airbyte-internal-ops 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {airbyte_internal_ops-0.1.3.dist-info → airbyte_internal_ops-0.1.4.dist-info}/METADATA +8 -5
  2. {airbyte_internal_ops-0.1.3.dist-info → airbyte_internal_ops-0.1.4.dist-info}/RECORD +31 -11
  3. airbyte_ops_mcp/_legacy/airbyte_ci/connector_pipelines/airbyte_ci/connectors/test/steps/common.py +1 -1
  4. airbyte_ops_mcp/cli/cloud.py +309 -38
  5. airbyte_ops_mcp/cloud_admin/connection_config.py +131 -0
  6. airbyte_ops_mcp/live_tests/__init__.py +16 -0
  7. airbyte_ops_mcp/live_tests/_connection_retriever/__init__.py +35 -0
  8. airbyte_ops_mcp/live_tests/_connection_retriever/audit_logging.py +88 -0
  9. airbyte_ops_mcp/live_tests/_connection_retriever/consts.py +33 -0
  10. airbyte_ops_mcp/live_tests/_connection_retriever/db_access.py +82 -0
  11. airbyte_ops_mcp/live_tests/_connection_retriever/retrieval.py +391 -0
  12. airbyte_ops_mcp/live_tests/_connection_retriever/secrets_resolution.py +130 -0
  13. airbyte_ops_mcp/live_tests/config.py +190 -0
  14. airbyte_ops_mcp/live_tests/connection_fetcher.py +159 -2
  15. airbyte_ops_mcp/live_tests/connection_secret_retriever.py +173 -0
  16. airbyte_ops_mcp/live_tests/evaluation_modes.py +45 -0
  17. airbyte_ops_mcp/live_tests/http_metrics.py +81 -0
  18. airbyte_ops_mcp/live_tests/message_cache/__init__.py +15 -0
  19. airbyte_ops_mcp/live_tests/message_cache/duckdb_cache.py +415 -0
  20. airbyte_ops_mcp/live_tests/obfuscation.py +126 -0
  21. airbyte_ops_mcp/live_tests/regression/__init__.py +29 -0
  22. airbyte_ops_mcp/live_tests/regression/comparators.py +466 -0
  23. airbyte_ops_mcp/live_tests/schema_generation.py +154 -0
  24. airbyte_ops_mcp/live_tests/validation/__init__.py +43 -0
  25. airbyte_ops_mcp/live_tests/validation/catalog_validators.py +389 -0
  26. airbyte_ops_mcp/live_tests/validation/record_validators.py +227 -0
  27. airbyte_ops_mcp/mcp/_mcp_utils.py +3 -0
  28. airbyte_ops_mcp/mcp/live_tests.py +500 -0
  29. airbyte_ops_mcp/mcp/server.py +3 -0
  30. {airbyte_internal_ops-0.1.3.dist-info → airbyte_internal_ops-0.1.4.dist-info}/WHEEL +0 -0
  31. {airbyte_internal_ops-0.1.3.dist-info → airbyte_internal_ops-0.1.4.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,131 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+ """Fetch connection configuration from Airbyte Cloud.
3
+
4
+ This module provides utilities for fetching connection configuration
5
+ from Airbyte Cloud, optionally including unmasked secrets.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import logging
12
+ from pathlib import Path
13
+
14
+ from pydantic import BaseModel, Field
15
+
16
+ from airbyte_ops_mcp.live_tests.connection_fetcher import fetch_connection_data
17
+ from airbyte_ops_mcp.live_tests.connection_secret_retriever import (
18
+ retrieve_unmasked_config,
19
+ )
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class FetchConnectionConfigResult(BaseModel):
25
+ """Result of fetching connection configuration."""
26
+
27
+ success: bool = Field(description="Whether the operation succeeded")
28
+ message: str = Field(description="Human-readable status message")
29
+ connection_id: str = Field(description="The connection ID that was fetched")
30
+ source_id: str | None = Field(
31
+ default=None, description="The source ID for this connection"
32
+ )
33
+ source_name: str | None = Field(default=None, description="The name of the source")
34
+ output_path: str | None = Field(
35
+ default=None, description="Path where the config was written"
36
+ )
37
+ with_secrets: bool = Field(
38
+ default=False, description="Whether secrets were included"
39
+ )
40
+
41
+
42
+ def fetch_connection_config(
43
+ connection_id: str,
44
+ output_path: Path | None = None,
45
+ with_secrets: bool = False,
46
+ oc_issue_url: str | None = None,
47
+ ) -> FetchConnectionConfigResult:
48
+ """Fetch connection configuration from Airbyte Cloud.
49
+
50
+ This function retrieves the source configuration for a given connection ID
51
+ and writes it to a local JSON file. When with_secrets is True, it uses the
52
+ internal connection-retriever to fetch unmasked secrets from the database.
53
+
54
+ Args:
55
+ connection_id: The UUID of the Airbyte Cloud connection.
56
+ output_path: Path to output file or directory. If directory, writes
57
+ connection-<id>-config.json. Default: ./connection-<id>-config.json
58
+ with_secrets: If True, fetch unmasked secrets from the internal database.
59
+ Requires appropriate GCP credentials and Cloud SQL Proxy access.
60
+ oc_issue_url: Required when with_secrets is True. The OC issue URL for
61
+ audit logging purposes.
62
+
63
+ Returns:
64
+ FetchConnectionConfigResult with operation status and details.
65
+
66
+ Raises:
67
+ ValueError: If with_secrets is True but oc_issue_url is not provided.
68
+ """
69
+ if with_secrets and not oc_issue_url:
70
+ return FetchConnectionConfigResult(
71
+ success=False,
72
+ message="--oc-issue-url is required when using --with-secrets for audit logging",
73
+ connection_id=connection_id,
74
+ with_secrets=with_secrets,
75
+ )
76
+
77
+ # Resolve output path
78
+ if output_path is None:
79
+ resolved_path = Path(f"./connection-{connection_id}-config.json")
80
+ elif output_path.is_dir():
81
+ resolved_path = output_path / f"connection-{connection_id}-config.json"
82
+ else:
83
+ resolved_path = output_path
84
+
85
+ # Fetch connection data via public Cloud API
86
+ connection_data = fetch_connection_data(connection_id)
87
+
88
+ # Get the config - either masked or unmasked
89
+ if with_secrets:
90
+ # Use the internal connection-retriever to get unmasked secrets
91
+ retrieval_reason = f"CLI fetch-connection-config: {oc_issue_url}"
92
+ unmasked_config = retrieve_unmasked_config(
93
+ connection_id=connection_id,
94
+ retrieval_reason=retrieval_reason,
95
+ )
96
+
97
+ if unmasked_config is None:
98
+ return FetchConnectionConfigResult(
99
+ success=False,
100
+ message=(
101
+ f"Failed to retrieve unmasked secrets for connection {connection_id}. "
102
+ "Ensure you have GCP credentials and Cloud SQL Proxy access."
103
+ ),
104
+ connection_id=connection_id,
105
+ source_id=connection_data.source_id,
106
+ source_name=connection_data.source_name,
107
+ with_secrets=True,
108
+ )
109
+
110
+ config = unmasked_config
111
+ logger.info(
112
+ f"Retrieved unmasked config for connection {connection_id} "
113
+ f"(reason: {retrieval_reason})"
114
+ )
115
+ else:
116
+ # Use the masked config from the public API
117
+ config = connection_data.config
118
+
119
+ # Write config to file
120
+ resolved_path.parent.mkdir(parents=True, exist_ok=True)
121
+ resolved_path.write_text(json.dumps(config, indent=2))
122
+
123
+ return FetchConnectionConfigResult(
124
+ success=True,
125
+ message=f"Successfully wrote config to {resolved_path}",
126
+ connection_id=connection_id,
127
+ source_id=connection_data.source_id,
128
+ source_name=connection_data.source_name,
129
+ output_path=str(resolved_path),
130
+ with_secrets=with_secrets,
131
+ )
@@ -5,6 +5,16 @@ This module provides tools for testing Airbyte connectors against live data
5
5
  without using Dagger. It uses Docker SDK directly for container orchestration.
6
6
  """
7
7
 
8
+ from airbyte_ops_mcp.live_tests.connection_fetcher import (
9
+ ConnectionData,
10
+ fetch_connection_data,
11
+ )
12
+ from airbyte_ops_mcp.live_tests.connection_secret_retriever import (
13
+ enrich_config_with_secrets,
14
+ is_secret_retriever_enabled,
15
+ retrieve_unmasked_config,
16
+ should_use_secret_retriever,
17
+ )
8
18
  from airbyte_ops_mcp.live_tests.models import (
9
19
  Command,
10
20
  ConnectorUnderTest,
@@ -14,7 +24,13 @@ from airbyte_ops_mcp.live_tests.models import (
14
24
 
15
25
  __all__ = [
16
26
  "Command",
27
+ "ConnectionData",
17
28
  "ConnectorUnderTest",
18
29
  "ExecutionResult",
19
30
  "TargetOrControl",
31
+ "enrich_config_with_secrets",
32
+ "fetch_connection_data",
33
+ "is_secret_retriever_enabled",
34
+ "retrieve_unmasked_config",
35
+ "should_use_secret_retriever",
20
36
  ]
@@ -0,0 +1,35 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+ """Vendored subset of connection-retriever from airbyte-platform-internal.
3
+
4
+ This module contains a minimal subset of the connection-retriever tool,
5
+ vendored to avoid depending on an unpublished internal package. It provides
6
+ functionality to retrieve unmasked source configuration from Airbyte Cloud's
7
+ internal database.
8
+
9
+ Original source: airbyte-platform-internal/tools/connection-retriever
10
+ Vendored: 2025-01-XX
11
+
12
+ Only the following functionality is included:
13
+ - Retrieve unmasked source config for a given connection ID
14
+ - Secret resolution from GCP Secret Manager
15
+ - Audit logging to GCP Cloud Logging
16
+
17
+ NOT included (see issue #91 for future work):
18
+ - retrieve_testing_candidates() - BigQuery-based candidate discovery
19
+ - Destination config retrieval
20
+ - CLI interface
21
+ """
22
+
23
+ from airbyte_ops_mcp.live_tests._connection_retriever.consts import (
24
+ ConnectionObject,
25
+ )
26
+ from airbyte_ops_mcp.live_tests._connection_retriever.retrieval import (
27
+ TestingCandidate,
28
+ retrieve_objects,
29
+ )
30
+
31
+ __all__ = [
32
+ "ConnectionObject",
33
+ "TestingCandidate",
34
+ "retrieve_objects",
35
+ ]
@@ -0,0 +1,88 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+ """Audit logging for vendored connection-retriever.
3
+
4
+ Vendored from: airbyte-platform-internal/tools/connection-retriever/src/connection_retriever/audit_logging.py
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ import subprocess
11
+ from typing import TYPE_CHECKING, Any, Callable
12
+
13
+ from google.cloud import logging as gcloud_logging
14
+
15
+ from airbyte_ops_mcp.live_tests._connection_retriever.consts import (
16
+ GCP_PROJECT_NAME,
17
+ )
18
+
19
+ if TYPE_CHECKING:
20
+ from airbyte_ops_mcp.live_tests._connection_retriever.retrieval import (
21
+ RetrievalMetadata,
22
+ )
23
+
24
+ LOGGER = logging.getLogger(__name__)
25
+
26
+ # Lazy-initialized to avoid import-time GCP calls
27
+ _logging_client: gcloud_logging.Client | None = None
28
+ _airbyte_gcloud_logger: Any = None
29
+
30
+
31
+ def _get_logger() -> Any:
32
+ """Get the GCP Cloud Logger, initializing lazily on first use."""
33
+ global _logging_client, _airbyte_gcloud_logger
34
+
35
+ if _airbyte_gcloud_logger is not None:
36
+ return _airbyte_gcloud_logger
37
+
38
+ _logging_client = gcloud_logging.Client(project=GCP_PROJECT_NAME)
39
+ _airbyte_gcloud_logger = _logging_client.logger(
40
+ "airbyte-cloud-connection-retriever"
41
+ )
42
+ return _airbyte_gcloud_logger
43
+
44
+
45
+ def get_user_email() -> str:
46
+ """Get the email of the currently authenticated GCP user."""
47
+ # This is a bit hacky - should use service account impersonation
48
+ # https://cloud.google.com/iam/docs/impersonating-service-accounts
49
+ command = [
50
+ "gcloud",
51
+ "auth",
52
+ "list",
53
+ "--filter=status:ACTIVE",
54
+ "--format=value(account)",
55
+ ]
56
+ output = subprocess.run(command, capture_output=True, text=True, check=True)
57
+ return output.stdout.strip()
58
+
59
+
60
+ def get_audit_log_message(
61
+ retrieval_metadata: RetrievalMetadata,
62
+ ) -> dict:
63
+ """Build an audit log message for a retrieval operation."""
64
+ user_email = get_user_email()
65
+ return {
66
+ "message": (
67
+ f"{user_email} is accessing {retrieval_metadata.connection_object.value} "
68
+ f"for connection_id {retrieval_metadata.connection_id} "
69
+ f"for {retrieval_metadata.retrieval_reason}"
70
+ ),
71
+ "retrieval_reason": retrieval_metadata.retrieval_reason,
72
+ "connection_object_type": retrieval_metadata.connection_object.value,
73
+ "user": user_email,
74
+ "connection_id": retrieval_metadata.connection_id,
75
+ }
76
+
77
+
78
+ def audit(function_to_audit: Callable) -> Callable:
79
+ """Decorator to audit function calls to GCP Cloud Logging."""
80
+
81
+ def wrapper(
82
+ retrieval_metadata: RetrievalMetadata, *args: Any, **kwargs: Any
83
+ ) -> Callable:
84
+ audit_log_message = get_audit_log_message(retrieval_metadata)
85
+ _get_logger().log_struct(audit_log_message)
86
+ return function_to_audit(*args, **kwargs)
87
+
88
+ return wrapper
@@ -0,0 +1,33 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+ """Constants for vendored connection-retriever.
3
+
4
+ Vendored from: airbyte-platform-internal/tools/connection-retriever/src/connection_retriever/consts.py
5
+ """
6
+
7
+ from enum import Enum
8
+
9
+ GCP_PROJECT_NAME = "prod-ab-cloud-proj"
10
+
11
+ CLOUD_REGISTRY_URL = (
12
+ "https://connectors.airbyte.com/files/registries/v0/cloud_registry.json"
13
+ )
14
+
15
+ CONNECTION_RETRIEVER_PG_CONNECTION_DETAILS_SECRET_ID = (
16
+ "projects/587336813068/secrets/CONNECTION_RETRIEVER_PG_CONNECTION_DETAILS"
17
+ )
18
+
19
+
20
+ class ConnectionObject(Enum):
21
+ """Types of connection objects that can be retrieved."""
22
+
23
+ CONNECTION = "connection"
24
+ SOURCE_ID = "source-id"
25
+ DESTINATION_ID = "destination-id"
26
+ DESTINATION_CONFIG = "destination-config"
27
+ SOURCE_CONFIG = "source-config"
28
+ CATALOG = "catalog"
29
+ CONFIGURED_CATALOG = "configured-catalog"
30
+ STATE = "state"
31
+ WORKSPACE_ID = "workspace-id"
32
+ DESTINATION_DOCKER_IMAGE = "destination-docker-image"
33
+ SOURCE_DOCKER_IMAGE = "source-docker-image"
@@ -0,0 +1,82 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+ """Database access for vendored connection-retriever.
3
+
4
+ Vendored from: airbyte-platform-internal/tools/connection-retriever/src/connection_retriever/db_access.py
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import os
11
+ import traceback
12
+ from typing import Any, Callable
13
+
14
+ import sqlalchemy
15
+ from google.cloud import secretmanager
16
+ from google.cloud.sql.connector import Connector
17
+ from google.cloud.sql.connector.enums import IPTypes
18
+
19
+ from airbyte_ops_mcp.live_tests._connection_retriever.consts import (
20
+ CONNECTION_RETRIEVER_PG_CONNECTION_DETAILS_SECRET_ID,
21
+ )
22
+ from airbyte_ops_mcp.live_tests._connection_retriever.secrets_resolution import (
23
+ get_secret_value,
24
+ )
25
+
26
+ PG_DRIVER = "pg8000"
27
+
28
+ # Lazy-initialized to avoid import-time GCP auth
29
+ _connector: Connector | None = None
30
+
31
+
32
+ def _get_connector() -> Connector:
33
+ """Get the Cloud SQL connector, initializing lazily on first use."""
34
+ global _connector
35
+ if _connector is None:
36
+ _connector = Connector()
37
+ return _connector
38
+
39
+
40
+ def get_database_creator(pg_connection_details: dict) -> Callable:
41
+ """Create a database connection creator function."""
42
+
43
+ def creator() -> Any:
44
+ return _get_connector().connect(
45
+ pg_connection_details["database_address"],
46
+ PG_DRIVER,
47
+ user=pg_connection_details["pg_user"],
48
+ password=pg_connection_details["pg_password"],
49
+ db=pg_connection_details["database_name"],
50
+ ip_type=IPTypes.PRIVATE,
51
+ )
52
+
53
+ return creator
54
+
55
+
56
+ def get_pool(
57
+ secret_manager_client: secretmanager.SecretManagerServiceClient,
58
+ ) -> sqlalchemy.Engine:
59
+ """Get a SQLAlchemy connection pool for the Airbyte Cloud database."""
60
+ pg_connection_details = json.loads(
61
+ get_secret_value(
62
+ secret_manager_client, CONNECTION_RETRIEVER_PG_CONNECTION_DETAILS_SECRET_ID
63
+ )
64
+ )
65
+
66
+ if os.getenv("CI"):
67
+ # In CI we connect via Cloud SQL Auth Proxy, running on localhost
68
+ host = "127.0.0.1"
69
+ try:
70
+ return sqlalchemy.create_engine(
71
+ f"postgresql+{PG_DRIVER}://{pg_connection_details['pg_user']}:{pg_connection_details['pg_password']}@127.0.0.1/{pg_connection_details['database_name']}",
72
+ )
73
+ except Exception as e:
74
+ raise AssertionError(
75
+ f"sqlalchemy.create_engine exception; could not connect to the proxy at {host}. "
76
+ f"Error: {traceback.format_exception(e)}"
77
+ ) from e
78
+ else:
79
+ return sqlalchemy.create_engine(
80
+ f"postgresql+{PG_DRIVER}://",
81
+ creator=get_database_creator(pg_connection_details),
82
+ )