airbyte-internal-ops 0.4.2__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. {airbyte_internal_ops-0.4.2.dist-info → airbyte_internal_ops-0.5.1.dist-info}/METADATA +2 -1
  2. {airbyte_internal_ops-0.4.2.dist-info → airbyte_internal_ops-0.5.1.dist-info}/RECORD +21 -129
  3. airbyte_ops_mcp/cli/cloud.py +31 -2
  4. airbyte_ops_mcp/cloud_admin/api_client.py +506 -33
  5. airbyte_ops_mcp/cloud_admin/models.py +56 -0
  6. airbyte_ops_mcp/constants.py +58 -0
  7. airbyte_ops_mcp/{_legacy/airbyte_ci/metadata_service/docker_hub.py → docker_hub.py} +16 -10
  8. airbyte_ops_mcp/mcp/cloud_connector_versions.py +491 -10
  9. airbyte_ops_mcp/mcp/prerelease.py +5 -44
  10. airbyte_ops_mcp/mcp/prod_db_queries.py +128 -4
  11. airbyte_ops_mcp/mcp/regression_tests.py +10 -5
  12. airbyte_ops_mcp/{_legacy/airbyte_ci/metadata_service/validators/metadata_validator.py → metadata_validator.py} +18 -12
  13. airbyte_ops_mcp/prod_db_access/queries.py +51 -0
  14. airbyte_ops_mcp/prod_db_access/sql.py +76 -0
  15. airbyte_ops_mcp/regression_tests/ci_output.py +8 -4
  16. airbyte_ops_mcp/regression_tests/connection_fetcher.py +16 -5
  17. airbyte_ops_mcp/regression_tests/http_metrics.py +21 -2
  18. airbyte_ops_mcp/regression_tests/models.py +7 -1
  19. airbyte_ops_mcp/telemetry.py +162 -0
  20. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/.gitignore +0 -1
  21. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/README.md +0 -420
  22. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/__init__.py +0 -2
  23. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/__init__.py +0 -1
  24. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/__init__.py +0 -8
  25. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/base_backend.py +0 -16
  26. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/duckdb_backend.py +0 -87
  27. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/file_backend.py +0 -165
  28. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/connection_objects_retrieval.py +0 -377
  29. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/connector_runner.py +0 -247
  30. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/errors.py +0 -7
  31. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/evaluation_modes.py +0 -25
  32. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/hacks.py +0 -23
  33. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/json_schema_helper.py +0 -384
  34. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/mitm_addons.py +0 -37
  35. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/models.py +0 -595
  36. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/proxy.py +0 -207
  37. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/secret_access.py +0 -47
  38. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/segment_tracking.py +0 -45
  39. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/utils.py +0 -214
  40. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/conftest.py.disabled +0 -751
  41. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/consts.py +0 -4
  42. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/poetry.lock +0 -4480
  43. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/pytest.ini +0 -9
  44. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/__init__.py +0 -1
  45. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_check.py +0 -61
  46. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_discover.py +0 -117
  47. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_read.py +0 -627
  48. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_spec.py +0 -43
  49. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/report.py +0 -542
  50. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/stash_keys.py +0 -38
  51. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/templates/__init__.py +0 -0
  52. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/templates/private_details.html.j2 +0 -305
  53. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/templates/report.html.j2 +0 -515
  54. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/utils.py +0 -187
  55. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/__init__.py +0 -0
  56. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_check.py +0 -61
  57. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_discover.py +0 -217
  58. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_read.py +0 -177
  59. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_spec.py +0 -631
  60. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/README.md +0 -91
  61. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/bin/bundle-schemas.js +0 -48
  62. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/bin/generate-metadata-models.sh +0 -36
  63. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/ActorDefinitionResourceRequirements.py +0 -54
  64. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/AirbyteInternal.py +0 -22
  65. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/AllowedHosts.py +0 -18
  66. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/ConnectorBreakingChanges.py +0 -65
  67. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/ConnectorBuildOptions.py +0 -15
  68. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/ConnectorIPCOptions.py +0 -25
  69. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/ConnectorMetadataDefinitionV0.json +0 -897
  70. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/ConnectorMetadataDefinitionV0.py +0 -478
  71. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/ConnectorMetrics.py +0 -24
  72. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/ConnectorPackageInfo.py +0 -12
  73. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/ConnectorRegistryDestinationDefinition.py +0 -407
  74. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/ConnectorRegistryReleases.py +0 -406
  75. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/ConnectorRegistrySourceDefinition.py +0 -407
  76. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/ConnectorRegistryV0.py +0 -413
  77. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/ConnectorReleases.py +0 -98
  78. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/ConnectorTestSuiteOptions.py +0 -58
  79. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/GeneratedFields.py +0 -62
  80. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/GitInfo.py +0 -31
  81. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/JobType.py +0 -23
  82. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/NormalizationDestinationDefinitionConfig.py +0 -24
  83. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/RegistryOverrides.py +0 -111
  84. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/ReleaseStage.py +0 -15
  85. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/RemoteRegistries.py +0 -23
  86. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/ResourceRequirements.py +0 -18
  87. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/RolloutConfiguration.py +0 -29
  88. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/Secret.py +0 -34
  89. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/SecretStore.py +0 -22
  90. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/SourceFileInfo.py +0 -16
  91. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/SuggestedStreams.py +0 -18
  92. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/SupportLevel.py +0 -15
  93. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/TestConnections.py +0 -14
  94. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/__init__.py +0 -31
  95. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/generated/airbyte-connector-metadata-schema.json +0 -0
  96. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/ActorDefinitionResourceRequirements.yaml +0 -30
  97. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/AirbyteInternal.yaml +0 -32
  98. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/AllowedHosts.yaml +0 -13
  99. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/ConnectorBreakingChanges.yaml +0 -65
  100. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/ConnectorBuildOptions.yaml +0 -10
  101. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/ConnectorIPCOptions.yaml +0 -29
  102. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/ConnectorMetadataDefinitionV0.yaml +0 -172
  103. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/ConnectorMetrics.yaml +0 -30
  104. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/ConnectorPackageInfo.yaml +0 -9
  105. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/ConnectorRegistryDestinationDefinition.yaml +0 -90
  106. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/ConnectorRegistryReleases.yaml +0 -35
  107. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/ConnectorRegistrySourceDefinition.yaml +0 -92
  108. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/ConnectorRegistryV0.yaml +0 -18
  109. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/ConnectorReleases.yaml +0 -16
  110. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/ConnectorTestSuiteOptions.yaml +0 -28
  111. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/GeneratedFields.yaml +0 -16
  112. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/GitInfo.yaml +0 -21
  113. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/JobType.yaml +0 -14
  114. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/NormalizationDestinationDefinitionConfig.yaml +0 -21
  115. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/RegistryOverrides.yaml +0 -38
  116. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/ReleaseStage.yaml +0 -11
  117. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/RemoteRegistries.yaml +0 -25
  118. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/ResourceRequirements.yaml +0 -16
  119. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/RolloutConfiguration.yaml +0 -29
  120. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/Secret.yaml +0 -19
  121. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/SecretStore.yaml +0 -16
  122. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/SourceFileInfo.yaml +0 -17
  123. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/SuggestedStreams.yaml +0 -13
  124. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/SupportLevel.yaml +0 -10
  125. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/models/TestConnections.yaml +0 -17
  126. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/package-lock.json +0 -62
  127. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/package.json +0 -12
  128. airbyte_ops_mcp/_legacy/airbyte_ci/metadata_models/transform.py +0 -71
  129. {airbyte_internal_ops-0.4.2.dist-info → airbyte_internal_ops-0.5.1.dist-info}/WHEEL +0 -0
  130. {airbyte_internal_ops-0.4.2.dist-info → airbyte_internal_ops-0.5.1.dist-info}/entry_points.txt +0 -0
@@ -16,12 +16,13 @@ from airbyte.exceptions import PyAirbyteInputError
16
16
  from fastmcp import FastMCP
17
17
  from pydantic import BaseModel, Field
18
18
 
19
- from airbyte_ops_mcp.constants import OrganizationAliasEnum
19
+ from airbyte_ops_mcp.constants import OrganizationAliasEnum, WorkspaceAliasEnum
20
20
  from airbyte_ops_mcp.mcp._mcp_utils import mcp_tool, register_mcp_tools
21
21
  from airbyte_ops_mcp.prod_db_access.queries import (
22
22
  query_actors_pinned_to_version,
23
23
  query_connections_by_connector,
24
24
  query_connections_by_destination_connector,
25
+ query_connections_by_stream,
25
26
  query_connector_versions,
26
27
  query_dataplanes_list,
27
28
  query_destination_connection_stats,
@@ -299,8 +300,11 @@ def query_prod_dataplanes() -> list[dict[str, Any]]:
299
300
  )
300
301
  def query_prod_workspace_info(
301
302
  workspace_id: Annotated[
302
- str,
303
- Field(description="Workspace UUID to look up"),
303
+ str | WorkspaceAliasEnum,
304
+ Field(
305
+ description="Workspace UUID or alias to look up. "
306
+ "Accepts '@devin-ai-sandbox' as an alias for the Devin AI sandbox workspace."
307
+ ),
304
308
  ],
305
309
  ) -> dict[str, Any] | None:
306
310
  """Get workspace information including dataplane group.
@@ -313,7 +317,11 @@ def query_prod_workspace_info(
313
317
  dataplane_group_id, dataplane_name, created_at, tombstone
314
318
  Or None if workspace not found.
315
319
  """
316
- return query_workspace_info(workspace_id)
320
+ # Resolve workspace ID alias (workspace_id is required, so resolved value is never None)
321
+ resolved_workspace_id = WorkspaceAliasEnum.resolve(workspace_id)
322
+ assert resolved_workspace_id is not None # Type narrowing: workspace_id is required
323
+
324
+ return query_workspace_info(resolved_workspace_id)
317
325
 
318
326
 
319
327
  @mcp_tool(
@@ -868,6 +876,122 @@ def query_prod_connections_by_connector(
868
876
  ]
869
877
 
870
878
 
879
+ @mcp_tool(
880
+ read_only=True,
881
+ idempotent=True,
882
+ open_world=True,
883
+ )
884
+ def query_prod_connections_by_stream(
885
+ stream_name: Annotated[
886
+ str,
887
+ Field(
888
+ description=(
889
+ "Name of the stream to search for in connection catalogs. "
890
+ "This must match the exact stream name as configured in the connection. "
891
+ "Examples: 'global_exclusions', 'campaigns', 'users'."
892
+ ),
893
+ ),
894
+ ],
895
+ source_definition_id: Annotated[
896
+ str | None,
897
+ Field(
898
+ description=(
899
+ "Source connector definition ID (UUID) to search for. "
900
+ "Provide this OR source_canonical_name (exactly one required). "
901
+ "Example: 'afa734e4-3571-11ec-991a-1e0031268139' for YouTube Analytics."
902
+ ),
903
+ default=None,
904
+ ),
905
+ ],
906
+ source_canonical_name: Annotated[
907
+ str | None,
908
+ Field(
909
+ description=(
910
+ "Canonical source connector name to search for. "
911
+ "Provide this OR source_definition_id (exactly one required). "
912
+ "Examples: 'source-klaviyo', 'Klaviyo', 'source-youtube-analytics'."
913
+ ),
914
+ default=None,
915
+ ),
916
+ ],
917
+ organization_id: Annotated[
918
+ str | OrganizationAliasEnum | None,
919
+ Field(
920
+ description=(
921
+ "Optional organization ID (UUID) or alias to filter results. "
922
+ "If provided, only connections in this organization will be returned. "
923
+ "Accepts '@airbyte-internal' as an alias for the Airbyte internal org."
924
+ ),
925
+ default=None,
926
+ ),
927
+ ],
928
+ limit: Annotated[
929
+ int,
930
+ Field(description="Maximum number of results (default: 100)", default=100),
931
+ ],
932
+ ) -> list[dict[str, Any]]:
933
+ """Find connections that have a specific stream enabled in their catalog.
934
+
935
+ This tool searches the connection's configured catalog (JSONB) for streams
936
+ matching the specified name. It's particularly useful when validating
937
+ connector fixes that affect specific streams - you can quickly find
938
+ customer connections that use the affected stream.
939
+
940
+ Use cases:
941
+ - Finding connections with a specific stream enabled for regression testing
942
+ - Validating connector fixes that affect particular streams
943
+ - Identifying which customers use rarely-enabled streams
944
+
945
+ Returns a list of connection dicts with workspace context and clickable Cloud UI URLs.
946
+ """
947
+ provided_params = [source_definition_id, source_canonical_name]
948
+ num_provided = sum(p is not None for p in provided_params)
949
+ if num_provided != 1:
950
+ raise PyAirbyteInputError(
951
+ message=(
952
+ "Exactly one of source_definition_id or source_canonical_name "
953
+ "must be provided."
954
+ ),
955
+ )
956
+
957
+ resolved_definition_id: str
958
+ if source_canonical_name:
959
+ resolved_definition_id = _resolve_canonical_name_to_definition_id(
960
+ canonical_name=source_canonical_name,
961
+ )
962
+ else:
963
+ assert source_definition_id is not None
964
+ resolved_definition_id = source_definition_id
965
+
966
+ resolved_organization_id = OrganizationAliasEnum.resolve(organization_id)
967
+
968
+ return [
969
+ {
970
+ "organization_id": str(row.get("organization_id", "")),
971
+ "workspace_id": str(row["workspace_id"]),
972
+ "workspace_name": row.get("workspace_name", ""),
973
+ "connection_id": str(row["connection_id"]),
974
+ "connection_name": row.get("connection_name", ""),
975
+ "connection_status": row.get("connection_status", ""),
976
+ "connection_url": (
977
+ f"{CLOUD_UI_BASE_URL}/workspaces/{row['workspace_id']}"
978
+ f"/connections/{row['connection_id']}/status"
979
+ ),
980
+ "source_id": str(row["source_id"]),
981
+ "source_name": row.get("source_name", ""),
982
+ "source_definition_id": str(row["source_definition_id"]),
983
+ "dataplane_group_id": str(row.get("dataplane_group_id", "")),
984
+ "dataplane_name": row.get("dataplane_name", ""),
985
+ }
986
+ for row in query_connections_by_stream(
987
+ connector_definition_id=resolved_definition_id,
988
+ stream_name=stream_name,
989
+ organization_id=resolved_organization_id,
990
+ limit=limit,
991
+ )
992
+ ]
993
+
994
+
871
995
  @mcp_tool(
872
996
  read_only=True,
873
997
  idempotent=True,
@@ -31,6 +31,7 @@ from airbyte.exceptions import (
31
31
  from fastmcp import FastMCP
32
32
  from pydantic import BaseModel, Field
33
33
 
34
+ from airbyte_ops_mcp.constants import WorkspaceAliasEnum
34
35
  from airbyte_ops_mcp.github_actions import trigger_workflow_dispatch
35
36
  from airbyte_ops_mcp.github_api import GITHUB_API_BASE, resolve_github_token
36
37
  from airbyte_ops_mcp.mcp._mcp_utils import mcp_tool, register_mcp_tools
@@ -341,9 +342,10 @@ def run_regression_tests(
341
342
  "Ignored if skip_compare=True.",
342
343
  ] = None,
343
344
  workspace_id: Annotated[
344
- str | None,
345
- "Optional Airbyte Cloud workspace ID. If provided with connection_id, validates "
346
- "that the connection belongs to this workspace before triggering tests.",
345
+ str | WorkspaceAliasEnum | None,
346
+ "Optional Airbyte Cloud workspace ID (UUID) or alias. If provided with connection_id, "
347
+ "validates that the connection belongs to this workspace before triggering tests. "
348
+ "Accepts '@devin-ai-sandbox' as an alias for the Devin AI sandbox workspace.",
347
349
  ] = None,
348
350
  ) -> RunRegressionTestsResponse:
349
351
  """Start a regression test run via GitHub Actions workflow.
@@ -368,6 +370,9 @@ def run_regression_tests(
368
370
  Requires GITHUB_CI_WORKFLOW_TRIGGER_PAT or GITHUB_TOKEN environment variable
369
371
  with 'actions:write' permission.
370
372
  """
373
+ # Resolve workspace ID alias
374
+ resolved_workspace_id = WorkspaceAliasEnum.resolve(workspace_id)
375
+
371
376
  # Generate a unique run ID for tracking
372
377
  run_id = str(uuid.uuid4())
373
378
 
@@ -383,9 +388,9 @@ def run_regression_tests(
383
388
  )
384
389
 
385
390
  # Validate workspace membership if workspace_id and connection_id are provided
386
- if workspace_id and connection_id:
391
+ if resolved_workspace_id and connection_id:
387
392
  try:
388
- validate_connection_workspace(connection_id, workspace_id)
393
+ validate_connection_workspace(connection_id, resolved_workspace_id)
389
394
  except (
390
395
  ValueError,
391
396
  AirbyteWorkspaceMismatchError,
@@ -1,6 +1,12 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
+ """Connector metadata validation for Airbyte connectors.
5
+
6
+ Provides validation functions for connector metadata.yaml files, including checks
7
+ for Docker image availability, version constraints, breaking changes, and registry
8
+ configuration. Uses Pydantic models from airbyte-connector-models for schema validation.
9
+ """
4
10
 
5
11
  import logging
6
12
  import pathlib
@@ -9,14 +15,13 @@ from typing import Callable, List, Optional, Tuple, Union
9
15
 
10
16
  import semver
11
17
  import yaml
18
+ from airbyte_connector_models.metadata.v0.connector_metadata_definition_v0 import (
19
+ ConnectorMetadataDefinitionV0,
20
+ )
12
21
  from pydantic import ValidationError
13
22
  from pydash.objects import get
14
23
 
15
- from airbyte_ops_mcp._legacy.airbyte_ci.metadata_models.generated.ConnectorMetadataDefinitionV0 import (
16
- ConnectorMetadataDefinitionV0,
17
- ) # type: ignore
18
-
19
- from ..docker_hub import get_latest_version_on_dockerhub, is_image_on_docker_hub
24
+ from .docker_hub import get_latest_version_on_dockerhub, is_image_on_docker_hub
20
25
 
21
26
  logger = logging.getLogger(__name__)
22
27
 
@@ -42,7 +47,7 @@ def validate_metadata_images_in_dockerhub(
42
47
  if validator_opts.disable_dockerhub_checks:
43
48
  return True, None
44
49
 
45
- metadata_definition_dict = metadata_definition.dict()
50
+ metadata_definition_dict = metadata_definition.model_dump(exclude_unset=True)
46
51
  base_docker_image = get(metadata_definition_dict, "data.dockerRepository")
47
52
  base_docker_version = get(metadata_definition_dict, "data.dockerImageTag")
48
53
 
@@ -111,7 +116,7 @@ def validate_at_least_one_language_tag(
111
116
  ) -> ValidationResult:
112
117
  """Ensure that there is at least one tag in the data.tags field that matches language:<LANG>."""
113
118
  tags = get(metadata_definition, "data.tags", [])
114
- if not any([tag.startswith("language:") for tag in tags]):
119
+ if not any(tag.startswith("language:") for tag in tags):
115
120
  return False, "At least one tag must be of the form language:<LANG>"
116
121
 
117
122
  return True, None
@@ -145,7 +150,7 @@ def validate_major_version_bump_has_breaking_change_entry(
145
150
  _validator_opts: ValidatorOptions,
146
151
  ) -> ValidationResult:
147
152
  """Ensure that if the major version is incremented, there is a breaking change entry for that version."""
148
- metadata_definition_dict = metadata_definition.dict()
153
+ metadata_definition_dict = metadata_definition.model_dump(exclude_unset=True)
149
154
  image_tag = get(metadata_definition_dict, "data.dockerImageTag")
150
155
 
151
156
  if not is_major_version(image_tag):
@@ -169,7 +174,7 @@ def validate_major_version_bump_has_breaking_change_entry(
169
174
  )
170
175
 
171
176
  breaking_changes = get(metadata_definition_dict, "data.releases.breakingChanges")
172
- if breaking_changes is None or image_tag not in breaking_changes.keys():
177
+ if breaking_changes is None or image_tag not in breaking_changes:
173
178
  return (
174
179
  False,
175
180
  f"Major version {image_tag} needs a 'releases.breakingChanges' entry indicating what changed.",
@@ -194,7 +199,7 @@ def validate_metadata_base_images_in_dockerhub(
194
199
  if validator_opts.disable_dockerhub_checks:
195
200
  return True, None
196
201
 
197
- metadata_definition_dict = metadata_definition.dict()
202
+ metadata_definition_dict = metadata_definition.model_dump(exclude_unset=True)
198
203
 
199
204
  image_address = get(
200
205
  metadata_definition_dict, "data.connectorBuildOptions.baseImage"
@@ -365,7 +370,8 @@ PRE_UPLOAD_VALIDATORS = [
365
370
  ]
366
371
 
367
372
 
368
- POST_UPLOAD_VALIDATORS = PRE_UPLOAD_VALIDATORS + [
373
+ POST_UPLOAD_VALIDATORS = [
374
+ *PRE_UPLOAD_VALIDATORS,
369
375
  validate_metadata_images_in_dockerhub,
370
376
  ]
371
377
 
@@ -384,7 +390,7 @@ def validate_and_load(
384
390
  try:
385
391
  # Load the metadata file - this implicitly runs jsonschema validation
386
392
  metadata = yaml.safe_load(file_path.read_text())
387
- metadata_model = ConnectorMetadataDefinitionV0.parse_obj(metadata)
393
+ metadata_model = ConnectorMetadataDefinitionV0.model_validate(metadata)
388
394
  except ValidationError as e:
389
395
  return None, f"Validation error: {e}"
390
396
 
@@ -24,6 +24,8 @@ from airbyte_ops_mcp.prod_db_access.sql import (
24
24
  SELECT_CONNECTIONS_BY_CONNECTOR_AND_ORG,
25
25
  SELECT_CONNECTIONS_BY_DESTINATION_CONNECTOR,
26
26
  SELECT_CONNECTIONS_BY_DESTINATION_CONNECTOR_AND_ORG,
27
+ SELECT_CONNECTIONS_BY_SOURCE_CONNECTOR_AND_STREAM,
28
+ SELECT_CONNECTIONS_BY_SOURCE_CONNECTOR_AND_STREAM_AND_ORG,
27
29
  SELECT_CONNECTOR_VERSIONS,
28
30
  SELECT_DATAPLANES_LIST,
29
31
  SELECT_DESTINATION_CONNECTION_STATS,
@@ -565,3 +567,52 @@ def query_destination_connection_stats(
565
567
  query_name="SELECT_DESTINATION_CONNECTION_STATS",
566
568
  gsm_client=gsm_client,
567
569
  )
570
+
571
+
572
+ def query_connections_by_stream(
573
+ connector_definition_id: str,
574
+ stream_name: str,
575
+ organization_id: str | None = None,
576
+ limit: int = 100,
577
+ *,
578
+ gsm_client: secretmanager.SecretManagerServiceClient | None = None,
579
+ ) -> list[dict[str, Any]]:
580
+ """Query connections by source connector type that have a specific stream enabled.
581
+
582
+ This searches the connection's configured catalog (JSONB) for streams matching
583
+ the specified name. Useful for finding connections that use a particular stream
584
+ when validating connector fixes that affect specific streams.
585
+
586
+ Args:
587
+ connector_definition_id: Source connector definition UUID to filter by
588
+ stream_name: Name of the stream to search for in the connection's catalog
589
+ organization_id: Optional organization UUID to filter results by
590
+ limit: Maximum number of results (default: 100)
591
+ gsm_client: GCP Secret Manager client. If None, a new client will be instantiated.
592
+
593
+ Returns:
594
+ List of connection records with workspace and dataplane info
595
+ """
596
+ if organization_id is None:
597
+ return _run_sql_query(
598
+ SELECT_CONNECTIONS_BY_SOURCE_CONNECTOR_AND_STREAM,
599
+ parameters={
600
+ "connector_definition_id": connector_definition_id,
601
+ "stream_name": stream_name,
602
+ "limit": limit,
603
+ },
604
+ query_name="SELECT_CONNECTIONS_BY_SOURCE_CONNECTOR_AND_STREAM",
605
+ gsm_client=gsm_client,
606
+ )
607
+
608
+ return _run_sql_query(
609
+ SELECT_CONNECTIONS_BY_SOURCE_CONNECTOR_AND_STREAM_AND_ORG,
610
+ parameters={
611
+ "connector_definition_id": connector_definition_id,
612
+ "stream_name": stream_name,
613
+ "organization_id": organization_id,
614
+ "limit": limit,
615
+ },
616
+ query_name="SELECT_CONNECTIONS_BY_SOURCE_CONNECTOR_AND_STREAM_AND_ORG",
617
+ gsm_client=gsm_client,
618
+ )
@@ -934,3 +934,79 @@ SELECT_DESTINATION_CONNECTION_STATS = sqlalchemy.text(
934
934
  ORDER BY total_connections DESC
935
935
  """
936
936
  )
937
+
938
+ # =============================================================================
939
+ # Stream-based Connection Queries
940
+ # =============================================================================
941
+
942
+ # Query connections by source connector type that have a specific stream enabled
943
+ # The catalog field is JSONB with structure: {"streams": [{"stream": {"name": "..."}, ...}, ...]}
944
+ SELECT_CONNECTIONS_BY_SOURCE_CONNECTOR_AND_STREAM = sqlalchemy.text(
945
+ """
946
+ SELECT
947
+ connection.id AS connection_id,
948
+ connection.name AS connection_name,
949
+ connection.source_id,
950
+ connection.status AS connection_status,
951
+ workspace.id AS workspace_id,
952
+ workspace.name AS workspace_name,
953
+ workspace.organization_id,
954
+ workspace.dataplane_group_id,
955
+ dataplane_group.name AS dataplane_name,
956
+ source_actor.actor_definition_id AS source_definition_id,
957
+ source_actor.name AS source_name
958
+ FROM connection
959
+ JOIN actor AS source_actor
960
+ ON connection.source_id = source_actor.id
961
+ AND source_actor.tombstone = false
962
+ JOIN workspace
963
+ ON source_actor.workspace_id = workspace.id
964
+ AND workspace.tombstone = false
965
+ LEFT JOIN dataplane_group
966
+ ON workspace.dataplane_group_id = dataplane_group.id
967
+ WHERE
968
+ source_actor.actor_definition_id = :connector_definition_id
969
+ AND connection.status = 'active'
970
+ AND EXISTS (
971
+ SELECT 1 FROM jsonb_array_elements(connection.catalog->'streams') AS stream
972
+ WHERE stream->'stream'->>'name' = :stream_name
973
+ )
974
+ LIMIT :limit
975
+ """
976
+ )
977
+
978
+ # Query connections by source connector type and stream, filtered by organization
979
+ SELECT_CONNECTIONS_BY_SOURCE_CONNECTOR_AND_STREAM_AND_ORG = sqlalchemy.text(
980
+ """
981
+ SELECT
982
+ connection.id AS connection_id,
983
+ connection.name AS connection_name,
984
+ connection.source_id,
985
+ connection.status AS connection_status,
986
+ workspace.id AS workspace_id,
987
+ workspace.name AS workspace_name,
988
+ workspace.organization_id,
989
+ workspace.dataplane_group_id,
990
+ dataplane_group.name AS dataplane_name,
991
+ source_actor.actor_definition_id AS source_definition_id,
992
+ source_actor.name AS source_name
993
+ FROM connection
994
+ JOIN actor AS source_actor
995
+ ON connection.source_id = source_actor.id
996
+ AND source_actor.tombstone = false
997
+ JOIN workspace
998
+ ON source_actor.workspace_id = workspace.id
999
+ AND workspace.tombstone = false
1000
+ LEFT JOIN dataplane_group
1001
+ ON workspace.dataplane_group_id = dataplane_group.id
1002
+ WHERE
1003
+ source_actor.actor_definition_id = :connector_definition_id
1004
+ AND workspace.organization_id = :organization_id
1005
+ AND connection.status = 'active'
1006
+ AND EXISTS (
1007
+ SELECT 1 FROM jsonb_array_elements(connection.catalog->'streams') AS stream
1008
+ WHERE stream->'stream'->>'name' = :stream_name
1009
+ )
1010
+ LIMIT :limit
1011
+ """
1012
+ )
@@ -308,10 +308,10 @@ def generate_action_test_comparison_report(
308
308
  [
309
309
  "#### HTTP Metrics",
310
310
  "",
311
- "| Version | Flow Count | Duplicate Flows |",
312
- "|---------|------------|-----------------|",
313
- f"| Control | {control_http.get('flow_count', 0)} | {control_http.get('duplicate_flow_count', 0)} |",
314
- f"| Target | {target_http.get('flow_count', 0)} | {target_http.get('duplicate_flow_count', 0)} |",
311
+ "| Version | Flow Count | Duplicate Flows | Cache Hit Ratio |",
312
+ "|---------|------------|-----------------|-----------------|",
313
+ f"| Control | {control_http.get('flow_count', 0)} | {control_http.get('duplicate_flow_count', 0)} | {control_http.get('cache_hit_ratio', 'N/A')} |",
314
+ f"| Target | {target_http.get('flow_count', 0)} | {target_http.get('duplicate_flow_count', 0)} | {target_http.get('cache_hit_ratio', 'N/A')} |",
315
315
  "",
316
316
  ]
317
317
  )
@@ -370,6 +370,9 @@ def generate_single_version_report(
370
370
  run_url = _get_github_run_url()
371
371
  artifacts_url = _get_github_artifacts_url()
372
372
 
373
+ # Get tester identity from environment (GitHub Actions sets GITHUB_ACTOR)
374
+ tester = os.getenv("GITHUB_ACTOR") or os.getenv("USER") or "unknown"
375
+
373
376
  # Start with L2 header containing the command name (no L1 header)
374
377
  lines: list[str] = [
375
378
  f"## `{command.upper()}` Test Results",
@@ -377,6 +380,7 @@ def generate_single_version_report(
377
380
  "### Context",
378
381
  "",
379
382
  f"- **Test Date:** {datetime.datetime.now(datetime.timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}",
383
+ f"- **Tester:** `{tester}`",
380
384
  f"- **Connector:** `{connector_name}`",
381
385
  f"- **Version:** `{version}`",
382
386
  f"- **Command:** `{command.upper()}`",
@@ -96,7 +96,7 @@ def fetch_connection_data(
96
96
  )
97
97
 
98
98
  access_token = _get_access_token(client_id, client_secret)
99
- api_root = constants.CLOUD_API_ROOT
99
+ public_api_root = constants.CLOUD_API_ROOT
100
100
  headers = {
101
101
  "Authorization": f"Bearer {access_token}",
102
102
  "Content-Type": "application/json",
@@ -104,7 +104,7 @@ def fetch_connection_data(
104
104
 
105
105
  # Get connection details
106
106
  conn_response = requests.get(
107
- f"{api_root}/connections/{connection_id}",
107
+ f"{public_api_root}/connections/{connection_id}",
108
108
  headers=headers,
109
109
  timeout=30,
110
110
  )
@@ -120,7 +120,7 @@ def fetch_connection_data(
120
120
 
121
121
  # Get source details (includes config)
122
122
  source_response = requests.get(
123
- f"{api_root}/sources/{source_id}",
123
+ f"{public_api_root}/sources/{source_id}",
124
124
  headers=headers,
125
125
  timeout=30,
126
126
  )
@@ -160,7 +160,9 @@ def fetch_connection_data(
160
160
  stream_names = [s["name"] for s in streams_config]
161
161
 
162
162
  # Build Airbyte protocol catalog format
163
- catalog = _build_configured_catalog(streams_config, source_id, headers, api_root)
163
+ catalog = _build_configured_catalog(
164
+ streams_config, source_id, headers, public_api_root
165
+ )
164
166
 
165
167
  return ConnectionData(
166
168
  connection_id=connection_id,
@@ -179,12 +181,21 @@ def _build_configured_catalog(
179
181
  streams_config: list[dict[str, Any]],
180
182
  source_id: str,
181
183
  headers: dict[str, str],
182
- api_root: str,
184
+ public_api_root: str,
183
185
  ) -> dict[str, Any]:
184
186
  """Build a configured catalog from connection stream configuration.
185
187
 
186
188
  This creates a catalog in the Airbyte protocol format that can be used
187
189
  with connector commands.
190
+
191
+ Args:
192
+ streams_config: List of stream configuration dicts from the connection.
193
+ source_id: The source ID.
194
+ headers: HTTP headers for API requests.
195
+ public_api_root: The Public API root URL (e.g., CLOUD_API_ROOT).
196
+
197
+ Returns:
198
+ A configured catalog dict in Airbyte protocol format.
188
199
  """
189
200
  # For now, create a minimal catalog structure
190
201
  # A full implementation would fetch the source's discovered catalog
@@ -47,11 +47,21 @@ class HttpMetrics:
47
47
  flow_count: int
48
48
  duplicate_flow_count: int
49
49
  unique_urls: list[str]
50
+ cache_hits_count: int = 0
51
+
52
+ @property
53
+ def cache_hit_ratio(self) -> str:
54
+ """Calculate cache hit ratio as a percentage string."""
55
+ if self.flow_count == 0:
56
+ return "N/A"
57
+ return f"{(self.cache_hits_count / self.flow_count) * 100:.2f}%"
50
58
 
51
59
  @classmethod
52
60
  def empty(cls) -> HttpMetrics:
53
61
  """Create empty metrics when HTTP capture is unavailable."""
54
- return cls(flow_count=0, duplicate_flow_count=0, unique_urls=[])
62
+ return cls(
63
+ flow_count=0, duplicate_flow_count=0, unique_urls=[], cache_hits_count=0
64
+ )
55
65
 
56
66
 
57
67
  @dataclass
@@ -285,17 +295,22 @@ def parse_http_dump(dump_file_path: Path) -> HttpMetrics:
285
295
  unique_urls = list(set(all_urls))
286
296
  duplicate_count = len(all_urls) - len(unique_urls)
287
297
 
298
+ # Cache hits are interpreted as duplicate requests to the same URL
299
+ # (requests that could potentially be served from cache)
300
+ cache_hits = duplicate_count
301
+
288
302
  return HttpMetrics(
289
303
  flow_count=len(flows),
290
304
  duplicate_flow_count=duplicate_count,
291
305
  unique_urls=sorted(unique_urls),
306
+ cache_hits_count=cache_hits,
292
307
  )
293
308
 
294
309
 
295
310
  def compute_http_metrics_comparison(
296
311
  control_metrics: HttpMetrics,
297
312
  target_metrics: HttpMetrics,
298
- ) -> dict[str, dict[str, int | str] | int]:
313
+ ) -> dict[str, dict[str, int | str] | int | str]:
299
314
  """Compute HTTP metrics comparison between control and target.
300
315
 
301
316
  This produces output in the same format as the legacy
@@ -312,10 +327,14 @@ def compute_http_metrics_comparison(
312
327
  "control": {
313
328
  "flow_count": control_metrics.flow_count,
314
329
  "duplicate_flow_count": control_metrics.duplicate_flow_count,
330
+ "cache_hits_count": control_metrics.cache_hits_count,
331
+ "cache_hit_ratio": control_metrics.cache_hit_ratio,
315
332
  },
316
333
  "target": {
317
334
  "flow_count": target_metrics.flow_count,
318
335
  "duplicate_flow_count": target_metrics.duplicate_flow_count,
336
+ "cache_hits_count": target_metrics.cache_hits_count,
337
+ "cache_hit_ratio": target_metrics.cache_hit_ratio,
319
338
  },
320
339
  "difference": target_metrics.flow_count - control_metrics.flow_count,
321
340
  }
@@ -250,10 +250,16 @@ class ExecutionResult:
250
250
  messages_by_type: dict[str, list[str]] = defaultdict(list)
251
251
  for message in self.airbyte_messages:
252
252
  type_name = message.type.value.lower()
253
- messages_by_type[type_name].append(message.json())
253
+ messages_by_type[type_name].append(message.model_dump_json())
254
254
 
255
255
  for type_name, messages in messages_by_type.items():
256
256
  file_path = airbyte_messages_dir / f"{type_name}.jsonl"
257
257
  file_path.write_text("\n".join(messages))
258
258
 
259
+ # Save configured catalog (input) if available
260
+ if self.configured_catalog is not None:
261
+ catalog_path = output_dir / "configured_catalog.json"
262
+ catalog_path.write_text(self.configured_catalog.model_dump_json(indent=2))
263
+ self.logger.info(f"Saved configured catalog to {catalog_path}")
264
+
259
265
  self.logger.info(f"Artifacts saved to {output_dir}")