airbyte-internal-ops 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {airbyte_internal_ops-0.1.3.dist-info → airbyte_internal_ops-0.1.4.dist-info}/METADATA +8 -5
  2. {airbyte_internal_ops-0.1.3.dist-info → airbyte_internal_ops-0.1.4.dist-info}/RECORD +31 -11
  3. airbyte_ops_mcp/_legacy/airbyte_ci/connector_pipelines/airbyte_ci/connectors/test/steps/common.py +1 -1
  4. airbyte_ops_mcp/cli/cloud.py +309 -38
  5. airbyte_ops_mcp/cloud_admin/connection_config.py +131 -0
  6. airbyte_ops_mcp/live_tests/__init__.py +16 -0
  7. airbyte_ops_mcp/live_tests/_connection_retriever/__init__.py +35 -0
  8. airbyte_ops_mcp/live_tests/_connection_retriever/audit_logging.py +88 -0
  9. airbyte_ops_mcp/live_tests/_connection_retriever/consts.py +33 -0
  10. airbyte_ops_mcp/live_tests/_connection_retriever/db_access.py +82 -0
  11. airbyte_ops_mcp/live_tests/_connection_retriever/retrieval.py +391 -0
  12. airbyte_ops_mcp/live_tests/_connection_retriever/secrets_resolution.py +130 -0
  13. airbyte_ops_mcp/live_tests/config.py +190 -0
  14. airbyte_ops_mcp/live_tests/connection_fetcher.py +159 -2
  15. airbyte_ops_mcp/live_tests/connection_secret_retriever.py +173 -0
  16. airbyte_ops_mcp/live_tests/evaluation_modes.py +45 -0
  17. airbyte_ops_mcp/live_tests/http_metrics.py +81 -0
  18. airbyte_ops_mcp/live_tests/message_cache/__init__.py +15 -0
  19. airbyte_ops_mcp/live_tests/message_cache/duckdb_cache.py +415 -0
  20. airbyte_ops_mcp/live_tests/obfuscation.py +126 -0
  21. airbyte_ops_mcp/live_tests/regression/__init__.py +29 -0
  22. airbyte_ops_mcp/live_tests/regression/comparators.py +466 -0
  23. airbyte_ops_mcp/live_tests/schema_generation.py +154 -0
  24. airbyte_ops_mcp/live_tests/validation/__init__.py +43 -0
  25. airbyte_ops_mcp/live_tests/validation/catalog_validators.py +389 -0
  26. airbyte_ops_mcp/live_tests/validation/record_validators.py +227 -0
  27. airbyte_ops_mcp/mcp/_mcp_utils.py +3 -0
  28. airbyte_ops_mcp/mcp/live_tests.py +500 -0
  29. airbyte_ops_mcp/mcp/server.py +3 -0
  30. {airbyte_internal_ops-0.1.3.dist-info → airbyte_internal_ops-0.1.4.dist-info}/WHEEL +0 -0
  31. {airbyte_internal_ops-0.1.3.dist-info → airbyte_internal_ops-0.1.4.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,391 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+ """Core retrieval logic for vendored connection-retriever.
3
+
4
+ Vendored from: airbyte-platform-internal/tools/connection-retriever/src/connection_retriever/retrieval.py
5
+
6
+ This is a minimal subset focused on retrieving unmasked source config.
7
+ For testing candidate discovery, see issue #91.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import logging
13
+ import uuid
14
+ from dataclasses import dataclass
15
+ from typing import Any, Mapping
16
+
17
+ import requests
18
+ import sqlalchemy
19
+ from google.cloud import secretmanager
20
+
21
+ from airbyte_ops_mcp.live_tests._connection_retriever.audit_logging import (
22
+ audit,
23
+ )
24
+ from airbyte_ops_mcp.live_tests._connection_retriever.consts import (
25
+ CLOUD_REGISTRY_URL,
26
+ ConnectionObject,
27
+ )
28
+ from airbyte_ops_mcp.live_tests._connection_retriever.db_access import (
29
+ get_pool,
30
+ )
31
+ from airbyte_ops_mcp.live_tests._connection_retriever.secrets_resolution import (
32
+ get_resolved_config,
33
+ )
34
+
35
+ LOGGER = logging.getLogger(__name__)
36
+
37
+ # SQL Queries
38
+ SELECT_ON_CONNECTION_NOT_EU = sqlalchemy.text(
39
+ """
40
+ SELECT
41
+ source_id,
42
+ destination_id,
43
+ source_catalog_id,
44
+ catalog
45
+ FROM
46
+ connection
47
+ JOIN
48
+ actor ON connection.source_id = actor.id
49
+ JOIN
50
+ workspace ON actor.workspace_id = workspace.id
51
+ JOIN
52
+ dataplane_group ON workspace.dataplane_group_id = dataplane_group.id
53
+ WHERE
54
+ connection.id = :connection_id
55
+ AND dataplane_group.name != 'EU'
56
+ """
57
+ )
58
+
59
+ SELECT_ON_CONNECTION_DATAPLANE_GROUP_IS_EU = sqlalchemy.text(
60
+ """
61
+ SELECT
62
+ CASE WHEN dataplane_group.name = 'EU' THEN TRUE ELSE FALSE END as is_eu
63
+ FROM
64
+ connection
65
+ JOIN
66
+ actor ON connection.source_id = actor.id
67
+ JOIN
68
+ workspace ON actor.workspace_id = workspace.id
69
+ JOIN
70
+ dataplane_group ON workspace.dataplane_group_id = dataplane_group.id
71
+ WHERE
72
+ connection.id = :connection_id
73
+ """
74
+ )
75
+
76
+ SELECT_ON_ACTOR_WITH_ORGANIZATION = sqlalchemy.text(
77
+ """
78
+ SELECT
79
+ organization_id,
80
+ workspace_id,
81
+ actor_definition_id,
82
+ configuration
83
+ FROM
84
+ actor
85
+ JOIN
86
+ workspace ON workspace.id = actor.workspace_id
87
+ WHERE
88
+ actor.id = :actor_id
89
+ """
90
+ )
91
+
92
+ SELECT_ON_OAUTH_PARAMETER = sqlalchemy.text(
93
+ """
94
+ SELECT
95
+ organization_id,
96
+ workspace_id,
97
+ configuration
98
+ FROM
99
+ actor_oauth_parameter
100
+ WHERE
101
+ actor_definition_id = :actor_definition_id
102
+ ORDER BY created_at ASC;
103
+ """
104
+ )
105
+
106
+
107
+ @dataclass
108
+ class RetrievalMetadata:
109
+ """Metadata about a retrieval operation for audit logging."""
110
+
111
+ connection_id: str
112
+ connection_object: ConnectionObject
113
+ retrieval_reason: str
114
+
115
+
116
+ @dataclass
117
+ class TestingCandidate:
118
+ """A connection candidate for testing."""
119
+
120
+ connection_id: str
121
+ connection_url: str | None = None
122
+ stream_count: int | None = None
123
+ last_attempt_duration_in_microseconds: int | None = None
124
+ is_internal: bool | None = None
125
+ streams_with_data: list[str] | None = None
126
+
127
+ # ConnectionObject fields
128
+ connection: str | None = None
129
+ source_id: str | None = None
130
+ destination_id: str | None = None
131
+
132
+ destination_config: Mapping | None = None
133
+ source_config: Mapping | None = None
134
+ catalog: Mapping | None = None
135
+ configured_catalog: Mapping | None = None
136
+ state: list[Mapping] | None = None
137
+
138
+ workspace_id: str | None = None
139
+ destination_docker_image: str | None = None
140
+ source_docker_image: str | None = None
141
+
142
+ def update(self, **kwargs: Any) -> None:
143
+ """Update fields from keyword arguments."""
144
+ for key, value in kwargs.items():
145
+ if hasattr(self, key):
146
+ setattr(self, key, value)
147
+ else:
148
+ raise AttributeError(
149
+ f"{key} is not a valid field of {self.__class__.__name__}"
150
+ )
151
+
152
+
153
+ class ConnectionNotFoundError(Exception):
154
+ """Raised when a connection cannot be found."""
155
+
156
+ pass
157
+
158
+
159
+ @audit
160
+ def get_connection(
161
+ connection_id: str,
162
+ db_conn: sqlalchemy.Connection,
163
+ ) -> Mapping | None:
164
+ """Get connection details from the database."""
165
+ connection_result = db_conn.execute(
166
+ SELECT_ON_CONNECTION_NOT_EU, parameters={"connection_id": connection_id}
167
+ ).first()
168
+ if connection_result is None:
169
+ raise ValueError(f"Could not find connection {connection_id}.")
170
+ return {
171
+ "source_id": connection_result[0],
172
+ "destination_id": connection_result[1],
173
+ "source_catalog_id": connection_result[2],
174
+ "catalog": connection_result[3],
175
+ }
176
+
177
+
178
+ def get_actor_config(
179
+ actor_id: str,
180
+ db_conn: sqlalchemy.Connection,
181
+ secret_manager_client: secretmanager.SecretManagerServiceClient,
182
+ ) -> Mapping | None:
183
+ """Get resolved actor configuration with secrets."""
184
+ actor_result = db_conn.execute(
185
+ SELECT_ON_ACTOR_WITH_ORGANIZATION, parameters={"actor_id": actor_id}
186
+ ).first()
187
+ if actor_result is None:
188
+ raise ValueError(f"Could not find actor configuration for actor {actor_id}.")
189
+ organization_id, workspace_id, actor_definition_id, actor_configuration = (
190
+ actor_result
191
+ )
192
+
193
+ spec = get_spec(actor_definition_id)
194
+ oauth_parameter_configuration = _get_oauth_parameters_overrides(
195
+ db_conn, actor_definition_id, organization_id, workspace_id
196
+ )
197
+ return get_resolved_config(
198
+ secret_manager_client, actor_configuration, oauth_parameter_configuration, spec
199
+ )
200
+
201
+
202
+ def _get_oauth_parameters_overrides(
203
+ db_conn: sqlalchemy.Connection,
204
+ actor_definition_id: str,
205
+ actor_organization_id: str,
206
+ actor_workspace_id: str,
207
+ ) -> dict:
208
+ """Get OAuth parameter overrides for an actor.
209
+
210
+ Priority:
211
+ 1. Same workspace and organization id
212
+ 2. Same workspace
213
+ 3. Same organization
214
+ 4. Default parameters
215
+ """
216
+ oauth_actor_parameters = db_conn.execute(
217
+ SELECT_ON_OAUTH_PARAMETER,
218
+ parameters={"actor_definition_id": actor_definition_id},
219
+ ).fetchall()
220
+ if not oauth_actor_parameters:
221
+ return {}
222
+
223
+ organization_override = None
224
+ workspace_override = None
225
+ default = None
226
+ for (
227
+ oauth_organization_id,
228
+ oauth_workspace_id,
229
+ oauth_parameter_configuration,
230
+ ) in oauth_actor_parameters:
231
+ if (
232
+ oauth_organization_id == actor_organization_id
233
+ and oauth_workspace_id == actor_workspace_id
234
+ ):
235
+ # Most precise case - return early
236
+ return oauth_parameter_configuration
237
+
238
+ if (
239
+ oauth_organization_id == actor_organization_id
240
+ and oauth_workspace_id is None
241
+ ):
242
+ if organization_override is not None:
243
+ raise ValueError(
244
+ "Multiple oauth parameters overrides for this actor_definition_id "
245
+ "for this organization"
246
+ )
247
+ organization_override = oauth_parameter_configuration
248
+ elif oauth_workspace_id == actor_workspace_id:
249
+ if workspace_override is not None:
250
+ raise ValueError(
251
+ "Multiple oauth parameters overrides for this actor_definition_id "
252
+ "for this workspace"
253
+ )
254
+ workspace_override = oauth_parameter_configuration
255
+ elif oauth_organization_id is None and oauth_workspace_id is None:
256
+ default = oauth_parameter_configuration
257
+
258
+ if workspace_override is not None:
259
+ return workspace_override
260
+ elif organization_override is not None:
261
+ return organization_override
262
+ elif default is not None:
263
+ return default
264
+ return {}
265
+
266
+
267
+ @audit
268
+ def get_source_config(
269
+ source_id: str,
270
+ db_conn: sqlalchemy.Connection,
271
+ secret_manager_client: secretmanager.SecretManagerServiceClient,
272
+ ) -> Mapping | None:
273
+ """Get resolved source configuration with secrets."""
274
+ return get_actor_config(source_id, db_conn, secret_manager_client)
275
+
276
+
277
+ def get_registry_entries() -> list[dict]:
278
+ """Fetch connector entries from the cloud registry."""
279
+ registry_response = requests.get(CLOUD_REGISTRY_URL)
280
+ registry_response.raise_for_status()
281
+ registry = registry_response.json()
282
+ return registry["sources"] + registry["destinations"]
283
+
284
+
285
+ def get_spec(actor_definition_id: uuid.UUID) -> dict:
286
+ """Get connector spec from the cloud registry for a given actor definition id."""
287
+ entries = get_registry_entries()
288
+ try:
289
+ return next(
290
+ entry["spec"]
291
+ for entry in entries
292
+ if (
293
+ entry.get("sourceDefinitionId") == str(actor_definition_id)
294
+ or entry.get("destinationDefinitionId") == str(actor_definition_id)
295
+ )
296
+ )
297
+ except StopIteration as err:
298
+ raise ValueError(
299
+ f"Could not find spec for actor definition {actor_definition_id}."
300
+ ) from err
301
+
302
+
303
+ def retrieve_objects(
304
+ connection_objects: list[ConnectionObject],
305
+ retrieval_reason: str,
306
+ connection_id: str,
307
+ ) -> list[TestingCandidate]:
308
+ """Retrieve connection objects for a given connection ID.
309
+
310
+ This is a simplified version that only supports retrieval by connection_id.
311
+ For testing candidate discovery by docker image, see issue #91.
312
+
313
+ Args:
314
+ connection_objects: List of ConnectionObject types to retrieve
315
+ retrieval_reason: Reason for retrieval (for audit logging)
316
+ connection_id: The connection ID to retrieve objects for
317
+
318
+ Returns:
319
+ List containing a single TestingCandidate with the requested objects
320
+ """
321
+ connection_candidates = [TestingCandidate(connection_id=connection_id)]
322
+
323
+ secret_manager_client = secretmanager.SecretManagerServiceClient()
324
+ connection_pool = get_pool(secret_manager_client)
325
+
326
+ with connection_pool.connect() as db_conn:
327
+ for candidate in connection_candidates.copy():
328
+ is_eu_result = db_conn.execute(
329
+ SELECT_ON_CONNECTION_DATAPLANE_GROUP_IS_EU,
330
+ parameters={"connection_id": candidate.connection_id},
331
+ ).first()
332
+ if is_eu_result is None:
333
+ raise ConnectionNotFoundError(
334
+ f"Credentials were not found for connection ID {candidate.connection_id}."
335
+ )
336
+ elif is_eu_result[0] is True:
337
+ connection_candidates.remove(candidate)
338
+ LOGGER.warning(
339
+ f"Credential retrieval not permitted; the data residency for "
340
+ f"connection ID {candidate.connection_id} is within the EU. "
341
+ f"Candidate will be removed from the list"
342
+ )
343
+ continue
344
+
345
+ candidate.update(
346
+ **{
347
+ connection_object.value.replace("-", "_"): retrieve_object(
348
+ candidate.connection_id,
349
+ connection_object,
350
+ retrieval_reason,
351
+ db_conn,
352
+ secret_manager_client,
353
+ )
354
+ for connection_object in connection_objects
355
+ }
356
+ )
357
+
358
+ return connection_candidates
359
+
360
+
361
+ def retrieve_object(
362
+ connection_id: str,
363
+ connection_object: ConnectionObject,
364
+ retrieval_reason: str,
365
+ db_conn: sqlalchemy.Connection,
366
+ secret_manager_client: secretmanager.SecretManagerServiceClient,
367
+ ) -> Mapping | list[Mapping] | str | None:
368
+ """Retrieve a single connection object."""
369
+ retrieval_metadata = RetrievalMetadata(
370
+ connection_id, connection_object, retrieval_reason
371
+ )
372
+ connection = get_connection(retrieval_metadata, connection_id, db_conn)
373
+
374
+ if connection_object == ConnectionObject.SOURCE_ID:
375
+ return connection["source_id"]
376
+ elif connection_object == ConnectionObject.DESTINATION_ID:
377
+ return connection["destination_id"]
378
+ elif connection_object == ConnectionObject.SOURCE_CONFIG:
379
+ return get_source_config(
380
+ retrieval_metadata,
381
+ connection["source_id"],
382
+ db_conn,
383
+ secret_manager_client,
384
+ )
385
+ elif connection_object == ConnectionObject.CONFIGURED_CATALOG:
386
+ return connection["catalog"]
387
+ else:
388
+ raise NotImplementedError(
389
+ f"Connection object {connection_object} not implemented in vendored version. "
390
+ f"Only SOURCE_CONFIG, SOURCE_ID, DESTINATION_ID, and CONFIGURED_CATALOG are supported."
391
+ )
@@ -0,0 +1,130 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+ """Secret resolution for vendored connection-retriever.
3
+
4
+ Vendored from: airbyte-platform-internal/tools/connection-retriever/src/connection_retriever/secrets_resolution.py
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any
10
+
11
+ import dpath
12
+ from google.cloud import secretmanager
13
+
14
+ from airbyte_ops_mcp.live_tests._connection_retriever.consts import (
15
+ GCP_PROJECT_NAME,
16
+ )
17
+
18
+
19
+ def get_secret_value(
20
+ secret_manager_client: secretmanager.SecretManagerServiceClient, secret_id: str
21
+ ) -> str:
22
+ """Get the value of the enabled version of a secret.
23
+
24
+ Args:
25
+ secret_manager_client: The secret manager client
26
+ secret_id: The id of the secret
27
+
28
+ Returns:
29
+ The value of the enabled version of the secret
30
+ """
31
+ response = secret_manager_client.list_secret_versions(
32
+ request={"parent": secret_id, "filter": "state:ENABLED"}
33
+ )
34
+ if len(response.versions) == 0:
35
+ raise ValueError(f"No enabled version of secret {secret_id} found")
36
+ enabled_version = response.versions[0]
37
+ response = secret_manager_client.access_secret_version(name=enabled_version.name)
38
+ return response.payload.data.decode("UTF-8")
39
+
40
+
41
+ def is_secret(value: Any) -> bool:
42
+ """Determine if a value is a secret.
43
+
44
+ Args:
45
+ value: The value to check
46
+
47
+ Returns:
48
+ True if the value is a secret, False otherwise
49
+ """
50
+ return isinstance(value, dict) and value.get("_secret") is not None
51
+
52
+
53
+ def resolve_secrets_in_config(
54
+ secret_manager_client: secretmanager.SecretManagerServiceClient,
55
+ connector_config: dict,
56
+ ) -> dict:
57
+ """Recursively resolve secrets in the connector_config.
58
+
59
+ Args:
60
+ secret_manager_client: The secret manager client
61
+ connector_config: The connector_config to resolve secrets in
62
+
63
+ Returns:
64
+ The connector_config with secrets resolved
65
+ """
66
+ for key in connector_config:
67
+ if is_secret(connector_config[key]):
68
+ secret_id = f"projects/{GCP_PROJECT_NAME}/secrets/{connector_config[key]['_secret']}"
69
+ connector_config[key] = get_secret_value(secret_manager_client, secret_id)
70
+ elif isinstance(connector_config[key], dict):
71
+ connector_config[key] = resolve_secrets_in_config(
72
+ secret_manager_client, connector_config[key]
73
+ )
74
+ return connector_config
75
+
76
+
77
+ def merge_dicts_non_destructive(a: dict, b: dict) -> dict:
78
+ """Merge two dicts, with b taking precedence for conflicts."""
79
+ merged = a.copy()
80
+ for key, value in b.items():
81
+ if key in merged and isinstance(merged[key], dict) and isinstance(value, dict):
82
+ merged[key] = merge_dicts_non_destructive(merged[key], value)
83
+ else:
84
+ merged[key] = value
85
+ return merged
86
+
87
+
88
+ def get_resolved_config(
89
+ secret_manager_client: secretmanager.SecretManagerServiceClient,
90
+ actor_configuration: dict,
91
+ actor_oauth_parameter: dict,
92
+ spec: dict,
93
+ ) -> dict:
94
+ """Get the resolved configuration, resolving secrets and merging OAuth params.
95
+
96
+ Args:
97
+ secret_manager_client: The secret manager client
98
+ actor_configuration: The actor configuration
99
+ actor_oauth_parameter: The actor oauth parameter
100
+ spec: The connector spec
101
+
102
+ Returns:
103
+ The resolved configuration
104
+ """
105
+ resolved_configuration = resolve_secrets_in_config(
106
+ secret_manager_client, actor_configuration
107
+ )
108
+
109
+ # Merge the resolved oauth parameter if the actor definition has OAuth
110
+ if "advanced_auth" in spec:
111
+ try:
112
+ is_using_oauth = (
113
+ dpath.get(
114
+ actor_configuration,
115
+ "/".join(spec["advanced_auth"]["predicate_key"]),
116
+ )
117
+ == spec["advanced_auth"]["predicate_value"]
118
+ )
119
+ except KeyError:
120
+ # When no predicate_key is defined but we have advanced_auth in spec
121
+ # we can assume that the connector is only using OAuth.
122
+ is_using_oauth = True
123
+ if is_using_oauth:
124
+ resolved_oauth_parameter = resolve_secrets_in_config(
125
+ secret_manager_client, actor_oauth_parameter
126
+ )
127
+ resolved_configuration = merge_dicts_non_destructive(
128
+ resolved_configuration, resolved_oauth_parameter
129
+ )
130
+ return resolved_configuration
@@ -0,0 +1,190 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+ """Configuration options for live tests.
3
+
4
+ This module provides configuration classes and enums for controlling
5
+ live test behavior, including connection filtering, stream selection,
6
+ and test modes.
7
+
8
+ Based on airbyte-ci implementation:
9
+ https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/commons/models.py
10
+ https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/commons/connection_objects_retrieval.py
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from dataclasses import dataclass, field
16
+ from enum import Enum
17
+ from pathlib import Path
18
+
19
+
20
+ class ConnectionSubset(Enum):
21
+ """Signals which connection pool to consider for live tests.
22
+
23
+ SANDBOXES: Only use Airbyte sandbox connections (safer, limited data)
24
+ ALL: Use all available connections on Cloud (more coverage, real data)
25
+ """
26
+
27
+ SANDBOXES = "sandboxes"
28
+ ALL = "all"
29
+
30
+ @classmethod
31
+ def from_string(
32
+ cls,
33
+ value: str,
34
+ ) -> ConnectionSubset:
35
+ """Parse connection subset from string."""
36
+ value_lower = value.lower()
37
+ if value_lower == "sandboxes":
38
+ return cls.SANDBOXES
39
+ if value_lower == "all":
40
+ return cls.ALL
41
+ raise ValueError(
42
+ f"Unknown connection subset: {value}. Must be 'sandboxes' or 'all'."
43
+ )
44
+
45
+
46
+ class TargetOrControl(Enum):
47
+ """Identifies whether a connector is the target or control version."""
48
+
49
+ TARGET = "target"
50
+ CONTROL = "control"
51
+
52
+
53
+ class ActorType(Enum):
54
+ """Type of connector actor."""
55
+
56
+ SOURCE = "source"
57
+ DESTINATION = "destination"
58
+
59
+
60
+ @dataclass
61
+ class LiveTestConfig:
62
+ """Configuration for live test execution.
63
+
64
+ This class consolidates all configuration options for running live tests,
65
+ including connection filtering, stream selection, and test behavior.
66
+ """
67
+
68
+ # Connection filtering
69
+ connection_id: str | None = None
70
+ connection_subset: ConnectionSubset = ConnectionSubset.SANDBOXES
71
+ max_connections: int | None = None
72
+ auto_select_connections: bool = False
73
+
74
+ # Stream filtering
75
+ selected_streams: set[str] | None = None
76
+
77
+ # Custom paths for local testing
78
+ custom_config_path: Path | None = None
79
+ custom_catalog_path: Path | None = None
80
+ custom_state_path: Path | None = None
81
+
82
+ # Test behavior
83
+ test_description: str | None = None
84
+ retrieval_reason: str | None = None
85
+
86
+ def __post_init__(self) -> None:
87
+ """Validate configuration after initialization."""
88
+ if self.connection_id and self.auto_select_connections:
89
+ raise ValueError(
90
+ "Cannot set both connection_id and auto_select_connections"
91
+ )
92
+
93
+
94
+ @dataclass
95
+ class StreamFilter:
96
+ """Filter for selecting which streams to test.
97
+
98
+ Provides utilities for filtering streams based on include/exclude patterns.
99
+ """
100
+
101
+ include_streams: set[str] | None = None
102
+ exclude_streams: set[str] | None = None
103
+
104
+ def filter_streams(
105
+ self,
106
+ available_streams: set[str],
107
+ ) -> set[str]:
108
+ """Filter available streams based on include/exclude rules.
109
+
110
+ If include_streams is set, only those streams are included.
111
+ If exclude_streams is set, those streams are removed from the result.
112
+ """
113
+ if self.include_streams:
114
+ result = available_streams & self.include_streams
115
+ else:
116
+ result = available_streams.copy()
117
+
118
+ if self.exclude_streams:
119
+ result = result - self.exclude_streams
120
+
121
+ return result
122
+
123
+ def matches(
124
+ self,
125
+ stream_name: str,
126
+ ) -> bool:
127
+ """Check if a stream name matches the filter."""
128
+ if self.include_streams and stream_name not in self.include_streams:
129
+ return False
130
+ return not (self.exclude_streams and stream_name in self.exclude_streams)
131
+
132
+
133
+ @dataclass
134
+ class ConnectionCandidate:
135
+ """Represents a candidate connection for testing.
136
+
137
+ Used when auto-selecting connections to test based on stream coverage
138
+ and sync duration.
139
+ """
140
+
141
+ connection_id: str
142
+ workspace_id: str | None = None
143
+ streams_with_data: list[str] = field(default_factory=list)
144
+ last_sync_duration_seconds: float | None = None
145
+
146
+ @property
147
+ def stream_count(self) -> int:
148
+ return len(self.streams_with_data)
149
+
150
+
151
+ def select_best_connection_candidates(
152
+ candidates: list[ConnectionCandidate],
153
+ max_connections: int | None = None,
154
+ ) -> list[tuple[ConnectionCandidate, list[str]]]:
155
+ """Select the best subset of connection candidates for testing.
156
+
157
+ This function reduces the list of candidates to minimize the number of
158
+ connections while maximizing stream coverage. It prioritizes faster
159
+ connections (shorter sync duration).
160
+
161
+ Based on airbyte-ci implementation:
162
+ https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/commons/connection_objects_retrieval.py#L201-L220
163
+ """
164
+ # Sort by sync duration (faster first)
165
+ sorted_candidates = sorted(
166
+ candidates,
167
+ key=lambda c: c.last_sync_duration_seconds or float("inf"),
168
+ )
169
+
170
+ tested_streams: set[str] = set()
171
+ selected: list[tuple[ConnectionCandidate, list[str]]] = []
172
+
173
+ for candidate in sorted_candidates:
174
+ streams_to_test = []
175
+ for stream in candidate.streams_with_data:
176
+ if stream not in tested_streams:
177
+ streams_to_test.append(stream)
178
+ tested_streams.add(stream)
179
+
180
+ if streams_to_test:
181
+ selected.append((candidate, streams_to_test))
182
+
183
+ # Sort by number of streams (most streams first)
184
+ selected = sorted(selected, key=lambda x: len(x[1]), reverse=True)
185
+
186
+ # Apply max_connections limit
187
+ if max_connections:
188
+ selected = selected[:max_connections]
189
+
190
+ return selected