airbyte-internal-ops 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/METADATA +1 -1
  2. {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/RECORD +13 -52
  3. airbyte_ops_mcp/cli/cloud.py +42 -3
  4. airbyte_ops_mcp/cloud_admin/api_client.py +473 -0
  5. airbyte_ops_mcp/cloud_admin/models.py +56 -0
  6. airbyte_ops_mcp/mcp/cloud_connector_versions.py +460 -0
  7. airbyte_ops_mcp/mcp/prerelease.py +6 -46
  8. airbyte_ops_mcp/regression_tests/ci_output.py +151 -71
  9. airbyte_ops_mcp/regression_tests/http_metrics.py +21 -2
  10. airbyte_ops_mcp/regression_tests/models.py +6 -0
  11. airbyte_ops_mcp/telemetry.py +162 -0
  12. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/.gitignore +0 -1
  13. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/README.md +0 -420
  14. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/__init__.py +0 -2
  15. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/__init__.py +0 -1
  16. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/__init__.py +0 -8
  17. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/base_backend.py +0 -16
  18. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/duckdb_backend.py +0 -87
  19. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/file_backend.py +0 -165
  20. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/connection_objects_retrieval.py +0 -377
  21. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/connector_runner.py +0 -247
  22. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/errors.py +0 -7
  23. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/evaluation_modes.py +0 -25
  24. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/hacks.py +0 -23
  25. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/json_schema_helper.py +0 -384
  26. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/mitm_addons.py +0 -37
  27. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/models.py +0 -595
  28. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/proxy.py +0 -207
  29. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/secret_access.py +0 -47
  30. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/segment_tracking.py +0 -45
  31. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/utils.py +0 -214
  32. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/conftest.py.disabled +0 -751
  33. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/consts.py +0 -4
  34. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/poetry.lock +0 -4480
  35. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/pytest.ini +0 -9
  36. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/__init__.py +0 -1
  37. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_check.py +0 -61
  38. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_discover.py +0 -117
  39. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_read.py +0 -627
  40. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_spec.py +0 -43
  41. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/report.py +0 -542
  42. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/stash_keys.py +0 -38
  43. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/templates/__init__.py +0 -0
  44. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/templates/private_details.html.j2 +0 -305
  45. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/templates/report.html.j2 +0 -515
  46. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/utils.py +0 -187
  47. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/__init__.py +0 -0
  48. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_check.py +0 -61
  49. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_discover.py +0 -217
  50. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_read.py +0 -177
  51. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_spec.py +0 -631
  52. {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/WHEEL +0 -0
  53. {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/entry_points.txt +0 -0
@@ -1,165 +0,0 @@
1
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
- from __future__ import annotations
3
-
4
- import json
5
- import logging
6
- from collections.abc import Iterable
7
- from pathlib import Path
8
- from typing import Any, TextIO
9
-
10
- from airbyte_protocol.models import AirbyteMessage # type: ignore
11
- from airbyte_protocol.models import Type as AirbyteMessageType
12
- from cachetools import LRUCache, cached
13
- from live_tests.commons.backends.base_backend import BaseBackend
14
- from live_tests.commons.utils import sanitize_stream_name
15
-
16
-
17
- class FileDescriptorLRUCache(LRUCache):
18
- def popitem(self) -> tuple[Any, Any]:
19
- filepath, fd = LRUCache.popitem(self)
20
- fd.close() # type: ignore # Close the file descriptor when it's evicted from the cache
21
- return filepath, fd
22
-
23
-
24
- class FileBackend(BaseBackend):
25
- RELATIVE_CATALOGS_PATH = "catalog.jsonl"
26
- RELATIVE_CONNECTION_STATUS_PATH = "connection_status.jsonl"
27
- RELATIVE_RECORDS_PATH = "records.jsonl"
28
- RELATIVE_SPECS_PATH = "spec.jsonl"
29
- RELATIVE_STATES_PATH = "states.jsonl"
30
- RELATIVE_TRACES_PATH = "traces.jsonl"
31
- RELATIVE_LOGS_PATH = "logs.jsonl"
32
- RELATIVE_CONTROLS_PATH = "controls.jsonl"
33
- CACHE = FileDescriptorLRUCache(maxsize=250)
34
-
35
- def __init__(self, output_directory: Path):
36
- self._output_directory = output_directory
37
- self.record_per_stream_directory = self._output_directory / "records_per_stream"
38
- self.record_per_stream_directory.mkdir(exist_ok=True, parents=True)
39
- self.record_per_stream_paths: dict[str, Path] = {}
40
- self.record_per_stream_paths_data_only: dict[str, Path] = {}
41
-
42
- @property
43
- def jsonl_specs_path(self) -> Path:
44
- return (self._output_directory / self.RELATIVE_SPECS_PATH).resolve()
45
-
46
- @property
47
- def jsonl_catalogs_path(self) -> Path:
48
- return (self._output_directory / self.RELATIVE_CATALOGS_PATH).resolve()
49
-
50
- @property
51
- def jsonl_connection_status_path(self) -> Path:
52
- return (self._output_directory / self.RELATIVE_CONNECTION_STATUS_PATH).resolve()
53
-
54
- @property
55
- def jsonl_records_path(self) -> Path:
56
- return (self._output_directory / self.RELATIVE_RECORDS_PATH).resolve()
57
-
58
- @property
59
- def jsonl_states_path(self) -> Path:
60
- return (self._output_directory / self.RELATIVE_STATES_PATH).resolve()
61
-
62
- @property
63
- def jsonl_traces_path(self) -> Path:
64
- return (self._output_directory / self.RELATIVE_TRACES_PATH).resolve()
65
-
66
- @property
67
- def jsonl_logs_path(self) -> Path:
68
- return (self._output_directory / self.RELATIVE_LOGS_PATH).resolve()
69
-
70
- @property
71
- def jsonl_controls_path(self) -> Path:
72
- return (self._output_directory / self.RELATIVE_CONTROLS_PATH).resolve()
73
-
74
- @property
75
- def jsonl_files(self) -> Iterable[Path]:
76
- return [
77
- self.jsonl_catalogs_path,
78
- self.jsonl_connection_status_path,
79
- self.jsonl_records_path,
80
- self.jsonl_specs_path,
81
- self.jsonl_states_path,
82
- self.jsonl_traces_path,
83
- self.jsonl_logs_path,
84
- self.jsonl_controls_path,
85
- ]
86
-
87
- def write(self, airbyte_messages: Iterable[AirbyteMessage]) -> None:
88
- """
89
- Write AirbyteMessages to the appropriate file.
90
-
91
- We use an LRU cache here to manage open file objects, in order to limit the number of concurrently open file
92
- descriptors. This mitigates the risk of hitting limits on the number of open file descriptors, particularly for
93
- connections with a high number of streams. The cache is designed to automatically close files upon eviction.
94
- """
95
-
96
- @cached(cache=self.CACHE)
97
- def _open_file(path: Path) -> TextIO:
98
- return open(path, "a")
99
-
100
- try:
101
- logging.info("Writing airbyte messages to disk")
102
- for _message in airbyte_messages:
103
- if not isinstance(_message, AirbyteMessage):
104
- continue
105
- filepaths, messages = self._get_filepaths_and_messages(_message)
106
- for filepath, message in zip(filepaths, messages, strict=False):
107
- _open_file(self._output_directory / filepath).write(f"{message}\n")
108
- logging.info("Finished writing airbyte messages to disk")
109
- finally:
110
- for f in self.CACHE.values():
111
- f.close()
112
-
113
- def _get_filepaths_and_messages(
114
- self, message: AirbyteMessage
115
- ) -> tuple[tuple[str, ...], tuple[str, ...]]:
116
- if message.type == AirbyteMessageType.CATALOG:
117
- return (self.RELATIVE_CATALOGS_PATH,), (message.catalog.json(),)
118
-
119
- elif message.type == AirbyteMessageType.CONNECTION_STATUS:
120
- return (self.RELATIVE_CONNECTION_STATUS_PATH,), (
121
- message.connectionStatus.json(),
122
- )
123
-
124
- elif message.type == AirbyteMessageType.RECORD:
125
- stream_name = message.record.stream
126
- stream_file_path = (
127
- self.record_per_stream_directory
128
- / f"{sanitize_stream_name(stream_name)}.jsonl"
129
- )
130
- stream_file_path_data_only = (
131
- self.record_per_stream_directory
132
- / f"{sanitize_stream_name(stream_name)}_data_only.jsonl"
133
- )
134
- self.record_per_stream_paths[stream_name] = stream_file_path
135
- self.record_per_stream_paths_data_only[stream_name] = (
136
- stream_file_path_data_only
137
- )
138
- return (
139
- self.RELATIVE_RECORDS_PATH,
140
- str(stream_file_path),
141
- str(stream_file_path_data_only),
142
- ), (
143
- message.json(sort_keys=True),
144
- message.json(sort_keys=True),
145
- json.dumps(message.record.data, sort_keys=True),
146
- )
147
-
148
- elif message.type == AirbyteMessageType.SPEC:
149
- return (self.RELATIVE_SPECS_PATH,), (message.spec.json(),)
150
-
151
- elif message.type == AirbyteMessageType.STATE:
152
- return (self.RELATIVE_STATES_PATH,), (message.state.json(),)
153
-
154
- elif message.type == AirbyteMessageType.TRACE:
155
- return (self.RELATIVE_TRACES_PATH,), (message.trace.json(),)
156
-
157
- elif message.type == AirbyteMessageType.LOG:
158
- return (self.RELATIVE_LOGS_PATH,), (message.log.json(),)
159
-
160
- elif message.type == AirbyteMessageType.CONTROL:
161
- return (self.RELATIVE_CONTROLS_PATH,), (message.control.json(),)
162
-
163
- raise NotImplementedError(
164
- f"No handling for AirbyteMessage type {message.type} has been implemented. This is unexpected."
165
- )
@@ -1,377 +0,0 @@
1
- # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
- from __future__ import annotations
3
-
4
- import json
5
- import os
6
- import textwrap
7
- from pathlib import Path
8
- from typing import Dict, List, Optional, Set, Tuple
9
-
10
- import rich
11
- from connection_retriever import ConnectionObject, retrieve_objects # type: ignore
12
- from connection_retriever.retrieval import TestingCandidate, retrieve_testing_candidates
13
- from live_tests.commons import hacks
14
- from live_tests.commons.models import ConnectionSubset
15
- from live_tests.commons.utils import build_connection_url
16
-
17
- from .models import (
18
- AirbyteCatalog,
19
- Command,
20
- ConfiguredAirbyteCatalog,
21
- ConnectionObjects,
22
- SecretDict,
23
- )
24
-
25
- console = rich.get_console()
26
-
27
-
28
- class InvalidConnectionError(Exception):
29
- pass
30
-
31
-
32
- def parse_config(config: dict | str | None) -> Optional[SecretDict]:
33
- if not config:
34
- return None
35
- if isinstance(config, str):
36
- return SecretDict(json.loads(config))
37
- else:
38
- return SecretDict(config)
39
-
40
-
41
- def parse_catalog(catalog: dict | str | None) -> Optional[AirbyteCatalog]:
42
- if not catalog:
43
- return None
44
- if isinstance(catalog, str):
45
- return AirbyteCatalog.parse_obj(json.loads(catalog))
46
- else:
47
- return AirbyteCatalog.parse_obj(catalog)
48
-
49
-
50
- def parse_configured_catalog(
51
- configured_catalog: dict | str | None, selected_streams: set[str] | None = None
52
- ) -> Optional[ConfiguredAirbyteCatalog]:
53
- if not configured_catalog:
54
- return None
55
- if isinstance(configured_catalog, str):
56
- configured_catalog = json.loads(configured_catalog)
57
- patched_catalog = hacks.patch_configured_catalog(configured_catalog)
58
- catalog = ConfiguredAirbyteCatalog.parse_obj(patched_catalog)
59
- if selected_streams:
60
- return ConfiguredAirbyteCatalog(
61
- streams=[
62
- stream
63
- for stream in catalog.streams
64
- if stream.stream.name in selected_streams
65
- ]
66
- )
67
- return catalog
68
-
69
-
70
- def parse_state(state: dict | str | None) -> Optional[dict]:
71
- if not state:
72
- return None
73
- if isinstance(state, str):
74
- return json.loads(state)
75
- else:
76
- return state
77
-
78
-
79
- def get_connector_config_from_path(config_path: Path) -> Optional[SecretDict]:
80
- return parse_config(config_path.read_text())
81
-
82
-
83
- def get_state_from_path(state_path: Path) -> Optional[dict]:
84
- return parse_state(state_path.read_text())
85
-
86
-
87
- def get_configured_catalog_from_path(
88
- path: Path, selected_streams: Optional[set[str]] = None
89
- ) -> Optional[ConfiguredAirbyteCatalog]:
90
- return parse_configured_catalog(path.read_text(), selected_streams)
91
-
92
-
93
- COMMAND_TO_REQUIRED_OBJECT_TYPES = {
94
- Command.SPEC: set(),
95
- Command.CHECK: {ConnectionObject.SOURCE_CONFIG},
96
- Command.DISCOVER: {ConnectionObject.SOURCE_CONFIG},
97
- Command.READ: {ConnectionObject.SOURCE_CONFIG, ConnectionObject.CONFIGURED_CATALOG},
98
- Command.READ_WITH_STATE: {
99
- ConnectionObject.SOURCE_CONFIG,
100
- ConnectionObject.CONFIGURED_CATALOG,
101
- ConnectionObject.STATE,
102
- },
103
- }
104
-
105
-
106
- def get_connection_objects(
107
- requested_objects: set[ConnectionObject],
108
- connection_id: Optional[str],
109
- custom_config_path: Optional[Path],
110
- custom_configured_catalog_path: Optional[Path],
111
- custom_state_path: Optional[Path],
112
- retrieval_reason: Optional[str],
113
- connector_image: Optional[str] = None,
114
- connector_version: Optional[str] = None,
115
- auto_select_connections: bool = False,
116
- selected_streams: Optional[set[str]] = None,
117
- connection_subset: ConnectionSubset = ConnectionSubset.SANDBOXES,
118
- max_connections: Optional[int] = None,
119
- ) -> List[ConnectionObjects]:
120
- """This function retrieves the connection objects values.
121
- It checks that the required objects are available and raises a UsageError if they are not.
122
- If a connection_id is provided, it retrieves the connection objects from the connection.
123
- If custom objects are provided, it overrides the retrieved objects with them.
124
-
125
- Args:
126
- requested_objects (Set[ConnectionObject]): The set of requested connection objects.
127
- connection_id (Optional[str]): The connection id to retrieve the connection objects for.
128
- custom_config_path (Optional[Path]): The local path to the custom config to use.
129
- custom_configured_catalog_path (Optional[Path]): The local path to the custom catalog to use.
130
- custom_state_path (Optional[Path]): The local path to the custom state to use.
131
- retrieval_reason (Optional[str]): The reason to access the connection objects.
132
- fail_if_missing_objects (bool, optional): Whether to raise a ValueError if a required object is missing. Defaults to True.
133
- connector_image (Optional[str]): The image name for the connector under test.
134
- connector_version (Optional[str]): The version for the connector under test.
135
- auto_select_connections (bool, optional): Whether to automatically select connections if no connection id is passed. Defaults to False.
136
- selected_streams (Optional[Set[str]]): The set of selected streams to use when auto selecting a connection.
137
- connection_subset (ConnectionSubset): The subset of connections to select from.
138
- max_connections (Optional[int]): The maximum number of connections to retrieve.
139
- Raises:
140
- click.UsageError: If a required object is missing for the command.
141
- click.UsageError: If a retrieval reason is missing when passing a connection id.
142
- Returns:
143
- List[ConnectionObjects]: List of connection objects.
144
- """
145
- if connection_id and auto_select_connections:
146
- raise ValueError(
147
- "Cannot set both `connection_id` and `auto_select_connections`."
148
- )
149
- if auto_select_connections and not connector_image:
150
- raise ValueError(
151
- "A connector image must be provided when using auto_select_connections."
152
- )
153
-
154
- custom_config = (
155
- get_connector_config_from_path(custom_config_path)
156
- if custom_config_path
157
- else None
158
- )
159
- custom_configured_catalog = (
160
- get_configured_catalog_from_path(
161
- custom_configured_catalog_path, selected_streams
162
- )
163
- if custom_configured_catalog_path
164
- else None
165
- )
166
- custom_state = get_state_from_path(custom_state_path) if custom_state_path else None
167
- is_ci = os.getenv("CI", False)
168
-
169
- if connection_id:
170
- if not retrieval_reason:
171
- raise ValueError(
172
- "A retrieval reason is required to access the connection objects when passing a connection id."
173
- )
174
-
175
- connection_objects = _get_connection_objects_from_retrieved_objects(
176
- requested_objects,
177
- retrieval_reason=retrieval_reason,
178
- source_docker_repository=connector_image,
179
- source_docker_image_tag=connector_version,
180
- selected_streams=selected_streams,
181
- connection_id=connection_id,
182
- custom_config=custom_config,
183
- custom_configured_catalog=custom_configured_catalog,
184
- custom_state=custom_state,
185
- connection_subset=connection_subset,
186
- max_connections=max_connections,
187
- )
188
-
189
- else:
190
- if auto_select_connections:
191
- connection_objects = _get_connection_objects_from_retrieved_objects(
192
- requested_objects,
193
- retrieval_reason=retrieval_reason,
194
- source_docker_repository=connector_image,
195
- source_docker_image_tag=connector_version,
196
- selected_streams=selected_streams,
197
- custom_config=custom_config,
198
- custom_configured_catalog=custom_configured_catalog,
199
- custom_state=custom_state,
200
- connection_subset=connection_subset,
201
- max_connections=max_connections,
202
- )
203
-
204
- else:
205
- # We don't make any requests to the connection-retriever; it is expected that config/catalog/state have been provided if needed for the commands being run.
206
- connection_objects = [
207
- ConnectionObjects(
208
- source_config=custom_config,
209
- destination_config=custom_config,
210
- catalog=None,
211
- configured_catalog=custom_configured_catalog,
212
- state=custom_state,
213
- workspace_id=None,
214
- source_id=None,
215
- destination_id=None,
216
- connection_id=None,
217
- source_docker_image=None,
218
- )
219
- ]
220
- if not connection_objects:
221
- raise ValueError("No connection objects could be fetched.")
222
-
223
- all_connection_ids = [
224
- connection_object.connection_id for connection_object in connection_objects
225
- ]
226
- assert len(set(all_connection_ids)) == len(all_connection_ids), (
227
- "Connection IDs must be unique."
228
- )
229
- return connection_objects
230
-
231
-
232
- def _find_best_candidates_subset(
233
- candidates: List[TestingCandidate],
234
- ) -> List[Tuple[TestingCandidate, List[str]]]:
235
- """
236
- This function reduces the list of candidates to the best subset of candidates.
237
- The best subset is the one which maximizes the number of streams tested and minimizes the number of candidates.
238
- """
239
- candidates_sorted_by_duration = sorted(
240
- candidates, key=lambda x: x.last_attempt_duration_in_microseconds
241
- )
242
-
243
- tested_streams = set()
244
- candidates_and_streams_to_test = []
245
-
246
- for candidate in candidates_sorted_by_duration:
247
- candidate_streams_to_test = []
248
- for stream in candidate.streams_with_data:
249
- # The candidate is selected if one of its streams has not been tested yet
250
- if stream not in tested_streams:
251
- candidate_streams_to_test.append(stream)
252
- tested_streams.add(stream)
253
- if candidate_streams_to_test:
254
- candidates_and_streams_to_test.append(
255
- (candidate, candidate_streams_to_test)
256
- )
257
- return candidates_and_streams_to_test
258
-
259
-
260
- def _get_connection_objects_from_retrieved_objects(
261
- requested_objects: Set[ConnectionObject],
262
- retrieval_reason: str,
263
- source_docker_repository: str,
264
- source_docker_image_tag: str,
265
- selected_streams: Optional[Set[str]],
266
- connection_id: Optional[str] = None,
267
- custom_config: Optional[Dict] = None,
268
- custom_configured_catalog: Optional[ConfiguredAirbyteCatalog] = None,
269
- custom_state: Optional[Dict] = None,
270
- connection_subset: ConnectionSubset = ConnectionSubset.SANDBOXES,
271
- max_connections: Optional[int] = None,
272
- ):
273
- console.log(
274
- textwrap.dedent(
275
- """
276
- Retrieving connection objects from the database.
277
- We will build a subset of candidates to test.
278
- This subset should minimize the number of candidates and sync duration while maximizing the number of streams tested.
279
- We patch configured catalogs to only test streams once.
280
- If the max_connections parameter is set, we will only keep the top connections with the most streams to test.
281
- """
282
- )
283
- )
284
- try:
285
- candidates = retrieve_testing_candidates(
286
- source_docker_repository=source_docker_repository,
287
- source_docker_image_tag=source_docker_image_tag,
288
- with_streams=selected_streams,
289
- connection_subset=connection_subset,
290
- )
291
- except IndexError:
292
- raise InvalidConnectionError(
293
- f"No candidates were found for the provided source docker image ({source_docker_repository}:{source_docker_image_tag})."
294
- )
295
- # If the connection_id is provided, we filter the candidates to only keep the ones with the same connection_id
296
- if connection_id:
297
- candidates = [
298
- candidate
299
- for candidate in candidates
300
- if candidate.connection_id == connection_id
301
- ]
302
-
303
- candidates_and_streams_to_test = _find_best_candidates_subset(candidates)
304
- candidates_and_streams_to_test = sorted(
305
- candidates_and_streams_to_test, key=lambda x: len(x[1]), reverse=True
306
- )
307
- if max_connections:
308
- candidates_and_streams_to_test = candidates_and_streams_to_test[
309
- :max_connections
310
- ]
311
-
312
- number_of_streams_tested = sum(
313
- [len(streams_to_test) for _, streams_to_test in candidates_and_streams_to_test]
314
- )
315
- console.log(
316
- f"Selected {len(candidates_and_streams_to_test)} candidates to test {number_of_streams_tested} streams."
317
- )
318
-
319
- all_connection_objects = []
320
- for candidate, streams_to_test in candidates_and_streams_to_test:
321
- retrieved_objects = retrieve_objects(
322
- requested_objects,
323
- retrieval_reason=retrieval_reason,
324
- source_docker_repository=source_docker_repository,
325
- source_docker_image_tag=source_docker_image_tag,
326
- connection_id=candidate.connection_id,
327
- connection_subset=connection_subset,
328
- )
329
- retrieved_objects = retrieved_objects[0]
330
- retrieved_source_config = parse_config(retrieved_objects.source_config)
331
- retrieved_destination_config = parse_config(
332
- retrieved_objects.destination_config
333
- )
334
- retrieved_catalog = parse_catalog(retrieved_objects.catalog)
335
- retrieved_configured_catalog = parse_configured_catalog(
336
- retrieved_objects.configured_catalog, selected_streams
337
- )
338
- retrieved_state = parse_state(retrieved_objects.state)
339
-
340
- retrieved_source_docker_image = retrieved_objects.source_docker_image
341
- connection_url = build_connection_url(
342
- retrieved_objects.workspace_id, retrieved_objects.connection_id
343
- )
344
- if retrieved_source_docker_image is None:
345
- raise InvalidConnectionError(
346
- f"No docker image was found for connection ID {retrieved_objects.connection_id}. Please double check that the latest job run used version {source_docker_image_tag}. Connection URL: {connection_url}"
347
- )
348
- elif retrieved_source_docker_image.split(":")[0] != source_docker_repository:
349
- raise InvalidConnectionError(
350
- f"The provided docker image ({source_docker_repository}) does not match the image for connection ID {retrieved_objects.connection_id}. Please double check that this connection is using the correct image. Connection URL: {connection_url}"
351
- )
352
- elif retrieved_source_docker_image.split(":")[1] != source_docker_image_tag:
353
- raise InvalidConnectionError(
354
- f"The provided docker image tag ({source_docker_image_tag}) does not match the image tag for connection ID {retrieved_objects.connection_id}. Please double check that this connection is using the correct image tag and the latest job ran using this version. Connection URL: {connection_url}"
355
- )
356
-
357
- all_connection_objects.append(
358
- ConnectionObjects(
359
- source_config=custom_config
360
- if custom_config
361
- else retrieved_source_config,
362
- destination_config=custom_config
363
- if custom_config
364
- else retrieved_destination_config,
365
- catalog=retrieved_catalog,
366
- configured_catalog=custom_configured_catalog
367
- if custom_configured_catalog
368
- else retrieved_configured_catalog,
369
- state=custom_state if custom_state else retrieved_state,
370
- workspace_id=retrieved_objects.workspace_id,
371
- source_id=retrieved_objects.source_id,
372
- destination_id=retrieved_objects.destination_id,
373
- source_docker_image=retrieved_source_docker_image,
374
- connection_id=retrieved_objects.connection_id,
375
- )
376
- )
377
- return all_connection_objects