airbyte-internal-ops 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/METADATA +1 -1
  2. {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/RECORD +13 -52
  3. airbyte_ops_mcp/cli/cloud.py +42 -3
  4. airbyte_ops_mcp/cloud_admin/api_client.py +473 -0
  5. airbyte_ops_mcp/cloud_admin/models.py +56 -0
  6. airbyte_ops_mcp/mcp/cloud_connector_versions.py +460 -0
  7. airbyte_ops_mcp/mcp/prerelease.py +6 -46
  8. airbyte_ops_mcp/regression_tests/ci_output.py +151 -71
  9. airbyte_ops_mcp/regression_tests/http_metrics.py +21 -2
  10. airbyte_ops_mcp/regression_tests/models.py +6 -0
  11. airbyte_ops_mcp/telemetry.py +162 -0
  12. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/.gitignore +0 -1
  13. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/README.md +0 -420
  14. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/__init__.py +0 -2
  15. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/__init__.py +0 -1
  16. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/__init__.py +0 -8
  17. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/base_backend.py +0 -16
  18. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/duckdb_backend.py +0 -87
  19. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/file_backend.py +0 -165
  20. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/connection_objects_retrieval.py +0 -377
  21. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/connector_runner.py +0 -247
  22. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/errors.py +0 -7
  23. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/evaluation_modes.py +0 -25
  24. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/hacks.py +0 -23
  25. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/json_schema_helper.py +0 -384
  26. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/mitm_addons.py +0 -37
  27. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/models.py +0 -595
  28. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/proxy.py +0 -207
  29. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/secret_access.py +0 -47
  30. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/segment_tracking.py +0 -45
  31. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/utils.py +0 -214
  32. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/conftest.py.disabled +0 -751
  33. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/consts.py +0 -4
  34. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/poetry.lock +0 -4480
  35. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/pytest.ini +0 -9
  36. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/__init__.py +0 -1
  37. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_check.py +0 -61
  38. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_discover.py +0 -117
  39. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_read.py +0 -627
  40. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_spec.py +0 -43
  41. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/report.py +0 -542
  42. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/stash_keys.py +0 -38
  43. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/templates/__init__.py +0 -0
  44. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/templates/private_details.html.j2 +0 -305
  45. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/templates/report.html.j2 +0 -515
  46. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/utils.py +0 -187
  47. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/__init__.py +0 -0
  48. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_check.py +0 -61
  49. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_discover.py +0 -217
  50. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_read.py +0 -177
  51. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_spec.py +0 -631
  52. {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/WHEEL +0 -0
  53. {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/entry_points.txt +0 -0
@@ -1,37 +0,0 @@
1
- # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
- from __future__ import annotations
3
-
4
- from urllib.parse import parse_qs, urlencode, urlparse
5
-
6
- from mitmproxy import http
7
-
8
-
9
- class SortQueryParams:
10
- """This addon sorts query parameters in the request URL.
11
- It is useful for testing purposes, as it makes it easier to compare requests and get cache hits.
12
- """
13
-
14
- def request(self, flow: http.HTTPFlow) -> None:
15
- if url := flow.request.url:
16
- parsed_url = urlparse(url)
17
- # Get query parameters as dictionary
18
- query_params = parse_qs(parsed_url.query)
19
- # Sort query parameters alphabetically
20
- sorted_params = {
21
- key: query_params[key] for key in sorted(query_params.keys())
22
- }
23
- # Reconstruct the URL with sorted query parameters
24
- sorted_url = parsed_url._replace(
25
- query=urlencode(sorted_params, doseq=True)
26
- ).geturl()
27
-
28
- # Update the request URL
29
- flow.request.url = sorted_url
30
-
31
-
32
- # Disabling the addon.
33
- # It can alter the request URL when some connector URL are already encoded.
34
- # See discussion here https://github.com/airbytehq/airbyte-internal-issues/issues/9302#issuecomment-2311854334
35
-
36
- # addons = [SortQueryParams()]
37
- addons = []
@@ -1,595 +0,0 @@
1
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
-
3
- from __future__ import annotations
4
-
5
- import _collections_abc
6
- import json
7
- import logging
8
- import tempfile
9
- from collections import defaultdict
10
- from collections.abc import Iterable, Iterator, MutableMapping
11
- from dataclasses import dataclass, field
12
- from enum import Enum
13
- from functools import cache
14
- from pathlib import Path
15
- from typing import Any, Dict, List, Optional
16
-
17
- import dagger
18
- import requests
19
- from airbyte_protocol.models import (
20
- AirbyteCatalog, # type: ignore
21
- AirbyteMessage, # type: ignore
22
- AirbyteStateMessage, # type: ignore
23
- AirbyteStreamStatusTraceMessage, # type: ignore
24
- ConfiguredAirbyteCatalog, # type: ignore
25
- TraceType, # type: ignore
26
- )
27
- from airbyte_protocol.models import Type as AirbyteMessageType
28
- from genson import SchemaBuilder # type: ignore
29
- from live_tests.commons.backends import DuckDbBackend, FileBackend
30
- from live_tests.commons.secret_access import get_airbyte_api_key
31
- from live_tests.commons.utils import (
32
- get_connector_container,
33
- get_http_flows_from_mitm_dump,
34
- mitm_http_stream_to_har,
35
- sanitize_stream_name,
36
- sort_dict_keys,
37
- )
38
- from mitmproxy import http
39
- from pydantic import ValidationError
40
-
41
-
42
- class UserDict(_collections_abc.MutableMapping): # type: ignore
43
- # Start by filling-out the abstract methods
44
- def __init__(self, _dict: Optional[MutableMapping] = None, **kwargs: Any):
45
- self.data: MutableMapping = {}
46
- if _dict is not None:
47
- self.update(_dict)
48
- if kwargs:
49
- self.update(kwargs)
50
-
51
- def __len__(self) -> int:
52
- return len(self.data)
53
-
54
- def __getitem__(self, key: Any) -> Any:
55
- if key in self.data:
56
- return self.data[key]
57
- if hasattr(self.__class__, "__missing__"):
58
- return self.__class__.__missing__(self, key)
59
- raise KeyError(key)
60
-
61
- def __setitem__(self, key: Any, item: Any) -> None:
62
- self.data[key] = item
63
-
64
- def __delitem__(self, key: Any) -> None:
65
- del self.data[key]
66
-
67
- def __iter__(self) -> Iterator:
68
- return iter(self.data)
69
-
70
- # Modify __contains__ to work correctly when __missing__ is present
71
- def __contains__(self, key: Any) -> bool:
72
- return key in self.data
73
-
74
- # Now, add the methods in dicts but not in MutableMapping
75
- def __repr__(self) -> str:
76
- return repr(self.data)
77
-
78
- def __or__(self, other: UserDict | dict) -> UserDict:
79
- if isinstance(other, UserDict):
80
- return self.__class__(self.data | other.data) # type: ignore
81
- if isinstance(other, dict):
82
- return self.__class__(self.data | other) # type: ignore
83
- return NotImplemented
84
-
85
- def __ror__(self, other: UserDict | dict) -> UserDict:
86
- if isinstance(other, UserDict):
87
- return self.__class__(other.data | self.data) # type: ignore
88
- if isinstance(other, dict):
89
- return self.__class__(other | self.data) # type: ignore
90
- return NotImplemented
91
-
92
- def __ior__(self, other: UserDict | dict) -> UserDict:
93
- if isinstance(other, UserDict):
94
- self.data |= other.data # type: ignore
95
- else:
96
- self.data |= other # type: ignore
97
- return self
98
-
99
- def __copy__(self) -> UserDict:
100
- inst = self.__class__.__new__(self.__class__)
101
- inst.__dict__.update(self.__dict__)
102
- # Create a copy and avoid triggering descriptors
103
- inst.__dict__["data"] = self.__dict__["data"].copy()
104
- return inst
105
-
106
- def copy(self) -> UserDict:
107
- if self.__class__ is UserDict:
108
- return UserDict(self.data.copy()) # type: ignore
109
- import copy
110
-
111
- data = self.data
112
- try:
113
- self.data = {}
114
- c = copy.copy(self)
115
- finally:
116
- self.data = data
117
- c.update(self)
118
- return c
119
-
120
- @classmethod
121
- def fromkeys(cls, iterable: Iterable, value: Optional[Any] = None) -> UserDict:
122
- d = cls()
123
- for key in iterable:
124
- d[key] = value
125
- return d
126
-
127
-
128
- class SecretDict(UserDict):
129
- def __str__(self) -> str:
130
- return f"{self.__class__.__name__}(******)"
131
-
132
- def __repr__(self) -> str:
133
- return str(self)
134
-
135
-
136
- class Command(Enum):
137
- CHECK = "check"
138
- DISCOVER = "discover"
139
- READ = "read"
140
- READ_WITH_STATE = "read-with-state"
141
- SPEC = "spec"
142
-
143
- def needs_config(self) -> bool:
144
- return self in {
145
- Command.CHECK,
146
- Command.DISCOVER,
147
- Command.READ,
148
- Command.READ_WITH_STATE,
149
- }
150
-
151
- def needs_catalog(self) -> bool:
152
- return self in {Command.READ, Command.READ_WITH_STATE}
153
-
154
- def needs_state(self) -> bool:
155
- return self in {Command.READ_WITH_STATE}
156
-
157
-
158
- class TargetOrControl(Enum):
159
- TARGET = "target"
160
- CONTROL = "control"
161
-
162
-
163
- class ActorType(Enum):
164
- SOURCE = "source"
165
- DESTINATION = "destination"
166
-
167
-
168
- class ConnectionSubset(Enum):
169
- """Signals which connection pool to consider for this live test — just the Airbyte sandboxes, or all possible connctions on Cloud."""
170
-
171
- SANDBOXES = "sandboxes"
172
- ALL = "all"
173
-
174
-
175
- @dataclass
176
- class ConnectorUnderTest:
177
- """Represents a connector being tested.
178
- In validation tests, there would be one connector under test.
179
- When running regression tests, there would be two connectors under test: the target and the control versions of the same connector.
180
- """
181
-
182
- # connector image, assuming it's in the format "airbyte/{actor_type}-{connector_name}:{version}"
183
- image_name: str
184
- container: dagger.Container
185
- target_or_control: TargetOrControl
186
-
187
- @property
188
- def name(self) -> str:
189
- return self.image_name.replace("airbyte/", "").split(":")[0]
190
-
191
- @property
192
- def name_without_type_prefix(self) -> str:
193
- return self.name.replace(f"{self.actor_type.value}-", "")
194
-
195
- @property
196
- def version(self) -> str:
197
- return self.image_name.replace("airbyte/", "").split(":")[1]
198
-
199
- @property
200
- def actor_type(self) -> ActorType:
201
- if "airbyte/destination-" in self.image_name:
202
- return ActorType.DESTINATION
203
- elif "airbyte/source-" in self.image_name:
204
- return ActorType.SOURCE
205
- else:
206
- raise ValueError(
207
- f"Can't infer the actor type. Connector image name {self.image_name} does not contain 'airbyte/source' or 'airbyte/destination'"
208
- )
209
-
210
- @classmethod
211
- async def from_image_name(
212
- cls: type[ConnectorUnderTest],
213
- dagger_client: dagger.Client,
214
- image_name: str,
215
- target_or_control: TargetOrControl,
216
- ) -> ConnectorUnderTest:
217
- container = await get_connector_container(dagger_client, image_name)
218
- return cls(image_name, container, target_or_control)
219
-
220
-
221
- @dataclass
222
- class ExecutionInputs:
223
- hashed_connection_id: str
224
- connector_under_test: ConnectorUnderTest
225
- actor_id: str
226
- global_output_dir: Path
227
- command: Command
228
- config: Optional[SecretDict] = None
229
- configured_catalog: Optional[ConfiguredAirbyteCatalog] = None
230
- state: Optional[dict] = None
231
- environment_variables: Optional[dict] = None
232
- duckdb_path: Optional[Path] = None
233
-
234
- def raise_if_missing_attr_for_command(self, attribute: str) -> None:
235
- if getattr(self, attribute) is None:
236
- raise ValueError(
237
- f"We need a {attribute} to run the {self.command.value} command"
238
- )
239
-
240
- def __post_init__(self) -> None:
241
- if self.command is Command.CHECK:
242
- self.raise_if_missing_attr_for_command("config")
243
- if self.command is Command.DISCOVER:
244
- self.raise_if_missing_attr_for_command("config")
245
- if self.command is Command.READ:
246
- self.raise_if_missing_attr_for_command("config")
247
- self.raise_if_missing_attr_for_command("configured_catalog")
248
- if self.command is Command.READ_WITH_STATE:
249
- self.raise_if_missing_attr_for_command("config")
250
- self.raise_if_missing_attr_for_command("configured_catalog")
251
- self.raise_if_missing_attr_for_command("state")
252
-
253
- @property
254
- def output_dir(self) -> Path:
255
- output_dir = (
256
- self.global_output_dir
257
- / f"command_execution_artifacts/{self.connector_under_test.name}/{self.command.value}/{self.connector_under_test.version}/{self.hashed_connection_id}"
258
- )
259
- output_dir.mkdir(parents=True, exist_ok=True)
260
- return output_dir
261
-
262
-
263
- @dataclass
264
- class ExecutionResult:
265
- hashed_connection_id: str
266
- actor_id: str
267
- configured_catalog: ConfiguredAirbyteCatalog
268
- connector_under_test: ConnectorUnderTest
269
- command: Command
270
- stdout_file_path: Path
271
- stderr_file_path: Path
272
- success: bool
273
- executed_container: Optional[dagger.Container]
274
- config: Optional[SecretDict]
275
- http_dump: Optional[dagger.File] = None
276
- http_flows: list[http.HTTPFlow] = field(default_factory=list)
277
- stream_schemas: Optional[dict[str, Any]] = None
278
- backend: Optional[FileBackend] = None
279
-
280
- HTTP_DUMP_FILE_NAME = "http_dump.mitm"
281
- HAR_FILE_NAME = "http_dump.har"
282
-
283
- @property
284
- def logger(self) -> logging.Logger:
285
- return logging.getLogger(
286
- f"{self.connector_under_test.target_or_control.value}-{self.command.value}"
287
- )
288
-
289
- @property
290
- def airbyte_messages(self) -> Iterable[AirbyteMessage]:
291
- return self.parse_airbyte_messages_from_command_output(self.stdout_file_path)
292
-
293
- @property
294
- def duckdb_schema(self) -> Iterable[str]:
295
- return (
296
- self.connector_under_test.target_or_control.value,
297
- self.command.value,
298
- self.hashed_connection_id,
299
- )
300
-
301
- @property
302
- def configured_streams(self) -> List[str]:
303
- return [stream.stream.name for stream in self.configured_catalog.streams]
304
-
305
- @property
306
- def primary_keys_per_stream(self) -> Dict[str, List[str]]:
307
- return {
308
- stream.stream.name: stream.primary_key[0] if stream.primary_key else None
309
- for stream in self.configured_catalog.streams
310
- }
311
-
312
- @classmethod
313
- async def load(
314
- cls: type[ExecutionResult],
315
- connector_under_test: ConnectorUnderTest,
316
- hashed_connection_id: str,
317
- actor_id: str,
318
- configured_catalog: ConfiguredAirbyteCatalog,
319
- command: Command,
320
- stdout_file_path: Path,
321
- stderr_file_path: Path,
322
- success: bool,
323
- executed_container: Optional[dagger.Container],
324
- config: Optional[SecretDict] = None,
325
- http_dump: Optional[dagger.File] = None,
326
- ) -> ExecutionResult:
327
- execution_result = cls(
328
- hashed_connection_id,
329
- actor_id,
330
- configured_catalog,
331
- connector_under_test,
332
- command,
333
- stdout_file_path,
334
- stderr_file_path,
335
- success,
336
- executed_container,
337
- config,
338
- http_dump,
339
- )
340
- await execution_result.load_http_flows()
341
- return execution_result
342
-
343
- async def load_http_flows(self) -> None:
344
- if not self.http_dump:
345
- return
346
- with tempfile.NamedTemporaryFile() as temp_file:
347
- await self.http_dump.export(temp_file.name)
348
- self.http_flows = get_http_flows_from_mitm_dump(Path(temp_file.name))
349
-
350
- def parse_airbyte_messages_from_command_output(
351
- self, command_output_path: Path, log_validation_errors: bool = False
352
- ) -> Iterable[AirbyteMessage]:
353
- with open(command_output_path) as command_output:
354
- for line in command_output:
355
- try:
356
- yield AirbyteMessage.parse_raw(line)
357
- except ValidationError as e:
358
- if log_validation_errors:
359
- self.logger.warn(f"Error parsing AirbyteMessage: {e}")
360
-
361
- def get_records(self) -> Iterable[AirbyteMessage]:
362
- self.logger.info(
363
- f"Reading records all records for command {self.command.value} on {self.connector_under_test.target_or_control.value} version."
364
- )
365
- for message in self.airbyte_messages:
366
- if message.type is AirbyteMessageType.RECORD:
367
- yield message
368
-
369
- def generate_stream_schemas(self) -> dict[str, Any]:
370
- self.logger.info("Generating stream schemas")
371
- stream_builders: dict[str, SchemaBuilder] = {}
372
- for record in self.get_records():
373
- stream = record.record.stream
374
- if stream not in stream_builders:
375
- stream_schema_builder = SchemaBuilder()
376
- stream_schema_builder.add_schema({"type": "object", "properties": {}})
377
- stream_builders[stream] = stream_schema_builder
378
- stream_builders[stream].add_object(
379
- self.get_obfuscated_types(record.record.data)
380
- )
381
- self.logger.info("Stream schemas generated")
382
- return {
383
- stream: sort_dict_keys(stream_builders[stream].to_schema())
384
- for stream in stream_builders
385
- }
386
-
387
- @staticmethod
388
- def get_obfuscated_types(data: dict[str, Any]) -> dict[str, Any]:
389
- """
390
- Convert obfuscated records into a record whose values have the same type as the original values.
391
- """
392
- types = {}
393
- for k, v in data.items():
394
- if v.startswith("string_"):
395
- types[k] = "a"
396
- elif v.startswith("integer_"):
397
- types[k] = 0
398
- elif v.startswith("number_"):
399
- types[k] = 0.1
400
- elif v.startswith("boolean_"):
401
- types[k] = True
402
- elif v.startswith("null_"):
403
- types[k] = None
404
- elif v.startswith("array_"):
405
- types[k] = []
406
- elif v.startswith("object_"):
407
- types[k] = {}
408
- else:
409
- types[k] = v
410
-
411
- return types
412
-
413
- def get_records_per_stream(self, stream: str) -> Iterator[AirbyteMessage]:
414
- assert self.backend is not None, "Backend must be set to get records per stream"
415
- self.logger.info(f"Reading records for stream {stream}")
416
- if stream not in self.backend.record_per_stream_paths:
417
- self.logger.warning(f"No records found for stream {stream}")
418
- yield from []
419
- else:
420
- for message in self.parse_airbyte_messages_from_command_output(
421
- self.backend.record_per_stream_paths[stream], log_validation_errors=True
422
- ):
423
- if message.type is AirbyteMessageType.RECORD:
424
- yield message
425
-
426
- def get_states_per_stream(
427
- self, stream: str
428
- ) -> Dict[str, List[AirbyteStateMessage]]:
429
- self.logger.info(f"Reading state messages for stream {stream}")
430
- states = defaultdict(list)
431
- for message in self.airbyte_messages:
432
- if message.type is AirbyteMessageType.STATE:
433
- states[message.state.stream.stream_descriptor.name].append(
434
- message.state
435
- )
436
- return states
437
-
438
- def get_status_messages_per_stream(
439
- self, stream: str
440
- ) -> Dict[str, List[AirbyteStreamStatusTraceMessage]]:
441
- self.logger.info(f"Reading state messages for stream {stream}")
442
- statuses = defaultdict(list)
443
- for message in self.airbyte_messages:
444
- if (
445
- message.type is AirbyteMessageType.TRACE
446
- and message.trace.type == TraceType.STREAM_STATUS
447
- ):
448
- statuses[message.trace.stream_status.stream_descriptor.name].append(
449
- message.trace.stream_status
450
- )
451
- return statuses
452
-
453
- @cache
454
- def get_message_count_per_type(self) -> dict[AirbyteMessageType, int]:
455
- message_count: dict[AirbyteMessageType, int] = defaultdict(int)
456
- for message in self.airbyte_messages:
457
- message_count[message.type] += 1
458
- return message_count
459
-
460
- async def save_http_dump(self, output_dir: Path) -> None:
461
- if self.http_dump:
462
- self.logger.info(
463
- "An http dump was captured during the execution of the command, saving it."
464
- )
465
- http_dump_file_path = (output_dir / self.HTTP_DUMP_FILE_NAME).resolve()
466
- await self.http_dump.export(str(http_dump_file_path))
467
- self.logger.info(f"Http dump saved to {http_dump_file_path}")
468
-
469
- # Define where the har file will be saved
470
- har_file_path = (output_dir / self.HAR_FILE_NAME).resolve()
471
- # Convert the mitmproxy dump file to a har file
472
- mitm_http_stream_to_har(http_dump_file_path, har_file_path)
473
- self.logger.info(f"Har file saved to {har_file_path}")
474
- else:
475
- self.logger.warning("No http dump to save")
476
-
477
- def save_airbyte_messages(
478
- self, output_dir: Path, duckdb_path: Optional[Path] = None
479
- ) -> None:
480
- self.logger.info("Saving Airbyte messages to disk")
481
- airbyte_messages_dir = output_dir / "airbyte_messages"
482
- airbyte_messages_dir.mkdir(parents=True, exist_ok=True)
483
- if duckdb_path:
484
- self.backend = DuckDbBackend(
485
- airbyte_messages_dir, duckdb_path, self.duckdb_schema
486
- )
487
- else:
488
- self.backend = FileBackend(airbyte_messages_dir)
489
- self.backend.write(self.airbyte_messages)
490
- self.logger.info("Airbyte messages saved")
491
-
492
- def save_stream_schemas(self, output_dir: Path) -> None:
493
- self.stream_schemas = self.generate_stream_schemas()
494
- stream_schemas_dir = output_dir / "stream_schemas"
495
- stream_schemas_dir.mkdir(parents=True, exist_ok=True)
496
- for stream_name, stream_schema in self.stream_schemas.items():
497
- (
498
- stream_schemas_dir / f"{sanitize_stream_name(stream_name)}.json"
499
- ).write_text(json.dumps(stream_schema, sort_keys=True))
500
- self.logger.info("Stream schemas saved to disk")
501
-
502
- async def save_artifacts(
503
- self, output_dir: Path, duckdb_path: Optional[Path] = None
504
- ) -> None:
505
- self.logger.info("Saving artifacts to disk")
506
- self.save_airbyte_messages(output_dir, duckdb_path)
507
- self.update_configuration()
508
- await self.save_http_dump(output_dir)
509
- self.save_stream_schemas(output_dir)
510
- self.logger.info("All artifacts saved to disk")
511
-
512
- def get_updated_configuration(
513
- self, control_message_path: Path
514
- ) -> Optional[dict[str, Any]]:
515
- """Iterate through the control messages to find CONNECTOR_CONFIG message and return the last updated configuration."""
516
- if not control_message_path.exists():
517
- return None
518
- updated_config = None
519
- for line in control_message_path.read_text().splitlines():
520
- if line.strip():
521
- connector_config = json.loads(line.strip()).get("connectorConfig", {})
522
- if connector_config:
523
- updated_config = connector_config
524
- return updated_config
525
-
526
- def update_configuration(self) -> None:
527
- """This function checks if a configuration has to be updated by reading the control messages file.
528
- If a configuration has to be updated, it updates the configuration on the actor using the Airbyte API.
529
- """
530
- assert self.backend is not None, (
531
- "Backend must be set to update configuration in order to find the control messages path"
532
- )
533
- updated_configuration = self.get_updated_configuration(
534
- self.backend.jsonl_controls_path
535
- )
536
- if updated_configuration is None:
537
- return
538
-
539
- self.logger.warning(
540
- f"Updating configuration for {self.connector_under_test.name}, actor {self.actor_id}"
541
- )
542
- url = f"https://api.airbyte.com/v1/{self.connector_under_test.actor_type.value}s/{self.actor_id}"
543
-
544
- payload = {
545
- "configuration": {
546
- **updated_configuration,
547
- f"{self.connector_under_test.actor_type.value}Type": self.connector_under_test.name_without_type_prefix,
548
- }
549
- }
550
- headers = {
551
- "accept": "application/json",
552
- "content-type": "application/json",
553
- "authorization": f"Bearer {get_airbyte_api_key()}",
554
- }
555
-
556
- response = requests.patch(url, json=payload, headers=headers)
557
- try:
558
- response.raise_for_status()
559
- except requests.HTTPError as e:
560
- self.logger.error(
561
- f"Failed to update {self.connector_under_test.name} configuration on actor {self.actor_id}: {e}"
562
- )
563
- self.logger.error(f"Response: {response.text}")
564
- self.logger.info(
565
- f"Updated configuration for {self.connector_under_test.name}, actor {self.actor_id}"
566
- )
567
-
568
- def __hash__(self):
569
- return hash(self.connector_under_test.version)
570
-
571
-
572
- @dataclass(kw_only=True)
573
- class ConnectionObjects:
574
- source_config: Optional[SecretDict]
575
- destination_config: Optional[SecretDict]
576
- configured_catalog: Optional[ConfiguredAirbyteCatalog]
577
- catalog: Optional[AirbyteCatalog]
578
- state: Optional[dict]
579
- workspace_id: Optional[str]
580
- source_id: Optional[str]
581
- destination_id: Optional[str]
582
- source_docker_image: Optional[str]
583
- connection_id: Optional[str]
584
-
585
- @property
586
- def url(self) -> Optional[str]:
587
- if not self.workspace_id or not self.connection_id:
588
- return None
589
- return f"https://cloud.airbyte.com/workspaces/{self.workspace_id}/connections/{self.connection_id}"
590
-
591
- @property
592
- def hashed_connection_id(self) -> Optional[str]:
593
- if not self.connection_id:
594
- return None
595
- return self.connection_id[:8]