airbyte-internal-ops 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/METADATA +1 -1
- {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/RECORD +13 -52
- airbyte_ops_mcp/cli/cloud.py +42 -3
- airbyte_ops_mcp/cloud_admin/api_client.py +473 -0
- airbyte_ops_mcp/cloud_admin/models.py +56 -0
- airbyte_ops_mcp/mcp/cloud_connector_versions.py +460 -0
- airbyte_ops_mcp/mcp/prerelease.py +6 -46
- airbyte_ops_mcp/regression_tests/ci_output.py +151 -71
- airbyte_ops_mcp/regression_tests/http_metrics.py +21 -2
- airbyte_ops_mcp/regression_tests/models.py +6 -0
- airbyte_ops_mcp/telemetry.py +162 -0
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/.gitignore +0 -1
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/README.md +0 -420
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/__init__.py +0 -2
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/__init__.py +0 -1
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/__init__.py +0 -8
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/base_backend.py +0 -16
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/duckdb_backend.py +0 -87
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/file_backend.py +0 -165
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/connection_objects_retrieval.py +0 -377
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/connector_runner.py +0 -247
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/errors.py +0 -7
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/evaluation_modes.py +0 -25
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/hacks.py +0 -23
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/json_schema_helper.py +0 -384
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/mitm_addons.py +0 -37
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/models.py +0 -595
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/proxy.py +0 -207
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/secret_access.py +0 -47
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/segment_tracking.py +0 -45
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/utils.py +0 -214
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/conftest.py.disabled +0 -751
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/consts.py +0 -4
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/poetry.lock +0 -4480
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/pytest.ini +0 -9
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/__init__.py +0 -1
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_check.py +0 -61
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_discover.py +0 -117
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_read.py +0 -627
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_spec.py +0 -43
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/report.py +0 -542
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/stash_keys.py +0 -38
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/templates/__init__.py +0 -0
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/templates/private_details.html.j2 +0 -305
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/templates/report.html.j2 +0 -515
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/utils.py +0 -187
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/__init__.py +0 -0
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_check.py +0 -61
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_discover.py +0 -217
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_read.py +0 -177
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_spec.py +0 -631
- {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/WHEEL +0 -0
- {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
|
|
4
|
-
from urllib.parse import parse_qs, urlencode, urlparse
|
|
5
|
-
|
|
6
|
-
from mitmproxy import http
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class SortQueryParams:
|
|
10
|
-
"""This addon sorts query parameters in the request URL.
|
|
11
|
-
It is useful for testing purposes, as it makes it easier to compare requests and get cache hits.
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
def request(self, flow: http.HTTPFlow) -> None:
|
|
15
|
-
if url := flow.request.url:
|
|
16
|
-
parsed_url = urlparse(url)
|
|
17
|
-
# Get query parameters as dictionary
|
|
18
|
-
query_params = parse_qs(parsed_url.query)
|
|
19
|
-
# Sort query parameters alphabetically
|
|
20
|
-
sorted_params = {
|
|
21
|
-
key: query_params[key] for key in sorted(query_params.keys())
|
|
22
|
-
}
|
|
23
|
-
# Reconstruct the URL with sorted query parameters
|
|
24
|
-
sorted_url = parsed_url._replace(
|
|
25
|
-
query=urlencode(sorted_params, doseq=True)
|
|
26
|
-
).geturl()
|
|
27
|
-
|
|
28
|
-
# Update the request URL
|
|
29
|
-
flow.request.url = sorted_url
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
# Disabling the addon.
|
|
33
|
-
# It can alter the request URL when some connector URL are already encoded.
|
|
34
|
-
# See discussion here https://github.com/airbytehq/airbyte-internal-issues/issues/9302#issuecomment-2311854334
|
|
35
|
-
|
|
36
|
-
# addons = [SortQueryParams()]
|
|
37
|
-
addons = []
|
|
@@ -1,595 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import _collections_abc
|
|
6
|
-
import json
|
|
7
|
-
import logging
|
|
8
|
-
import tempfile
|
|
9
|
-
from collections import defaultdict
|
|
10
|
-
from collections.abc import Iterable, Iterator, MutableMapping
|
|
11
|
-
from dataclasses import dataclass, field
|
|
12
|
-
from enum import Enum
|
|
13
|
-
from functools import cache
|
|
14
|
-
from pathlib import Path
|
|
15
|
-
from typing import Any, Dict, List, Optional
|
|
16
|
-
|
|
17
|
-
import dagger
|
|
18
|
-
import requests
|
|
19
|
-
from airbyte_protocol.models import (
|
|
20
|
-
AirbyteCatalog, # type: ignore
|
|
21
|
-
AirbyteMessage, # type: ignore
|
|
22
|
-
AirbyteStateMessage, # type: ignore
|
|
23
|
-
AirbyteStreamStatusTraceMessage, # type: ignore
|
|
24
|
-
ConfiguredAirbyteCatalog, # type: ignore
|
|
25
|
-
TraceType, # type: ignore
|
|
26
|
-
)
|
|
27
|
-
from airbyte_protocol.models import Type as AirbyteMessageType
|
|
28
|
-
from genson import SchemaBuilder # type: ignore
|
|
29
|
-
from live_tests.commons.backends import DuckDbBackend, FileBackend
|
|
30
|
-
from live_tests.commons.secret_access import get_airbyte_api_key
|
|
31
|
-
from live_tests.commons.utils import (
|
|
32
|
-
get_connector_container,
|
|
33
|
-
get_http_flows_from_mitm_dump,
|
|
34
|
-
mitm_http_stream_to_har,
|
|
35
|
-
sanitize_stream_name,
|
|
36
|
-
sort_dict_keys,
|
|
37
|
-
)
|
|
38
|
-
from mitmproxy import http
|
|
39
|
-
from pydantic import ValidationError
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
class UserDict(_collections_abc.MutableMapping): # type: ignore
|
|
43
|
-
# Start by filling-out the abstract methods
|
|
44
|
-
def __init__(self, _dict: Optional[MutableMapping] = None, **kwargs: Any):
|
|
45
|
-
self.data: MutableMapping = {}
|
|
46
|
-
if _dict is not None:
|
|
47
|
-
self.update(_dict)
|
|
48
|
-
if kwargs:
|
|
49
|
-
self.update(kwargs)
|
|
50
|
-
|
|
51
|
-
def __len__(self) -> int:
|
|
52
|
-
return len(self.data)
|
|
53
|
-
|
|
54
|
-
def __getitem__(self, key: Any) -> Any:
|
|
55
|
-
if key in self.data:
|
|
56
|
-
return self.data[key]
|
|
57
|
-
if hasattr(self.__class__, "__missing__"):
|
|
58
|
-
return self.__class__.__missing__(self, key)
|
|
59
|
-
raise KeyError(key)
|
|
60
|
-
|
|
61
|
-
def __setitem__(self, key: Any, item: Any) -> None:
|
|
62
|
-
self.data[key] = item
|
|
63
|
-
|
|
64
|
-
def __delitem__(self, key: Any) -> None:
|
|
65
|
-
del self.data[key]
|
|
66
|
-
|
|
67
|
-
def __iter__(self) -> Iterator:
|
|
68
|
-
return iter(self.data)
|
|
69
|
-
|
|
70
|
-
# Modify __contains__ to work correctly when __missing__ is present
|
|
71
|
-
def __contains__(self, key: Any) -> bool:
|
|
72
|
-
return key in self.data
|
|
73
|
-
|
|
74
|
-
# Now, add the methods in dicts but not in MutableMapping
|
|
75
|
-
def __repr__(self) -> str:
|
|
76
|
-
return repr(self.data)
|
|
77
|
-
|
|
78
|
-
def __or__(self, other: UserDict | dict) -> UserDict:
|
|
79
|
-
if isinstance(other, UserDict):
|
|
80
|
-
return self.__class__(self.data | other.data) # type: ignore
|
|
81
|
-
if isinstance(other, dict):
|
|
82
|
-
return self.__class__(self.data | other) # type: ignore
|
|
83
|
-
return NotImplemented
|
|
84
|
-
|
|
85
|
-
def __ror__(self, other: UserDict | dict) -> UserDict:
|
|
86
|
-
if isinstance(other, UserDict):
|
|
87
|
-
return self.__class__(other.data | self.data) # type: ignore
|
|
88
|
-
if isinstance(other, dict):
|
|
89
|
-
return self.__class__(other | self.data) # type: ignore
|
|
90
|
-
return NotImplemented
|
|
91
|
-
|
|
92
|
-
def __ior__(self, other: UserDict | dict) -> UserDict:
|
|
93
|
-
if isinstance(other, UserDict):
|
|
94
|
-
self.data |= other.data # type: ignore
|
|
95
|
-
else:
|
|
96
|
-
self.data |= other # type: ignore
|
|
97
|
-
return self
|
|
98
|
-
|
|
99
|
-
def __copy__(self) -> UserDict:
|
|
100
|
-
inst = self.__class__.__new__(self.__class__)
|
|
101
|
-
inst.__dict__.update(self.__dict__)
|
|
102
|
-
# Create a copy and avoid triggering descriptors
|
|
103
|
-
inst.__dict__["data"] = self.__dict__["data"].copy()
|
|
104
|
-
return inst
|
|
105
|
-
|
|
106
|
-
def copy(self) -> UserDict:
|
|
107
|
-
if self.__class__ is UserDict:
|
|
108
|
-
return UserDict(self.data.copy()) # type: ignore
|
|
109
|
-
import copy
|
|
110
|
-
|
|
111
|
-
data = self.data
|
|
112
|
-
try:
|
|
113
|
-
self.data = {}
|
|
114
|
-
c = copy.copy(self)
|
|
115
|
-
finally:
|
|
116
|
-
self.data = data
|
|
117
|
-
c.update(self)
|
|
118
|
-
return c
|
|
119
|
-
|
|
120
|
-
@classmethod
|
|
121
|
-
def fromkeys(cls, iterable: Iterable, value: Optional[Any] = None) -> UserDict:
|
|
122
|
-
d = cls()
|
|
123
|
-
for key in iterable:
|
|
124
|
-
d[key] = value
|
|
125
|
-
return d
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
class SecretDict(UserDict):
|
|
129
|
-
def __str__(self) -> str:
|
|
130
|
-
return f"{self.__class__.__name__}(******)"
|
|
131
|
-
|
|
132
|
-
def __repr__(self) -> str:
|
|
133
|
-
return str(self)
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
class Command(Enum):
|
|
137
|
-
CHECK = "check"
|
|
138
|
-
DISCOVER = "discover"
|
|
139
|
-
READ = "read"
|
|
140
|
-
READ_WITH_STATE = "read-with-state"
|
|
141
|
-
SPEC = "spec"
|
|
142
|
-
|
|
143
|
-
def needs_config(self) -> bool:
|
|
144
|
-
return self in {
|
|
145
|
-
Command.CHECK,
|
|
146
|
-
Command.DISCOVER,
|
|
147
|
-
Command.READ,
|
|
148
|
-
Command.READ_WITH_STATE,
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
def needs_catalog(self) -> bool:
|
|
152
|
-
return self in {Command.READ, Command.READ_WITH_STATE}
|
|
153
|
-
|
|
154
|
-
def needs_state(self) -> bool:
|
|
155
|
-
return self in {Command.READ_WITH_STATE}
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
class TargetOrControl(Enum):
|
|
159
|
-
TARGET = "target"
|
|
160
|
-
CONTROL = "control"
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
class ActorType(Enum):
|
|
164
|
-
SOURCE = "source"
|
|
165
|
-
DESTINATION = "destination"
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
class ConnectionSubset(Enum):
|
|
169
|
-
"""Signals which connection pool to consider for this live test — just the Airbyte sandboxes, or all possible connctions on Cloud."""
|
|
170
|
-
|
|
171
|
-
SANDBOXES = "sandboxes"
|
|
172
|
-
ALL = "all"
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
@dataclass
|
|
176
|
-
class ConnectorUnderTest:
|
|
177
|
-
"""Represents a connector being tested.
|
|
178
|
-
In validation tests, there would be one connector under test.
|
|
179
|
-
When running regression tests, there would be two connectors under test: the target and the control versions of the same connector.
|
|
180
|
-
"""
|
|
181
|
-
|
|
182
|
-
# connector image, assuming it's in the format "airbyte/{actor_type}-{connector_name}:{version}"
|
|
183
|
-
image_name: str
|
|
184
|
-
container: dagger.Container
|
|
185
|
-
target_or_control: TargetOrControl
|
|
186
|
-
|
|
187
|
-
@property
|
|
188
|
-
def name(self) -> str:
|
|
189
|
-
return self.image_name.replace("airbyte/", "").split(":")[0]
|
|
190
|
-
|
|
191
|
-
@property
|
|
192
|
-
def name_without_type_prefix(self) -> str:
|
|
193
|
-
return self.name.replace(f"{self.actor_type.value}-", "")
|
|
194
|
-
|
|
195
|
-
@property
|
|
196
|
-
def version(self) -> str:
|
|
197
|
-
return self.image_name.replace("airbyte/", "").split(":")[1]
|
|
198
|
-
|
|
199
|
-
@property
|
|
200
|
-
def actor_type(self) -> ActorType:
|
|
201
|
-
if "airbyte/destination-" in self.image_name:
|
|
202
|
-
return ActorType.DESTINATION
|
|
203
|
-
elif "airbyte/source-" in self.image_name:
|
|
204
|
-
return ActorType.SOURCE
|
|
205
|
-
else:
|
|
206
|
-
raise ValueError(
|
|
207
|
-
f"Can't infer the actor type. Connector image name {self.image_name} does not contain 'airbyte/source' or 'airbyte/destination'"
|
|
208
|
-
)
|
|
209
|
-
|
|
210
|
-
@classmethod
|
|
211
|
-
async def from_image_name(
|
|
212
|
-
cls: type[ConnectorUnderTest],
|
|
213
|
-
dagger_client: dagger.Client,
|
|
214
|
-
image_name: str,
|
|
215
|
-
target_or_control: TargetOrControl,
|
|
216
|
-
) -> ConnectorUnderTest:
|
|
217
|
-
container = await get_connector_container(dagger_client, image_name)
|
|
218
|
-
return cls(image_name, container, target_or_control)
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
@dataclass
|
|
222
|
-
class ExecutionInputs:
|
|
223
|
-
hashed_connection_id: str
|
|
224
|
-
connector_under_test: ConnectorUnderTest
|
|
225
|
-
actor_id: str
|
|
226
|
-
global_output_dir: Path
|
|
227
|
-
command: Command
|
|
228
|
-
config: Optional[SecretDict] = None
|
|
229
|
-
configured_catalog: Optional[ConfiguredAirbyteCatalog] = None
|
|
230
|
-
state: Optional[dict] = None
|
|
231
|
-
environment_variables: Optional[dict] = None
|
|
232
|
-
duckdb_path: Optional[Path] = None
|
|
233
|
-
|
|
234
|
-
def raise_if_missing_attr_for_command(self, attribute: str) -> None:
|
|
235
|
-
if getattr(self, attribute) is None:
|
|
236
|
-
raise ValueError(
|
|
237
|
-
f"We need a {attribute} to run the {self.command.value} command"
|
|
238
|
-
)
|
|
239
|
-
|
|
240
|
-
def __post_init__(self) -> None:
|
|
241
|
-
if self.command is Command.CHECK:
|
|
242
|
-
self.raise_if_missing_attr_for_command("config")
|
|
243
|
-
if self.command is Command.DISCOVER:
|
|
244
|
-
self.raise_if_missing_attr_for_command("config")
|
|
245
|
-
if self.command is Command.READ:
|
|
246
|
-
self.raise_if_missing_attr_for_command("config")
|
|
247
|
-
self.raise_if_missing_attr_for_command("configured_catalog")
|
|
248
|
-
if self.command is Command.READ_WITH_STATE:
|
|
249
|
-
self.raise_if_missing_attr_for_command("config")
|
|
250
|
-
self.raise_if_missing_attr_for_command("configured_catalog")
|
|
251
|
-
self.raise_if_missing_attr_for_command("state")
|
|
252
|
-
|
|
253
|
-
@property
|
|
254
|
-
def output_dir(self) -> Path:
|
|
255
|
-
output_dir = (
|
|
256
|
-
self.global_output_dir
|
|
257
|
-
/ f"command_execution_artifacts/{self.connector_under_test.name}/{self.command.value}/{self.connector_under_test.version}/{self.hashed_connection_id}"
|
|
258
|
-
)
|
|
259
|
-
output_dir.mkdir(parents=True, exist_ok=True)
|
|
260
|
-
return output_dir
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
@dataclass
|
|
264
|
-
class ExecutionResult:
|
|
265
|
-
hashed_connection_id: str
|
|
266
|
-
actor_id: str
|
|
267
|
-
configured_catalog: ConfiguredAirbyteCatalog
|
|
268
|
-
connector_under_test: ConnectorUnderTest
|
|
269
|
-
command: Command
|
|
270
|
-
stdout_file_path: Path
|
|
271
|
-
stderr_file_path: Path
|
|
272
|
-
success: bool
|
|
273
|
-
executed_container: Optional[dagger.Container]
|
|
274
|
-
config: Optional[SecretDict]
|
|
275
|
-
http_dump: Optional[dagger.File] = None
|
|
276
|
-
http_flows: list[http.HTTPFlow] = field(default_factory=list)
|
|
277
|
-
stream_schemas: Optional[dict[str, Any]] = None
|
|
278
|
-
backend: Optional[FileBackend] = None
|
|
279
|
-
|
|
280
|
-
HTTP_DUMP_FILE_NAME = "http_dump.mitm"
|
|
281
|
-
HAR_FILE_NAME = "http_dump.har"
|
|
282
|
-
|
|
283
|
-
@property
|
|
284
|
-
def logger(self) -> logging.Logger:
|
|
285
|
-
return logging.getLogger(
|
|
286
|
-
f"{self.connector_under_test.target_or_control.value}-{self.command.value}"
|
|
287
|
-
)
|
|
288
|
-
|
|
289
|
-
@property
|
|
290
|
-
def airbyte_messages(self) -> Iterable[AirbyteMessage]:
|
|
291
|
-
return self.parse_airbyte_messages_from_command_output(self.stdout_file_path)
|
|
292
|
-
|
|
293
|
-
@property
|
|
294
|
-
def duckdb_schema(self) -> Iterable[str]:
|
|
295
|
-
return (
|
|
296
|
-
self.connector_under_test.target_or_control.value,
|
|
297
|
-
self.command.value,
|
|
298
|
-
self.hashed_connection_id,
|
|
299
|
-
)
|
|
300
|
-
|
|
301
|
-
@property
|
|
302
|
-
def configured_streams(self) -> List[str]:
|
|
303
|
-
return [stream.stream.name for stream in self.configured_catalog.streams]
|
|
304
|
-
|
|
305
|
-
@property
|
|
306
|
-
def primary_keys_per_stream(self) -> Dict[str, List[str]]:
|
|
307
|
-
return {
|
|
308
|
-
stream.stream.name: stream.primary_key[0] if stream.primary_key else None
|
|
309
|
-
for stream in self.configured_catalog.streams
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
@classmethod
|
|
313
|
-
async def load(
|
|
314
|
-
cls: type[ExecutionResult],
|
|
315
|
-
connector_under_test: ConnectorUnderTest,
|
|
316
|
-
hashed_connection_id: str,
|
|
317
|
-
actor_id: str,
|
|
318
|
-
configured_catalog: ConfiguredAirbyteCatalog,
|
|
319
|
-
command: Command,
|
|
320
|
-
stdout_file_path: Path,
|
|
321
|
-
stderr_file_path: Path,
|
|
322
|
-
success: bool,
|
|
323
|
-
executed_container: Optional[dagger.Container],
|
|
324
|
-
config: Optional[SecretDict] = None,
|
|
325
|
-
http_dump: Optional[dagger.File] = None,
|
|
326
|
-
) -> ExecutionResult:
|
|
327
|
-
execution_result = cls(
|
|
328
|
-
hashed_connection_id,
|
|
329
|
-
actor_id,
|
|
330
|
-
configured_catalog,
|
|
331
|
-
connector_under_test,
|
|
332
|
-
command,
|
|
333
|
-
stdout_file_path,
|
|
334
|
-
stderr_file_path,
|
|
335
|
-
success,
|
|
336
|
-
executed_container,
|
|
337
|
-
config,
|
|
338
|
-
http_dump,
|
|
339
|
-
)
|
|
340
|
-
await execution_result.load_http_flows()
|
|
341
|
-
return execution_result
|
|
342
|
-
|
|
343
|
-
async def load_http_flows(self) -> None:
|
|
344
|
-
if not self.http_dump:
|
|
345
|
-
return
|
|
346
|
-
with tempfile.NamedTemporaryFile() as temp_file:
|
|
347
|
-
await self.http_dump.export(temp_file.name)
|
|
348
|
-
self.http_flows = get_http_flows_from_mitm_dump(Path(temp_file.name))
|
|
349
|
-
|
|
350
|
-
def parse_airbyte_messages_from_command_output(
|
|
351
|
-
self, command_output_path: Path, log_validation_errors: bool = False
|
|
352
|
-
) -> Iterable[AirbyteMessage]:
|
|
353
|
-
with open(command_output_path) as command_output:
|
|
354
|
-
for line in command_output:
|
|
355
|
-
try:
|
|
356
|
-
yield AirbyteMessage.parse_raw(line)
|
|
357
|
-
except ValidationError as e:
|
|
358
|
-
if log_validation_errors:
|
|
359
|
-
self.logger.warn(f"Error parsing AirbyteMessage: {e}")
|
|
360
|
-
|
|
361
|
-
def get_records(self) -> Iterable[AirbyteMessage]:
|
|
362
|
-
self.logger.info(
|
|
363
|
-
f"Reading records all records for command {self.command.value} on {self.connector_under_test.target_or_control.value} version."
|
|
364
|
-
)
|
|
365
|
-
for message in self.airbyte_messages:
|
|
366
|
-
if message.type is AirbyteMessageType.RECORD:
|
|
367
|
-
yield message
|
|
368
|
-
|
|
369
|
-
def generate_stream_schemas(self) -> dict[str, Any]:
|
|
370
|
-
self.logger.info("Generating stream schemas")
|
|
371
|
-
stream_builders: dict[str, SchemaBuilder] = {}
|
|
372
|
-
for record in self.get_records():
|
|
373
|
-
stream = record.record.stream
|
|
374
|
-
if stream not in stream_builders:
|
|
375
|
-
stream_schema_builder = SchemaBuilder()
|
|
376
|
-
stream_schema_builder.add_schema({"type": "object", "properties": {}})
|
|
377
|
-
stream_builders[stream] = stream_schema_builder
|
|
378
|
-
stream_builders[stream].add_object(
|
|
379
|
-
self.get_obfuscated_types(record.record.data)
|
|
380
|
-
)
|
|
381
|
-
self.logger.info("Stream schemas generated")
|
|
382
|
-
return {
|
|
383
|
-
stream: sort_dict_keys(stream_builders[stream].to_schema())
|
|
384
|
-
for stream in stream_builders
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
@staticmethod
|
|
388
|
-
def get_obfuscated_types(data: dict[str, Any]) -> dict[str, Any]:
|
|
389
|
-
"""
|
|
390
|
-
Convert obfuscated records into a record whose values have the same type as the original values.
|
|
391
|
-
"""
|
|
392
|
-
types = {}
|
|
393
|
-
for k, v in data.items():
|
|
394
|
-
if v.startswith("string_"):
|
|
395
|
-
types[k] = "a"
|
|
396
|
-
elif v.startswith("integer_"):
|
|
397
|
-
types[k] = 0
|
|
398
|
-
elif v.startswith("number_"):
|
|
399
|
-
types[k] = 0.1
|
|
400
|
-
elif v.startswith("boolean_"):
|
|
401
|
-
types[k] = True
|
|
402
|
-
elif v.startswith("null_"):
|
|
403
|
-
types[k] = None
|
|
404
|
-
elif v.startswith("array_"):
|
|
405
|
-
types[k] = []
|
|
406
|
-
elif v.startswith("object_"):
|
|
407
|
-
types[k] = {}
|
|
408
|
-
else:
|
|
409
|
-
types[k] = v
|
|
410
|
-
|
|
411
|
-
return types
|
|
412
|
-
|
|
413
|
-
def get_records_per_stream(self, stream: str) -> Iterator[AirbyteMessage]:
|
|
414
|
-
assert self.backend is not None, "Backend must be set to get records per stream"
|
|
415
|
-
self.logger.info(f"Reading records for stream {stream}")
|
|
416
|
-
if stream not in self.backend.record_per_stream_paths:
|
|
417
|
-
self.logger.warning(f"No records found for stream {stream}")
|
|
418
|
-
yield from []
|
|
419
|
-
else:
|
|
420
|
-
for message in self.parse_airbyte_messages_from_command_output(
|
|
421
|
-
self.backend.record_per_stream_paths[stream], log_validation_errors=True
|
|
422
|
-
):
|
|
423
|
-
if message.type is AirbyteMessageType.RECORD:
|
|
424
|
-
yield message
|
|
425
|
-
|
|
426
|
-
def get_states_per_stream(
|
|
427
|
-
self, stream: str
|
|
428
|
-
) -> Dict[str, List[AirbyteStateMessage]]:
|
|
429
|
-
self.logger.info(f"Reading state messages for stream {stream}")
|
|
430
|
-
states = defaultdict(list)
|
|
431
|
-
for message in self.airbyte_messages:
|
|
432
|
-
if message.type is AirbyteMessageType.STATE:
|
|
433
|
-
states[message.state.stream.stream_descriptor.name].append(
|
|
434
|
-
message.state
|
|
435
|
-
)
|
|
436
|
-
return states
|
|
437
|
-
|
|
438
|
-
def get_status_messages_per_stream(
|
|
439
|
-
self, stream: str
|
|
440
|
-
) -> Dict[str, List[AirbyteStreamStatusTraceMessage]]:
|
|
441
|
-
self.logger.info(f"Reading state messages for stream {stream}")
|
|
442
|
-
statuses = defaultdict(list)
|
|
443
|
-
for message in self.airbyte_messages:
|
|
444
|
-
if (
|
|
445
|
-
message.type is AirbyteMessageType.TRACE
|
|
446
|
-
and message.trace.type == TraceType.STREAM_STATUS
|
|
447
|
-
):
|
|
448
|
-
statuses[message.trace.stream_status.stream_descriptor.name].append(
|
|
449
|
-
message.trace.stream_status
|
|
450
|
-
)
|
|
451
|
-
return statuses
|
|
452
|
-
|
|
453
|
-
@cache
|
|
454
|
-
def get_message_count_per_type(self) -> dict[AirbyteMessageType, int]:
|
|
455
|
-
message_count: dict[AirbyteMessageType, int] = defaultdict(int)
|
|
456
|
-
for message in self.airbyte_messages:
|
|
457
|
-
message_count[message.type] += 1
|
|
458
|
-
return message_count
|
|
459
|
-
|
|
460
|
-
async def save_http_dump(self, output_dir: Path) -> None:
|
|
461
|
-
if self.http_dump:
|
|
462
|
-
self.logger.info(
|
|
463
|
-
"An http dump was captured during the execution of the command, saving it."
|
|
464
|
-
)
|
|
465
|
-
http_dump_file_path = (output_dir / self.HTTP_DUMP_FILE_NAME).resolve()
|
|
466
|
-
await self.http_dump.export(str(http_dump_file_path))
|
|
467
|
-
self.logger.info(f"Http dump saved to {http_dump_file_path}")
|
|
468
|
-
|
|
469
|
-
# Define where the har file will be saved
|
|
470
|
-
har_file_path = (output_dir / self.HAR_FILE_NAME).resolve()
|
|
471
|
-
# Convert the mitmproxy dump file to a har file
|
|
472
|
-
mitm_http_stream_to_har(http_dump_file_path, har_file_path)
|
|
473
|
-
self.logger.info(f"Har file saved to {har_file_path}")
|
|
474
|
-
else:
|
|
475
|
-
self.logger.warning("No http dump to save")
|
|
476
|
-
|
|
477
|
-
def save_airbyte_messages(
|
|
478
|
-
self, output_dir: Path, duckdb_path: Optional[Path] = None
|
|
479
|
-
) -> None:
|
|
480
|
-
self.logger.info("Saving Airbyte messages to disk")
|
|
481
|
-
airbyte_messages_dir = output_dir / "airbyte_messages"
|
|
482
|
-
airbyte_messages_dir.mkdir(parents=True, exist_ok=True)
|
|
483
|
-
if duckdb_path:
|
|
484
|
-
self.backend = DuckDbBackend(
|
|
485
|
-
airbyte_messages_dir, duckdb_path, self.duckdb_schema
|
|
486
|
-
)
|
|
487
|
-
else:
|
|
488
|
-
self.backend = FileBackend(airbyte_messages_dir)
|
|
489
|
-
self.backend.write(self.airbyte_messages)
|
|
490
|
-
self.logger.info("Airbyte messages saved")
|
|
491
|
-
|
|
492
|
-
def save_stream_schemas(self, output_dir: Path) -> None:
|
|
493
|
-
self.stream_schemas = self.generate_stream_schemas()
|
|
494
|
-
stream_schemas_dir = output_dir / "stream_schemas"
|
|
495
|
-
stream_schemas_dir.mkdir(parents=True, exist_ok=True)
|
|
496
|
-
for stream_name, stream_schema in self.stream_schemas.items():
|
|
497
|
-
(
|
|
498
|
-
stream_schemas_dir / f"{sanitize_stream_name(stream_name)}.json"
|
|
499
|
-
).write_text(json.dumps(stream_schema, sort_keys=True))
|
|
500
|
-
self.logger.info("Stream schemas saved to disk")
|
|
501
|
-
|
|
502
|
-
async def save_artifacts(
|
|
503
|
-
self, output_dir: Path, duckdb_path: Optional[Path] = None
|
|
504
|
-
) -> None:
|
|
505
|
-
self.logger.info("Saving artifacts to disk")
|
|
506
|
-
self.save_airbyte_messages(output_dir, duckdb_path)
|
|
507
|
-
self.update_configuration()
|
|
508
|
-
await self.save_http_dump(output_dir)
|
|
509
|
-
self.save_stream_schemas(output_dir)
|
|
510
|
-
self.logger.info("All artifacts saved to disk")
|
|
511
|
-
|
|
512
|
-
def get_updated_configuration(
|
|
513
|
-
self, control_message_path: Path
|
|
514
|
-
) -> Optional[dict[str, Any]]:
|
|
515
|
-
"""Iterate through the control messages to find CONNECTOR_CONFIG message and return the last updated configuration."""
|
|
516
|
-
if not control_message_path.exists():
|
|
517
|
-
return None
|
|
518
|
-
updated_config = None
|
|
519
|
-
for line in control_message_path.read_text().splitlines():
|
|
520
|
-
if line.strip():
|
|
521
|
-
connector_config = json.loads(line.strip()).get("connectorConfig", {})
|
|
522
|
-
if connector_config:
|
|
523
|
-
updated_config = connector_config
|
|
524
|
-
return updated_config
|
|
525
|
-
|
|
526
|
-
def update_configuration(self) -> None:
|
|
527
|
-
"""This function checks if a configuration has to be updated by reading the control messages file.
|
|
528
|
-
If a configuration has to be updated, it updates the configuration on the actor using the Airbyte API.
|
|
529
|
-
"""
|
|
530
|
-
assert self.backend is not None, (
|
|
531
|
-
"Backend must be set to update configuration in order to find the control messages path"
|
|
532
|
-
)
|
|
533
|
-
updated_configuration = self.get_updated_configuration(
|
|
534
|
-
self.backend.jsonl_controls_path
|
|
535
|
-
)
|
|
536
|
-
if updated_configuration is None:
|
|
537
|
-
return
|
|
538
|
-
|
|
539
|
-
self.logger.warning(
|
|
540
|
-
f"Updating configuration for {self.connector_under_test.name}, actor {self.actor_id}"
|
|
541
|
-
)
|
|
542
|
-
url = f"https://api.airbyte.com/v1/{self.connector_under_test.actor_type.value}s/{self.actor_id}"
|
|
543
|
-
|
|
544
|
-
payload = {
|
|
545
|
-
"configuration": {
|
|
546
|
-
**updated_configuration,
|
|
547
|
-
f"{self.connector_under_test.actor_type.value}Type": self.connector_under_test.name_without_type_prefix,
|
|
548
|
-
}
|
|
549
|
-
}
|
|
550
|
-
headers = {
|
|
551
|
-
"accept": "application/json",
|
|
552
|
-
"content-type": "application/json",
|
|
553
|
-
"authorization": f"Bearer {get_airbyte_api_key()}",
|
|
554
|
-
}
|
|
555
|
-
|
|
556
|
-
response = requests.patch(url, json=payload, headers=headers)
|
|
557
|
-
try:
|
|
558
|
-
response.raise_for_status()
|
|
559
|
-
except requests.HTTPError as e:
|
|
560
|
-
self.logger.error(
|
|
561
|
-
f"Failed to update {self.connector_under_test.name} configuration on actor {self.actor_id}: {e}"
|
|
562
|
-
)
|
|
563
|
-
self.logger.error(f"Response: {response.text}")
|
|
564
|
-
self.logger.info(
|
|
565
|
-
f"Updated configuration for {self.connector_under_test.name}, actor {self.actor_id}"
|
|
566
|
-
)
|
|
567
|
-
|
|
568
|
-
def __hash__(self):
|
|
569
|
-
return hash(self.connector_under_test.version)
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
@dataclass(kw_only=True)
|
|
573
|
-
class ConnectionObjects:
|
|
574
|
-
source_config: Optional[SecretDict]
|
|
575
|
-
destination_config: Optional[SecretDict]
|
|
576
|
-
configured_catalog: Optional[ConfiguredAirbyteCatalog]
|
|
577
|
-
catalog: Optional[AirbyteCatalog]
|
|
578
|
-
state: Optional[dict]
|
|
579
|
-
workspace_id: Optional[str]
|
|
580
|
-
source_id: Optional[str]
|
|
581
|
-
destination_id: Optional[str]
|
|
582
|
-
source_docker_image: Optional[str]
|
|
583
|
-
connection_id: Optional[str]
|
|
584
|
-
|
|
585
|
-
@property
|
|
586
|
-
def url(self) -> Optional[str]:
|
|
587
|
-
if not self.workspace_id or not self.connection_id:
|
|
588
|
-
return None
|
|
589
|
-
return f"https://cloud.airbyte.com/workspaces/{self.workspace_id}/connections/{self.connection_id}"
|
|
590
|
-
|
|
591
|
-
@property
|
|
592
|
-
def hashed_connection_id(self) -> Optional[str]:
|
|
593
|
-
if not self.connection_id:
|
|
594
|
-
return None
|
|
595
|
-
return self.connection_id[:8]
|