airbyte-internal-ops 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/METADATA +1 -1
- {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/RECORD +13 -52
- airbyte_ops_mcp/cli/cloud.py +42 -3
- airbyte_ops_mcp/cloud_admin/api_client.py +473 -0
- airbyte_ops_mcp/cloud_admin/models.py +56 -0
- airbyte_ops_mcp/mcp/cloud_connector_versions.py +460 -0
- airbyte_ops_mcp/mcp/prerelease.py +6 -46
- airbyte_ops_mcp/regression_tests/ci_output.py +151 -71
- airbyte_ops_mcp/regression_tests/http_metrics.py +21 -2
- airbyte_ops_mcp/regression_tests/models.py +6 -0
- airbyte_ops_mcp/telemetry.py +162 -0
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/.gitignore +0 -1
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/README.md +0 -420
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/__init__.py +0 -2
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/__init__.py +0 -1
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/__init__.py +0 -8
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/base_backend.py +0 -16
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/duckdb_backend.py +0 -87
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/file_backend.py +0 -165
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/connection_objects_retrieval.py +0 -377
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/connector_runner.py +0 -247
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/errors.py +0 -7
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/evaluation_modes.py +0 -25
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/hacks.py +0 -23
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/json_schema_helper.py +0 -384
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/mitm_addons.py +0 -37
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/models.py +0 -595
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/proxy.py +0 -207
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/secret_access.py +0 -47
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/segment_tracking.py +0 -45
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/utils.py +0 -214
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/conftest.py.disabled +0 -751
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/consts.py +0 -4
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/poetry.lock +0 -4480
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/pytest.ini +0 -9
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/__init__.py +0 -1
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_check.py +0 -61
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_discover.py +0 -117
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_read.py +0 -627
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_spec.py +0 -43
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/report.py +0 -542
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/stash_keys.py +0 -38
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/templates/__init__.py +0 -0
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/templates/private_details.html.j2 +0 -305
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/templates/report.html.j2 +0 -515
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/utils.py +0 -187
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/__init__.py +0 -0
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_check.py +0 -61
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_discover.py +0 -217
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_read.py +0 -177
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_spec.py +0 -631
- {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/WHEEL +0 -0
- {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,247 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
-
#
|
|
4
|
-
|
|
5
|
-
from __future__ import annotations
|
|
6
|
-
|
|
7
|
-
import datetime
|
|
8
|
-
import json
|
|
9
|
-
import logging
|
|
10
|
-
import os
|
|
11
|
-
import subprocess
|
|
12
|
-
import uuid
|
|
13
|
-
from pathlib import Path
|
|
14
|
-
|
|
15
|
-
import anyio
|
|
16
|
-
import asyncer
|
|
17
|
-
import dagger
|
|
18
|
-
from live_tests.commons import errors
|
|
19
|
-
from live_tests.commons.models import Command, ExecutionInputs, ExecutionResult
|
|
20
|
-
from live_tests.commons.proxy import Proxy
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class ConnectorRunner:
|
|
24
|
-
DATA_DIR = "/airbyte/data"
|
|
25
|
-
IN_CONTAINER_CONFIG_PATH = f"{DATA_DIR}/config.json"
|
|
26
|
-
IN_CONTAINER_CONFIGURED_CATALOG_PATH = f"{DATA_DIR}/catalog.json"
|
|
27
|
-
IN_CONTAINER_STATE_PATH = f"{DATA_DIR}/state.json"
|
|
28
|
-
IN_CONTAINER_OUTPUT_PATH = f"{DATA_DIR}/output.txt"
|
|
29
|
-
IN_CONTAINER_OBFUSCATOR_PATH = "/user/local/bin/record_obfuscator.py"
|
|
30
|
-
|
|
31
|
-
def __init__(
|
|
32
|
-
self,
|
|
33
|
-
dagger_client: dagger.Client,
|
|
34
|
-
execution_inputs: ExecutionInputs,
|
|
35
|
-
is_airbyte_ci: bool,
|
|
36
|
-
http_proxy: Proxy | None = None,
|
|
37
|
-
):
|
|
38
|
-
self.connector_under_test = execution_inputs.connector_under_test
|
|
39
|
-
self.command = execution_inputs.command
|
|
40
|
-
self.output_dir = execution_inputs.output_dir
|
|
41
|
-
self.config = execution_inputs.config
|
|
42
|
-
self.configured_catalog = execution_inputs.configured_catalog
|
|
43
|
-
self.state = execution_inputs.state
|
|
44
|
-
self.duckdb_path = execution_inputs.duckdb_path
|
|
45
|
-
self.actor_id = execution_inputs.actor_id
|
|
46
|
-
self.hashed_connection_id = execution_inputs.hashed_connection_id
|
|
47
|
-
self.environment_variables = (
|
|
48
|
-
execution_inputs.environment_variables
|
|
49
|
-
if execution_inputs.environment_variables
|
|
50
|
-
else {}
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
self.full_command: list[str] = self._get_full_command(execution_inputs.command)
|
|
54
|
-
self.completion_event = anyio.Event()
|
|
55
|
-
self.http_proxy = http_proxy
|
|
56
|
-
self.logger = logging.getLogger(
|
|
57
|
-
f"{self.connector_under_test.name}-{self.connector_under_test.version}"
|
|
58
|
-
)
|
|
59
|
-
self.dagger_client = dagger_client
|
|
60
|
-
if is_airbyte_ci:
|
|
61
|
-
self.host_obfuscator_path = "/tmp/record_obfuscator.py"
|
|
62
|
-
else:
|
|
63
|
-
repo_root = Path(
|
|
64
|
-
subprocess.check_output(["git", "rev-parse", "--show-toplevel"])
|
|
65
|
-
.strip()
|
|
66
|
-
.decode()
|
|
67
|
-
)
|
|
68
|
-
self.host_obfuscator_path = f"{repo_root}/tools/bin/record_obfuscator.py"
|
|
69
|
-
|
|
70
|
-
@property
|
|
71
|
-
def _connector_under_test_container(self) -> dagger.Container:
|
|
72
|
-
return self.connector_under_test.container
|
|
73
|
-
|
|
74
|
-
@property
|
|
75
|
-
def stdout_file_path(self) -> Path:
|
|
76
|
-
return (self.output_dir / "stdout.log").resolve()
|
|
77
|
-
|
|
78
|
-
@property
|
|
79
|
-
def stderr_file_path(self) -> Path:
|
|
80
|
-
return (self.output_dir / "stderr.log").resolve()
|
|
81
|
-
|
|
82
|
-
def _get_full_command(self, command: Command) -> list[str]:
|
|
83
|
-
"""Returns a list with a full Airbyte command invocation and all it's arguments and options."""
|
|
84
|
-
if command is Command.SPEC:
|
|
85
|
-
return ["spec"]
|
|
86
|
-
elif command is Command.CHECK:
|
|
87
|
-
return ["check", "--config", self.IN_CONTAINER_CONFIG_PATH]
|
|
88
|
-
elif command is Command.DISCOVER:
|
|
89
|
-
return ["discover", "--config", self.IN_CONTAINER_CONFIG_PATH]
|
|
90
|
-
elif command is Command.READ:
|
|
91
|
-
return [
|
|
92
|
-
"read",
|
|
93
|
-
"--config",
|
|
94
|
-
self.IN_CONTAINER_CONFIG_PATH,
|
|
95
|
-
"--catalog",
|
|
96
|
-
self.IN_CONTAINER_CONFIGURED_CATALOG_PATH,
|
|
97
|
-
]
|
|
98
|
-
elif command is Command.READ_WITH_STATE:
|
|
99
|
-
return [
|
|
100
|
-
"read",
|
|
101
|
-
"--config",
|
|
102
|
-
self.IN_CONTAINER_CONFIG_PATH,
|
|
103
|
-
"--catalog",
|
|
104
|
-
self.IN_CONTAINER_CONFIGURED_CATALOG_PATH,
|
|
105
|
-
"--state",
|
|
106
|
-
self.IN_CONTAINER_STATE_PATH,
|
|
107
|
-
]
|
|
108
|
-
else:
|
|
109
|
-
raise NotImplementedError(
|
|
110
|
-
f"The connector runner does not support the {command} command"
|
|
111
|
-
)
|
|
112
|
-
|
|
113
|
-
async def get_container_env_variable_value(self, name: str) -> str | None:
|
|
114
|
-
return await self._connector_under_test_container.env_variable(name)
|
|
115
|
-
|
|
116
|
-
async def get_container_label(self, label: str) -> str | None:
|
|
117
|
-
return await self._connector_under_test_container.label(label)
|
|
118
|
-
|
|
119
|
-
async def get_container_entrypoint(self) -> str:
|
|
120
|
-
entrypoint = await self._connector_under_test_container.entrypoint()
|
|
121
|
-
assert entrypoint, "The connector container has no entrypoint"
|
|
122
|
-
return " ".join(entrypoint)
|
|
123
|
-
|
|
124
|
-
async def run(self) -> ExecutionResult:
|
|
125
|
-
async with asyncer.create_task_group() as task_group:
|
|
126
|
-
soon_result = task_group.soonify(self._run)()
|
|
127
|
-
task_group.soonify(self._log_progress)()
|
|
128
|
-
return soon_result.value
|
|
129
|
-
|
|
130
|
-
async def _run(
|
|
131
|
-
self,
|
|
132
|
-
) -> ExecutionResult:
|
|
133
|
-
container = self._connector_under_test_container
|
|
134
|
-
current_user = (await container.with_exec(["whoami"]).stdout()).strip()
|
|
135
|
-
container = container.with_user(current_user)
|
|
136
|
-
container = container.with_exec(["mkdir", "-p", self.DATA_DIR])
|
|
137
|
-
# Do not cache downstream dagger layers
|
|
138
|
-
container = container.with_env_variable("CACHEBUSTER", str(uuid.uuid4()))
|
|
139
|
-
|
|
140
|
-
# When running locally, it's likely that record_obfuscator is within the user's home directory, so we expand it.
|
|
141
|
-
expanded_host_executable_path = os.path.expanduser(self.host_obfuscator_path)
|
|
142
|
-
container = container.with_file(
|
|
143
|
-
self.IN_CONTAINER_OBFUSCATOR_PATH,
|
|
144
|
-
self.dagger_client.host().file(expanded_host_executable_path),
|
|
145
|
-
)
|
|
146
|
-
|
|
147
|
-
for env_var_name, env_var_value in self.environment_variables.items():
|
|
148
|
-
container = container.with_env_variable(env_var_name, env_var_value)
|
|
149
|
-
if self.config:
|
|
150
|
-
container = container.with_new_file(
|
|
151
|
-
self.IN_CONTAINER_CONFIG_PATH,
|
|
152
|
-
contents=json.dumps(dict(self.config)),
|
|
153
|
-
owner=current_user,
|
|
154
|
-
)
|
|
155
|
-
if self.state:
|
|
156
|
-
container = container.with_new_file(
|
|
157
|
-
self.IN_CONTAINER_STATE_PATH,
|
|
158
|
-
contents=json.dumps(self.state),
|
|
159
|
-
owner=current_user,
|
|
160
|
-
)
|
|
161
|
-
if self.configured_catalog:
|
|
162
|
-
container = container.with_new_file(
|
|
163
|
-
self.IN_CONTAINER_CONFIGURED_CATALOG_PATH,
|
|
164
|
-
contents=self.configured_catalog.json(),
|
|
165
|
-
owner=current_user,
|
|
166
|
-
)
|
|
167
|
-
if self.http_proxy:
|
|
168
|
-
container = await self.http_proxy.bind_container(container)
|
|
169
|
-
|
|
170
|
-
self.logger.info(f"⏳ Start running {self.command.value} command")
|
|
171
|
-
|
|
172
|
-
try:
|
|
173
|
-
entrypoint = await container.entrypoint()
|
|
174
|
-
assert entrypoint, "The connector container has no entrypoint"
|
|
175
|
-
airbyte_command = entrypoint + self.full_command
|
|
176
|
-
|
|
177
|
-
container = container.with_exec(
|
|
178
|
-
[
|
|
179
|
-
"sh",
|
|
180
|
-
"-c",
|
|
181
|
-
" ".join(airbyte_command)
|
|
182
|
-
+ f"| {self.IN_CONTAINER_OBFUSCATOR_PATH} > {self.IN_CONTAINER_OUTPUT_PATH} 2>&1 | tee -a {self.IN_CONTAINER_OUTPUT_PATH}",
|
|
183
|
-
]
|
|
184
|
-
)
|
|
185
|
-
executed_container = await container.sync()
|
|
186
|
-
# We exporting to disk as we can't read .stdout() or await file.contents() as it might blow up the memory
|
|
187
|
-
stdout_exported = await executed_container.file(
|
|
188
|
-
self.IN_CONTAINER_OUTPUT_PATH
|
|
189
|
-
).export(str(self.stdout_file_path))
|
|
190
|
-
if not stdout_exported:
|
|
191
|
-
raise errors.ExportError(
|
|
192
|
-
f"Failed to export {self.IN_CONTAINER_OUTPUT_PATH}"
|
|
193
|
-
)
|
|
194
|
-
|
|
195
|
-
stderr = await executed_container.stderr()
|
|
196
|
-
self.stderr_file_path.write_text(stderr)
|
|
197
|
-
success = True
|
|
198
|
-
except dagger.ExecError as e:
|
|
199
|
-
self.stderr_file_path.write_text(e.stderr)
|
|
200
|
-
self.stdout_file_path.write_text(e.stdout)
|
|
201
|
-
executed_container = None
|
|
202
|
-
success = False
|
|
203
|
-
|
|
204
|
-
self.completion_event.set()
|
|
205
|
-
if not success:
|
|
206
|
-
self.logger.error(f"❌ Failed to run {self.command.value} command")
|
|
207
|
-
else:
|
|
208
|
-
self.logger.info(f"⌛ Finished running {self.command.value} command")
|
|
209
|
-
|
|
210
|
-
execution_result = await ExecutionResult.load(
|
|
211
|
-
command=self.command,
|
|
212
|
-
connector_under_test=self.connector_under_test,
|
|
213
|
-
actor_id=self.actor_id,
|
|
214
|
-
hashed_connection_id=self.hashed_connection_id,
|
|
215
|
-
configured_catalog=self.configured_catalog,
|
|
216
|
-
stdout_file_path=self.stdout_file_path,
|
|
217
|
-
stderr_file_path=self.stderr_file_path,
|
|
218
|
-
success=success,
|
|
219
|
-
http_dump=await self.http_proxy.retrieve_http_dump()
|
|
220
|
-
if self.http_proxy
|
|
221
|
-
else None,
|
|
222
|
-
executed_container=executed_container,
|
|
223
|
-
config=self.config,
|
|
224
|
-
)
|
|
225
|
-
await execution_result.save_artifacts(self.output_dir, self.duckdb_path)
|
|
226
|
-
return execution_result
|
|
227
|
-
|
|
228
|
-
async def _log_progress(self) -> None:
|
|
229
|
-
start_time = datetime.datetime.utcnow()
|
|
230
|
-
message = f"⏳ Still running {self.command.value} command"
|
|
231
|
-
while not self.completion_event.is_set():
|
|
232
|
-
duration = datetime.datetime.utcnow() - start_time
|
|
233
|
-
elapsed_seconds = duration.total_seconds()
|
|
234
|
-
if elapsed_seconds > 10 and round(elapsed_seconds) % 10 == 0:
|
|
235
|
-
self.logger.info(
|
|
236
|
-
f"{message} (duration: {self.format_duration(duration)})"
|
|
237
|
-
)
|
|
238
|
-
await anyio.sleep(1)
|
|
239
|
-
|
|
240
|
-
@staticmethod
|
|
241
|
-
def format_duration(time_delta: datetime.timedelta) -> str:
|
|
242
|
-
total_seconds = time_delta.total_seconds()
|
|
243
|
-
if total_seconds < 60:
|
|
244
|
-
return f"{total_seconds:.2f}s"
|
|
245
|
-
minutes = int(total_seconds // 60)
|
|
246
|
-
seconds = int(total_seconds % 60)
|
|
247
|
-
return f"{minutes:02d}mn{seconds:02d}s"
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
|
|
4
|
-
from enum import Enum
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class TestEvaluationMode(Enum):
|
|
8
|
-
"""
|
|
9
|
-
Tests may be run in "diagnostic" mode or "strict" mode.
|
|
10
|
-
|
|
11
|
-
When run in "diagnostic" mode, `AssertionError`s won't fail the test, but we will continue to surface
|
|
12
|
-
any errors to the test report.
|
|
13
|
-
|
|
14
|
-
In "strict" mode, tests pass/fail as usual.
|
|
15
|
-
|
|
16
|
-
In live tests, diagnostic mode is used for tests that don't affect the overall functionality of the
|
|
17
|
-
connector but that test an ideal state of the connector. Currently this is applicable to validation
|
|
18
|
-
tests only.
|
|
19
|
-
|
|
20
|
-
The diagnostic mode can be made available to a test using the @pytest.mark.allow_diagnostic_mode decorator,
|
|
21
|
-
and passing in the --test-evaluation-mode=diagnostic flag.
|
|
22
|
-
"""
|
|
23
|
-
|
|
24
|
-
DIAGNOSTIC = "diagnostic"
|
|
25
|
-
STRICT = "strict"
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
|
2
|
-
|
|
3
|
-
import copy
|
|
4
|
-
|
|
5
|
-
import rich
|
|
6
|
-
|
|
7
|
-
console = rich.get_console()
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def patch_configured_catalog(configured_catalog: dict) -> dict:
|
|
11
|
-
"""
|
|
12
|
-
The configured catalog extracted from the platform can be incompatible with the airbyte-protocol.
|
|
13
|
-
This leads to validation error when we serialize the configured catalog into a ConfiguredAirbyteCatalog object.
|
|
14
|
-
This functions is a best effort to patch the configured catalog to make it compatible with the airbyte-protocol.
|
|
15
|
-
"""
|
|
16
|
-
patched_catalog = copy.deepcopy(configured_catalog)
|
|
17
|
-
for stream in patched_catalog["streams"]:
|
|
18
|
-
if stream.get("destination_sync_mode") == "overwrite_dedup":
|
|
19
|
-
stream["destination_sync_mode"] = "overwrite"
|
|
20
|
-
console.log(
|
|
21
|
-
f"Stream {stream['stream']['name']} destination_sync_mode has been patched from 'overwrite_dedup' to 'overwrite' to guarantee compatibility with the airbyte-protocol."
|
|
22
|
-
)
|
|
23
|
-
return patched_catalog
|
|
@@ -1,384 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
-
#
|
|
4
|
-
from __future__ import annotations
|
|
5
|
-
|
|
6
|
-
from enum import Enum
|
|
7
|
-
from functools import reduce, total_ordering
|
|
8
|
-
from typing import Any, Dict, List, Mapping, Optional, Set, Union
|
|
9
|
-
|
|
10
|
-
import dpath.util
|
|
11
|
-
import pendulum
|
|
12
|
-
from jsonref import JsonRef
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class CatalogField:
|
|
16
|
-
"""Field class to represent cursor/pk fields.
|
|
17
|
-
It eases the read of values from records according to schema definition.
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
def __init__(self, schema: Mapping[str, Any], path: List[str]):
|
|
21
|
-
self.schema = schema
|
|
22
|
-
self.path = path
|
|
23
|
-
self.formats = self._detect_formats()
|
|
24
|
-
|
|
25
|
-
def _detect_formats(self) -> Set[str]:
|
|
26
|
-
"""Extract set of formats/types for this field"""
|
|
27
|
-
format_ = []
|
|
28
|
-
try:
|
|
29
|
-
format_ = self.schema.get("format", self.schema["type"])
|
|
30
|
-
if not isinstance(format_, List):
|
|
31
|
-
format_ = [format_]
|
|
32
|
-
except KeyError:
|
|
33
|
-
pass
|
|
34
|
-
return set(format_)
|
|
35
|
-
|
|
36
|
-
def _parse_value(self, value: Any) -> Any:
|
|
37
|
-
"""Do actual parsing of the serialized value"""
|
|
38
|
-
if self.formats.intersection({"datetime", "date-time", "date"}):
|
|
39
|
-
if value is None and "null" not in self.formats:
|
|
40
|
-
raise ValueError(
|
|
41
|
-
f"Invalid field format. Value: {value}. Format: {self.formats}"
|
|
42
|
-
)
|
|
43
|
-
# handle beautiful MySQL datetime, i.e. NULL datetime
|
|
44
|
-
if value.startswith("0000-00-00"):
|
|
45
|
-
value = value.replace("0000-00-00", "0001-01-01")
|
|
46
|
-
return pendulum.parse(value)
|
|
47
|
-
return value
|
|
48
|
-
|
|
49
|
-
def parse(
|
|
50
|
-
self, record: Mapping[str, Any], path: Optional[List[Union[int, str]]] = None
|
|
51
|
-
) -> Any:
|
|
52
|
-
"""Extract field value from the record and cast it to native type"""
|
|
53
|
-
path = path or self.path
|
|
54
|
-
value = reduce(lambda data, key: data[key], path, record)
|
|
55
|
-
return self._parse_value(value)
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
@total_ordering
|
|
59
|
-
class ComparableType(Enum):
|
|
60
|
-
NULL = 0
|
|
61
|
-
BOOLEAN = 1
|
|
62
|
-
INTEGER = 2
|
|
63
|
-
NUMBER = 3
|
|
64
|
-
STRING = 4
|
|
65
|
-
OBJECT = 5
|
|
66
|
-
|
|
67
|
-
def __lt__(self, other: Any) -> bool:
|
|
68
|
-
if self.__class__ is other.__class__:
|
|
69
|
-
return self.value < other.value # type: ignore
|
|
70
|
-
else:
|
|
71
|
-
return NotImplemented
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
class JsonSchemaHelper:
|
|
75
|
-
"""Helper class to simplify schema validation and read of records according to their schema."""
|
|
76
|
-
|
|
77
|
-
def __init__(self, schema):
|
|
78
|
-
self._schema = schema
|
|
79
|
-
|
|
80
|
-
def get_ref(self, path: str) -> Any:
|
|
81
|
-
"""Resolve reference
|
|
82
|
-
|
|
83
|
-
:param path: reference (#/definitions/SomeClass, etc)
|
|
84
|
-
:return: part of schema that is definition of the reference
|
|
85
|
-
:raises KeyError: in case path can't be followed
|
|
86
|
-
"""
|
|
87
|
-
node = self._schema
|
|
88
|
-
for segment in path.split("/")[1:]:
|
|
89
|
-
node = node[segment]
|
|
90
|
-
return node
|
|
91
|
-
|
|
92
|
-
def get_property(self, path: List[str]) -> Mapping[str, Any]:
|
|
93
|
-
"""Get any part of schema according to provided path, resolves $refs if necessary
|
|
94
|
-
|
|
95
|
-
schema = {
|
|
96
|
-
"properties": {
|
|
97
|
-
"field1": {
|
|
98
|
-
"properties": {
|
|
99
|
-
"nested_field": {
|
|
100
|
-
<inner_object>
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
},
|
|
104
|
-
"field2": ...
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
helper = JsonSchemaHelper(schema)
|
|
109
|
-
helper.get_property(["field1", "nested_field"]) == <inner_object>
|
|
110
|
-
|
|
111
|
-
:param path: list of fields in the order of navigation
|
|
112
|
-
:return: discovered part of schema
|
|
113
|
-
:raises KeyError: in case path can't be followed
|
|
114
|
-
"""
|
|
115
|
-
node = self._schema
|
|
116
|
-
for segment in path:
|
|
117
|
-
if "$ref" in node:
|
|
118
|
-
node = self.get_ref(node["$ref"])
|
|
119
|
-
node = node["properties"][segment]
|
|
120
|
-
return node
|
|
121
|
-
|
|
122
|
-
def field(self, path: List[str]) -> CatalogField:
|
|
123
|
-
"""Get schema property and wrap it into CatalogField.
|
|
124
|
-
|
|
125
|
-
CatalogField is a helper to ease the read of values from records according to schema definition.
|
|
126
|
-
|
|
127
|
-
:param path: list of fields in the order of navigation
|
|
128
|
-
:return: discovered part of schema wrapped in CatalogField
|
|
129
|
-
:raises KeyError: in case path can't be followed
|
|
130
|
-
"""
|
|
131
|
-
return CatalogField(schema=self.get_property(path), path=path)
|
|
132
|
-
|
|
133
|
-
def get_node(self, path: List[Union[str, int]]) -> Any:
|
|
134
|
-
"""Return part of schema by specified path
|
|
135
|
-
|
|
136
|
-
:param path: list of fields in the order of navigation
|
|
137
|
-
"""
|
|
138
|
-
|
|
139
|
-
node = self._schema
|
|
140
|
-
for segment in path:
|
|
141
|
-
if "$ref" in node:
|
|
142
|
-
node = self.get_ref(node["$ref"])
|
|
143
|
-
node = node[segment]
|
|
144
|
-
return node
|
|
145
|
-
|
|
146
|
-
def get_parent_path(self, path: str, separator="/") -> Any:
|
|
147
|
-
"""
|
|
148
|
-
Returns the parent path of the supplied path
|
|
149
|
-
"""
|
|
150
|
-
absolute_path = f"{separator}{path}" if not path.startswith(separator) else path
|
|
151
|
-
parent_path, _ = absolute_path.rsplit(sep=separator, maxsplit=1)
|
|
152
|
-
return parent_path
|
|
153
|
-
|
|
154
|
-
def get_parent(self, path: str, separator="/") -> Any:
|
|
155
|
-
"""
|
|
156
|
-
Returns the parent dict of a given path within the `obj` dict
|
|
157
|
-
"""
|
|
158
|
-
parent_path = self.get_parent_path(path, separator=separator)
|
|
159
|
-
if parent_path == "":
|
|
160
|
-
return self._schema
|
|
161
|
-
return dpath.util.get(self._schema, parent_path, separator=separator)
|
|
162
|
-
|
|
163
|
-
def find_nodes(self, keys: List[str]) -> List[List[Union[str, int]]]:
|
|
164
|
-
"""Find all paths that lead to nodes with the specified keys.
|
|
165
|
-
|
|
166
|
-
:param keys: list of keys
|
|
167
|
-
:return: list of json object paths
|
|
168
|
-
"""
|
|
169
|
-
variant_paths = []
|
|
170
|
-
|
|
171
|
-
def traverse_schema(_schema: Union[Dict[str, Any], List], path=None):
|
|
172
|
-
path = path or []
|
|
173
|
-
if path and path[-1] in keys:
|
|
174
|
-
variant_paths.append(path)
|
|
175
|
-
if isinstance(_schema, dict):
|
|
176
|
-
for item in _schema:
|
|
177
|
-
traverse_schema(_schema[item], [*path, item])
|
|
178
|
-
elif isinstance(_schema, list):
|
|
179
|
-
for i, item in enumerate(_schema):
|
|
180
|
-
traverse_schema(_schema[i], [*path, i])
|
|
181
|
-
|
|
182
|
-
traverse_schema(self._schema)
|
|
183
|
-
return variant_paths
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
def get_object_structure(obj: dict) -> List[str]:
|
|
187
|
-
"""
|
|
188
|
-
Traverse through object structure and compose a list of property keys including nested one.
|
|
189
|
-
This list reflects object's structure with list of all obj property key
|
|
190
|
-
paths. In case if object is nested inside array we assume that it has same
|
|
191
|
-
structure as first element.
|
|
192
|
-
:param obj: data object to get its structure
|
|
193
|
-
:returns list of object property keys paths
|
|
194
|
-
"""
|
|
195
|
-
paths = []
|
|
196
|
-
|
|
197
|
-
def _traverse_obj_and_get_path(obj, path=""):
|
|
198
|
-
if path:
|
|
199
|
-
paths.append(path)
|
|
200
|
-
if isinstance(obj, dict):
|
|
201
|
-
return {
|
|
202
|
-
k: _traverse_obj_and_get_path(v, path + "/" + k) for k, v in obj.items()
|
|
203
|
-
}
|
|
204
|
-
elif isinstance(obj, list) and len(obj) > 0:
|
|
205
|
-
return [_traverse_obj_and_get_path(obj[0], path + "/[]")]
|
|
206
|
-
|
|
207
|
-
_traverse_obj_and_get_path(obj)
|
|
208
|
-
|
|
209
|
-
return paths
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
def get_expected_schema_structure(
|
|
213
|
-
schema: dict, annotate_one_of: bool = False
|
|
214
|
-
) -> List[str]:
|
|
215
|
-
"""
|
|
216
|
-
Traverse through json schema and compose list of property keys that object expected to have.
|
|
217
|
-
:param annotate_one_of: Generate one_of index in path
|
|
218
|
-
:param schema: jsonschema to get expected paths
|
|
219
|
-
:returns list of object property keys paths
|
|
220
|
-
"""
|
|
221
|
-
paths = []
|
|
222
|
-
if "$ref" in schema:
|
|
223
|
-
"""
|
|
224
|
-
JsonRef doesnt work correctly with schemas that has refenreces in root e.g.
|
|
225
|
-
{
|
|
226
|
-
"$ref": "#/definitions/ref"
|
|
227
|
-
"definitions": {
|
|
228
|
-
"ref": ...
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
Considering this schema already processed by resolver so it should
|
|
232
|
-
contain only references to definitions section, replace root reference
|
|
233
|
-
manually before processing it with JsonRef library.
|
|
234
|
-
"""
|
|
235
|
-
ref = schema["$ref"].split("/")[-1]
|
|
236
|
-
schema.update(schema["definitions"][ref])
|
|
237
|
-
schema.pop("$ref")
|
|
238
|
-
# Resolve all references to simplify schema processing.
|
|
239
|
-
schema = JsonRef.replace_refs(schema)
|
|
240
|
-
|
|
241
|
-
def _scan_schema(subschema, path=""):
|
|
242
|
-
if "oneOf" in subschema or "anyOf" in subschema:
|
|
243
|
-
if annotate_one_of:
|
|
244
|
-
return [
|
|
245
|
-
_scan_schema({"type": "object", **s}, path + f"({num})")
|
|
246
|
-
for num, s in enumerate(
|
|
247
|
-
subschema.get("oneOf") or subschema.get("anyOf")
|
|
248
|
-
)
|
|
249
|
-
]
|
|
250
|
-
return [
|
|
251
|
-
_scan_schema({"type": "object", **s}, path)
|
|
252
|
-
for s in subschema.get("oneOf") or subschema.get("anyOf")
|
|
253
|
-
]
|
|
254
|
-
schema_type = subschema.get("type", ["object", "null"])
|
|
255
|
-
if not isinstance(schema_type, list):
|
|
256
|
-
schema_type = [schema_type]
|
|
257
|
-
if "object" in schema_type:
|
|
258
|
-
props = subschema.get("properties")
|
|
259
|
-
if not props:
|
|
260
|
-
# Handle objects with arbitrary properties:
|
|
261
|
-
# {"type": "object", "additionalProperties": {"type": "string"}}
|
|
262
|
-
if path:
|
|
263
|
-
paths.append(path)
|
|
264
|
-
return
|
|
265
|
-
return {k: _scan_schema(v, path + "/" + k) for k, v in props.items()}
|
|
266
|
-
elif "array" in schema_type:
|
|
267
|
-
items = subschema.get("items", {})
|
|
268
|
-
return [_scan_schema(items, path + "/[]")]
|
|
269
|
-
paths.append(path)
|
|
270
|
-
|
|
271
|
-
_scan_schema(schema)
|
|
272
|
-
return paths
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
def flatten_tuples(to_flatten):
|
|
276
|
-
"""Flatten a tuple of tuples into a single tuple."""
|
|
277
|
-
types = set()
|
|
278
|
-
|
|
279
|
-
if not isinstance(to_flatten, tuple):
|
|
280
|
-
to_flatten = (to_flatten,)
|
|
281
|
-
for thing in to_flatten:
|
|
282
|
-
if isinstance(thing, tuple):
|
|
283
|
-
types.update(flatten_tuples(thing))
|
|
284
|
-
else:
|
|
285
|
-
types.add(thing)
|
|
286
|
-
return tuple(types)
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
def get_paths_in_connector_config(schema: dict) -> List[str]:
|
|
290
|
-
"""
|
|
291
|
-
Traverse through the provided schema's values and extract the path_in_connector_config paths
|
|
292
|
-
:param properties: jsonschema containing values which may have path_in_connector_config attributes
|
|
293
|
-
:returns list of path_in_connector_config paths
|
|
294
|
-
"""
|
|
295
|
-
return [
|
|
296
|
-
"/" + "/".join(value["path_in_connector_config"]) for value in schema.values()
|
|
297
|
-
]
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
def conforms_to_schema(record: Mapping[str, Any], schema: Mapping[str, Any]) -> bool:
|
|
301
|
-
"""
|
|
302
|
-
Return true iff the record conforms to the supplied schema.
|
|
303
|
-
|
|
304
|
-
The record conforms to the supplied schema iff:
|
|
305
|
-
- All columns in the record are in the schema.
|
|
306
|
-
- For every column in the record, that column's type is equal to or narrower than the same column's
|
|
307
|
-
type in the schema.
|
|
308
|
-
"""
|
|
309
|
-
schema_columns = set(schema.get("properties", {}).keys())
|
|
310
|
-
record_columns = set(record.keys())
|
|
311
|
-
|
|
312
|
-
if not record_columns.issubset(schema_columns):
|
|
313
|
-
return False
|
|
314
|
-
|
|
315
|
-
for column, definition in schema.get("properties", {}).items():
|
|
316
|
-
expected_type = definition.get("type")
|
|
317
|
-
value = record.get(column)
|
|
318
|
-
|
|
319
|
-
if value is not None:
|
|
320
|
-
if isinstance(expected_type, list):
|
|
321
|
-
return any(_is_equal_or_narrower_type(value, e) for e in expected_type)
|
|
322
|
-
elif expected_type == "object":
|
|
323
|
-
return isinstance(value, dict)
|
|
324
|
-
elif expected_type == "array":
|
|
325
|
-
if not isinstance(value, list):
|
|
326
|
-
return False
|
|
327
|
-
array_type = definition.get("items", {}).get("type")
|
|
328
|
-
if not all(_is_equal_or_narrower_type(v, array_type) for v in value):
|
|
329
|
-
return False
|
|
330
|
-
elif not _is_equal_or_narrower_type(value, expected_type):
|
|
331
|
-
return False
|
|
332
|
-
|
|
333
|
-
return True
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
def _is_equal_or_narrower_type(value: Any, expected_type: str) -> bool:
|
|
337
|
-
if isinstance(value, list):
|
|
338
|
-
# We do not compare lists directly; the individual items are compared.
|
|
339
|
-
# If we hit this condition, it means that the expected type is not
|
|
340
|
-
# compatible with the inferred type.
|
|
341
|
-
return False
|
|
342
|
-
|
|
343
|
-
inferred_type = ComparableType(_get_inferred_type(value))
|
|
344
|
-
|
|
345
|
-
if inferred_type is None:
|
|
346
|
-
return False
|
|
347
|
-
|
|
348
|
-
return ComparableType(inferred_type) <= ComparableType(
|
|
349
|
-
_get_comparable_type(expected_type)
|
|
350
|
-
)
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
def _get_inferred_type(value: Any) -> Optional[ComparableType]:
|
|
354
|
-
if value is None:
|
|
355
|
-
return ComparableType.NULL
|
|
356
|
-
if isinstance(value, bool):
|
|
357
|
-
return ComparableType.BOOLEAN
|
|
358
|
-
if isinstance(value, int):
|
|
359
|
-
return ComparableType.INTEGER
|
|
360
|
-
if isinstance(value, float):
|
|
361
|
-
return ComparableType.NUMBER
|
|
362
|
-
if isinstance(value, str):
|
|
363
|
-
return ComparableType.STRING
|
|
364
|
-
if isinstance(value, dict):
|
|
365
|
-
return ComparableType.OBJECT
|
|
366
|
-
else:
|
|
367
|
-
return None
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
def _get_comparable_type(value: Any) -> Optional[ComparableType]:
|
|
371
|
-
if value == "null":
|
|
372
|
-
return ComparableType.NULL
|
|
373
|
-
if value == "boolean":
|
|
374
|
-
return ComparableType.BOOLEAN
|
|
375
|
-
if value == "integer":
|
|
376
|
-
return ComparableType.INTEGER
|
|
377
|
-
if value == "number":
|
|
378
|
-
return ComparableType.NUMBER
|
|
379
|
-
if value == "string":
|
|
380
|
-
return ComparableType.STRING
|
|
381
|
-
if value == "object":
|
|
382
|
-
return ComparableType.OBJECT
|
|
383
|
-
else:
|
|
384
|
-
return None
|