airbyte-internal-ops 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/METADATA +1 -1
  2. {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/RECORD +13 -52
  3. airbyte_ops_mcp/cli/cloud.py +42 -3
  4. airbyte_ops_mcp/cloud_admin/api_client.py +473 -0
  5. airbyte_ops_mcp/cloud_admin/models.py +56 -0
  6. airbyte_ops_mcp/mcp/cloud_connector_versions.py +460 -0
  7. airbyte_ops_mcp/mcp/prerelease.py +6 -46
  8. airbyte_ops_mcp/regression_tests/ci_output.py +151 -71
  9. airbyte_ops_mcp/regression_tests/http_metrics.py +21 -2
  10. airbyte_ops_mcp/regression_tests/models.py +6 -0
  11. airbyte_ops_mcp/telemetry.py +162 -0
  12. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/.gitignore +0 -1
  13. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/README.md +0 -420
  14. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/__init__.py +0 -2
  15. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/__init__.py +0 -1
  16. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/__init__.py +0 -8
  17. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/base_backend.py +0 -16
  18. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/duckdb_backend.py +0 -87
  19. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/backends/file_backend.py +0 -165
  20. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/connection_objects_retrieval.py +0 -377
  21. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/connector_runner.py +0 -247
  22. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/errors.py +0 -7
  23. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/evaluation_modes.py +0 -25
  24. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/hacks.py +0 -23
  25. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/json_schema_helper.py +0 -384
  26. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/mitm_addons.py +0 -37
  27. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/models.py +0 -595
  28. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/proxy.py +0 -207
  29. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/secret_access.py +0 -47
  30. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/segment_tracking.py +0 -45
  31. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/commons/utils.py +0 -214
  32. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/conftest.py.disabled +0 -751
  33. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/consts.py +0 -4
  34. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/poetry.lock +0 -4480
  35. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/pytest.ini +0 -9
  36. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/__init__.py +0 -1
  37. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_check.py +0 -61
  38. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_discover.py +0 -117
  39. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_read.py +0 -627
  40. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/regression_tests/test_spec.py +0 -43
  41. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/report.py +0 -542
  42. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/stash_keys.py +0 -38
  43. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/templates/__init__.py +0 -0
  44. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/templates/private_details.html.j2 +0 -305
  45. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/templates/report.html.j2 +0 -515
  46. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/utils.py +0 -187
  47. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/__init__.py +0 -0
  48. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_check.py +0 -61
  49. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_discover.py +0 -217
  50. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_read.py +0 -177
  51. airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_spec.py +0 -631
  52. {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/WHEEL +0 -0
  53. {airbyte_internal_ops-0.4.1.dist-info → airbyte_internal_ops-0.5.0.dist-info}/entry_points.txt +0 -0
@@ -1,247 +0,0 @@
1
- #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
- #
4
-
5
- from __future__ import annotations
6
-
7
- import datetime
8
- import json
9
- import logging
10
- import os
11
- import subprocess
12
- import uuid
13
- from pathlib import Path
14
-
15
- import anyio
16
- import asyncer
17
- import dagger
18
- from live_tests.commons import errors
19
- from live_tests.commons.models import Command, ExecutionInputs, ExecutionResult
20
- from live_tests.commons.proxy import Proxy
21
-
22
-
23
- class ConnectorRunner:
24
- DATA_DIR = "/airbyte/data"
25
- IN_CONTAINER_CONFIG_PATH = f"{DATA_DIR}/config.json"
26
- IN_CONTAINER_CONFIGURED_CATALOG_PATH = f"{DATA_DIR}/catalog.json"
27
- IN_CONTAINER_STATE_PATH = f"{DATA_DIR}/state.json"
28
- IN_CONTAINER_OUTPUT_PATH = f"{DATA_DIR}/output.txt"
29
- IN_CONTAINER_OBFUSCATOR_PATH = "/user/local/bin/record_obfuscator.py"
30
-
31
- def __init__(
32
- self,
33
- dagger_client: dagger.Client,
34
- execution_inputs: ExecutionInputs,
35
- is_airbyte_ci: bool,
36
- http_proxy: Proxy | None = None,
37
- ):
38
- self.connector_under_test = execution_inputs.connector_under_test
39
- self.command = execution_inputs.command
40
- self.output_dir = execution_inputs.output_dir
41
- self.config = execution_inputs.config
42
- self.configured_catalog = execution_inputs.configured_catalog
43
- self.state = execution_inputs.state
44
- self.duckdb_path = execution_inputs.duckdb_path
45
- self.actor_id = execution_inputs.actor_id
46
- self.hashed_connection_id = execution_inputs.hashed_connection_id
47
- self.environment_variables = (
48
- execution_inputs.environment_variables
49
- if execution_inputs.environment_variables
50
- else {}
51
- )
52
-
53
- self.full_command: list[str] = self._get_full_command(execution_inputs.command)
54
- self.completion_event = anyio.Event()
55
- self.http_proxy = http_proxy
56
- self.logger = logging.getLogger(
57
- f"{self.connector_under_test.name}-{self.connector_under_test.version}"
58
- )
59
- self.dagger_client = dagger_client
60
- if is_airbyte_ci:
61
- self.host_obfuscator_path = "/tmp/record_obfuscator.py"
62
- else:
63
- repo_root = Path(
64
- subprocess.check_output(["git", "rev-parse", "--show-toplevel"])
65
- .strip()
66
- .decode()
67
- )
68
- self.host_obfuscator_path = f"{repo_root}/tools/bin/record_obfuscator.py"
69
-
70
- @property
71
- def _connector_under_test_container(self) -> dagger.Container:
72
- return self.connector_under_test.container
73
-
74
- @property
75
- def stdout_file_path(self) -> Path:
76
- return (self.output_dir / "stdout.log").resolve()
77
-
78
- @property
79
- def stderr_file_path(self) -> Path:
80
- return (self.output_dir / "stderr.log").resolve()
81
-
82
- def _get_full_command(self, command: Command) -> list[str]:
83
- """Returns a list with a full Airbyte command invocation and all it's arguments and options."""
84
- if command is Command.SPEC:
85
- return ["spec"]
86
- elif command is Command.CHECK:
87
- return ["check", "--config", self.IN_CONTAINER_CONFIG_PATH]
88
- elif command is Command.DISCOVER:
89
- return ["discover", "--config", self.IN_CONTAINER_CONFIG_PATH]
90
- elif command is Command.READ:
91
- return [
92
- "read",
93
- "--config",
94
- self.IN_CONTAINER_CONFIG_PATH,
95
- "--catalog",
96
- self.IN_CONTAINER_CONFIGURED_CATALOG_PATH,
97
- ]
98
- elif command is Command.READ_WITH_STATE:
99
- return [
100
- "read",
101
- "--config",
102
- self.IN_CONTAINER_CONFIG_PATH,
103
- "--catalog",
104
- self.IN_CONTAINER_CONFIGURED_CATALOG_PATH,
105
- "--state",
106
- self.IN_CONTAINER_STATE_PATH,
107
- ]
108
- else:
109
- raise NotImplementedError(
110
- f"The connector runner does not support the {command} command"
111
- )
112
-
113
- async def get_container_env_variable_value(self, name: str) -> str | None:
114
- return await self._connector_under_test_container.env_variable(name)
115
-
116
- async def get_container_label(self, label: str) -> str | None:
117
- return await self._connector_under_test_container.label(label)
118
-
119
- async def get_container_entrypoint(self) -> str:
120
- entrypoint = await self._connector_under_test_container.entrypoint()
121
- assert entrypoint, "The connector container has no entrypoint"
122
- return " ".join(entrypoint)
123
-
124
- async def run(self) -> ExecutionResult:
125
- async with asyncer.create_task_group() as task_group:
126
- soon_result = task_group.soonify(self._run)()
127
- task_group.soonify(self._log_progress)()
128
- return soon_result.value
129
-
130
- async def _run(
131
- self,
132
- ) -> ExecutionResult:
133
- container = self._connector_under_test_container
134
- current_user = (await container.with_exec(["whoami"]).stdout()).strip()
135
- container = container.with_user(current_user)
136
- container = container.with_exec(["mkdir", "-p", self.DATA_DIR])
137
- # Do not cache downstream dagger layers
138
- container = container.with_env_variable("CACHEBUSTER", str(uuid.uuid4()))
139
-
140
- # When running locally, it's likely that record_obfuscator is within the user's home directory, so we expand it.
141
- expanded_host_executable_path = os.path.expanduser(self.host_obfuscator_path)
142
- container = container.with_file(
143
- self.IN_CONTAINER_OBFUSCATOR_PATH,
144
- self.dagger_client.host().file(expanded_host_executable_path),
145
- )
146
-
147
- for env_var_name, env_var_value in self.environment_variables.items():
148
- container = container.with_env_variable(env_var_name, env_var_value)
149
- if self.config:
150
- container = container.with_new_file(
151
- self.IN_CONTAINER_CONFIG_PATH,
152
- contents=json.dumps(dict(self.config)),
153
- owner=current_user,
154
- )
155
- if self.state:
156
- container = container.with_new_file(
157
- self.IN_CONTAINER_STATE_PATH,
158
- contents=json.dumps(self.state),
159
- owner=current_user,
160
- )
161
- if self.configured_catalog:
162
- container = container.with_new_file(
163
- self.IN_CONTAINER_CONFIGURED_CATALOG_PATH,
164
- contents=self.configured_catalog.json(),
165
- owner=current_user,
166
- )
167
- if self.http_proxy:
168
- container = await self.http_proxy.bind_container(container)
169
-
170
- self.logger.info(f"⏳ Start running {self.command.value} command")
171
-
172
- try:
173
- entrypoint = await container.entrypoint()
174
- assert entrypoint, "The connector container has no entrypoint"
175
- airbyte_command = entrypoint + self.full_command
176
-
177
- container = container.with_exec(
178
- [
179
- "sh",
180
- "-c",
181
- " ".join(airbyte_command)
182
- + f"| {self.IN_CONTAINER_OBFUSCATOR_PATH} > {self.IN_CONTAINER_OUTPUT_PATH} 2>&1 | tee -a {self.IN_CONTAINER_OUTPUT_PATH}",
183
- ]
184
- )
185
- executed_container = await container.sync()
186
- # We exporting to disk as we can't read .stdout() or await file.contents() as it might blow up the memory
187
- stdout_exported = await executed_container.file(
188
- self.IN_CONTAINER_OUTPUT_PATH
189
- ).export(str(self.stdout_file_path))
190
- if not stdout_exported:
191
- raise errors.ExportError(
192
- f"Failed to export {self.IN_CONTAINER_OUTPUT_PATH}"
193
- )
194
-
195
- stderr = await executed_container.stderr()
196
- self.stderr_file_path.write_text(stderr)
197
- success = True
198
- except dagger.ExecError as e:
199
- self.stderr_file_path.write_text(e.stderr)
200
- self.stdout_file_path.write_text(e.stdout)
201
- executed_container = None
202
- success = False
203
-
204
- self.completion_event.set()
205
- if not success:
206
- self.logger.error(f"❌ Failed to run {self.command.value} command")
207
- else:
208
- self.logger.info(f"⌛ Finished running {self.command.value} command")
209
-
210
- execution_result = await ExecutionResult.load(
211
- command=self.command,
212
- connector_under_test=self.connector_under_test,
213
- actor_id=self.actor_id,
214
- hashed_connection_id=self.hashed_connection_id,
215
- configured_catalog=self.configured_catalog,
216
- stdout_file_path=self.stdout_file_path,
217
- stderr_file_path=self.stderr_file_path,
218
- success=success,
219
- http_dump=await self.http_proxy.retrieve_http_dump()
220
- if self.http_proxy
221
- else None,
222
- executed_container=executed_container,
223
- config=self.config,
224
- )
225
- await execution_result.save_artifacts(self.output_dir, self.duckdb_path)
226
- return execution_result
227
-
228
- async def _log_progress(self) -> None:
229
- start_time = datetime.datetime.utcnow()
230
- message = f"⏳ Still running {self.command.value} command"
231
- while not self.completion_event.is_set():
232
- duration = datetime.datetime.utcnow() - start_time
233
- elapsed_seconds = duration.total_seconds()
234
- if elapsed_seconds > 10 and round(elapsed_seconds) % 10 == 0:
235
- self.logger.info(
236
- f"{message} (duration: {self.format_duration(duration)})"
237
- )
238
- await anyio.sleep(1)
239
-
240
- @staticmethod
241
- def format_duration(time_delta: datetime.timedelta) -> str:
242
- total_seconds = time_delta.total_seconds()
243
- if total_seconds < 60:
244
- return f"{total_seconds:.2f}s"
245
- minutes = int(total_seconds // 60)
246
- seconds = int(total_seconds % 60)
247
- return f"{minutes:02d}mn{seconds:02d}s"
@@ -1,7 +0,0 @@
1
- # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
- from __future__ import annotations
3
-
4
-
5
- class ExportError(Exception):
6
- def __init__(self, message: str):
7
- super().__init__(message)
@@ -1,25 +0,0 @@
1
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
- from __future__ import annotations
3
-
4
- from enum import Enum
5
-
6
-
7
- class TestEvaluationMode(Enum):
8
- """
9
- Tests may be run in "diagnostic" mode or "strict" mode.
10
-
11
- When run in "diagnostic" mode, `AssertionError`s won't fail the test, but we will continue to surface
12
- any errors to the test report.
13
-
14
- In "strict" mode, tests pass/fail as usual.
15
-
16
- In live tests, diagnostic mode is used for tests that don't affect the overall functionality of the
17
- connector but that test an ideal state of the connector. Currently this is applicable to validation
18
- tests only.
19
-
20
- The diagnostic mode can be made available to a test using the @pytest.mark.allow_diagnostic_mode decorator,
21
- and passing in the --test-evaluation-mode=diagnostic flag.
22
- """
23
-
24
- DIAGNOSTIC = "diagnostic"
25
- STRICT = "strict"
@@ -1,23 +0,0 @@
1
- # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
-
3
- import copy
4
-
5
- import rich
6
-
7
- console = rich.get_console()
8
-
9
-
10
- def patch_configured_catalog(configured_catalog: dict) -> dict:
11
- """
12
- The configured catalog extracted from the platform can be incompatible with the airbyte-protocol.
13
- This leads to validation error when we serialize the configured catalog into a ConfiguredAirbyteCatalog object.
14
- This functions is a best effort to patch the configured catalog to make it compatible with the airbyte-protocol.
15
- """
16
- patched_catalog = copy.deepcopy(configured_catalog)
17
- for stream in patched_catalog["streams"]:
18
- if stream.get("destination_sync_mode") == "overwrite_dedup":
19
- stream["destination_sync_mode"] = "overwrite"
20
- console.log(
21
- f"Stream {stream['stream']['name']} destination_sync_mode has been patched from 'overwrite_dedup' to 'overwrite' to guarantee compatibility with the airbyte-protocol."
22
- )
23
- return patched_catalog
@@ -1,384 +0,0 @@
1
- #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
- #
4
- from __future__ import annotations
5
-
6
- from enum import Enum
7
- from functools import reduce, total_ordering
8
- from typing import Any, Dict, List, Mapping, Optional, Set, Union
9
-
10
- import dpath.util
11
- import pendulum
12
- from jsonref import JsonRef
13
-
14
-
15
- class CatalogField:
16
- """Field class to represent cursor/pk fields.
17
- It eases the read of values from records according to schema definition.
18
- """
19
-
20
- def __init__(self, schema: Mapping[str, Any], path: List[str]):
21
- self.schema = schema
22
- self.path = path
23
- self.formats = self._detect_formats()
24
-
25
- def _detect_formats(self) -> Set[str]:
26
- """Extract set of formats/types for this field"""
27
- format_ = []
28
- try:
29
- format_ = self.schema.get("format", self.schema["type"])
30
- if not isinstance(format_, List):
31
- format_ = [format_]
32
- except KeyError:
33
- pass
34
- return set(format_)
35
-
36
- def _parse_value(self, value: Any) -> Any:
37
- """Do actual parsing of the serialized value"""
38
- if self.formats.intersection({"datetime", "date-time", "date"}):
39
- if value is None and "null" not in self.formats:
40
- raise ValueError(
41
- f"Invalid field format. Value: {value}. Format: {self.formats}"
42
- )
43
- # handle beautiful MySQL datetime, i.e. NULL datetime
44
- if value.startswith("0000-00-00"):
45
- value = value.replace("0000-00-00", "0001-01-01")
46
- return pendulum.parse(value)
47
- return value
48
-
49
- def parse(
50
- self, record: Mapping[str, Any], path: Optional[List[Union[int, str]]] = None
51
- ) -> Any:
52
- """Extract field value from the record and cast it to native type"""
53
- path = path or self.path
54
- value = reduce(lambda data, key: data[key], path, record)
55
- return self._parse_value(value)
56
-
57
-
58
- @total_ordering
59
- class ComparableType(Enum):
60
- NULL = 0
61
- BOOLEAN = 1
62
- INTEGER = 2
63
- NUMBER = 3
64
- STRING = 4
65
- OBJECT = 5
66
-
67
- def __lt__(self, other: Any) -> bool:
68
- if self.__class__ is other.__class__:
69
- return self.value < other.value # type: ignore
70
- else:
71
- return NotImplemented
72
-
73
-
74
- class JsonSchemaHelper:
75
- """Helper class to simplify schema validation and read of records according to their schema."""
76
-
77
- def __init__(self, schema):
78
- self._schema = schema
79
-
80
- def get_ref(self, path: str) -> Any:
81
- """Resolve reference
82
-
83
- :param path: reference (#/definitions/SomeClass, etc)
84
- :return: part of schema that is definition of the reference
85
- :raises KeyError: in case path can't be followed
86
- """
87
- node = self._schema
88
- for segment in path.split("/")[1:]:
89
- node = node[segment]
90
- return node
91
-
92
- def get_property(self, path: List[str]) -> Mapping[str, Any]:
93
- """Get any part of schema according to provided path, resolves $refs if necessary
94
-
95
- schema = {
96
- "properties": {
97
- "field1": {
98
- "properties": {
99
- "nested_field": {
100
- <inner_object>
101
- }
102
- }
103
- },
104
- "field2": ...
105
- }
106
- }
107
-
108
- helper = JsonSchemaHelper(schema)
109
- helper.get_property(["field1", "nested_field"]) == <inner_object>
110
-
111
- :param path: list of fields in the order of navigation
112
- :return: discovered part of schema
113
- :raises KeyError: in case path can't be followed
114
- """
115
- node = self._schema
116
- for segment in path:
117
- if "$ref" in node:
118
- node = self.get_ref(node["$ref"])
119
- node = node["properties"][segment]
120
- return node
121
-
122
- def field(self, path: List[str]) -> CatalogField:
123
- """Get schema property and wrap it into CatalogField.
124
-
125
- CatalogField is a helper to ease the read of values from records according to schema definition.
126
-
127
- :param path: list of fields in the order of navigation
128
- :return: discovered part of schema wrapped in CatalogField
129
- :raises KeyError: in case path can't be followed
130
- """
131
- return CatalogField(schema=self.get_property(path), path=path)
132
-
133
- def get_node(self, path: List[Union[str, int]]) -> Any:
134
- """Return part of schema by specified path
135
-
136
- :param path: list of fields in the order of navigation
137
- """
138
-
139
- node = self._schema
140
- for segment in path:
141
- if "$ref" in node:
142
- node = self.get_ref(node["$ref"])
143
- node = node[segment]
144
- return node
145
-
146
- def get_parent_path(self, path: str, separator="/") -> Any:
147
- """
148
- Returns the parent path of the supplied path
149
- """
150
- absolute_path = f"{separator}{path}" if not path.startswith(separator) else path
151
- parent_path, _ = absolute_path.rsplit(sep=separator, maxsplit=1)
152
- return parent_path
153
-
154
- def get_parent(self, path: str, separator="/") -> Any:
155
- """
156
- Returns the parent dict of a given path within the `obj` dict
157
- """
158
- parent_path = self.get_parent_path(path, separator=separator)
159
- if parent_path == "":
160
- return self._schema
161
- return dpath.util.get(self._schema, parent_path, separator=separator)
162
-
163
- def find_nodes(self, keys: List[str]) -> List[List[Union[str, int]]]:
164
- """Find all paths that lead to nodes with the specified keys.
165
-
166
- :param keys: list of keys
167
- :return: list of json object paths
168
- """
169
- variant_paths = []
170
-
171
- def traverse_schema(_schema: Union[Dict[str, Any], List], path=None):
172
- path = path or []
173
- if path and path[-1] in keys:
174
- variant_paths.append(path)
175
- if isinstance(_schema, dict):
176
- for item in _schema:
177
- traverse_schema(_schema[item], [*path, item])
178
- elif isinstance(_schema, list):
179
- for i, item in enumerate(_schema):
180
- traverse_schema(_schema[i], [*path, i])
181
-
182
- traverse_schema(self._schema)
183
- return variant_paths
184
-
185
-
186
- def get_object_structure(obj: dict) -> List[str]:
187
- """
188
- Traverse through object structure and compose a list of property keys including nested one.
189
- This list reflects object's structure with list of all obj property key
190
- paths. In case if object is nested inside array we assume that it has same
191
- structure as first element.
192
- :param obj: data object to get its structure
193
- :returns list of object property keys paths
194
- """
195
- paths = []
196
-
197
- def _traverse_obj_and_get_path(obj, path=""):
198
- if path:
199
- paths.append(path)
200
- if isinstance(obj, dict):
201
- return {
202
- k: _traverse_obj_and_get_path(v, path + "/" + k) for k, v in obj.items()
203
- }
204
- elif isinstance(obj, list) and len(obj) > 0:
205
- return [_traverse_obj_and_get_path(obj[0], path + "/[]")]
206
-
207
- _traverse_obj_and_get_path(obj)
208
-
209
- return paths
210
-
211
-
212
- def get_expected_schema_structure(
213
- schema: dict, annotate_one_of: bool = False
214
- ) -> List[str]:
215
- """
216
- Traverse through json schema and compose list of property keys that object expected to have.
217
- :param annotate_one_of: Generate one_of index in path
218
- :param schema: jsonschema to get expected paths
219
- :returns list of object property keys paths
220
- """
221
- paths = []
222
- if "$ref" in schema:
223
- """
224
- JsonRef doesnt work correctly with schemas that has refenreces in root e.g.
225
- {
226
- "$ref": "#/definitions/ref"
227
- "definitions": {
228
- "ref": ...
229
- }
230
- }
231
- Considering this schema already processed by resolver so it should
232
- contain only references to definitions section, replace root reference
233
- manually before processing it with JsonRef library.
234
- """
235
- ref = schema["$ref"].split("/")[-1]
236
- schema.update(schema["definitions"][ref])
237
- schema.pop("$ref")
238
- # Resolve all references to simplify schema processing.
239
- schema = JsonRef.replace_refs(schema)
240
-
241
- def _scan_schema(subschema, path=""):
242
- if "oneOf" in subschema or "anyOf" in subschema:
243
- if annotate_one_of:
244
- return [
245
- _scan_schema({"type": "object", **s}, path + f"({num})")
246
- for num, s in enumerate(
247
- subschema.get("oneOf") or subschema.get("anyOf")
248
- )
249
- ]
250
- return [
251
- _scan_schema({"type": "object", **s}, path)
252
- for s in subschema.get("oneOf") or subschema.get("anyOf")
253
- ]
254
- schema_type = subschema.get("type", ["object", "null"])
255
- if not isinstance(schema_type, list):
256
- schema_type = [schema_type]
257
- if "object" in schema_type:
258
- props = subschema.get("properties")
259
- if not props:
260
- # Handle objects with arbitrary properties:
261
- # {"type": "object", "additionalProperties": {"type": "string"}}
262
- if path:
263
- paths.append(path)
264
- return
265
- return {k: _scan_schema(v, path + "/" + k) for k, v in props.items()}
266
- elif "array" in schema_type:
267
- items = subschema.get("items", {})
268
- return [_scan_schema(items, path + "/[]")]
269
- paths.append(path)
270
-
271
- _scan_schema(schema)
272
- return paths
273
-
274
-
275
- def flatten_tuples(to_flatten):
276
- """Flatten a tuple of tuples into a single tuple."""
277
- types = set()
278
-
279
- if not isinstance(to_flatten, tuple):
280
- to_flatten = (to_flatten,)
281
- for thing in to_flatten:
282
- if isinstance(thing, tuple):
283
- types.update(flatten_tuples(thing))
284
- else:
285
- types.add(thing)
286
- return tuple(types)
287
-
288
-
289
- def get_paths_in_connector_config(schema: dict) -> List[str]:
290
- """
291
- Traverse through the provided schema's values and extract the path_in_connector_config paths
292
- :param properties: jsonschema containing values which may have path_in_connector_config attributes
293
- :returns list of path_in_connector_config paths
294
- """
295
- return [
296
- "/" + "/".join(value["path_in_connector_config"]) for value in schema.values()
297
- ]
298
-
299
-
300
- def conforms_to_schema(record: Mapping[str, Any], schema: Mapping[str, Any]) -> bool:
301
- """
302
- Return true iff the record conforms to the supplied schema.
303
-
304
- The record conforms to the supplied schema iff:
305
- - All columns in the record are in the schema.
306
- - For every column in the record, that column's type is equal to or narrower than the same column's
307
- type in the schema.
308
- """
309
- schema_columns = set(schema.get("properties", {}).keys())
310
- record_columns = set(record.keys())
311
-
312
- if not record_columns.issubset(schema_columns):
313
- return False
314
-
315
- for column, definition in schema.get("properties", {}).items():
316
- expected_type = definition.get("type")
317
- value = record.get(column)
318
-
319
- if value is not None:
320
- if isinstance(expected_type, list):
321
- return any(_is_equal_or_narrower_type(value, e) for e in expected_type)
322
- elif expected_type == "object":
323
- return isinstance(value, dict)
324
- elif expected_type == "array":
325
- if not isinstance(value, list):
326
- return False
327
- array_type = definition.get("items", {}).get("type")
328
- if not all(_is_equal_or_narrower_type(v, array_type) for v in value):
329
- return False
330
- elif not _is_equal_or_narrower_type(value, expected_type):
331
- return False
332
-
333
- return True
334
-
335
-
336
- def _is_equal_or_narrower_type(value: Any, expected_type: str) -> bool:
337
- if isinstance(value, list):
338
- # We do not compare lists directly; the individual items are compared.
339
- # If we hit this condition, it means that the expected type is not
340
- # compatible with the inferred type.
341
- return False
342
-
343
- inferred_type = ComparableType(_get_inferred_type(value))
344
-
345
- if inferred_type is None:
346
- return False
347
-
348
- return ComparableType(inferred_type) <= ComparableType(
349
- _get_comparable_type(expected_type)
350
- )
351
-
352
-
353
- def _get_inferred_type(value: Any) -> Optional[ComparableType]:
354
- if value is None:
355
- return ComparableType.NULL
356
- if isinstance(value, bool):
357
- return ComparableType.BOOLEAN
358
- if isinstance(value, int):
359
- return ComparableType.INTEGER
360
- if isinstance(value, float):
361
- return ComparableType.NUMBER
362
- if isinstance(value, str):
363
- return ComparableType.STRING
364
- if isinstance(value, dict):
365
- return ComparableType.OBJECT
366
- else:
367
- return None
368
-
369
-
370
- def _get_comparable_type(value: Any) -> Optional[ComparableType]:
371
- if value == "null":
372
- return ComparableType.NULL
373
- if value == "boolean":
374
- return ComparableType.BOOLEAN
375
- if value == "integer":
376
- return ComparableType.INTEGER
377
- if value == "number":
378
- return ComparableType.NUMBER
379
- if value == "string":
380
- return ComparableType.STRING
381
- if value == "object":
382
- return ComparableType.OBJECT
383
- else:
384
- return None