airbyte-cdk 6.45.9__py3-none-any.whl → 6.46.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/__init__.py +9 -1
- airbyte_cdk/cli/airbyte_cdk/__init__.py +86 -0
- airbyte_cdk/cli/airbyte_cdk/_connector.py +179 -0
- airbyte_cdk/cli/airbyte_cdk/_image.py +95 -0
- airbyte_cdk/cli/airbyte_cdk/_manifest.py +24 -0
- airbyte_cdk/cli/airbyte_cdk/_secrets.py +150 -0
- airbyte_cdk/cli/airbyte_cdk/_util.py +43 -0
- airbyte_cdk/cli/airbyte_cdk/_version.py +13 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +10 -0
- airbyte_cdk/models/connector_metadata.py +97 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +108 -79
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +122 -45
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +87 -82
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +9 -0
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +2 -2
- airbyte_cdk/sources/declarative/parsers/manifest_normalizer.py +462 -0
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +2 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +24 -24
- airbyte_cdk/sources/file_based/file_record_data.py +1 -1
- airbyte_cdk/test/standard_tests/connector_base.py +51 -25
- airbyte_cdk/test/standard_tests/declarative_sources.py +3 -1
- airbyte_cdk/test/standard_tests/test_resources.py +69 -0
- airbyte_cdk/test/standard_tests/util.py +79 -0
- airbyte_cdk/utils/docker.py +337 -0
- airbyte_cdk/utils/docker_image_templates.py +101 -0
- {airbyte_cdk-6.45.9.dist-info → airbyte_cdk-6.46.0.dist-info}/METADATA +6 -1
- {airbyte_cdk-6.45.9.dist-info → airbyte_cdk-6.46.0.dist-info}/RECORD +31 -18
- {airbyte_cdk-6.45.9.dist-info → airbyte_cdk-6.46.0.dist-info}/entry_points.txt +1 -0
- {airbyte_cdk-6.45.9.dist-info → airbyte_cdk-6.46.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.45.9.dist-info → airbyte_cdk-6.46.0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.45.9.dist-info → airbyte_cdk-6.46.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,462 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import copy
|
6
|
+
import hashlib
|
7
|
+
import json
|
8
|
+
from collections import defaultdict
|
9
|
+
from itertools import chain
|
10
|
+
from typing import Any, Callable, DefaultDict, Dict, Iterable, List, Optional, Tuple
|
11
|
+
|
12
|
+
from airbyte_cdk.sources.declarative.parsers.custom_exceptions import ManifestNormalizationException
|
13
|
+
|
14
|
+
# Type definitions for better readability
|
15
|
+
ManifestType = Dict[str, Any]
|
16
|
+
DefinitionsType = Dict[str, Any]
|
17
|
+
DuplicateOccurancesType = List[Tuple[List[str], Dict[str, Any], Dict[str, Any]]]
|
18
|
+
DuplicatesType = DefaultDict[str, DuplicateOccurancesType]
|
19
|
+
|
20
|
+
# Configuration constants
|
21
|
+
N_OCCURANCES = 2
|
22
|
+
|
23
|
+
DEF_TAG = "definitions"
|
24
|
+
LINKABLE_TAG = "linkable"
|
25
|
+
LINKED_TAG = "linked"
|
26
|
+
PROPERTIES_TAG = "properties"
|
27
|
+
SCHEMA_LOADER_TAG = "schema_loader"
|
28
|
+
SCHEMA_TAG = "schema"
|
29
|
+
SCHEMAS_TAG = "schemas"
|
30
|
+
STREAMS_TAG = "streams"
|
31
|
+
|
32
|
+
|
33
|
+
def _get_linkable_schema_tags(schema: DefinitionsType) -> List[str]:
|
34
|
+
"""
|
35
|
+
Extracts linkable tags from schema definitions.
|
36
|
+
This function identifies properties within a schema's definitions that are marked as linkable.
|
37
|
+
It traverses through each definition in the schema, examines its properties, and collects
|
38
|
+
the keys of properties that contain the LINKABLE_TAG.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
schema (DefinitionsType): The schema definition dictionary to process
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
List[str]: A deduplicated list of property keys that are marked as linkable
|
45
|
+
"""
|
46
|
+
|
47
|
+
# the linkable scope: ['definitions.*']
|
48
|
+
schema_definitions = schema.get(DEF_TAG, {})
|
49
|
+
|
50
|
+
linkable_tags: List[str] = []
|
51
|
+
# Extract linkable keys from properties
|
52
|
+
|
53
|
+
extract_linkable_keys: Callable[[Dict[str, Dict[str, Any]]], List[str]] = lambda properties: [
|
54
|
+
key for key, value in properties.items() if LINKABLE_TAG in value.keys()
|
55
|
+
]
|
56
|
+
|
57
|
+
# Process each root value to get its linkable keys
|
58
|
+
process_root: Callable[[Dict[str, Any]], List[str]] = lambda root_value: extract_linkable_keys(
|
59
|
+
root_value.get(PROPERTIES_TAG, {})
|
60
|
+
)
|
61
|
+
|
62
|
+
# Map the process_root function over all schema values and flatten the results
|
63
|
+
all_linkable_tags = chain.from_iterable(map(process_root, schema_definitions.values()))
|
64
|
+
|
65
|
+
# Add all found linkable tags to the tags list
|
66
|
+
linkable_tags.extend(all_linkable_tags)
|
67
|
+
|
68
|
+
# return unique tags only
|
69
|
+
return list(set(linkable_tags))
|
70
|
+
|
71
|
+
|
72
|
+
class ManifestNormalizer:
|
73
|
+
"""
|
74
|
+
This class is responsible for normalizing the manifest by appliying processing such as:
|
75
|
+
- removing duplicated definitions
|
76
|
+
- replacing them with references.
|
77
|
+
|
78
|
+
To extend the functionality, use the `normilize()` method to include any additional processing steps.
|
79
|
+
"""
|
80
|
+
|
81
|
+
def __init__(
|
82
|
+
self,
|
83
|
+
resolved_manifest: ManifestType,
|
84
|
+
declarative_schema: DefinitionsType,
|
85
|
+
) -> None:
|
86
|
+
self._resolved_manifest = resolved_manifest
|
87
|
+
self._declarative_schema = declarative_schema
|
88
|
+
self._normalized_manifest: ManifestType = copy.deepcopy(self._resolved_manifest)
|
89
|
+
# get the tags marked as `linkable` in the component schema
|
90
|
+
self._linkable_tags = _get_linkable_schema_tags(self._declarative_schema)
|
91
|
+
|
92
|
+
def to_json_str(self) -> str:
|
93
|
+
return json.dumps(self._normalized_manifest, indent=2)
|
94
|
+
|
95
|
+
def normalize(self) -> ManifestType:
|
96
|
+
"""
|
97
|
+
Normalizes the manifest by deduplicating and resolving schema references.
|
98
|
+
|
99
|
+
This method processes the manifest in two steps:
|
100
|
+
1. Deduplicates elements within the manifest
|
101
|
+
2. Resolves and references schemas
|
102
|
+
|
103
|
+
Returns:
|
104
|
+
ManifestType: The normalized manifest if processing succeeds,
|
105
|
+
or the original resolved manifest if normalization fails.
|
106
|
+
|
107
|
+
Raises:
|
108
|
+
ManifestNormalizationException: Caught internally and handled by returning the original manifest.
|
109
|
+
"""
|
110
|
+
try:
|
111
|
+
self._deduplicate_minifest()
|
112
|
+
self._reference_schemas()
|
113
|
+
|
114
|
+
return self._normalized_manifest
|
115
|
+
except ManifestNormalizationException:
|
116
|
+
# if any error occurs, we just return the original manifest.
|
117
|
+
# TODO: enable debug logging
|
118
|
+
return self._resolved_manifest
|
119
|
+
|
120
|
+
def _get_manifest_streams(self) -> Iterable[Dict[str, Any]]:
|
121
|
+
"""
|
122
|
+
Get the streams from the manifest.
|
123
|
+
|
124
|
+
Returns:
|
125
|
+
An Iterable of streams.
|
126
|
+
"""
|
127
|
+
|
128
|
+
if STREAMS_TAG in self._normalized_manifest.keys():
|
129
|
+
for stream in self._normalized_manifest[STREAMS_TAG]:
|
130
|
+
yield stream
|
131
|
+
|
132
|
+
yield from []
|
133
|
+
|
134
|
+
def _deduplicate_minifest(self) -> None:
|
135
|
+
"""
|
136
|
+
Find commonalities in the input JSON structure and refactor it to avoid redundancy.
|
137
|
+
"""
|
138
|
+
|
139
|
+
try:
|
140
|
+
# prepare the `definitions` tag
|
141
|
+
self._prepare_definitions()
|
142
|
+
# replace duplicates with references, if any
|
143
|
+
self._handle_duplicates(self._collect_duplicates())
|
144
|
+
except Exception as e:
|
145
|
+
raise ManifestNormalizationException(str(e))
|
146
|
+
|
147
|
+
def _prepare_definitions(self) -> None:
|
148
|
+
"""
|
149
|
+
Clean the definitions in the manifest by removing unnecessary properties.
|
150
|
+
This function modifies the manifest in place.
|
151
|
+
"""
|
152
|
+
|
153
|
+
# Check if the definitions tag exists
|
154
|
+
if not DEF_TAG in self._normalized_manifest:
|
155
|
+
self._normalized_manifest[DEF_TAG] = {}
|
156
|
+
|
157
|
+
# Check if the linked tag exists
|
158
|
+
if not LINKED_TAG in self._normalized_manifest[DEF_TAG]:
|
159
|
+
self._normalized_manifest[DEF_TAG][LINKED_TAG] = {}
|
160
|
+
|
161
|
+
# remove everything from definitions tag except of `linked`, after processing
|
162
|
+
for key in list(self._normalized_manifest[DEF_TAG].keys()):
|
163
|
+
if key != LINKED_TAG:
|
164
|
+
self._normalized_manifest[DEF_TAG].pop(key, None)
|
165
|
+
|
166
|
+
def _extract_stream_schema(self, stream: Dict[str, Any]) -> None:
|
167
|
+
"""
|
168
|
+
Extract the schema from the stream and add it to the `schemas` tag.
|
169
|
+
"""
|
170
|
+
|
171
|
+
stream_name = stream["name"]
|
172
|
+
# copy the value of the SCHEMA_TAG to the SCHEMAS_TAG with the stream name as key
|
173
|
+
schema = stream.get(SCHEMA_LOADER_TAG, {}).get(SCHEMA_TAG)
|
174
|
+
if not SCHEMAS_TAG in self._normalized_manifest.keys():
|
175
|
+
self._normalized_manifest[SCHEMAS_TAG] = {}
|
176
|
+
# add stream schema to the SCHEMAS_TAG
|
177
|
+
if not stream_name in self._normalized_manifest[SCHEMAS_TAG].keys():
|
178
|
+
# add the schema to the SCHEMAS_TAG with the stream name as key
|
179
|
+
self._normalized_manifest[SCHEMAS_TAG][stream_name] = schema
|
180
|
+
|
181
|
+
def _reference_schemas(self) -> None:
|
182
|
+
"""
|
183
|
+
Set the schema reference for the given stream in the manifest.
|
184
|
+
This function modifies the manifest in place.
|
185
|
+
"""
|
186
|
+
|
187
|
+
# reference the stream schema for the stream to where it's stored
|
188
|
+
if SCHEMAS_TAG in self._normalized_manifest.keys():
|
189
|
+
for stream in self._get_manifest_streams():
|
190
|
+
self._extract_stream_schema(stream)
|
191
|
+
self._set_stream_schema_ref(stream)
|
192
|
+
|
193
|
+
def _set_stream_schema_ref(self, stream: Dict[str, Any]) -> None:
|
194
|
+
"""
|
195
|
+
Set the schema reference for the given stream in the manifest.
|
196
|
+
This function modifies the manifest in place.
|
197
|
+
"""
|
198
|
+
stream_name = stream["name"]
|
199
|
+
if SCHEMAS_TAG in self._normalized_manifest.keys():
|
200
|
+
if stream_name in self._normalized_manifest[SCHEMAS_TAG]:
|
201
|
+
stream[SCHEMA_LOADER_TAG][SCHEMA_TAG] = self._create_schema_ref(stream_name)
|
202
|
+
|
203
|
+
def _replace_duplicates_with_refs(self, duplicates: DuplicatesType) -> None:
|
204
|
+
"""
|
205
|
+
Process duplicate objects and replace them with references.
|
206
|
+
|
207
|
+
Args:
|
208
|
+
duplicates: The duplicates dictionary collected from the given manifest.
|
209
|
+
"""
|
210
|
+
|
211
|
+
for _, occurrences in duplicates.items():
|
212
|
+
type_key, key, value = self._get_occurance_samples(occurrences)
|
213
|
+
is_linked_def = self._is_linked_definition(type_key, key)
|
214
|
+
|
215
|
+
# Add to definitions if not there already
|
216
|
+
if not is_linked_def:
|
217
|
+
self._add_to_linked_definitions(type_key, key, value)
|
218
|
+
|
219
|
+
# Replace occurrences with references
|
220
|
+
for _, parent_obj, value in occurrences:
|
221
|
+
if is_linked_def:
|
222
|
+
if value == self._get_linked_definition_value(type_key, key):
|
223
|
+
parent_obj[key] = self._create_linked_definition_ref(type_key, key)
|
224
|
+
else:
|
225
|
+
parent_obj[key] = self._create_linked_definition_ref(type_key, key)
|
226
|
+
|
227
|
+
def _handle_duplicates(self, duplicates: DuplicatesType) -> None:
|
228
|
+
"""
|
229
|
+
Process the duplicates and replace them with references.
|
230
|
+
|
231
|
+
Args:
|
232
|
+
duplicates: The duplicates dictionary collected from the given manifest.
|
233
|
+
"""
|
234
|
+
|
235
|
+
if len(duplicates) > 0:
|
236
|
+
self._replace_duplicates_with_refs(duplicates)
|
237
|
+
|
238
|
+
def _add_duplicate(
|
239
|
+
self,
|
240
|
+
duplicates: DuplicatesType,
|
241
|
+
current_path: List[str],
|
242
|
+
obj: Dict[str, Any],
|
243
|
+
value: Any,
|
244
|
+
key: Optional[str] = None,
|
245
|
+
) -> None:
|
246
|
+
"""
|
247
|
+
Adds a duplicate record of an observed object by computing a unique hash for the provided value.
|
248
|
+
|
249
|
+
This function computes a hash for the given value (or a dictionary composed of the key and value if a key is provided)
|
250
|
+
and appends a tuple containing the current path, the original object, and the value to the duplicates
|
251
|
+
dictionary under the corresponding hash.
|
252
|
+
|
253
|
+
Parameters:
|
254
|
+
duplicates (DuplicatesType): The dictionary to store duplicate records.
|
255
|
+
current_path (List[str]): The list of keys or indices representing the current location in the object hierarchy.
|
256
|
+
obj (Dict): The original dictionary object where the duplicate is observed.
|
257
|
+
value (Any): The value to be hashed and used for identifying duplicates.
|
258
|
+
key (Optional[str]): An optional key that, if provided, wraps the value in a dictionary before hashing.
|
259
|
+
"""
|
260
|
+
|
261
|
+
# create hash for each duplicate observed
|
262
|
+
value_to_hash = {key: value} if key is not None else value
|
263
|
+
duplicates[self._hash_object(value_to_hash)].append((current_path, obj, value))
|
264
|
+
|
265
|
+
def _add_to_linked_definitions(
|
266
|
+
self,
|
267
|
+
type_key: str,
|
268
|
+
key: str,
|
269
|
+
value: Any,
|
270
|
+
) -> None:
|
271
|
+
"""
|
272
|
+
Add a value to the linked definitions under the specified key.
|
273
|
+
|
274
|
+
Args:
|
275
|
+
definitions: The definitions dictionary to modify
|
276
|
+
key: The key to use
|
277
|
+
value: The value to add
|
278
|
+
"""
|
279
|
+
if type_key not in self._normalized_manifest[DEF_TAG][LINKED_TAG].keys():
|
280
|
+
self._normalized_manifest[DEF_TAG][LINKED_TAG][type_key] = {}
|
281
|
+
|
282
|
+
if key not in self._normalized_manifest[DEF_TAG][LINKED_TAG][type_key].keys():
|
283
|
+
self._normalized_manifest[DEF_TAG][LINKED_TAG][type_key][key] = value
|
284
|
+
|
285
|
+
def _collect_duplicates(self) -> DuplicatesType:
|
286
|
+
"""
|
287
|
+
Traverse the JSON object and collect all potential duplicate values and objects.
|
288
|
+
|
289
|
+
Returns:
|
290
|
+
duplicates: A dictionary of duplicate objects.
|
291
|
+
"""
|
292
|
+
|
293
|
+
def _collect(obj: Dict[str, Any], path: Optional[List[str]] = None) -> None:
|
294
|
+
"""
|
295
|
+
The closure to recursively collect duplicates in the JSON object.
|
296
|
+
|
297
|
+
Args:
|
298
|
+
obj: The current object being analyzed.
|
299
|
+
path: The current path in the object hierarchy.
|
300
|
+
"""
|
301
|
+
|
302
|
+
if not isinstance(obj, dict):
|
303
|
+
return
|
304
|
+
|
305
|
+
path = [] if path is None else path
|
306
|
+
# Check if the object is empty
|
307
|
+
for key, value in obj.items():
|
308
|
+
# do not collect duplicates from `definitions` tag
|
309
|
+
if key == DEF_TAG:
|
310
|
+
continue
|
311
|
+
|
312
|
+
current_path = path + [key]
|
313
|
+
|
314
|
+
if isinstance(value, dict):
|
315
|
+
# First process nested dictionaries
|
316
|
+
_collect(value, current_path)
|
317
|
+
# Process allowed-only component tags
|
318
|
+
if key in self._linkable_tags:
|
319
|
+
self._add_duplicate(duplicates, current_path, obj, value)
|
320
|
+
|
321
|
+
# handle primitive types
|
322
|
+
elif isinstance(value, (str, int, float, bool)):
|
323
|
+
# Process allowed-only field tags
|
324
|
+
if key in self._linkable_tags:
|
325
|
+
self._add_duplicate(duplicates, current_path, obj, value, key)
|
326
|
+
|
327
|
+
# handle list cases
|
328
|
+
elif isinstance(value, list):
|
329
|
+
for i, item in enumerate(value):
|
330
|
+
_collect(item, current_path + [str(i)])
|
331
|
+
|
332
|
+
duplicates: DuplicatesType = defaultdict(list, {})
|
333
|
+
try:
|
334
|
+
if self._linkable_tags:
|
335
|
+
_collect(self._normalized_manifest)
|
336
|
+
# clean non-duplicates and sort based on the count of occurrences
|
337
|
+
return self._clean_and_sort_duplicates(duplicates)
|
338
|
+
return duplicates
|
339
|
+
except Exception as e:
|
340
|
+
raise ManifestNormalizationException(str(e))
|
341
|
+
|
342
|
+
def _clean_and_sort_duplicates(self, duplicates: DuplicatesType) -> DuplicatesType:
|
343
|
+
"""
|
344
|
+
Clean non-duplicates and sort the duplicates by their occurrences.
|
345
|
+
|
346
|
+
Args:
|
347
|
+
duplicates: The duplicates dictionary to sort
|
348
|
+
|
349
|
+
Returns:
|
350
|
+
A sorted duplicates dictionary.
|
351
|
+
"""
|
352
|
+
|
353
|
+
# clean non-duplicates
|
354
|
+
duplicates = defaultdict(
|
355
|
+
list,
|
356
|
+
{k: v for k, v in duplicates.items() if len(v) >= N_OCCURANCES},
|
357
|
+
)
|
358
|
+
|
359
|
+
# sort the duplicates by their occurrences, more frequent ones go first
|
360
|
+
duplicates = defaultdict(
|
361
|
+
list,
|
362
|
+
{k: v for k, v in sorted(duplicates.items(), key=lambda x: len(x[1]), reverse=True)},
|
363
|
+
)
|
364
|
+
|
365
|
+
return duplicates
|
366
|
+
|
367
|
+
def _hash_object(self, obj: Dict[str, Any]) -> str:
|
368
|
+
"""
|
369
|
+
Create a unique hash for a dictionary object.
|
370
|
+
|
371
|
+
Args:
|
372
|
+
node: The dictionary to hash
|
373
|
+
|
374
|
+
Returns:
|
375
|
+
A hashed string
|
376
|
+
"""
|
377
|
+
|
378
|
+
# Sort keys to ensure consistent hash for same content
|
379
|
+
return hashlib.md5(json.dumps(obj, sort_keys=True).encode()).hexdigest()
|
380
|
+
|
381
|
+
def _is_linked_definition(self, type_key: str, key: str) -> bool:
|
382
|
+
"""
|
383
|
+
Check if the key already exists in the linked definitions.
|
384
|
+
|
385
|
+
Args:
|
386
|
+
key: The key to check
|
387
|
+
definitions: The definitions dictionary with definitions
|
388
|
+
|
389
|
+
Returns:
|
390
|
+
True if the key exists in the linked definitions, False otherwise
|
391
|
+
"""
|
392
|
+
|
393
|
+
if type_key in self._normalized_manifest[DEF_TAG][LINKED_TAG].keys():
|
394
|
+
# Check if the key exists in the linked definitions
|
395
|
+
if key in self._normalized_manifest[DEF_TAG][LINKED_TAG][type_key].keys():
|
396
|
+
return True
|
397
|
+
|
398
|
+
return False
|
399
|
+
|
400
|
+
def _get_linked_definition_value(self, type_key: str, key: str) -> Any:
|
401
|
+
"""
|
402
|
+
Get the value of a linked definition by its key.
|
403
|
+
|
404
|
+
Args:
|
405
|
+
key: The key to check
|
406
|
+
definitions: The definitions dictionary with definitions
|
407
|
+
|
408
|
+
Returns:
|
409
|
+
The value of the linked definition
|
410
|
+
"""
|
411
|
+
if type_key in self._normalized_manifest[DEF_TAG][LINKED_TAG].keys():
|
412
|
+
if key in self._normalized_manifest[DEF_TAG][LINKED_TAG][type_key].keys():
|
413
|
+
return self._normalized_manifest[DEF_TAG][LINKED_TAG][type_key][key]
|
414
|
+
else:
|
415
|
+
raise ManifestNormalizationException(
|
416
|
+
f"Key {key} not found in linked definitions. Please check the manifest."
|
417
|
+
)
|
418
|
+
|
419
|
+
def _get_occurance_samples(self, occurrences: DuplicateOccurancesType) -> Tuple[str, str, Any]:
|
420
|
+
"""
|
421
|
+
Get the key from the occurrences list.
|
422
|
+
|
423
|
+
Args:
|
424
|
+
occurrences: The occurrences list
|
425
|
+
|
426
|
+
Returns:
|
427
|
+
The key, type and value from the occurrences
|
428
|
+
"""
|
429
|
+
|
430
|
+
# Take the value from the first occurrence, as they are the same
|
431
|
+
path, obj, value = occurrences[0]
|
432
|
+
return (
|
433
|
+
obj["type"],
|
434
|
+
path[-1],
|
435
|
+
value,
|
436
|
+
) # Return the component's name as the last part of its path
|
437
|
+
|
438
|
+
def _create_linked_definition_ref(self, type_key: str, key: str) -> Dict[str, str]:
|
439
|
+
"""
|
440
|
+
Create a reference object for the linked definitions using the specified key.
|
441
|
+
|
442
|
+
Args:
|
443
|
+
ref_key: The reference key to use
|
444
|
+
|
445
|
+
Returns:
|
446
|
+
A reference object in the proper format
|
447
|
+
"""
|
448
|
+
|
449
|
+
return {"$ref": f"#/{DEF_TAG}/{LINKED_TAG}/{type_key}/{key}"}
|
450
|
+
|
451
|
+
def _create_schema_ref(self, key: str) -> Dict[str, str]:
|
452
|
+
"""
|
453
|
+
Create a reference object for stream schema using the specified key.
|
454
|
+
|
455
|
+
Args:
|
456
|
+
key: The reference key to use
|
457
|
+
|
458
|
+
Returns:
|
459
|
+
A reference object in the proper format
|
460
|
+
"""
|
461
|
+
|
462
|
+
return {"$ref": f"#/{SCHEMAS_TAG}/{key}"}
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import re
|
6
|
-
from typing import Any, Mapping, Set, Tuple, Union
|
6
|
+
from typing import Any, Dict, Mapping, Set, Tuple, Union
|
7
7
|
|
8
8
|
from airbyte_cdk.sources.declarative.parsers.custom_exceptions import (
|
9
9
|
CircularReferenceException,
|
@@ -99,7 +99,7 @@ class ManifestReferenceResolver:
|
|
99
99
|
until we find a key with the given path, or until there is nothing to traverse.
|
100
100
|
"""
|
101
101
|
|
102
|
-
def preprocess_manifest(self, manifest: Mapping[str, Any]) ->
|
102
|
+
def preprocess_manifest(self, manifest: Mapping[str, Any]) -> Dict[str, Any]:
|
103
103
|
"""
|
104
104
|
:param manifest: incoming manifest that could have references to previously defined components
|
105
105
|
:return:
|
@@ -2194,11 +2194,20 @@ class ModelToComponentFactory:
|
|
2194
2194
|
|
2195
2195
|
api_budget = self._api_budget
|
2196
2196
|
|
2197
|
+
# Removes QueryProperties components from the interpolated mappings because it has been designed
|
2198
|
+
# to be used by the SimpleRetriever and will be resolved from the provider from the slice directly
|
2199
|
+
# instead of through jinja interpolation
|
2200
|
+
request_parameters: Optional[Union[str, Mapping[str, str]]]
|
2201
|
+
if isinstance(model.request_parameters, Mapping):
|
2202
|
+
request_parameters = self._remove_query_properties(model.request_parameters)
|
2203
|
+
else:
|
2204
|
+
request_parameters = model.request_parameters
|
2205
|
+
|
2197
2206
|
request_options_provider = InterpolatedRequestOptionsProvider(
|
2198
2207
|
request_body_data=model.request_body_data,
|
2199
2208
|
request_body_json=model.request_body_json,
|
2200
2209
|
request_headers=model.request_headers,
|
2201
|
-
request_parameters=
|
2210
|
+
request_parameters=request_parameters,
|
2202
2211
|
query_properties_key=query_properties_key,
|
2203
2212
|
config=config,
|
2204
2213
|
parameters=model.parameters or {},
|
@@ -2818,6 +2827,10 @@ class ModelToComponentFactory:
|
|
2818
2827
|
else None
|
2819
2828
|
)
|
2820
2829
|
|
2830
|
+
if model.transform_before_filtering is None:
|
2831
|
+
# default to False if not set
|
2832
|
+
model.transform_before_filtering = False
|
2833
|
+
|
2821
2834
|
assert model.transform_before_filtering is not None # for mypy
|
2822
2835
|
|
2823
2836
|
transform_before_filtering = model.transform_before_filtering
|
@@ -2832,6 +2845,10 @@ class ModelToComponentFactory:
|
|
2832
2845
|
)
|
2833
2846
|
transform_before_filtering = True
|
2834
2847
|
|
2848
|
+
if model.schema_normalization is None:
|
2849
|
+
# default to no schema normalization if not set
|
2850
|
+
model.schema_normalization = SchemaNormalizationModel.None_
|
2851
|
+
|
2835
2852
|
schema_normalization = (
|
2836
2853
|
TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
|
2837
2854
|
if isinstance(model.schema_normalization, SchemaNormalizationModel)
|
@@ -2938,16 +2955,9 @@ class ModelToComponentFactory:
|
|
2938
2955
|
# When translating JSON schema into Pydantic models, enforcing types for arrays containing both
|
2939
2956
|
# concrete string complex object definitions like QueryProperties would get resolved to Union[str, Any].
|
2940
2957
|
# This adds the extra validation that we couldn't get for free in Pydantic model generation
|
2941
|
-
if (
|
2942
|
-
isinstance(request_parameter, Mapping)
|
2943
|
-
and request_parameter.get("type") == "QueryProperties"
|
2944
|
-
):
|
2958
|
+
if isinstance(request_parameter, QueryPropertiesModel):
|
2945
2959
|
query_properties_key = key
|
2946
2960
|
query_properties_definitions.append(request_parameter)
|
2947
|
-
elif not isinstance(request_parameter, str):
|
2948
|
-
raise ValueError(
|
2949
|
-
f"Each element of request_parameters should be of type str or QueryProperties, but received {request_parameter.get('type')}"
|
2950
|
-
)
|
2951
2961
|
|
2952
2962
|
if len(query_properties_definitions) > 1:
|
2953
2963
|
raise ValueError(
|
@@ -2955,17 +2965,8 @@ class ModelToComponentFactory:
|
|
2955
2965
|
)
|
2956
2966
|
|
2957
2967
|
if len(query_properties_definitions) == 1:
|
2958
|
-
query_properties = self.
|
2959
|
-
|
2960
|
-
component_definition=query_properties_definitions[0],
|
2961
|
-
config=config,
|
2962
|
-
)
|
2963
|
-
|
2964
|
-
# Removes QueryProperties components from the interpolated mappings because it will be resolved in
|
2965
|
-
# the provider from the slice directly instead of through jinja interpolation
|
2966
|
-
if isinstance(model.requester.request_parameters, Mapping):
|
2967
|
-
model.requester.request_parameters = self._remove_query_properties(
|
2968
|
-
model.requester.request_parameters
|
2968
|
+
query_properties = self._create_component_from_model(
|
2969
|
+
model=query_properties_definitions[0], config=config
|
2969
2970
|
)
|
2970
2971
|
|
2971
2972
|
requester = self._create_component_from_model(
|
@@ -3088,13 +3089,12 @@ class ModelToComponentFactory:
|
|
3088
3089
|
|
3089
3090
|
@staticmethod
|
3090
3091
|
def _remove_query_properties(
|
3091
|
-
request_parameters: Mapping[str, Union[
|
3092
|
-
) -> Mapping[str,
|
3092
|
+
request_parameters: Mapping[str, Union[str, QueryPropertiesModel]],
|
3093
|
+
) -> Mapping[str, str]:
|
3093
3094
|
return {
|
3094
3095
|
parameter_field: request_parameter
|
3095
3096
|
for parameter_field, request_parameter in request_parameters.items()
|
3096
|
-
if not isinstance(request_parameter,
|
3097
|
-
or not request_parameter.get("type") == "QueryProperties"
|
3097
|
+
if not isinstance(request_parameter, QueryPropertiesModel)
|
3098
3098
|
}
|
3099
3099
|
|
3100
3100
|
def create_state_delegating_stream(
|
@@ -4,7 +4,9 @@
|
|
4
4
|
from __future__ import annotations
|
5
5
|
|
6
6
|
import abc
|
7
|
+
import importlib
|
7
8
|
import inspect
|
9
|
+
import os
|
8
10
|
import sys
|
9
11
|
from collections.abc import Callable
|
10
12
|
from pathlib import Path
|
@@ -22,17 +24,61 @@ from airbyte_cdk.test.standard_tests._job_runner import IConnector, run_test_job
|
|
22
24
|
from airbyte_cdk.test.standard_tests.models import (
|
23
25
|
ConnectorTestScenario,
|
24
26
|
)
|
25
|
-
|
26
|
-
ACCEPTANCE_TEST_CONFIG
|
27
|
-
|
27
|
+
from airbyte_cdk.test.standard_tests.test_resources import (
|
28
|
+
ACCEPTANCE_TEST_CONFIG,
|
29
|
+
find_connector_root,
|
30
|
+
)
|
28
31
|
|
29
32
|
|
30
33
|
class ConnectorTestSuiteBase(abc.ABC):
|
31
34
|
"""Base class for connector test suites."""
|
32
35
|
|
33
|
-
connector: type[IConnector] | Callable[[], IConnector] | None
|
36
|
+
connector: type[IConnector] | Callable[[], IConnector] | None # type: ignore [reportRedeclaration]
|
34
37
|
"""The connector class or a factory function that returns an scenario of IConnector."""
|
35
38
|
|
39
|
+
@classproperty # type: ignore [no-redef]
|
40
|
+
def connector(cls) -> type[IConnector] | Callable[[], IConnector] | None:
|
41
|
+
"""Get the connector class for the test suite.
|
42
|
+
|
43
|
+
This assumes a python connector and should be overridden by subclasses to provide the
|
44
|
+
specific connector class to be tested.
|
45
|
+
"""
|
46
|
+
connector_root = cls.get_connector_root_dir()
|
47
|
+
connector_name = connector_root.absolute().name
|
48
|
+
|
49
|
+
expected_module_name = connector_name.replace("-", "_").lower()
|
50
|
+
expected_class_name = connector_name.replace("-", "_").title().replace("_", "")
|
51
|
+
|
52
|
+
# dynamically import and get the connector class: <expected_module_name>.<expected_class_name>
|
53
|
+
|
54
|
+
cwd_snapshot = Path().absolute()
|
55
|
+
os.chdir(connector_root)
|
56
|
+
|
57
|
+
# Dynamically import the module
|
58
|
+
try:
|
59
|
+
module = importlib.import_module(expected_module_name)
|
60
|
+
except ModuleNotFoundError as e:
|
61
|
+
raise ImportError(f"Could not import module '{expected_module_name}'.") from e
|
62
|
+
finally:
|
63
|
+
# Change back to the original working directory
|
64
|
+
os.chdir(cwd_snapshot)
|
65
|
+
|
66
|
+
# Dynamically get the class from the module
|
67
|
+
try:
|
68
|
+
return cast(type[IConnector], getattr(module, expected_class_name))
|
69
|
+
except AttributeError as e:
|
70
|
+
# We did not find it based on our expectations, so let's check if we can find it
|
71
|
+
# with a case-insensitive match.
|
72
|
+
matching_class_name = next(
|
73
|
+
(name for name in dir(module) if name.lower() == expected_class_name.lower()),
|
74
|
+
None,
|
75
|
+
)
|
76
|
+
if not matching_class_name:
|
77
|
+
raise ImportError(
|
78
|
+
f"Module '{expected_module_name}' does not have a class named '{expected_class_name}'."
|
79
|
+
) from e
|
80
|
+
return cast(type[IConnector], getattr(module, matching_class_name))
|
81
|
+
|
36
82
|
@classmethod
|
37
83
|
def get_test_class_dir(cls) -> Path:
|
38
84
|
"""Get the file path that contains the class."""
|
@@ -81,27 +127,7 @@ class ConnectorTestSuiteBase(abc.ABC):
|
|
81
127
|
@classmethod
|
82
128
|
def get_connector_root_dir(cls) -> Path:
|
83
129
|
"""Get the root directory of the connector."""
|
84
|
-
|
85
|
-
if (parent / MANIFEST_YAML).exists():
|
86
|
-
return parent
|
87
|
-
if (parent / ACCEPTANCE_TEST_CONFIG).exists():
|
88
|
-
return parent
|
89
|
-
if parent.name == "airbyte_cdk":
|
90
|
-
break
|
91
|
-
# If we reach here, we didn't find the manifest file in any parent directory
|
92
|
-
# Check if the manifest file exists in the current directory
|
93
|
-
for parent in Path.cwd().parents:
|
94
|
-
if (parent / MANIFEST_YAML).exists():
|
95
|
-
return parent
|
96
|
-
if (parent / ACCEPTANCE_TEST_CONFIG).exists():
|
97
|
-
return parent
|
98
|
-
if parent.name == "airbyte_cdk":
|
99
|
-
break
|
100
|
-
|
101
|
-
raise FileNotFoundError(
|
102
|
-
"Could not find connector root directory relative to "
|
103
|
-
f"'{str(cls.get_test_class_dir())}' or '{str(Path.cwd())}'."
|
104
|
-
)
|
130
|
+
return find_connector_root([cls.get_test_class_dir(), Path.cwd()])
|
105
131
|
|
106
132
|
@classproperty
|
107
133
|
def acceptance_test_config_path(cls) -> Path:
|