airbyte-cdk 6.56.6__py3-none-any.whl → 6.56.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/parsers/manifest_normalizer.py +110 -53
- airbyte_cdk/test/entrypoint_wrapper.py +3 -0
- {airbyte_cdk-6.56.6.dist-info → airbyte_cdk-6.56.7.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.56.6.dist-info → airbyte_cdk-6.56.7.dist-info}/RECORD +8 -8
- {airbyte_cdk-6.56.6.dist-info → airbyte_cdk-6.56.7.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.56.6.dist-info → airbyte_cdk-6.56.7.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.56.6.dist-info → airbyte_cdk-6.56.7.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.56.6.dist-info → airbyte_cdk-6.56.7.dist-info}/entry_points.txt +0 -0
@@ -108,8 +108,7 @@ class ManifestNormalizer:
|
|
108
108
|
ManifestNormalizationException: Caught internally and handled by returning the original manifest.
|
109
109
|
"""
|
110
110
|
try:
|
111
|
-
self.
|
112
|
-
self._reference_schemas()
|
111
|
+
self._deduplicate_manifest()
|
113
112
|
|
114
113
|
return self._normalized_manifest
|
115
114
|
except ManifestNormalizationException:
|
@@ -131,7 +130,7 @@ class ManifestNormalizer:
|
|
131
130
|
|
132
131
|
yield from []
|
133
132
|
|
134
|
-
def
|
133
|
+
def _deduplicate_manifest(self) -> None:
|
135
134
|
"""
|
136
135
|
Find commonalities in the input JSON structure and refactor it to avoid redundancy.
|
137
136
|
"""
|
@@ -141,9 +140,117 @@ class ManifestNormalizer:
|
|
141
140
|
self._prepare_definitions()
|
142
141
|
# replace duplicates with references, if any
|
143
142
|
self._handle_duplicates(self._collect_duplicates())
|
143
|
+
# replace parent streams with $refs
|
144
|
+
self._replace_parent_streams_with_refs()
|
145
|
+
# clean dangling fields after resolving $refs
|
146
|
+
self._clean_dangling_fields()
|
144
147
|
except Exception as e:
|
145
148
|
raise ManifestNormalizationException(str(e))
|
146
149
|
|
150
|
+
def _replace_parent_streams_with_refs(self) -> None:
|
151
|
+
"""
|
152
|
+
For each stream in the manifest, if it has a retriever.partition_router with parent_stream_configs,
|
153
|
+
replace any 'stream' fields in those configs that are dicts and deeply equal to another stream object
|
154
|
+
with a $ref to the correct stream index.
|
155
|
+
"""
|
156
|
+
streams = self._normalized_manifest.get(STREAMS_TAG, [])
|
157
|
+
|
158
|
+
# Build a hash-to-index mapping for O(1) lookups
|
159
|
+
stream_hash_to_index = {}
|
160
|
+
for idx, stream in enumerate(streams):
|
161
|
+
stream_hash = self._hash_object(stream)
|
162
|
+
stream_hash_to_index[stream_hash] = idx
|
163
|
+
|
164
|
+
for idx, stream in enumerate(streams):
|
165
|
+
retriever = stream.get("retriever")
|
166
|
+
if not retriever:
|
167
|
+
continue
|
168
|
+
partition_router = retriever.get("partition_router")
|
169
|
+
routers = (
|
170
|
+
partition_router
|
171
|
+
if isinstance(partition_router, list)
|
172
|
+
else [partition_router]
|
173
|
+
if partition_router
|
174
|
+
else []
|
175
|
+
)
|
176
|
+
for router in routers:
|
177
|
+
if not isinstance(router, dict):
|
178
|
+
continue
|
179
|
+
if router.get("type") != "SubstreamPartitionRouter":
|
180
|
+
continue
|
181
|
+
parent_stream_configs = router.get("parent_stream_configs", [])
|
182
|
+
for parent_config in parent_stream_configs:
|
183
|
+
if not isinstance(parent_config, dict):
|
184
|
+
continue
|
185
|
+
stream_ref = parent_config.get("stream")
|
186
|
+
# Only replace if it's a dict and matches any stream in the manifest
|
187
|
+
if stream_ref is not None and isinstance(stream_ref, dict):
|
188
|
+
stream_ref_hash = self._hash_object(stream_ref)
|
189
|
+
if stream_ref_hash in stream_hash_to_index:
|
190
|
+
parent_config["stream"] = {
|
191
|
+
"$ref": f"#/streams/{stream_hash_to_index[stream_ref_hash]}"
|
192
|
+
}
|
193
|
+
|
194
|
+
def _clean_dangling_fields(self) -> None:
|
195
|
+
"""
|
196
|
+
Clean the manifest by removing unused definitions and schemas.
|
197
|
+
This method removes any definitions or schemas that are not referenced by any $ref in the manifest.
|
198
|
+
"""
|
199
|
+
|
200
|
+
def find_all_refs(obj: Dict[str, Any], refs: List[str]) -> None:
|
201
|
+
"""
|
202
|
+
Recursively find all $ref paths in the object.
|
203
|
+
|
204
|
+
Args:
|
205
|
+
obj: The object to search through
|
206
|
+
refs: List to store found reference paths
|
207
|
+
"""
|
208
|
+
if not isinstance(obj, dict):
|
209
|
+
return
|
210
|
+
|
211
|
+
for key, value in obj.items():
|
212
|
+
if key == "$ref" and isinstance(value, str):
|
213
|
+
# Remove the leading #/ from the ref path
|
214
|
+
refs.append(value[2:])
|
215
|
+
elif isinstance(value, dict):
|
216
|
+
find_all_refs(value, refs)
|
217
|
+
elif isinstance(value, list):
|
218
|
+
for item in value:
|
219
|
+
if isinstance(item, dict):
|
220
|
+
find_all_refs(item, refs)
|
221
|
+
|
222
|
+
def clean_section(section: Dict[str, Any], section_path: str) -> None:
|
223
|
+
"""
|
224
|
+
Clean a section by removing unreferenced fields.
|
225
|
+
|
226
|
+
Args:
|
227
|
+
section: The section to clean
|
228
|
+
section_path: The path to this section in the manifest
|
229
|
+
"""
|
230
|
+
for key in list(section.keys()):
|
231
|
+
current_path = f"{section_path}/{key}"
|
232
|
+
# Check if this path is referenced or is a parent of a referenced path
|
233
|
+
if not any(ref.startswith(current_path) for ref in all_refs):
|
234
|
+
del section[key]
|
235
|
+
|
236
|
+
# Find all references in the manifest
|
237
|
+
all_refs: List[str] = []
|
238
|
+
find_all_refs(self._normalized_manifest, all_refs)
|
239
|
+
|
240
|
+
# Clean definitions
|
241
|
+
if DEF_TAG in self._normalized_manifest:
|
242
|
+
clean_section(self._normalized_manifest[DEF_TAG], DEF_TAG)
|
243
|
+
# Remove empty definitions section
|
244
|
+
if not self._normalized_manifest[DEF_TAG]:
|
245
|
+
del self._normalized_manifest[DEF_TAG]
|
246
|
+
|
247
|
+
# Clean schemas
|
248
|
+
if SCHEMAS_TAG in self._normalized_manifest:
|
249
|
+
clean_section(self._normalized_manifest[SCHEMAS_TAG], SCHEMAS_TAG)
|
250
|
+
# Remove empty schemas section
|
251
|
+
if not self._normalized_manifest[SCHEMAS_TAG]:
|
252
|
+
del self._normalized_manifest[SCHEMAS_TAG]
|
253
|
+
|
147
254
|
def _prepare_definitions(self) -> None:
|
148
255
|
"""
|
149
256
|
Clean the definitions in the manifest by removing unnecessary properties.
|
@@ -163,43 +270,6 @@ class ManifestNormalizer:
|
|
163
270
|
if key != LINKED_TAG:
|
164
271
|
self._normalized_manifest[DEF_TAG].pop(key, None)
|
165
272
|
|
166
|
-
def _extract_stream_schema(self, stream: Dict[str, Any]) -> None:
|
167
|
-
"""
|
168
|
-
Extract the schema from the stream and add it to the `schemas` tag.
|
169
|
-
"""
|
170
|
-
|
171
|
-
stream_name = stream["name"]
|
172
|
-
# copy the value of the SCHEMA_TAG to the SCHEMAS_TAG with the stream name as key
|
173
|
-
schema = stream.get(SCHEMA_LOADER_TAG, {}).get(SCHEMA_TAG)
|
174
|
-
if not SCHEMAS_TAG in self._normalized_manifest.keys():
|
175
|
-
self._normalized_manifest[SCHEMAS_TAG] = {}
|
176
|
-
# add stream schema to the SCHEMAS_TAG
|
177
|
-
if not stream_name in self._normalized_manifest[SCHEMAS_TAG].keys():
|
178
|
-
# add the schema to the SCHEMAS_TAG with the stream name as key
|
179
|
-
self._normalized_manifest[SCHEMAS_TAG][stream_name] = schema
|
180
|
-
|
181
|
-
def _reference_schemas(self) -> None:
|
182
|
-
"""
|
183
|
-
Set the schema reference for the given stream in the manifest.
|
184
|
-
This function modifies the manifest in place.
|
185
|
-
"""
|
186
|
-
|
187
|
-
# reference the stream schema for the stream to where it's stored
|
188
|
-
if SCHEMAS_TAG in self._normalized_manifest.keys():
|
189
|
-
for stream in self._get_manifest_streams():
|
190
|
-
self._extract_stream_schema(stream)
|
191
|
-
self._set_stream_schema_ref(stream)
|
192
|
-
|
193
|
-
def _set_stream_schema_ref(self, stream: Dict[str, Any]) -> None:
|
194
|
-
"""
|
195
|
-
Set the schema reference for the given stream in the manifest.
|
196
|
-
This function modifies the manifest in place.
|
197
|
-
"""
|
198
|
-
stream_name = stream["name"]
|
199
|
-
if SCHEMAS_TAG in self._normalized_manifest.keys():
|
200
|
-
if stream_name in self._normalized_manifest[SCHEMAS_TAG]:
|
201
|
-
stream[SCHEMA_LOADER_TAG][SCHEMA_TAG] = self._create_schema_ref(stream_name)
|
202
|
-
|
203
273
|
def _replace_duplicates_with_refs(self, duplicates: DuplicatesType) -> None:
|
204
274
|
"""
|
205
275
|
Process duplicate objects and replace them with references.
|
@@ -447,16 +517,3 @@ class ManifestNormalizer:
|
|
447
517
|
"""
|
448
518
|
|
449
519
|
return {"$ref": f"#/{DEF_TAG}/{LINKED_TAG}/{type_key}/{key}"}
|
450
|
-
|
451
|
-
def _create_schema_ref(self, key: str) -> Dict[str, str]:
|
452
|
-
"""
|
453
|
-
Create a reference object for stream schema using the specified key.
|
454
|
-
|
455
|
-
Args:
|
456
|
-
key: The reference key to use
|
457
|
-
|
458
|
-
Returns:
|
459
|
-
A reference object in the proper format
|
460
|
-
"""
|
461
|
-
|
462
|
-
return {"$ref": f"#/{SCHEMAS_TAG}/{key}"}
|
@@ -373,6 +373,7 @@ def read(
|
|
373
373
|
expecting_exception: bool | None = None, # Deprecated, use `expected_outcome` instead.
|
374
374
|
*,
|
375
375
|
expected_outcome: ExpectedOutcome | None = None,
|
376
|
+
debug: bool = False,
|
376
377
|
) -> EntrypointOutput:
|
377
378
|
"""
|
378
379
|
config and state must be json serializable
|
@@ -394,6 +395,8 @@ def read(
|
|
394
395
|
"--catalog",
|
395
396
|
catalog_file,
|
396
397
|
]
|
398
|
+
if debug:
|
399
|
+
args.append("--debug")
|
397
400
|
if state is not None:
|
398
401
|
args.extend(
|
399
402
|
[
|
@@ -138,7 +138,7 @@ airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQ
|
|
138
138
|
airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=nlVvHC511NUyDEEIRBkoeDTAvLqKNp-hRy8D19z8tdk,5941
|
139
139
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=wnRUP0Xeru9Rbu5OexXSDN9QWDo8YU4tT9M2LDVOgGA,802
|
140
140
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=2UdpCz3yi7ISZTyqkQXSSy3dMxeyOWqV7OlAS5b9GVg,11568
|
141
|
-
airbyte_cdk/sources/declarative/parsers/manifest_normalizer.py,sha256=
|
141
|
+
airbyte_cdk/sources/declarative/parsers/manifest_normalizer.py,sha256=EtKjS9c94yNp3AwQC8KUCQaAYW5T3zvFYxoWYjc_buI,19729
|
142
142
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=pJmg78vqE5VfUrF_KJnWjucQ4k9IWFULeAxHCowrHXE,6806
|
143
143
|
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=d1GXdgEwolqQSfkMvREvIesUhMgMOMlxoXj4ctPCBwI,176855
|
144
144
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=TBC9AkGaUqHm2IKHMPN6punBIcY5tWGULowcLoAVkfw,1109
|
@@ -379,7 +379,7 @@ airbyte_cdk/sql/shared/sql_processor.py,sha256=1CwfC3fp9dWnHBpKtly7vGduf9ho_Mahi
|
|
379
379
|
airbyte_cdk/sql/types.py,sha256=XEIhRAo_ASd0kVLBkdLf5bHiRhNple-IJrC9TibcDdY,5880
|
380
380
|
airbyte_cdk/test/__init__.py,sha256=f_XdkOg4_63QT2k3BbKY34209lppwgw-svzfZstQEq4,199
|
381
381
|
airbyte_cdk/test/catalog_builder.py,sha256=-y05Cz1x0Dlk6oE9LSKhCozssV2gYBNtMdV5YYOPOtk,3015
|
382
|
-
airbyte_cdk/test/entrypoint_wrapper.py,sha256=
|
382
|
+
airbyte_cdk/test/entrypoint_wrapper.py,sha256=GVddh4-mDdPWDd-Qua85aAUAsOdf6uhdf1zkPL926Gg,16275
|
383
383
|
airbyte_cdk/test/mock_http/__init__.py,sha256=jE5kC6CQ0OXkTqKhciDnNVZHesBFVIA2YvkdFGwva7k,322
|
384
384
|
airbyte_cdk/test/mock_http/matcher.py,sha256=4Qj8UnJKZIs-eodshryce3SN1Ayc8GZpBETmP6hTEyc,1446
|
385
385
|
airbyte_cdk/test/mock_http/mocker.py,sha256=XgsjMtVoeMpRELPyALgrkHFauH9H5irxrz1Kcxh2yFY,8013
|
@@ -423,9 +423,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
|
|
423
423
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
424
424
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
425
425
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
426
|
-
airbyte_cdk-6.56.
|
427
|
-
airbyte_cdk-6.56.
|
428
|
-
airbyte_cdk-6.56.
|
429
|
-
airbyte_cdk-6.56.
|
430
|
-
airbyte_cdk-6.56.
|
431
|
-
airbyte_cdk-6.56.
|
426
|
+
airbyte_cdk-6.56.7.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
427
|
+
airbyte_cdk-6.56.7.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
|
428
|
+
airbyte_cdk-6.56.7.dist-info/METADATA,sha256=KqCnSx1P31HADnmA2gRYE6bTkOp0_55QBtNd_Zs-QBk,6392
|
429
|
+
airbyte_cdk-6.56.7.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
430
|
+
airbyte_cdk-6.56.7.dist-info/entry_points.txt,sha256=AKWbEkHfpzzk9nF9tqBUaw1MbvTM4mGtEzmZQm0ZWvM,139
|
431
|
+
airbyte_cdk-6.56.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|