airbyte-cdk 6.56.6__py3-none-any.whl → 6.56.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -108,8 +108,7 @@ class ManifestNormalizer:
108
108
  ManifestNormalizationException: Caught internally and handled by returning the original manifest.
109
109
  """
110
110
  try:
111
- self._deduplicate_minifest()
112
- self._reference_schemas()
111
+ self._deduplicate_manifest()
113
112
 
114
113
  return self._normalized_manifest
115
114
  except ManifestNormalizationException:
@@ -131,7 +130,7 @@ class ManifestNormalizer:
131
130
 
132
131
  yield from []
133
132
 
134
- def _deduplicate_minifest(self) -> None:
133
+ def _deduplicate_manifest(self) -> None:
135
134
  """
136
135
  Find commonalities in the input JSON structure and refactor it to avoid redundancy.
137
136
  """
@@ -141,9 +140,117 @@ class ManifestNormalizer:
141
140
  self._prepare_definitions()
142
141
  # replace duplicates with references, if any
143
142
  self._handle_duplicates(self._collect_duplicates())
143
+ # replace parent streams with $refs
144
+ self._replace_parent_streams_with_refs()
145
+ # clean dangling fields after resolving $refs
146
+ self._clean_dangling_fields()
144
147
  except Exception as e:
145
148
  raise ManifestNormalizationException(str(e))
146
149
 
150
+ def _replace_parent_streams_with_refs(self) -> None:
151
+ """
152
+ For each stream in the manifest, if it has a retriever.partition_router with parent_stream_configs,
153
+ replace any 'stream' fields in those configs that are dicts and deeply equal to another stream object
154
+ with a $ref to the correct stream index.
155
+ """
156
+ streams = self._normalized_manifest.get(STREAMS_TAG, [])
157
+
158
+ # Build a hash-to-index mapping for O(1) lookups
159
+ stream_hash_to_index = {}
160
+ for idx, stream in enumerate(streams):
161
+ stream_hash = self._hash_object(stream)
162
+ stream_hash_to_index[stream_hash] = idx
163
+
164
+ for idx, stream in enumerate(streams):
165
+ retriever = stream.get("retriever")
166
+ if not retriever:
167
+ continue
168
+ partition_router = retriever.get("partition_router")
169
+ routers = (
170
+ partition_router
171
+ if isinstance(partition_router, list)
172
+ else [partition_router]
173
+ if partition_router
174
+ else []
175
+ )
176
+ for router in routers:
177
+ if not isinstance(router, dict):
178
+ continue
179
+ if router.get("type") != "SubstreamPartitionRouter":
180
+ continue
181
+ parent_stream_configs = router.get("parent_stream_configs", [])
182
+ for parent_config in parent_stream_configs:
183
+ if not isinstance(parent_config, dict):
184
+ continue
185
+ stream_ref = parent_config.get("stream")
186
+ # Only replace if it's a dict and matches any stream in the manifest
187
+ if stream_ref is not None and isinstance(stream_ref, dict):
188
+ stream_ref_hash = self._hash_object(stream_ref)
189
+ if stream_ref_hash in stream_hash_to_index:
190
+ parent_config["stream"] = {
191
+ "$ref": f"#/streams/{stream_hash_to_index[stream_ref_hash]}"
192
+ }
193
+
194
+ def _clean_dangling_fields(self) -> None:
195
+ """
196
+ Clean the manifest by removing unused definitions and schemas.
197
+ This method removes any definitions or schemas that are not referenced by any $ref in the manifest.
198
+ """
199
+
200
+ def find_all_refs(obj: Dict[str, Any], refs: List[str]) -> None:
201
+ """
202
+ Recursively find all $ref paths in the object.
203
+
204
+ Args:
205
+ obj: The object to search through
206
+ refs: List to store found reference paths
207
+ """
208
+ if not isinstance(obj, dict):
209
+ return
210
+
211
+ for key, value in obj.items():
212
+ if key == "$ref" and isinstance(value, str):
213
+ # Remove the leading #/ from the ref path
214
+ refs.append(value[2:])
215
+ elif isinstance(value, dict):
216
+ find_all_refs(value, refs)
217
+ elif isinstance(value, list):
218
+ for item in value:
219
+ if isinstance(item, dict):
220
+ find_all_refs(item, refs)
221
+
222
+ def clean_section(section: Dict[str, Any], section_path: str) -> None:
223
+ """
224
+ Clean a section by removing unreferenced fields.
225
+
226
+ Args:
227
+ section: The section to clean
228
+ section_path: The path to this section in the manifest
229
+ """
230
+ for key in list(section.keys()):
231
+ current_path = f"{section_path}/{key}"
232
+ # Check if this path is referenced or is a parent of a referenced path
233
+ if not any(ref.startswith(current_path) for ref in all_refs):
234
+ del section[key]
235
+
236
+ # Find all references in the manifest
237
+ all_refs: List[str] = []
238
+ find_all_refs(self._normalized_manifest, all_refs)
239
+
240
+ # Clean definitions
241
+ if DEF_TAG in self._normalized_manifest:
242
+ clean_section(self._normalized_manifest[DEF_TAG], DEF_TAG)
243
+ # Remove empty definitions section
244
+ if not self._normalized_manifest[DEF_TAG]:
245
+ del self._normalized_manifest[DEF_TAG]
246
+
247
+ # Clean schemas
248
+ if SCHEMAS_TAG in self._normalized_manifest:
249
+ clean_section(self._normalized_manifest[SCHEMAS_TAG], SCHEMAS_TAG)
250
+ # Remove empty schemas section
251
+ if not self._normalized_manifest[SCHEMAS_TAG]:
252
+ del self._normalized_manifest[SCHEMAS_TAG]
253
+
147
254
  def _prepare_definitions(self) -> None:
148
255
  """
149
256
  Clean the definitions in the manifest by removing unnecessary properties.
@@ -163,43 +270,6 @@ class ManifestNormalizer:
163
270
  if key != LINKED_TAG:
164
271
  self._normalized_manifest[DEF_TAG].pop(key, None)
165
272
 
166
- def _extract_stream_schema(self, stream: Dict[str, Any]) -> None:
167
- """
168
- Extract the schema from the stream and add it to the `schemas` tag.
169
- """
170
-
171
- stream_name = stream["name"]
172
- # copy the value of the SCHEMA_TAG to the SCHEMAS_TAG with the stream name as key
173
- schema = stream.get(SCHEMA_LOADER_TAG, {}).get(SCHEMA_TAG)
174
- if not SCHEMAS_TAG in self._normalized_manifest.keys():
175
- self._normalized_manifest[SCHEMAS_TAG] = {}
176
- # add stream schema to the SCHEMAS_TAG
177
- if not stream_name in self._normalized_manifest[SCHEMAS_TAG].keys():
178
- # add the schema to the SCHEMAS_TAG with the stream name as key
179
- self._normalized_manifest[SCHEMAS_TAG][stream_name] = schema
180
-
181
- def _reference_schemas(self) -> None:
182
- """
183
- Set the schema reference for the given stream in the manifest.
184
- This function modifies the manifest in place.
185
- """
186
-
187
- # reference the stream schema for the stream to where it's stored
188
- if SCHEMAS_TAG in self._normalized_manifest.keys():
189
- for stream in self._get_manifest_streams():
190
- self._extract_stream_schema(stream)
191
- self._set_stream_schema_ref(stream)
192
-
193
- def _set_stream_schema_ref(self, stream: Dict[str, Any]) -> None:
194
- """
195
- Set the schema reference for the given stream in the manifest.
196
- This function modifies the manifest in place.
197
- """
198
- stream_name = stream["name"]
199
- if SCHEMAS_TAG in self._normalized_manifest.keys():
200
- if stream_name in self._normalized_manifest[SCHEMAS_TAG]:
201
- stream[SCHEMA_LOADER_TAG][SCHEMA_TAG] = self._create_schema_ref(stream_name)
202
-
203
273
  def _replace_duplicates_with_refs(self, duplicates: DuplicatesType) -> None:
204
274
  """
205
275
  Process duplicate objects and replace them with references.
@@ -447,16 +517,3 @@ class ManifestNormalizer:
447
517
  """
448
518
 
449
519
  return {"$ref": f"#/{DEF_TAG}/{LINKED_TAG}/{type_key}/{key}"}
450
-
451
- def _create_schema_ref(self, key: str) -> Dict[str, str]:
452
- """
453
- Create a reference object for stream schema using the specified key.
454
-
455
- Args:
456
- key: The reference key to use
457
-
458
- Returns:
459
- A reference object in the proper format
460
- """
461
-
462
- return {"$ref": f"#/{SCHEMAS_TAG}/{key}"}
@@ -373,6 +373,7 @@ def read(
373
373
  expecting_exception: bool | None = None, # Deprecated, use `expected_outcome` instead.
374
374
  *,
375
375
  expected_outcome: ExpectedOutcome | None = None,
376
+ debug: bool = False,
376
377
  ) -> EntrypointOutput:
377
378
  """
378
379
  config and state must be json serializable
@@ -394,6 +395,8 @@ def read(
394
395
  "--catalog",
395
396
  catalog_file,
396
397
  ]
398
+ if debug:
399
+ args.append("--debug")
397
400
  if state is not None:
398
401
  args.extend(
399
402
  [
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.56.6
3
+ Version: 6.56.7
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -138,7 +138,7 @@ airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQ
138
138
  airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=nlVvHC511NUyDEEIRBkoeDTAvLqKNp-hRy8D19z8tdk,5941
139
139
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=wnRUP0Xeru9Rbu5OexXSDN9QWDo8YU4tT9M2LDVOgGA,802
140
140
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=2UdpCz3yi7ISZTyqkQXSSy3dMxeyOWqV7OlAS5b9GVg,11568
141
- airbyte_cdk/sources/declarative/parsers/manifest_normalizer.py,sha256=laBy7ebjA-PiNwc-50U4FHvMqS_mmHvnabxgFs4CjGw,17069
141
+ airbyte_cdk/sources/declarative/parsers/manifest_normalizer.py,sha256=EtKjS9c94yNp3AwQC8KUCQaAYW5T3zvFYxoWYjc_buI,19729
142
142
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=pJmg78vqE5VfUrF_KJnWjucQ4k9IWFULeAxHCowrHXE,6806
143
143
  airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=d1GXdgEwolqQSfkMvREvIesUhMgMOMlxoXj4ctPCBwI,176855
144
144
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=TBC9AkGaUqHm2IKHMPN6punBIcY5tWGULowcLoAVkfw,1109
@@ -379,7 +379,7 @@ airbyte_cdk/sql/shared/sql_processor.py,sha256=1CwfC3fp9dWnHBpKtly7vGduf9ho_Mahi
379
379
  airbyte_cdk/sql/types.py,sha256=XEIhRAo_ASd0kVLBkdLf5bHiRhNple-IJrC9TibcDdY,5880
380
380
  airbyte_cdk/test/__init__.py,sha256=f_XdkOg4_63QT2k3BbKY34209lppwgw-svzfZstQEq4,199
381
381
  airbyte_cdk/test/catalog_builder.py,sha256=-y05Cz1x0Dlk6oE9LSKhCozssV2gYBNtMdV5YYOPOtk,3015
382
- airbyte_cdk/test/entrypoint_wrapper.py,sha256=Lj1E00iTen1PZKLQ63mLCK5cLKFLmm65lqahNJDXxK4,16197
382
+ airbyte_cdk/test/entrypoint_wrapper.py,sha256=GVddh4-mDdPWDd-Qua85aAUAsOdf6uhdf1zkPL926Gg,16275
383
383
  airbyte_cdk/test/mock_http/__init__.py,sha256=jE5kC6CQ0OXkTqKhciDnNVZHesBFVIA2YvkdFGwva7k,322
384
384
  airbyte_cdk/test/mock_http/matcher.py,sha256=4Qj8UnJKZIs-eodshryce3SN1Ayc8GZpBETmP6hTEyc,1446
385
385
  airbyte_cdk/test/mock_http/mocker.py,sha256=XgsjMtVoeMpRELPyALgrkHFauH9H5irxrz1Kcxh2yFY,8013
@@ -423,9 +423,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
423
423
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
424
424
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
425
425
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
426
- airbyte_cdk-6.56.6.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
427
- airbyte_cdk-6.56.6.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
428
- airbyte_cdk-6.56.6.dist-info/METADATA,sha256=UIEu7OLuNVAhPHDB7oKrCBEvNdXnGz6-oVgzQor4n5o,6392
429
- airbyte_cdk-6.56.6.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
430
- airbyte_cdk-6.56.6.dist-info/entry_points.txt,sha256=AKWbEkHfpzzk9nF9tqBUaw1MbvTM4mGtEzmZQm0ZWvM,139
431
- airbyte_cdk-6.56.6.dist-info/RECORD,,
426
+ airbyte_cdk-6.56.7.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
427
+ airbyte_cdk-6.56.7.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
428
+ airbyte_cdk-6.56.7.dist-info/METADATA,sha256=KqCnSx1P31HADnmA2gRYE6bTkOp0_55QBtNd_Zs-QBk,6392
429
+ airbyte_cdk-6.56.7.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
430
+ airbyte_cdk-6.56.7.dist-info/entry_points.txt,sha256=AKWbEkHfpzzk9nF9tqBUaw1MbvTM4mGtEzmZQm0ZWvM,139
431
+ airbyte_cdk-6.56.7.dist-info/RECORD,,