airbyte-cdk 6.45.5__py3-none-any.whl → 6.45.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py +2 -0
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +106 -101
- {airbyte_cdk-6.45.5.dist-info → airbyte_cdk-6.45.6.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.45.5.dist-info → airbyte_cdk-6.45.6.dist-info}/RECORD +8 -8
- {airbyte_cdk-6.45.5.dist-info → airbyte_cdk-6.45.6.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.45.5.dist-info → airbyte_cdk-6.45.6.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.45.5.dist-info → airbyte_cdk-6.45.6.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.45.5.dist-info → airbyte_cdk-6.45.6.dist-info}/entry_points.txt +0 -0
@@ -52,8 +52,10 @@ class PropertyChunking:
|
|
52
52
|
chunk_size = 0
|
53
53
|
for property_field in property_fields:
|
54
54
|
# If property_limit_type is not defined, we default to property_count which is just an incrementing count
|
55
|
+
# todo: Add ability to specify parameter delimiter representation and take into account in property_field_size
|
55
56
|
property_field_size = (
|
56
57
|
len(property_field)
|
58
|
+
+ 1 # The +1 represents the extra character for the delimiter in between properties
|
57
59
|
if self.property_limit_type == PropertyLimitType.characters
|
58
60
|
else 1
|
59
61
|
)
|
@@ -10,6 +10,7 @@ from itertools import islice
|
|
10
10
|
from typing import (
|
11
11
|
Any,
|
12
12
|
Callable,
|
13
|
+
Dict,
|
13
14
|
Iterable,
|
14
15
|
List,
|
15
16
|
Mapping,
|
@@ -367,14 +368,65 @@ class SimpleRetriever(Retriever):
|
|
367
368
|
{"next_page_token": initial_token} if initial_token is not None else None
|
368
369
|
)
|
369
370
|
while not pagination_complete:
|
370
|
-
|
371
|
+
property_chunks: List[List[str]] = (
|
372
|
+
list(
|
373
|
+
self.additional_query_properties.get_request_property_chunks(
|
374
|
+
stream_slice=stream_slice
|
375
|
+
)
|
376
|
+
)
|
377
|
+
if self.additional_query_properties
|
378
|
+
else [
|
379
|
+
[]
|
380
|
+
] # A single empty property chunk represents the case where property chunking is not configured
|
381
|
+
)
|
371
382
|
|
383
|
+
merged_records: MutableMapping[str, Any] = defaultdict(dict)
|
372
384
|
last_page_size = 0
|
373
385
|
last_record: Optional[Record] = None
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
386
|
+
response: Optional[requests.Response] = None
|
387
|
+
for properties in property_chunks:
|
388
|
+
if len(properties) > 0:
|
389
|
+
stream_slice = StreamSlice(
|
390
|
+
partition=stream_slice.partition or {},
|
391
|
+
cursor_slice=stream_slice.cursor_slice or {},
|
392
|
+
extra_fields={"query_properties": properties},
|
393
|
+
)
|
394
|
+
|
395
|
+
response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
|
396
|
+
for current_record in records_generator_fn(response):
|
397
|
+
if (
|
398
|
+
current_record
|
399
|
+
and self.additional_query_properties
|
400
|
+
and self.additional_query_properties.property_chunking
|
401
|
+
):
|
402
|
+
merge_key = (
|
403
|
+
self.additional_query_properties.property_chunking.get_merge_key(
|
404
|
+
current_record
|
405
|
+
)
|
406
|
+
)
|
407
|
+
if merge_key:
|
408
|
+
_deep_merge(merged_records[merge_key], current_record)
|
409
|
+
else:
|
410
|
+
# We should still emit records even if the record did not have a merge key
|
411
|
+
last_page_size += 1
|
412
|
+
last_record = current_record
|
413
|
+
yield current_record
|
414
|
+
else:
|
415
|
+
last_page_size += 1
|
416
|
+
last_record = current_record
|
417
|
+
yield current_record
|
418
|
+
|
419
|
+
if (
|
420
|
+
self.additional_query_properties
|
421
|
+
and self.additional_query_properties.property_chunking
|
422
|
+
):
|
423
|
+
for merged_record in merged_records.values():
|
424
|
+
record = Record(
|
425
|
+
data=merged_record, stream_name=self.name, associated_slice=stream_slice
|
426
|
+
)
|
427
|
+
last_page_size += 1
|
428
|
+
last_record = record
|
429
|
+
yield record
|
378
430
|
|
379
431
|
if not response:
|
380
432
|
pagination_complete = True
|
@@ -449,110 +501,43 @@ class SimpleRetriever(Retriever):
|
|
449
501
|
:param stream_slice: The stream slice to read data for
|
450
502
|
:return: The records read from the API source
|
451
503
|
"""
|
452
|
-
|
453
|
-
property_chunks = (
|
454
|
-
list(
|
455
|
-
self.additional_query_properties.get_request_property_chunks(
|
456
|
-
stream_slice=stream_slice
|
457
|
-
)
|
458
|
-
)
|
459
|
-
if self.additional_query_properties
|
460
|
-
else []
|
461
|
-
)
|
462
|
-
records_without_merge_key = []
|
463
|
-
merged_records: MutableMapping[str, Any] = defaultdict(dict)
|
464
|
-
|
465
504
|
_slice = stream_slice or StreamSlice(partition={}, cursor_slice={}) # None-check
|
505
|
+
|
466
506
|
most_recent_record_from_slice = None
|
507
|
+
record_generator = partial(
|
508
|
+
self._parse_records,
|
509
|
+
stream_slice=stream_slice,
|
510
|
+
stream_state=self.state or {},
|
511
|
+
records_schema=records_schema,
|
512
|
+
)
|
467
513
|
|
468
|
-
if self.
|
469
|
-
|
470
|
-
_slice = StreamSlice(
|
471
|
-
partition=_slice.partition or {},
|
472
|
-
cursor_slice=_slice.cursor_slice or {},
|
473
|
-
extra_fields={"query_properties": properties},
|
474
|
-
) # None-check
|
475
|
-
|
476
|
-
record_generator = partial(
|
477
|
-
self._parse_records,
|
478
|
-
stream_slice=_slice,
|
479
|
-
stream_state=self.state or {},
|
480
|
-
records_schema=records_schema,
|
481
|
-
)
|
514
|
+
if self.cursor and isinstance(self.cursor, ResumableFullRefreshCursor):
|
515
|
+
stream_state = self.state
|
482
516
|
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
517
|
+
# Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to
|
518
|
+
# fetch more records. The platform deletes stream state for full refresh streams before starting a
|
519
|
+
# new job, so we don't need to worry about this value existing for the initial attempt
|
520
|
+
if stream_state.get(FULL_REFRESH_SYNC_COMPLETE_KEY):
|
521
|
+
return
|
487
522
|
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
)
|
523
|
+
yield from self._read_single_page(record_generator, stream_state, _slice)
|
524
|
+
else:
|
525
|
+
for stream_data in self._read_pages(record_generator, self.state, _slice):
|
526
|
+
current_record = self._extract_record(stream_data, _slice)
|
527
|
+
if self.cursor and current_record:
|
528
|
+
self.cursor.observe(_slice, current_record)
|
529
|
+
|
530
|
+
# Latest record read, not necessarily within slice boundaries.
|
531
|
+
# TODO Remove once all custom components implement `observe` method.
|
532
|
+
# https://github.com/airbytehq/airbyte-internal-issues/issues/6955
|
533
|
+
most_recent_record_from_slice = self._get_most_recent_record(
|
534
|
+
most_recent_record_from_slice, current_record, _slice
|
535
|
+
)
|
536
|
+
yield stream_data
|
494
537
|
|
495
|
-
if current_record and self.additional_query_properties.property_chunking:
|
496
|
-
merge_key = (
|
497
|
-
self.additional_query_properties.property_chunking.get_merge_key(
|
498
|
-
current_record
|
499
|
-
)
|
500
|
-
)
|
501
|
-
if merge_key:
|
502
|
-
merged_records[merge_key].update(current_record)
|
503
|
-
else:
|
504
|
-
# We should still emit records even if the record did not have a merge key
|
505
|
-
records_without_merge_key.append(current_record)
|
506
|
-
else:
|
507
|
-
yield stream_data
|
508
538
|
if self.cursor:
|
509
539
|
self.cursor.close_slice(_slice, most_recent_record_from_slice)
|
510
|
-
|
511
|
-
if len(merged_records) > 0:
|
512
|
-
yield from [
|
513
|
-
Record(data=merged_record, stream_name=self.name, associated_slice=stream_slice)
|
514
|
-
for merged_record in merged_records.values()
|
515
|
-
]
|
516
|
-
if len(records_without_merge_key) > 0:
|
517
|
-
yield from records_without_merge_key
|
518
|
-
else:
|
519
|
-
_slice = stream_slice or StreamSlice(partition={}, cursor_slice={}) # None-check
|
520
|
-
|
521
|
-
most_recent_record_from_slice = None
|
522
|
-
record_generator = partial(
|
523
|
-
self._parse_records,
|
524
|
-
stream_slice=stream_slice,
|
525
|
-
stream_state=self.state or {},
|
526
|
-
records_schema=records_schema,
|
527
|
-
)
|
528
|
-
|
529
|
-
if self.cursor and isinstance(self.cursor, ResumableFullRefreshCursor):
|
530
|
-
stream_state = self.state
|
531
|
-
|
532
|
-
# Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to
|
533
|
-
# fetch more records. The platform deletes stream state for full refresh streams before starting a
|
534
|
-
# new job, so we don't need to worry about this value existing for the initial attempt
|
535
|
-
if stream_state.get(FULL_REFRESH_SYNC_COMPLETE_KEY):
|
536
|
-
return
|
537
|
-
|
538
|
-
yield from self._read_single_page(record_generator, stream_state, _slice)
|
539
|
-
else:
|
540
|
-
for stream_data in self._read_pages(record_generator, self.state, _slice):
|
541
|
-
current_record = self._extract_record(stream_data, _slice)
|
542
|
-
if self.cursor and current_record:
|
543
|
-
self.cursor.observe(_slice, current_record)
|
544
|
-
|
545
|
-
# Latest record read, not necessarily within slice boundaries.
|
546
|
-
# TODO Remove once all custom components implement `observe` method.
|
547
|
-
# https://github.com/airbytehq/airbyte-internal-issues/issues/6955
|
548
|
-
most_recent_record_from_slice = self._get_most_recent_record(
|
549
|
-
most_recent_record_from_slice, current_record, _slice
|
550
|
-
)
|
551
|
-
yield stream_data
|
552
|
-
|
553
|
-
if self.cursor:
|
554
|
-
self.cursor.close_slice(_slice, most_recent_record_from_slice)
|
555
|
-
return
|
540
|
+
return
|
556
541
|
|
557
542
|
def _get_most_recent_record(
|
558
543
|
self,
|
@@ -639,6 +624,26 @@ class SimpleRetriever(Retriever):
|
|
639
624
|
return json.dumps(to_serialize, indent=None, separators=(",", ":"), sort_keys=True)
|
640
625
|
|
641
626
|
|
627
|
+
def _deep_merge(
|
628
|
+
target: MutableMapping[str, Any], source: Union[Record, MutableMapping[str, Any]]
|
629
|
+
) -> None:
|
630
|
+
"""
|
631
|
+
Recursively merge two dictionaries, combining nested dictionaries instead of overwriting them.
|
632
|
+
|
633
|
+
:param target: The dictionary to merge into (modified in place)
|
634
|
+
:param source: The dictionary to merge from
|
635
|
+
"""
|
636
|
+
for key, value in source.items():
|
637
|
+
if (
|
638
|
+
key in target
|
639
|
+
and isinstance(target[key], MutableMapping)
|
640
|
+
and isinstance(value, MutableMapping)
|
641
|
+
):
|
642
|
+
_deep_merge(target[key], value)
|
643
|
+
else:
|
644
|
+
target[key] = value
|
645
|
+
|
646
|
+
|
642
647
|
@dataclass
|
643
648
|
class SimpleRetrieverTestReadDecorator(SimpleRetriever):
|
644
649
|
"""
|
@@ -158,7 +158,7 @@ airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_stra
|
|
158
158
|
airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py,sha256=LoKXdUbSgHEtSwtA8DFrnX6SpQbRVVwreY8NguTKTcI,2229
|
159
159
|
airbyte_cdk/sources/declarative/requesters/query_properties/__init__.py,sha256=sHwHVuN6djuRBF7zQb-HmINV0By4wE5j_i6TjmIPMzQ,494
|
160
160
|
airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py,sha256=3h9Ae6TNGagh9sMYWdG5KoEFWDlqUWZ5fkswTPreveM,1616
|
161
|
-
airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py,sha256=
|
161
|
+
airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py,sha256=IQkNMWPEXxCJ7zPfGa6Ri17YVVdwuAnUA2Ats7TMIRU,2972
|
162
162
|
airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py,sha256=2VWhgphAFKmHJhzp-UoSP9_QR3eYOLPT0nzMDyglBV4,2650
|
163
163
|
airbyte_cdk/sources/declarative/requesters/query_properties/strategies/__init__.py,sha256=ojiPj9eVU7SuNpF3RZwhZWW21IYLQYEoxpzg1rCdvNM,350
|
164
164
|
airbyte_cdk/sources/declarative/requesters/query_properties/strategies/group_by_key.py,sha256=np4uTwSpQvXxubIzVbwSDX0Xf3EgVn8kkhs6zYLOdAQ,1081
|
@@ -180,7 +180,7 @@ airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=Aio
|
|
180
180
|
airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=nQepwG_RfW53sgwvK5dLPqfCx0VjsQ83nYoPjBMAaLM,527
|
181
181
|
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=6oZtnCHm9NdDvjTSrVwPQOXGSdETSIR7eWH2vFjM7jI,4855
|
182
182
|
airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
|
183
|
-
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=
|
183
|
+
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=O7qpM71L1_ATIbEKa8y658jdiSJSPw0KmuGKgnaruQU,31008
|
184
184
|
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=xU45UvM5O4c1PSM13UHpCdh5hpW3HXy9vRRGEiAC1rg,795
|
185
185
|
airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=UnbzlExmwoQiVV8zDg4lhAEaqA_0pRfwbMRe8yqOuWk,1834
|
186
186
|
airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=J8Q_iJYhcSQLWyt0bTZCbDAGpxt9G8FCc6Q9jtGsNzw,10703
|
@@ -375,9 +375,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
|
|
375
375
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
376
376
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
377
377
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
378
|
-
airbyte_cdk-6.45.
|
379
|
-
airbyte_cdk-6.45.
|
380
|
-
airbyte_cdk-6.45.
|
381
|
-
airbyte_cdk-6.45.
|
382
|
-
airbyte_cdk-6.45.
|
383
|
-
airbyte_cdk-6.45.
|
378
|
+
airbyte_cdk-6.45.6.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
379
|
+
airbyte_cdk-6.45.6.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
|
380
|
+
airbyte_cdk-6.45.6.dist-info/METADATA,sha256=RsYTgGOclj0X4Azn5WPXWgSyZTmN8ooXVijCnoIkEeg,6113
|
381
|
+
airbyte_cdk-6.45.6.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
382
|
+
airbyte_cdk-6.45.6.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
383
|
+
airbyte_cdk-6.45.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|