airbyte-cdk 6.45.5__py3-none-any.whl → 6.45.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -52,8 +52,10 @@ class PropertyChunking:
52
52
  chunk_size = 0
53
53
  for property_field in property_fields:
54
54
  # If property_limit_type is not defined, we default to property_count which is just an incrementing count
55
+ # todo: Add ability to specify parameter delimiter representation and take into account in property_field_size
55
56
  property_field_size = (
56
57
  len(property_field)
58
+ + 1 # The +1 represents the extra character for the delimiter in between properties
57
59
  if self.property_limit_type == PropertyLimitType.characters
58
60
  else 1
59
61
  )
@@ -10,6 +10,7 @@ from itertools import islice
10
10
  from typing import (
11
11
  Any,
12
12
  Callable,
13
+ Dict,
13
14
  Iterable,
14
15
  List,
15
16
  Mapping,
@@ -367,14 +368,65 @@ class SimpleRetriever(Retriever):
367
368
  {"next_page_token": initial_token} if initial_token is not None else None
368
369
  )
369
370
  while not pagination_complete:
370
- response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
371
+ property_chunks: List[List[str]] = (
372
+ list(
373
+ self.additional_query_properties.get_request_property_chunks(
374
+ stream_slice=stream_slice
375
+ )
376
+ )
377
+ if self.additional_query_properties
378
+ else [
379
+ []
380
+ ] # A single empty property chunk represents the case where property chunking is not configured
381
+ )
371
382
 
383
+ merged_records: MutableMapping[str, Any] = defaultdict(dict)
372
384
  last_page_size = 0
373
385
  last_record: Optional[Record] = None
374
- for record in records_generator_fn(response):
375
- last_page_size += 1
376
- last_record = record
377
- yield record
386
+ response: Optional[requests.Response] = None
387
+ for properties in property_chunks:
388
+ if len(properties) > 0:
389
+ stream_slice = StreamSlice(
390
+ partition=stream_slice.partition or {},
391
+ cursor_slice=stream_slice.cursor_slice or {},
392
+ extra_fields={"query_properties": properties},
393
+ )
394
+
395
+ response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
396
+ for current_record in records_generator_fn(response):
397
+ if (
398
+ current_record
399
+ and self.additional_query_properties
400
+ and self.additional_query_properties.property_chunking
401
+ ):
402
+ merge_key = (
403
+ self.additional_query_properties.property_chunking.get_merge_key(
404
+ current_record
405
+ )
406
+ )
407
+ if merge_key:
408
+ _deep_merge(merged_records[merge_key], current_record)
409
+ else:
410
+ # We should still emit records even if the record did not have a merge key
411
+ last_page_size += 1
412
+ last_record = current_record
413
+ yield current_record
414
+ else:
415
+ last_page_size += 1
416
+ last_record = current_record
417
+ yield current_record
418
+
419
+ if (
420
+ self.additional_query_properties
421
+ and self.additional_query_properties.property_chunking
422
+ ):
423
+ for merged_record in merged_records.values():
424
+ record = Record(
425
+ data=merged_record, stream_name=self.name, associated_slice=stream_slice
426
+ )
427
+ last_page_size += 1
428
+ last_record = record
429
+ yield record
378
430
 
379
431
  if not response:
380
432
  pagination_complete = True
@@ -449,110 +501,43 @@ class SimpleRetriever(Retriever):
449
501
  :param stream_slice: The stream slice to read data for
450
502
  :return: The records read from the API source
451
503
  """
452
-
453
- property_chunks = (
454
- list(
455
- self.additional_query_properties.get_request_property_chunks(
456
- stream_slice=stream_slice
457
- )
458
- )
459
- if self.additional_query_properties
460
- else []
461
- )
462
- records_without_merge_key = []
463
- merged_records: MutableMapping[str, Any] = defaultdict(dict)
464
-
465
504
  _slice = stream_slice or StreamSlice(partition={}, cursor_slice={}) # None-check
505
+
466
506
  most_recent_record_from_slice = None
507
+ record_generator = partial(
508
+ self._parse_records,
509
+ stream_slice=stream_slice,
510
+ stream_state=self.state or {},
511
+ records_schema=records_schema,
512
+ )
467
513
 
468
- if self.additional_query_properties:
469
- for properties in property_chunks:
470
- _slice = StreamSlice(
471
- partition=_slice.partition or {},
472
- cursor_slice=_slice.cursor_slice or {},
473
- extra_fields={"query_properties": properties},
474
- ) # None-check
475
-
476
- record_generator = partial(
477
- self._parse_records,
478
- stream_slice=_slice,
479
- stream_state=self.state or {},
480
- records_schema=records_schema,
481
- )
514
+ if self.cursor and isinstance(self.cursor, ResumableFullRefreshCursor):
515
+ stream_state = self.state
482
516
 
483
- for stream_data in self._read_pages(record_generator, self.state, _slice):
484
- current_record = self._extract_record(stream_data, _slice)
485
- if self.cursor and current_record:
486
- self.cursor.observe(_slice, current_record)
517
+ # Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to
518
+ # fetch more records. The platform deletes stream state for full refresh streams before starting a
519
+ # new job, so we don't need to worry about this value existing for the initial attempt
520
+ if stream_state.get(FULL_REFRESH_SYNC_COMPLETE_KEY):
521
+ return
487
522
 
488
- # Latest record read, not necessarily within slice boundaries.
489
- # TODO Remove once all custom components implement `observe` method.
490
- # https://github.com/airbytehq/airbyte-internal-issues/issues/6955
491
- most_recent_record_from_slice = self._get_most_recent_record(
492
- most_recent_record_from_slice, current_record, _slice
493
- )
523
+ yield from self._read_single_page(record_generator, stream_state, _slice)
524
+ else:
525
+ for stream_data in self._read_pages(record_generator, self.state, _slice):
526
+ current_record = self._extract_record(stream_data, _slice)
527
+ if self.cursor and current_record:
528
+ self.cursor.observe(_slice, current_record)
529
+
530
+ # Latest record read, not necessarily within slice boundaries.
531
+ # TODO Remove once all custom components implement `observe` method.
532
+ # https://github.com/airbytehq/airbyte-internal-issues/issues/6955
533
+ most_recent_record_from_slice = self._get_most_recent_record(
534
+ most_recent_record_from_slice, current_record, _slice
535
+ )
536
+ yield stream_data
494
537
 
495
- if current_record and self.additional_query_properties.property_chunking:
496
- merge_key = (
497
- self.additional_query_properties.property_chunking.get_merge_key(
498
- current_record
499
- )
500
- )
501
- if merge_key:
502
- merged_records[merge_key].update(current_record)
503
- else:
504
- # We should still emit records even if the record did not have a merge key
505
- records_without_merge_key.append(current_record)
506
- else:
507
- yield stream_data
508
538
  if self.cursor:
509
539
  self.cursor.close_slice(_slice, most_recent_record_from_slice)
510
-
511
- if len(merged_records) > 0:
512
- yield from [
513
- Record(data=merged_record, stream_name=self.name, associated_slice=stream_slice)
514
- for merged_record in merged_records.values()
515
- ]
516
- if len(records_without_merge_key) > 0:
517
- yield from records_without_merge_key
518
- else:
519
- _slice = stream_slice or StreamSlice(partition={}, cursor_slice={}) # None-check
520
-
521
- most_recent_record_from_slice = None
522
- record_generator = partial(
523
- self._parse_records,
524
- stream_slice=stream_slice,
525
- stream_state=self.state or {},
526
- records_schema=records_schema,
527
- )
528
-
529
- if self.cursor and isinstance(self.cursor, ResumableFullRefreshCursor):
530
- stream_state = self.state
531
-
532
- # Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to
533
- # fetch more records. The platform deletes stream state for full refresh streams before starting a
534
- # new job, so we don't need to worry about this value existing for the initial attempt
535
- if stream_state.get(FULL_REFRESH_SYNC_COMPLETE_KEY):
536
- return
537
-
538
- yield from self._read_single_page(record_generator, stream_state, _slice)
539
- else:
540
- for stream_data in self._read_pages(record_generator, self.state, _slice):
541
- current_record = self._extract_record(stream_data, _slice)
542
- if self.cursor and current_record:
543
- self.cursor.observe(_slice, current_record)
544
-
545
- # Latest record read, not necessarily within slice boundaries.
546
- # TODO Remove once all custom components implement `observe` method.
547
- # https://github.com/airbytehq/airbyte-internal-issues/issues/6955
548
- most_recent_record_from_slice = self._get_most_recent_record(
549
- most_recent_record_from_slice, current_record, _slice
550
- )
551
- yield stream_data
552
-
553
- if self.cursor:
554
- self.cursor.close_slice(_slice, most_recent_record_from_slice)
555
- return
540
+ return
556
541
 
557
542
  def _get_most_recent_record(
558
543
  self,
@@ -639,6 +624,26 @@ class SimpleRetriever(Retriever):
639
624
  return json.dumps(to_serialize, indent=None, separators=(",", ":"), sort_keys=True)
640
625
 
641
626
 
627
+ def _deep_merge(
628
+ target: MutableMapping[str, Any], source: Union[Record, MutableMapping[str, Any]]
629
+ ) -> None:
630
+ """
631
+ Recursively merge two dictionaries, combining nested dictionaries instead of overwriting them.
632
+
633
+ :param target: The dictionary to merge into (modified in place)
634
+ :param source: The dictionary to merge from
635
+ """
636
+ for key, value in source.items():
637
+ if (
638
+ key in target
639
+ and isinstance(target[key], MutableMapping)
640
+ and isinstance(value, MutableMapping)
641
+ ):
642
+ _deep_merge(target[key], value)
643
+ else:
644
+ target[key] = value
645
+
646
+
642
647
  @dataclass
643
648
  class SimpleRetrieverTestReadDecorator(SimpleRetriever):
644
649
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.45.5
3
+ Version: 6.45.6
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -158,7 +158,7 @@ airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_stra
158
158
  airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py,sha256=LoKXdUbSgHEtSwtA8DFrnX6SpQbRVVwreY8NguTKTcI,2229
159
159
  airbyte_cdk/sources/declarative/requesters/query_properties/__init__.py,sha256=sHwHVuN6djuRBF7zQb-HmINV0By4wE5j_i6TjmIPMzQ,494
160
160
  airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py,sha256=3h9Ae6TNGagh9sMYWdG5KoEFWDlqUWZ5fkswTPreveM,1616
161
- airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py,sha256=YmUeeY3ZpsuK2VTF3SkdVuJcplI1I4UfhgzOrggifag,2748
161
+ airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py,sha256=IQkNMWPEXxCJ7zPfGa6Ri17YVVdwuAnUA2Ats7TMIRU,2972
162
162
  airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py,sha256=2VWhgphAFKmHJhzp-UoSP9_QR3eYOLPT0nzMDyglBV4,2650
163
163
  airbyte_cdk/sources/declarative/requesters/query_properties/strategies/__init__.py,sha256=ojiPj9eVU7SuNpF3RZwhZWW21IYLQYEoxpzg1rCdvNM,350
164
164
  airbyte_cdk/sources/declarative/requesters/query_properties/strategies/group_by_key.py,sha256=np4uTwSpQvXxubIzVbwSDX0Xf3EgVn8kkhs6zYLOdAQ,1081
@@ -180,7 +180,7 @@ airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=Aio
180
180
  airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=nQepwG_RfW53sgwvK5dLPqfCx0VjsQ83nYoPjBMAaLM,527
181
181
  airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=6oZtnCHm9NdDvjTSrVwPQOXGSdETSIR7eWH2vFjM7jI,4855
182
182
  airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
183
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=cI3UEWSIuGNzzIlf8I_7Vf_3fX_tQwIwPrnmrY7MEh4,31146
183
+ airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=O7qpM71L1_ATIbEKa8y658jdiSJSPw0KmuGKgnaruQU,31008
184
184
  airbyte_cdk/sources/declarative/schema/__init__.py,sha256=xU45UvM5O4c1PSM13UHpCdh5hpW3HXy9vRRGEiAC1rg,795
185
185
  airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=UnbzlExmwoQiVV8zDg4lhAEaqA_0pRfwbMRe8yqOuWk,1834
186
186
  airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=J8Q_iJYhcSQLWyt0bTZCbDAGpxt9G8FCc6Q9jtGsNzw,10703
@@ -375,9 +375,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
375
375
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
376
376
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
377
377
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
378
- airbyte_cdk-6.45.5.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
379
- airbyte_cdk-6.45.5.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
380
- airbyte_cdk-6.45.5.dist-info/METADATA,sha256=Yx9K_oEn6WPdgAKJx7ObyeMQRTXpF8JYps2DLKjkdwM,6113
381
- airbyte_cdk-6.45.5.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
382
- airbyte_cdk-6.45.5.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
383
- airbyte_cdk-6.45.5.dist-info/RECORD,,
378
+ airbyte_cdk-6.45.6.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
379
+ airbyte_cdk-6.45.6.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
380
+ airbyte_cdk-6.45.6.dist-info/METADATA,sha256=RsYTgGOclj0X4Azn5WPXWgSyZTmN8ooXVijCnoIkEeg,6113
381
+ airbyte_cdk-6.45.6.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
382
+ airbyte_cdk-6.45.6.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
383
+ airbyte_cdk-6.45.6.dist-info/RECORD,,