airbyte-cdk 0.67.0__py3-none-any.whl → 0.67.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. airbyte_cdk/sources/abstract_source.py +30 -69
  2. airbyte_cdk/sources/connector_state_manager.py +12 -26
  3. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +552 -524
  4. airbyte_cdk/sources/file_based/config/csv_format.py +2 -0
  5. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +32 -14
  6. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +3 -19
  7. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +1 -3
  8. airbyte_cdk/sources/streams/__init__.py +2 -2
  9. airbyte_cdk/sources/streams/concurrent/adapters.py +3 -19
  10. airbyte_cdk/sources/streams/concurrent/cursor.py +1 -3
  11. airbyte_cdk/sources/streams/core.py +36 -34
  12. {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/METADATA +3 -2
  13. {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/RECORD +31 -31
  14. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +2 -1
  15. unit_tests/sources/file_based/config/test_csv_format.py +6 -1
  16. unit_tests/sources/file_based/file_types/test_parquet_parser.py +51 -6
  17. unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +139 -199
  18. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +91 -133
  19. unit_tests/sources/file_based/stream/concurrent/test_adapters.py +2 -13
  20. unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +2 -2
  21. unit_tests/sources/file_based/test_scenarios.py +2 -2
  22. unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +9 -9
  23. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +5 -5
  24. unit_tests/sources/streams/concurrent/test_adapters.py +2 -13
  25. unit_tests/sources/streams/test_stream_read.py +221 -11
  26. unit_tests/sources/test_abstract_source.py +142 -130
  27. unit_tests/sources/test_connector_state_manager.py +3 -124
  28. unit_tests/sources/test_source.py +18 -14
  29. {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/LICENSE.txt +0 -0
  30. {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/WHEEL +0 -0
  31. {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/top_level.txt +0 -0
@@ -54,14 +54,12 @@ class MockSource(AbstractSource):
54
54
  self,
55
55
  check_lambda: Callable[[], Tuple[bool, Optional[Any]]] = None,
56
56
  streams: List[Stream] = None,
57
- per_stream: bool = True,
58
57
  message_repository: MessageRepository = None,
59
58
  exception_on_missing_stream: bool = True,
60
59
  stop_sync_on_stream_failure: bool = False,
61
60
  ):
62
61
  self._streams = streams
63
62
  self.check_lambda = check_lambda
64
- self.per_stream = per_stream
65
63
  self.exception_on_missing_stream = exception_on_missing_stream
66
64
  self._message_repository = message_repository
67
65
  self._stop_sync_on_stream_failure = stop_sync_on_stream_failure
@@ -286,7 +284,7 @@ def test_read_stream_emits_repository_message_before_record(mocker, message_repo
286
284
  stream = MockStream(name="my_stream")
287
285
  mocker.patch.object(MockStream, "get_json_schema", return_value={})
288
286
  mocker.patch.object(MockStream, "read_records", side_effect=[[{"a record": "a value"}, {"another record": "another value"}]])
289
- message_repository.consume_queue.side_effect = [[message for message in [MESSAGE_FROM_REPOSITORY]], []]
287
+ message_repository.consume_queue.side_effect = [[message for message in [MESSAGE_FROM_REPOSITORY]], [], []]
290
288
 
291
289
  source = MockSource(streams=[stream], message_repository=message_repository)
292
290
 
@@ -357,19 +355,16 @@ def _as_stream_status(stream: str, status: AirbyteStreamStatus) -> AirbyteMessag
357
355
  return AirbyteMessage(type=MessageType.TRACE, trace=trace_message)
358
356
 
359
357
 
360
- def _as_state(state_data: Dict[str, Any], stream_name: str = "", per_stream_state: Dict[str, Any] = None):
361
- if per_stream_state:
362
- return AirbyteMessage(
363
- type=Type.STATE,
364
- state=AirbyteStateMessage(
365
- type=AirbyteStateType.STREAM,
366
- stream=AirbyteStreamState(
367
- stream_descriptor=StreamDescriptor(name=stream_name), stream_state=AirbyteStateBlob.parse_obj(per_stream_state)
368
- ),
369
- data=state_data,
358
+ def _as_state(stream_name: str = "", per_stream_state: Dict[str, Any] = None):
359
+ return AirbyteMessage(
360
+ type=Type.STATE,
361
+ state=AirbyteStateMessage(
362
+ type=AirbyteStateType.STREAM,
363
+ stream=AirbyteStreamState(
364
+ stream_descriptor=StreamDescriptor(name=stream_name), stream_state=AirbyteStateBlob.parse_obj(per_stream_state)
370
365
  ),
371
- )
372
- return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data=state_data))
366
+ ),
367
+ )
373
368
 
374
369
 
375
370
  def _as_error_trace(
@@ -410,8 +405,8 @@ def _fix_emitted_at(messages: List[AirbyteMessage]) -> List[AirbyteMessage]:
410
405
  def test_valid_full_refresh_read_no_slices(mocker):
411
406
  """Tests that running a full refresh sync on streams which don't specify slices produces the expected AirbyteMessages"""
412
407
  stream_output = [{"k1": "v1"}, {"k2": "v2"}]
413
- s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
414
- s2 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s2")
408
+ s1 = MockStream([({"stream_state": {}, "sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
409
+ s2 = MockStream([({"stream_state": {}, "sync_mode": SyncMode.full_refresh}, stream_output)], name="s2")
415
410
 
416
411
  mocker.patch.object(MockStream, "get_json_schema", return_value={})
417
412
 
@@ -428,10 +423,12 @@ def test_valid_full_refresh_read_no_slices(mocker):
428
423
  _as_stream_status("s1", AirbyteStreamStatus.STARTED),
429
424
  _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
430
425
  *_as_records("s1", stream_output),
426
+ _as_state("s1", {"__ab_full_refresh_state_message": True}),
431
427
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
432
428
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
433
429
  _as_stream_status("s2", AirbyteStreamStatus.RUNNING),
434
430
  *_as_records("s2", stream_output),
431
+ _as_state("s2", {"__ab_full_refresh_state_message": True}),
435
432
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
436
433
  ]
437
434
  )
@@ -445,11 +442,11 @@ def test_valid_full_refresh_read_with_slices(mocker):
445
442
  slices = [{"1": "1"}, {"2": "2"}]
446
443
  # When attempting to sync a slice, just output that slice as a record
447
444
  s1 = MockStream(
448
- [({"sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
445
+ [({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
449
446
  name="s1",
450
447
  )
451
448
  s2 = MockStream(
452
- [({"sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
449
+ [({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
453
450
  name="s2",
454
451
  )
455
452
 
@@ -469,10 +466,12 @@ def test_valid_full_refresh_read_with_slices(mocker):
469
466
  _as_stream_status("s1", AirbyteStreamStatus.STARTED),
470
467
  _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
471
468
  *_as_records("s1", slices),
469
+ _as_state("s1", {"__ab_full_refresh_state_message": True}),
472
470
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
473
471
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
474
472
  _as_stream_status("s2", AirbyteStreamStatus.RUNNING),
475
473
  *_as_records("s2", slices),
474
+ _as_state("s2", {"__ab_full_refresh_state_message": True}),
476
475
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
477
476
  ]
478
477
  )
@@ -482,6 +481,73 @@ def test_valid_full_refresh_read_with_slices(mocker):
482
481
  assert expected == messages
483
482
 
484
483
 
484
+ def test_full_refresh_does_not_use_incoming_state(mocker):
485
+ """Tests that running a full refresh sync does not use an incoming state message from the platform"""
486
+ slices = [{"1": "1"}, {"2": "2"}]
487
+ # When attempting to sync a slice, just output that slice as a record
488
+ s1 = MockStream(
489
+ [({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
490
+ name="s1",
491
+ )
492
+ s2 = MockStream(
493
+ [({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
494
+ name="s2",
495
+ )
496
+
497
+ def stream_slices_side_effect(stream_state: Mapping[str, Any], **kwargs) -> List[Mapping[str, Any]]:
498
+ if stream_state:
499
+ return slices[1:]
500
+ else:
501
+ return slices
502
+
503
+ mocker.patch.object(MockStream, "get_json_schema", return_value={})
504
+ mocker.patch.object(MockStream, "stream_slices", side_effect=stream_slices_side_effect)
505
+
506
+ state = [
507
+ AirbyteStateMessage(
508
+ type=AirbyteStateType.STREAM,
509
+ stream=AirbyteStreamState(
510
+ stream_descriptor=StreamDescriptor(name="s1"),
511
+ stream_state=AirbyteStateBlob.parse_obj({"created_at": "2024-01-31"}),
512
+ ),
513
+ ),
514
+ AirbyteStateMessage(
515
+ type=AirbyteStateType.STREAM,
516
+ stream=AirbyteStreamState(
517
+ stream_descriptor=StreamDescriptor(name="s2"),
518
+ stream_state=AirbyteStateBlob.parse_obj({"__ab_full_refresh_state_message": True}),
519
+ ),
520
+ ),
521
+ ]
522
+
523
+ src = MockSource(streams=[s1, s2])
524
+ catalog = ConfiguredAirbyteCatalog(
525
+ streams=[
526
+ _configured_stream(s1, SyncMode.full_refresh),
527
+ _configured_stream(s2, SyncMode.full_refresh),
528
+ ]
529
+ )
530
+
531
+ expected = _fix_emitted_at(
532
+ [
533
+ _as_stream_status("s1", AirbyteStreamStatus.STARTED),
534
+ _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
535
+ *_as_records("s1", slices),
536
+ _as_state("s1", {"__ab_full_refresh_state_message": True}),
537
+ _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
538
+ _as_stream_status("s2", AirbyteStreamStatus.STARTED),
539
+ _as_stream_status("s2", AirbyteStreamStatus.RUNNING),
540
+ *_as_records("s2", slices),
541
+ _as_state("s2", {"__ab_full_refresh_state_message": True}),
542
+ _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
543
+ ]
544
+ )
545
+
546
+ messages = _fix_emitted_at(list(src.read(logger, {}, catalog, state)))
547
+
548
+ assert messages == expected
549
+
550
+
485
551
  @pytest.mark.parametrize(
486
552
  "slices",
487
553
  [[{"1": "1"}, {"2": "2"}], [{"date": datetime.date(year=2023, month=1, day=1)}, {"date": datetime.date(year=2023, month=1, day=1)}]],
@@ -491,7 +557,7 @@ def test_read_full_refresh_with_slices_sends_slice_messages(mocker, slices):
491
557
  debug_logger = logging.getLogger("airbyte.debug")
492
558
  debug_logger.setLevel(logging.DEBUG)
493
559
  stream = MockStream(
494
- [({"sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
560
+ [({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
495
561
  name="s1",
496
562
  )
497
563
 
@@ -544,14 +610,7 @@ class TestIncrementalRead:
544
610
  pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
545
611
  ],
546
612
  )
547
- @pytest.mark.parametrize(
548
- "per_stream_enabled",
549
- [
550
- pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
551
- pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
552
- ],
553
- )
554
- def test_with_state_attribute(self, mocker, use_legacy, per_stream_enabled):
613
+ def test_with_state_attribute(self, mocker, use_legacy):
555
614
  """Test correct state passing for the streams that have a state attribute"""
556
615
  stream_output = [{"k1": "v1"}, {"k2": "v2"}]
557
616
  old_state = {"cursor": "old_value"}
@@ -589,7 +648,7 @@ class TestIncrementalRead:
589
648
  return_value=new_state_from_connector,
590
649
  )
591
650
  mocker.patch.object(MockStreamWithState, "get_json_schema", return_value={})
592
- src = MockSource(streams=[stream_1, stream_2], per_stream=per_stream_enabled)
651
+ src = MockSource(streams=[stream_1, stream_2])
593
652
  catalog = ConfiguredAirbyteCatalog(
594
653
  streams=[
595
654
  _configured_stream(stream_1, SyncMode.incremental),
@@ -603,17 +662,13 @@ class TestIncrementalRead:
603
662
  _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
604
663
  _as_record("s1", stream_output[0]),
605
664
  _as_record("s1", stream_output[1]),
606
- _as_state({"s1": new_state_from_connector}, "s1", new_state_from_connector)
607
- if per_stream_enabled
608
- else _as_state({"s1": new_state_from_connector}),
665
+ _as_state("s1", new_state_from_connector),
609
666
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
610
667
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
611
668
  _as_stream_status("s2", AirbyteStreamStatus.RUNNING),
612
669
  _as_record("s2", stream_output[0]),
613
670
  _as_record("s2", stream_output[1]),
614
- _as_state({"s1": new_state_from_connector, "s2": new_state_from_connector}, "s2", new_state_from_connector)
615
- if per_stream_enabled
616
- else _as_state({"s1": new_state_from_connector, "s2": new_state_from_connector}),
671
+ _as_state("s2", new_state_from_connector),
617
672
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
618
673
  ]
619
674
  )
@@ -633,14 +688,7 @@ class TestIncrementalRead:
633
688
  pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
634
689
  ],
635
690
  )
636
- @pytest.mark.parametrize(
637
- "per_stream_enabled",
638
- [
639
- pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
640
- pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
641
- ],
642
- )
643
- def test_with_checkpoint_interval(self, mocker, use_legacy, per_stream_enabled):
691
+ def test_with_checkpoint_interval(self, mocker, use_legacy):
644
692
  """Tests that an incremental read which doesn't specify a checkpoint interval outputs a STATE message
645
693
  after reading N records within a stream.
646
694
  """
@@ -670,7 +718,7 @@ class TestIncrementalRead:
670
718
  return_value=1,
671
719
  )
672
720
 
673
- src = MockSource(streams=[stream_1, stream_2], per_stream=per_stream_enabled)
721
+ src = MockSource(streams=[stream_1, stream_2])
674
722
  catalog = ConfiguredAirbyteCatalog(
675
723
  streams=[
676
724
  _configured_stream(stream_1, SyncMode.incremental),
@@ -683,18 +731,18 @@ class TestIncrementalRead:
683
731
  _as_stream_status("s1", AirbyteStreamStatus.STARTED),
684
732
  _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
685
733
  _as_record("s1", stream_output[0]),
686
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
734
+ _as_state("s1", state),
687
735
  _as_record("s1", stream_output[1]),
688
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
689
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
736
+ _as_state("s1", state),
737
+ _as_state("s1", state),
690
738
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
691
739
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
692
740
  _as_stream_status("s2", AirbyteStreamStatus.RUNNING),
693
741
  _as_record("s2", stream_output[0]),
694
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
742
+ _as_state("s2", state),
695
743
  _as_record("s2", stream_output[1]),
696
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
697
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
744
+ _as_state("s2", state),
745
+ _as_state("s2", state),
698
746
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
699
747
  ]
700
748
  )
@@ -709,14 +757,7 @@ class TestIncrementalRead:
709
757
  pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
710
758
  ],
711
759
  )
712
- @pytest.mark.parametrize(
713
- "per_stream_enabled",
714
- [
715
- pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
716
- pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
717
- ],
718
- )
719
- def test_with_no_interval(self, mocker, use_legacy, per_stream_enabled):
760
+ def test_with_no_interval(self, mocker, use_legacy):
720
761
  """Tests that an incremental read which doesn't specify a checkpoint interval outputs
721
762
  a STATE message only after fully reading the stream and does not output any STATE messages during syncing the stream.
722
763
  """
@@ -739,7 +780,7 @@ class TestIncrementalRead:
739
780
  mocker.patch.object(MockStream, "supports_incremental", return_value=True)
740
781
  mocker.patch.object(MockStream, "get_json_schema", return_value={})
741
782
 
742
- src = MockSource(streams=[stream_1, stream_2], per_stream=per_stream_enabled)
783
+ src = MockSource(streams=[stream_1, stream_2])
743
784
  catalog = ConfiguredAirbyteCatalog(
744
785
  streams=[
745
786
  _configured_stream(stream_1, SyncMode.incremental),
@@ -752,12 +793,12 @@ class TestIncrementalRead:
752
793
  _as_stream_status("s1", AirbyteStreamStatus.STARTED),
753
794
  _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
754
795
  *_as_records("s1", stream_output),
755
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
796
+ _as_state("s1", state),
756
797
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
757
798
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
758
799
  _as_stream_status("s2", AirbyteStreamStatus.RUNNING),
759
800
  *_as_records("s2", stream_output),
760
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
801
+ _as_state("s2", state),
761
802
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
762
803
  ]
763
804
  )
@@ -773,14 +814,7 @@ class TestIncrementalRead:
773
814
  pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
774
815
  ],
775
816
  )
776
- @pytest.mark.parametrize(
777
- "per_stream_enabled",
778
- [
779
- pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
780
- pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
781
- ],
782
- )
783
- def test_with_slices(self, mocker, use_legacy, per_stream_enabled):
817
+ def test_with_slices(self, mocker, use_legacy):
784
818
  """Tests that an incremental read which uses slices outputs each record in the slice followed by a STATE message, for each slice"""
785
819
  if use_legacy:
786
820
  input_state = defaultdict(dict)
@@ -823,7 +857,7 @@ class TestIncrementalRead:
823
857
  mocker.patch.object(MockStream, "get_json_schema", return_value={})
824
858
  mocker.patch.object(MockStream, "stream_slices", return_value=slices)
825
859
 
826
- src = MockSource(streams=[stream_1, stream_2], per_stream=per_stream_enabled)
860
+ src = MockSource(streams=[stream_1, stream_2])
827
861
  catalog = ConfiguredAirbyteCatalog(
828
862
  streams=[
829
863
  _configured_stream(stream_1, SyncMode.incremental),
@@ -837,19 +871,19 @@ class TestIncrementalRead:
837
871
  _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
838
872
  # stream 1 slice 1
839
873
  *_as_records("s1", stream_output),
840
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
874
+ _as_state("s1", state),
841
875
  # stream 1 slice 2
842
876
  *_as_records("s1", stream_output),
843
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
877
+ _as_state("s1", state),
844
878
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
845
879
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
846
880
  _as_stream_status("s2", AirbyteStreamStatus.RUNNING),
847
881
  # stream 2 slice 1
848
882
  *_as_records("s2", stream_output),
849
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
883
+ _as_state("s2", state),
850
884
  # stream 2 slice 2
851
885
  *_as_records("s2", stream_output),
852
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
886
+ _as_state("s2", state),
853
887
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
854
888
  ]
855
889
  )
@@ -865,15 +899,8 @@ class TestIncrementalRead:
865
899
  pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
866
900
  ],
867
901
  )
868
- @pytest.mark.parametrize(
869
- "per_stream_enabled",
870
- [
871
- pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
872
- pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
873
- ],
874
- )
875
902
  @pytest.mark.parametrize("slices", [pytest.param([], id="test_slices_as_list"), pytest.param(iter([]), id="test_slices_as_iterator")])
876
- def test_no_slices(self, mocker, use_legacy, per_stream_enabled, slices):
903
+ def test_no_slices(self, mocker, use_legacy, slices):
877
904
  """
878
905
  Tests that an incremental read returns at least one state messages even if no records were read:
879
906
  1. outputs a state message after reading the entire stream
@@ -926,7 +953,7 @@ class TestIncrementalRead:
926
953
  return_value=2,
927
954
  )
928
955
 
929
- src = MockSource(streams=[stream_1, stream_2], per_stream=per_stream_enabled)
956
+ src = MockSource(streams=[stream_1, stream_2])
930
957
  catalog = ConfiguredAirbyteCatalog(
931
958
  streams=[
932
959
  _configured_stream(stream_1, SyncMode.incremental),
@@ -937,10 +964,10 @@ class TestIncrementalRead:
937
964
  expected = _fix_emitted_at(
938
965
  [
939
966
  _as_stream_status("s1", AirbyteStreamStatus.STARTED),
940
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
967
+ _as_state("s1", state),
941
968
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
942
969
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
943
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
970
+ _as_state("s2", state),
944
971
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
945
972
  ]
946
973
  )
@@ -956,14 +983,7 @@ class TestIncrementalRead:
956
983
  pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
957
984
  ],
958
985
  )
959
- @pytest.mark.parametrize(
960
- "per_stream_enabled",
961
- [
962
- pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
963
- pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
964
- ],
965
- )
966
- def test_with_slices_and_interval(self, mocker, use_legacy, per_stream_enabled):
986
+ def test_with_slices_and_interval(self, mocker, use_legacy):
967
987
  """
968
988
  Tests that an incremental read which uses slices and a checkpoint interval:
969
989
  1. outputs all records
@@ -1016,7 +1036,7 @@ class TestIncrementalRead:
1016
1036
  return_value=2,
1017
1037
  )
1018
1038
 
1019
- src = MockSource(streams=[stream_1, stream_2], per_stream=per_stream_enabled)
1039
+ src = MockSource(streams=[stream_1, stream_2])
1020
1040
  catalog = ConfiguredAirbyteCatalog(
1021
1041
  streams=[
1022
1042
  _configured_stream(stream_1, SyncMode.incremental),
@@ -1031,32 +1051,32 @@ class TestIncrementalRead:
1031
1051
  _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
1032
1052
  _as_record("s1", stream_output[0]),
1033
1053
  _as_record("s1", stream_output[1]),
1034
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1054
+ _as_state("s1", state),
1035
1055
  _as_record("s1", stream_output[2]),
1036
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1056
+ _as_state("s1", state),
1037
1057
  # stream 1 slice 2
1038
1058
  _as_record("s1", stream_output[0]),
1039
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1059
+ _as_state("s1", state),
1040
1060
  _as_record("s1", stream_output[1]),
1041
1061
  _as_record("s1", stream_output[2]),
1042
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1043
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1062
+ _as_state("s1", state),
1063
+ _as_state("s1", state),
1044
1064
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1045
1065
  # stream 2 slice 1
1046
1066
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
1047
1067
  _as_stream_status("s2", AirbyteStreamStatus.RUNNING),
1048
1068
  _as_record("s2", stream_output[0]),
1049
1069
  _as_record("s2", stream_output[1]),
1050
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1070
+ _as_state("s2", state),
1051
1071
  _as_record("s2", stream_output[2]),
1052
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1072
+ _as_state("s2", state),
1053
1073
  # stream 2 slice 2
1054
1074
  _as_record("s2", stream_output[0]),
1055
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1075
+ _as_state("s2", state),
1056
1076
  _as_record("s2", stream_output[1]),
1057
1077
  _as_record("s2", stream_output[2]),
1058
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1059
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1078
+ _as_state("s2", state),
1079
+ _as_state("s2", state),
1060
1080
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
1061
1081
  ]
1062
1082
  )
@@ -1065,13 +1085,7 @@ class TestIncrementalRead:
1065
1085
 
1066
1086
  assert messages == expected
1067
1087
 
1068
- @pytest.mark.parametrize(
1069
- "per_stream_enabled",
1070
- [
1071
- pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
1072
- ],
1073
- )
1074
- def test_emit_non_records(self, mocker, per_stream_enabled):
1088
+ def test_emit_non_records(self, mocker):
1075
1089
  """
1076
1090
  Tests that an incremental read which uses slices and a checkpoint interval:
1077
1091
  1. outputs all records
@@ -1129,7 +1143,7 @@ class TestIncrementalRead:
1129
1143
  return_value=2,
1130
1144
  )
1131
1145
 
1132
- src = MockSource(streams=[stream_1, stream_2], per_stream=per_stream_enabled)
1146
+ src = MockSource(streams=[stream_1, stream_2])
1133
1147
  catalog = ConfiguredAirbyteCatalog(
1134
1148
  streams=[
1135
1149
  _configured_stream(stream_1, SyncMode.incremental),
@@ -1145,17 +1159,17 @@ class TestIncrementalRead:
1145
1159
  stream_data_to_airbyte_message("s1", stream_output[0]),
1146
1160
  stream_data_to_airbyte_message("s1", stream_output[1]),
1147
1161
  stream_data_to_airbyte_message("s1", stream_output[2]),
1148
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1162
+ _as_state("s1", state),
1149
1163
  stream_data_to_airbyte_message("s1", stream_output[3]),
1150
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1164
+ _as_state("s1", state),
1151
1165
  # stream 1 slice 2
1152
1166
  stream_data_to_airbyte_message("s1", stream_output[0]),
1153
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1167
+ _as_state("s1", state),
1154
1168
  stream_data_to_airbyte_message("s1", stream_output[1]),
1155
1169
  stream_data_to_airbyte_message("s1", stream_output[2]),
1156
1170
  stream_data_to_airbyte_message("s1", stream_output[3]),
1157
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1158
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1171
+ _as_state("s1", state),
1172
+ _as_state("s1", state),
1159
1173
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1160
1174
  # stream 2 slice 1
1161
1175
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
@@ -1163,17 +1177,17 @@ class TestIncrementalRead:
1163
1177
  stream_data_to_airbyte_message("s2", stream_output[0]),
1164
1178
  stream_data_to_airbyte_message("s2", stream_output[1]),
1165
1179
  stream_data_to_airbyte_message("s2", stream_output[2]),
1166
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1180
+ _as_state("s2", state),
1167
1181
  stream_data_to_airbyte_message("s2", stream_output[3]),
1168
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1182
+ _as_state("s2", state),
1169
1183
  # stream 2 slice 2
1170
1184
  stream_data_to_airbyte_message("s2", stream_output[0]),
1171
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1185
+ _as_state("s2", state),
1172
1186
  stream_data_to_airbyte_message("s2", stream_output[1]),
1173
1187
  stream_data_to_airbyte_message("s2", stream_output[2]),
1174
1188
  stream_data_to_airbyte_message("s2", stream_output[3]),
1175
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1176
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1189
+ _as_state("s2", state),
1190
+ _as_state("s2", state),
1177
1191
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
1178
1192
  ]
1179
1193
  )
@@ -1200,14 +1214,12 @@ def test_checkpoint_state_from_stream_instance():
1200
1214
 
1201
1215
  # The stream_state passed to checkpoint_state() should be ignored since stream implements state function
1202
1216
  teams_stream.state = {"updated_at": "2022-09-11"}
1203
- actual_message = teams_stream._checkpoint_state({"ignored": "state"}, state_manager, True)
1204
- assert actual_message == _as_state({"teams": {"updated_at": "2022-09-11"}}, "teams", {"updated_at": "2022-09-11"})
1217
+ actual_message = teams_stream._checkpoint_state({"ignored": "state"}, state_manager)
1218
+ assert actual_message == _as_state("teams", {"updated_at": "2022-09-11"})
1205
1219
 
1206
1220
  # The stream_state passed to checkpoint_state() should be used since the stream does not implement state function
1207
- actual_message = managers_stream._checkpoint_state({"updated": "expected_here"}, state_manager, True)
1208
- assert actual_message == _as_state(
1209
- {"teams": {"updated_at": "2022-09-11"}, "managers": {"updated": "expected_here"}}, "managers", {"updated": "expected_here"}
1210
- )
1221
+ actual_message = managers_stream._checkpoint_state({"updated": "expected_here"}, state_manager)
1222
+ assert actual_message == _as_state("managers", {"updated": "expected_here"})
1211
1223
 
1212
1224
 
1213
1225
  @pytest.mark.parametrize(
@@ -1382,9 +1394,9 @@ def test_continue_sync_with_failed_streams_with_override_false(mocker):
1382
1394
  the sync when one stream fails with an error.
1383
1395
  """
1384
1396
  stream_output = [{"k1": "v1"}, {"k2": "v2"}]
1385
- s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
1397
+ s1 = MockStream([({"stream_state": {}, "sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
1386
1398
  s2 = StreamRaisesException(AirbyteTracedException(message="I was born only to crash like Icarus"))
1387
- s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
1399
+ s3 = MockStream([({"stream_state": {}, "sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
1388
1400
 
1389
1401
  mocker.patch.object(MockStream, "get_json_schema", return_value={})
1390
1402
  mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})