airbyte-cdk 0.67.1__py3-none-any.whl → 0.67.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (28) hide show
  1. airbyte_cdk/sources/abstract_source.py +30 -69
  2. airbyte_cdk/sources/connector_state_manager.py +12 -26
  3. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +32 -14
  4. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +3 -19
  5. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +1 -3
  6. airbyte_cdk/sources/streams/__init__.py +2 -2
  7. airbyte_cdk/sources/streams/concurrent/adapters.py +3 -19
  8. airbyte_cdk/sources/streams/concurrent/cursor.py +1 -3
  9. airbyte_cdk/sources/streams/core.py +36 -34
  10. {airbyte_cdk-0.67.1.dist-info → airbyte_cdk-0.67.2.dist-info}/METADATA +1 -1
  11. {airbyte_cdk-0.67.1.dist-info → airbyte_cdk-0.67.2.dist-info}/RECORD +28 -28
  12. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +2 -1
  13. unit_tests/sources/file_based/file_types/test_parquet_parser.py +51 -6
  14. unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +139 -199
  15. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +91 -133
  16. unit_tests/sources/file_based/stream/concurrent/test_adapters.py +2 -13
  17. unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +2 -2
  18. unit_tests/sources/file_based/test_scenarios.py +2 -2
  19. unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +9 -9
  20. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +5 -5
  21. unit_tests/sources/streams/concurrent/test_adapters.py +2 -13
  22. unit_tests/sources/streams/test_stream_read.py +221 -11
  23. unit_tests/sources/test_abstract_source.py +142 -130
  24. unit_tests/sources/test_connector_state_manager.py +3 -124
  25. unit_tests/sources/test_source.py +18 -14
  26. {airbyte_cdk-0.67.1.dist-info → airbyte_cdk-0.67.2.dist-info}/LICENSE.txt +0 -0
  27. {airbyte_cdk-0.67.1.dist-info → airbyte_cdk-0.67.2.dist-info}/WHEEL +0 -0
  28. {airbyte_cdk-0.67.1.dist-info → airbyte_cdk-0.67.2.dist-info}/top_level.txt +0 -0
@@ -54,14 +54,12 @@ class MockSource(AbstractSource):
54
54
  self,
55
55
  check_lambda: Callable[[], Tuple[bool, Optional[Any]]] = None,
56
56
  streams: List[Stream] = None,
57
- per_stream: bool = True,
58
57
  message_repository: MessageRepository = None,
59
58
  exception_on_missing_stream: bool = True,
60
59
  stop_sync_on_stream_failure: bool = False,
61
60
  ):
62
61
  self._streams = streams
63
62
  self.check_lambda = check_lambda
64
- self.per_stream = per_stream
65
63
  self.exception_on_missing_stream = exception_on_missing_stream
66
64
  self._message_repository = message_repository
67
65
  self._stop_sync_on_stream_failure = stop_sync_on_stream_failure
@@ -286,7 +284,7 @@ def test_read_stream_emits_repository_message_before_record(mocker, message_repo
286
284
  stream = MockStream(name="my_stream")
287
285
  mocker.patch.object(MockStream, "get_json_schema", return_value={})
288
286
  mocker.patch.object(MockStream, "read_records", side_effect=[[{"a record": "a value"}, {"another record": "another value"}]])
289
- message_repository.consume_queue.side_effect = [[message for message in [MESSAGE_FROM_REPOSITORY]], []]
287
+ message_repository.consume_queue.side_effect = [[message for message in [MESSAGE_FROM_REPOSITORY]], [], []]
290
288
 
291
289
  source = MockSource(streams=[stream], message_repository=message_repository)
292
290
 
@@ -357,19 +355,16 @@ def _as_stream_status(stream: str, status: AirbyteStreamStatus) -> AirbyteMessag
357
355
  return AirbyteMessage(type=MessageType.TRACE, trace=trace_message)
358
356
 
359
357
 
360
- def _as_state(state_data: Dict[str, Any], stream_name: str = "", per_stream_state: Dict[str, Any] = None):
361
- if per_stream_state:
362
- return AirbyteMessage(
363
- type=Type.STATE,
364
- state=AirbyteStateMessage(
365
- type=AirbyteStateType.STREAM,
366
- stream=AirbyteStreamState(
367
- stream_descriptor=StreamDescriptor(name=stream_name), stream_state=AirbyteStateBlob.parse_obj(per_stream_state)
368
- ),
369
- data=state_data,
358
+ def _as_state(stream_name: str = "", per_stream_state: Dict[str, Any] = None):
359
+ return AirbyteMessage(
360
+ type=Type.STATE,
361
+ state=AirbyteStateMessage(
362
+ type=AirbyteStateType.STREAM,
363
+ stream=AirbyteStreamState(
364
+ stream_descriptor=StreamDescriptor(name=stream_name), stream_state=AirbyteStateBlob.parse_obj(per_stream_state)
370
365
  ),
371
- )
372
- return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data=state_data))
366
+ ),
367
+ )
373
368
 
374
369
 
375
370
  def _as_error_trace(
@@ -410,8 +405,8 @@ def _fix_emitted_at(messages: List[AirbyteMessage]) -> List[AirbyteMessage]:
410
405
  def test_valid_full_refresh_read_no_slices(mocker):
411
406
  """Tests that running a full refresh sync on streams which don't specify slices produces the expected AirbyteMessages"""
412
407
  stream_output = [{"k1": "v1"}, {"k2": "v2"}]
413
- s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
414
- s2 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s2")
408
+ s1 = MockStream([({"stream_state": {}, "sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
409
+ s2 = MockStream([({"stream_state": {}, "sync_mode": SyncMode.full_refresh}, stream_output)], name="s2")
415
410
 
416
411
  mocker.patch.object(MockStream, "get_json_schema", return_value={})
417
412
 
@@ -428,10 +423,12 @@ def test_valid_full_refresh_read_no_slices(mocker):
428
423
  _as_stream_status("s1", AirbyteStreamStatus.STARTED),
429
424
  _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
430
425
  *_as_records("s1", stream_output),
426
+ _as_state("s1", {"__ab_full_refresh_state_message": True}),
431
427
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
432
428
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
433
429
  _as_stream_status("s2", AirbyteStreamStatus.RUNNING),
434
430
  *_as_records("s2", stream_output),
431
+ _as_state("s2", {"__ab_full_refresh_state_message": True}),
435
432
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
436
433
  ]
437
434
  )
@@ -445,11 +442,11 @@ def test_valid_full_refresh_read_with_slices(mocker):
445
442
  slices = [{"1": "1"}, {"2": "2"}]
446
443
  # When attempting to sync a slice, just output that slice as a record
447
444
  s1 = MockStream(
448
- [({"sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
445
+ [({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
449
446
  name="s1",
450
447
  )
451
448
  s2 = MockStream(
452
- [({"sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
449
+ [({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
453
450
  name="s2",
454
451
  )
455
452
 
@@ -469,10 +466,12 @@ def test_valid_full_refresh_read_with_slices(mocker):
469
466
  _as_stream_status("s1", AirbyteStreamStatus.STARTED),
470
467
  _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
471
468
  *_as_records("s1", slices),
469
+ _as_state("s1", {"__ab_full_refresh_state_message": True}),
472
470
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
473
471
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
474
472
  _as_stream_status("s2", AirbyteStreamStatus.RUNNING),
475
473
  *_as_records("s2", slices),
474
+ _as_state("s2", {"__ab_full_refresh_state_message": True}),
476
475
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
477
476
  ]
478
477
  )
@@ -482,6 +481,73 @@ def test_valid_full_refresh_read_with_slices(mocker):
482
481
  assert expected == messages
483
482
 
484
483
 
484
+ def test_full_refresh_does_not_use_incoming_state(mocker):
485
+ """Tests that running a full refresh sync does not use an incoming state message from the platform"""
486
+ slices = [{"1": "1"}, {"2": "2"}]
487
+ # When attempting to sync a slice, just output that slice as a record
488
+ s1 = MockStream(
489
+ [({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
490
+ name="s1",
491
+ )
492
+ s2 = MockStream(
493
+ [({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
494
+ name="s2",
495
+ )
496
+
497
+ def stream_slices_side_effect(stream_state: Mapping[str, Any], **kwargs) -> List[Mapping[str, Any]]:
498
+ if stream_state:
499
+ return slices[1:]
500
+ else:
501
+ return slices
502
+
503
+ mocker.patch.object(MockStream, "get_json_schema", return_value={})
504
+ mocker.patch.object(MockStream, "stream_slices", side_effect=stream_slices_side_effect)
505
+
506
+ state = [
507
+ AirbyteStateMessage(
508
+ type=AirbyteStateType.STREAM,
509
+ stream=AirbyteStreamState(
510
+ stream_descriptor=StreamDescriptor(name="s1"),
511
+ stream_state=AirbyteStateBlob.parse_obj({"created_at": "2024-01-31"}),
512
+ ),
513
+ ),
514
+ AirbyteStateMessage(
515
+ type=AirbyteStateType.STREAM,
516
+ stream=AirbyteStreamState(
517
+ stream_descriptor=StreamDescriptor(name="s2"),
518
+ stream_state=AirbyteStateBlob.parse_obj({"__ab_full_refresh_state_message": True}),
519
+ ),
520
+ ),
521
+ ]
522
+
523
+ src = MockSource(streams=[s1, s2])
524
+ catalog = ConfiguredAirbyteCatalog(
525
+ streams=[
526
+ _configured_stream(s1, SyncMode.full_refresh),
527
+ _configured_stream(s2, SyncMode.full_refresh),
528
+ ]
529
+ )
530
+
531
+ expected = _fix_emitted_at(
532
+ [
533
+ _as_stream_status("s1", AirbyteStreamStatus.STARTED),
534
+ _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
535
+ *_as_records("s1", slices),
536
+ _as_state("s1", {"__ab_full_refresh_state_message": True}),
537
+ _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
538
+ _as_stream_status("s2", AirbyteStreamStatus.STARTED),
539
+ _as_stream_status("s2", AirbyteStreamStatus.RUNNING),
540
+ *_as_records("s2", slices),
541
+ _as_state("s2", {"__ab_full_refresh_state_message": True}),
542
+ _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
543
+ ]
544
+ )
545
+
546
+ messages = _fix_emitted_at(list(src.read(logger, {}, catalog, state)))
547
+
548
+ assert messages == expected
549
+
550
+
485
551
  @pytest.mark.parametrize(
486
552
  "slices",
487
553
  [[{"1": "1"}, {"2": "2"}], [{"date": datetime.date(year=2023, month=1, day=1)}, {"date": datetime.date(year=2023, month=1, day=1)}]],
@@ -491,7 +557,7 @@ def test_read_full_refresh_with_slices_sends_slice_messages(mocker, slices):
491
557
  debug_logger = logging.getLogger("airbyte.debug")
492
558
  debug_logger.setLevel(logging.DEBUG)
493
559
  stream = MockStream(
494
- [({"sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
560
+ [({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
495
561
  name="s1",
496
562
  )
497
563
 
@@ -544,14 +610,7 @@ class TestIncrementalRead:
544
610
  pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
545
611
  ],
546
612
  )
547
- @pytest.mark.parametrize(
548
- "per_stream_enabled",
549
- [
550
- pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
551
- pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
552
- ],
553
- )
554
- def test_with_state_attribute(self, mocker, use_legacy, per_stream_enabled):
613
+ def test_with_state_attribute(self, mocker, use_legacy):
555
614
  """Test correct state passing for the streams that have a state attribute"""
556
615
  stream_output = [{"k1": "v1"}, {"k2": "v2"}]
557
616
  old_state = {"cursor": "old_value"}
@@ -589,7 +648,7 @@ class TestIncrementalRead:
589
648
  return_value=new_state_from_connector,
590
649
  )
591
650
  mocker.patch.object(MockStreamWithState, "get_json_schema", return_value={})
592
- src = MockSource(streams=[stream_1, stream_2], per_stream=per_stream_enabled)
651
+ src = MockSource(streams=[stream_1, stream_2])
593
652
  catalog = ConfiguredAirbyteCatalog(
594
653
  streams=[
595
654
  _configured_stream(stream_1, SyncMode.incremental),
@@ -603,17 +662,13 @@ class TestIncrementalRead:
603
662
  _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
604
663
  _as_record("s1", stream_output[0]),
605
664
  _as_record("s1", stream_output[1]),
606
- _as_state({"s1": new_state_from_connector}, "s1", new_state_from_connector)
607
- if per_stream_enabled
608
- else _as_state({"s1": new_state_from_connector}),
665
+ _as_state("s1", new_state_from_connector),
609
666
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
610
667
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
611
668
  _as_stream_status("s2", AirbyteStreamStatus.RUNNING),
612
669
  _as_record("s2", stream_output[0]),
613
670
  _as_record("s2", stream_output[1]),
614
- _as_state({"s1": new_state_from_connector, "s2": new_state_from_connector}, "s2", new_state_from_connector)
615
- if per_stream_enabled
616
- else _as_state({"s1": new_state_from_connector, "s2": new_state_from_connector}),
671
+ _as_state("s2", new_state_from_connector),
617
672
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
618
673
  ]
619
674
  )
@@ -633,14 +688,7 @@ class TestIncrementalRead:
633
688
  pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
634
689
  ],
635
690
  )
636
- @pytest.mark.parametrize(
637
- "per_stream_enabled",
638
- [
639
- pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
640
- pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
641
- ],
642
- )
643
- def test_with_checkpoint_interval(self, mocker, use_legacy, per_stream_enabled):
691
+ def test_with_checkpoint_interval(self, mocker, use_legacy):
644
692
  """Tests that an incremental read which doesn't specify a checkpoint interval outputs a STATE message
645
693
  after reading N records within a stream.
646
694
  """
@@ -670,7 +718,7 @@ class TestIncrementalRead:
670
718
  return_value=1,
671
719
  )
672
720
 
673
- src = MockSource(streams=[stream_1, stream_2], per_stream=per_stream_enabled)
721
+ src = MockSource(streams=[stream_1, stream_2])
674
722
  catalog = ConfiguredAirbyteCatalog(
675
723
  streams=[
676
724
  _configured_stream(stream_1, SyncMode.incremental),
@@ -683,18 +731,18 @@ class TestIncrementalRead:
683
731
  _as_stream_status("s1", AirbyteStreamStatus.STARTED),
684
732
  _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
685
733
  _as_record("s1", stream_output[0]),
686
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
734
+ _as_state("s1", state),
687
735
  _as_record("s1", stream_output[1]),
688
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
689
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
736
+ _as_state("s1", state),
737
+ _as_state("s1", state),
690
738
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
691
739
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
692
740
  _as_stream_status("s2", AirbyteStreamStatus.RUNNING),
693
741
  _as_record("s2", stream_output[0]),
694
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
742
+ _as_state("s2", state),
695
743
  _as_record("s2", stream_output[1]),
696
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
697
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
744
+ _as_state("s2", state),
745
+ _as_state("s2", state),
698
746
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
699
747
  ]
700
748
  )
@@ -709,14 +757,7 @@ class TestIncrementalRead:
709
757
  pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
710
758
  ],
711
759
  )
712
- @pytest.mark.parametrize(
713
- "per_stream_enabled",
714
- [
715
- pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
716
- pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
717
- ],
718
- )
719
- def test_with_no_interval(self, mocker, use_legacy, per_stream_enabled):
760
+ def test_with_no_interval(self, mocker, use_legacy):
720
761
  """Tests that an incremental read which doesn't specify a checkpoint interval outputs
721
762
  a STATE message only after fully reading the stream and does not output any STATE messages during syncing the stream.
722
763
  """
@@ -739,7 +780,7 @@ class TestIncrementalRead:
739
780
  mocker.patch.object(MockStream, "supports_incremental", return_value=True)
740
781
  mocker.patch.object(MockStream, "get_json_schema", return_value={})
741
782
 
742
- src = MockSource(streams=[stream_1, stream_2], per_stream=per_stream_enabled)
783
+ src = MockSource(streams=[stream_1, stream_2])
743
784
  catalog = ConfiguredAirbyteCatalog(
744
785
  streams=[
745
786
  _configured_stream(stream_1, SyncMode.incremental),
@@ -752,12 +793,12 @@ class TestIncrementalRead:
752
793
  _as_stream_status("s1", AirbyteStreamStatus.STARTED),
753
794
  _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
754
795
  *_as_records("s1", stream_output),
755
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
796
+ _as_state("s1", state),
756
797
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
757
798
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
758
799
  _as_stream_status("s2", AirbyteStreamStatus.RUNNING),
759
800
  *_as_records("s2", stream_output),
760
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
801
+ _as_state("s2", state),
761
802
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
762
803
  ]
763
804
  )
@@ -773,14 +814,7 @@ class TestIncrementalRead:
773
814
  pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
774
815
  ],
775
816
  )
776
- @pytest.mark.parametrize(
777
- "per_stream_enabled",
778
- [
779
- pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
780
- pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
781
- ],
782
- )
783
- def test_with_slices(self, mocker, use_legacy, per_stream_enabled):
817
+ def test_with_slices(self, mocker, use_legacy):
784
818
  """Tests that an incremental read which uses slices outputs each record in the slice followed by a STATE message, for each slice"""
785
819
  if use_legacy:
786
820
  input_state = defaultdict(dict)
@@ -823,7 +857,7 @@ class TestIncrementalRead:
823
857
  mocker.patch.object(MockStream, "get_json_schema", return_value={})
824
858
  mocker.patch.object(MockStream, "stream_slices", return_value=slices)
825
859
 
826
- src = MockSource(streams=[stream_1, stream_2], per_stream=per_stream_enabled)
860
+ src = MockSource(streams=[stream_1, stream_2])
827
861
  catalog = ConfiguredAirbyteCatalog(
828
862
  streams=[
829
863
  _configured_stream(stream_1, SyncMode.incremental),
@@ -837,19 +871,19 @@ class TestIncrementalRead:
837
871
  _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
838
872
  # stream 1 slice 1
839
873
  *_as_records("s1", stream_output),
840
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
874
+ _as_state("s1", state),
841
875
  # stream 1 slice 2
842
876
  *_as_records("s1", stream_output),
843
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
877
+ _as_state("s1", state),
844
878
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
845
879
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
846
880
  _as_stream_status("s2", AirbyteStreamStatus.RUNNING),
847
881
  # stream 2 slice 1
848
882
  *_as_records("s2", stream_output),
849
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
883
+ _as_state("s2", state),
850
884
  # stream 2 slice 2
851
885
  *_as_records("s2", stream_output),
852
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
886
+ _as_state("s2", state),
853
887
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
854
888
  ]
855
889
  )
@@ -865,15 +899,8 @@ class TestIncrementalRead:
865
899
  pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
866
900
  ],
867
901
  )
868
- @pytest.mark.parametrize(
869
- "per_stream_enabled",
870
- [
871
- pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
872
- pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
873
- ],
874
- )
875
902
  @pytest.mark.parametrize("slices", [pytest.param([], id="test_slices_as_list"), pytest.param(iter([]), id="test_slices_as_iterator")])
876
- def test_no_slices(self, mocker, use_legacy, per_stream_enabled, slices):
903
+ def test_no_slices(self, mocker, use_legacy, slices):
877
904
  """
878
905
  Tests that an incremental read returns at least one state messages even if no records were read:
879
906
  1. outputs a state message after reading the entire stream
@@ -926,7 +953,7 @@ class TestIncrementalRead:
926
953
  return_value=2,
927
954
  )
928
955
 
929
- src = MockSource(streams=[stream_1, stream_2], per_stream=per_stream_enabled)
956
+ src = MockSource(streams=[stream_1, stream_2])
930
957
  catalog = ConfiguredAirbyteCatalog(
931
958
  streams=[
932
959
  _configured_stream(stream_1, SyncMode.incremental),
@@ -937,10 +964,10 @@ class TestIncrementalRead:
937
964
  expected = _fix_emitted_at(
938
965
  [
939
966
  _as_stream_status("s1", AirbyteStreamStatus.STARTED),
940
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
967
+ _as_state("s1", state),
941
968
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
942
969
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
943
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
970
+ _as_state("s2", state),
944
971
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
945
972
  ]
946
973
  )
@@ -956,14 +983,7 @@ class TestIncrementalRead:
956
983
  pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
957
984
  ],
958
985
  )
959
- @pytest.mark.parametrize(
960
- "per_stream_enabled",
961
- [
962
- pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
963
- pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
964
- ],
965
- )
966
- def test_with_slices_and_interval(self, mocker, use_legacy, per_stream_enabled):
986
+ def test_with_slices_and_interval(self, mocker, use_legacy):
967
987
  """
968
988
  Tests that an incremental read which uses slices and a checkpoint interval:
969
989
  1. outputs all records
@@ -1016,7 +1036,7 @@ class TestIncrementalRead:
1016
1036
  return_value=2,
1017
1037
  )
1018
1038
 
1019
- src = MockSource(streams=[stream_1, stream_2], per_stream=per_stream_enabled)
1039
+ src = MockSource(streams=[stream_1, stream_2])
1020
1040
  catalog = ConfiguredAirbyteCatalog(
1021
1041
  streams=[
1022
1042
  _configured_stream(stream_1, SyncMode.incremental),
@@ -1031,32 +1051,32 @@ class TestIncrementalRead:
1031
1051
  _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
1032
1052
  _as_record("s1", stream_output[0]),
1033
1053
  _as_record("s1", stream_output[1]),
1034
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1054
+ _as_state("s1", state),
1035
1055
  _as_record("s1", stream_output[2]),
1036
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1056
+ _as_state("s1", state),
1037
1057
  # stream 1 slice 2
1038
1058
  _as_record("s1", stream_output[0]),
1039
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1059
+ _as_state("s1", state),
1040
1060
  _as_record("s1", stream_output[1]),
1041
1061
  _as_record("s1", stream_output[2]),
1042
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1043
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1062
+ _as_state("s1", state),
1063
+ _as_state("s1", state),
1044
1064
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1045
1065
  # stream 2 slice 1
1046
1066
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
1047
1067
  _as_stream_status("s2", AirbyteStreamStatus.RUNNING),
1048
1068
  _as_record("s2", stream_output[0]),
1049
1069
  _as_record("s2", stream_output[1]),
1050
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1070
+ _as_state("s2", state),
1051
1071
  _as_record("s2", stream_output[2]),
1052
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1072
+ _as_state("s2", state),
1053
1073
  # stream 2 slice 2
1054
1074
  _as_record("s2", stream_output[0]),
1055
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1075
+ _as_state("s2", state),
1056
1076
  _as_record("s2", stream_output[1]),
1057
1077
  _as_record("s2", stream_output[2]),
1058
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1059
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1078
+ _as_state("s2", state),
1079
+ _as_state("s2", state),
1060
1080
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
1061
1081
  ]
1062
1082
  )
@@ -1065,13 +1085,7 @@ class TestIncrementalRead:
1065
1085
 
1066
1086
  assert messages == expected
1067
1087
 
1068
- @pytest.mark.parametrize(
1069
- "per_stream_enabled",
1070
- [
1071
- pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
1072
- ],
1073
- )
1074
- def test_emit_non_records(self, mocker, per_stream_enabled):
1088
+ def test_emit_non_records(self, mocker):
1075
1089
  """
1076
1090
  Tests that an incremental read which uses slices and a checkpoint interval:
1077
1091
  1. outputs all records
@@ -1129,7 +1143,7 @@ class TestIncrementalRead:
1129
1143
  return_value=2,
1130
1144
  )
1131
1145
 
1132
- src = MockSource(streams=[stream_1, stream_2], per_stream=per_stream_enabled)
1146
+ src = MockSource(streams=[stream_1, stream_2])
1133
1147
  catalog = ConfiguredAirbyteCatalog(
1134
1148
  streams=[
1135
1149
  _configured_stream(stream_1, SyncMode.incremental),
@@ -1145,17 +1159,17 @@ class TestIncrementalRead:
1145
1159
  stream_data_to_airbyte_message("s1", stream_output[0]),
1146
1160
  stream_data_to_airbyte_message("s1", stream_output[1]),
1147
1161
  stream_data_to_airbyte_message("s1", stream_output[2]),
1148
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1162
+ _as_state("s1", state),
1149
1163
  stream_data_to_airbyte_message("s1", stream_output[3]),
1150
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1164
+ _as_state("s1", state),
1151
1165
  # stream 1 slice 2
1152
1166
  stream_data_to_airbyte_message("s1", stream_output[0]),
1153
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1167
+ _as_state("s1", state),
1154
1168
  stream_data_to_airbyte_message("s1", stream_output[1]),
1155
1169
  stream_data_to_airbyte_message("s1", stream_output[2]),
1156
1170
  stream_data_to_airbyte_message("s1", stream_output[3]),
1157
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1158
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1171
+ _as_state("s1", state),
1172
+ _as_state("s1", state),
1159
1173
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1160
1174
  # stream 2 slice 1
1161
1175
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
@@ -1163,17 +1177,17 @@ class TestIncrementalRead:
1163
1177
  stream_data_to_airbyte_message("s2", stream_output[0]),
1164
1178
  stream_data_to_airbyte_message("s2", stream_output[1]),
1165
1179
  stream_data_to_airbyte_message("s2", stream_output[2]),
1166
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1180
+ _as_state("s2", state),
1167
1181
  stream_data_to_airbyte_message("s2", stream_output[3]),
1168
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1182
+ _as_state("s2", state),
1169
1183
  # stream 2 slice 2
1170
1184
  stream_data_to_airbyte_message("s2", stream_output[0]),
1171
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1185
+ _as_state("s2", state),
1172
1186
  stream_data_to_airbyte_message("s2", stream_output[1]),
1173
1187
  stream_data_to_airbyte_message("s2", stream_output[2]),
1174
1188
  stream_data_to_airbyte_message("s2", stream_output[3]),
1175
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1176
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1189
+ _as_state("s2", state),
1190
+ _as_state("s2", state),
1177
1191
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
1178
1192
  ]
1179
1193
  )
@@ -1200,14 +1214,12 @@ def test_checkpoint_state_from_stream_instance():
1200
1214
 
1201
1215
  # The stream_state passed to checkpoint_state() should be ignored since stream implements state function
1202
1216
  teams_stream.state = {"updated_at": "2022-09-11"}
1203
- actual_message = teams_stream._checkpoint_state({"ignored": "state"}, state_manager, True)
1204
- assert actual_message == _as_state({"teams": {"updated_at": "2022-09-11"}}, "teams", {"updated_at": "2022-09-11"})
1217
+ actual_message = teams_stream._checkpoint_state({"ignored": "state"}, state_manager)
1218
+ assert actual_message == _as_state("teams", {"updated_at": "2022-09-11"})
1205
1219
 
1206
1220
  # The stream_state passed to checkpoint_state() should be used since the stream does not implement state function
1207
- actual_message = managers_stream._checkpoint_state({"updated": "expected_here"}, state_manager, True)
1208
- assert actual_message == _as_state(
1209
- {"teams": {"updated_at": "2022-09-11"}, "managers": {"updated": "expected_here"}}, "managers", {"updated": "expected_here"}
1210
- )
1221
+ actual_message = managers_stream._checkpoint_state({"updated": "expected_here"}, state_manager)
1222
+ assert actual_message == _as_state("managers", {"updated": "expected_here"})
1211
1223
 
1212
1224
 
1213
1225
  @pytest.mark.parametrize(
@@ -1382,9 +1394,9 @@ def test_continue_sync_with_failed_streams_with_override_false(mocker):
1382
1394
  the sync when one stream fails with an error.
1383
1395
  """
1384
1396
  stream_output = [{"k1": "v1"}, {"k2": "v2"}]
1385
- s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
1397
+ s1 = MockStream([({"stream_state": {}, "sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
1386
1398
  s2 = StreamRaisesException(AirbyteTracedException(message="I was born only to crash like Icarus"))
1387
- s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
1399
+ s3 = MockStream([({"stream_state": {}, "sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
1388
1400
 
1389
1401
  mocker.patch.object(MockStream, "get_json_schema", return_value={})
1390
1402
  mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})