airbyte-cdk 0.67.0__py3-none-any.whl → 0.67.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/abstract_source.py +30 -69
- airbyte_cdk/sources/connector_state_manager.py +12 -26
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +552 -524
- airbyte_cdk/sources/file_based/config/csv_format.py +2 -0
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +32 -14
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +3 -19
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +1 -3
- airbyte_cdk/sources/streams/__init__.py +2 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +3 -19
- airbyte_cdk/sources/streams/concurrent/cursor.py +1 -3
- airbyte_cdk/sources/streams/core.py +36 -34
- {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/METADATA +3 -2
- {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/RECORD +31 -31
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +2 -1
- unit_tests/sources/file_based/config/test_csv_format.py +6 -1
- unit_tests/sources/file_based/file_types/test_parquet_parser.py +51 -6
- unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +139 -199
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +91 -133
- unit_tests/sources/file_based/stream/concurrent/test_adapters.py +2 -13
- unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +2 -2
- unit_tests/sources/file_based/test_scenarios.py +2 -2
- unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +9 -9
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +5 -5
- unit_tests/sources/streams/concurrent/test_adapters.py +2 -13
- unit_tests/sources/streams/test_stream_read.py +221 -11
- unit_tests/sources/test_abstract_source.py +142 -130
- unit_tests/sources/test_connector_state_manager.py +3 -124
- unit_tests/sources/test_source.py +18 -14
- {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/top_level.txt +0 -0
@@ -54,14 +54,12 @@ class MockSource(AbstractSource):
|
|
54
54
|
self,
|
55
55
|
check_lambda: Callable[[], Tuple[bool, Optional[Any]]] = None,
|
56
56
|
streams: List[Stream] = None,
|
57
|
-
per_stream: bool = True,
|
58
57
|
message_repository: MessageRepository = None,
|
59
58
|
exception_on_missing_stream: bool = True,
|
60
59
|
stop_sync_on_stream_failure: bool = False,
|
61
60
|
):
|
62
61
|
self._streams = streams
|
63
62
|
self.check_lambda = check_lambda
|
64
|
-
self.per_stream = per_stream
|
65
63
|
self.exception_on_missing_stream = exception_on_missing_stream
|
66
64
|
self._message_repository = message_repository
|
67
65
|
self._stop_sync_on_stream_failure = stop_sync_on_stream_failure
|
@@ -286,7 +284,7 @@ def test_read_stream_emits_repository_message_before_record(mocker, message_repo
|
|
286
284
|
stream = MockStream(name="my_stream")
|
287
285
|
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
288
286
|
mocker.patch.object(MockStream, "read_records", side_effect=[[{"a record": "a value"}, {"another record": "another value"}]])
|
289
|
-
message_repository.consume_queue.side_effect = [[message for message in [MESSAGE_FROM_REPOSITORY]], []]
|
287
|
+
message_repository.consume_queue.side_effect = [[message for message in [MESSAGE_FROM_REPOSITORY]], [], []]
|
290
288
|
|
291
289
|
source = MockSource(streams=[stream], message_repository=message_repository)
|
292
290
|
|
@@ -357,19 +355,16 @@ def _as_stream_status(stream: str, status: AirbyteStreamStatus) -> AirbyteMessag
|
|
357
355
|
return AirbyteMessage(type=MessageType.TRACE, trace=trace_message)
|
358
356
|
|
359
357
|
|
360
|
-
def _as_state(
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
stream_descriptor=StreamDescriptor(name=stream_name), stream_state=AirbyteStateBlob.parse_obj(per_stream_state)
|
368
|
-
),
|
369
|
-
data=state_data,
|
358
|
+
def _as_state(stream_name: str = "", per_stream_state: Dict[str, Any] = None):
|
359
|
+
return AirbyteMessage(
|
360
|
+
type=Type.STATE,
|
361
|
+
state=AirbyteStateMessage(
|
362
|
+
type=AirbyteStateType.STREAM,
|
363
|
+
stream=AirbyteStreamState(
|
364
|
+
stream_descriptor=StreamDescriptor(name=stream_name), stream_state=AirbyteStateBlob.parse_obj(per_stream_state)
|
370
365
|
),
|
371
|
-
)
|
372
|
-
|
366
|
+
),
|
367
|
+
)
|
373
368
|
|
374
369
|
|
375
370
|
def _as_error_trace(
|
@@ -410,8 +405,8 @@ def _fix_emitted_at(messages: List[AirbyteMessage]) -> List[AirbyteMessage]:
|
|
410
405
|
def test_valid_full_refresh_read_no_slices(mocker):
|
411
406
|
"""Tests that running a full refresh sync on streams which don't specify slices produces the expected AirbyteMessages"""
|
412
407
|
stream_output = [{"k1": "v1"}, {"k2": "v2"}]
|
413
|
-
s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
|
414
|
-
s2 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s2")
|
408
|
+
s1 = MockStream([({"stream_state": {}, "sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
|
409
|
+
s2 = MockStream([({"stream_state": {}, "sync_mode": SyncMode.full_refresh}, stream_output)], name="s2")
|
415
410
|
|
416
411
|
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
417
412
|
|
@@ -428,10 +423,12 @@ def test_valid_full_refresh_read_no_slices(mocker):
|
|
428
423
|
_as_stream_status("s1", AirbyteStreamStatus.STARTED),
|
429
424
|
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
430
425
|
*_as_records("s1", stream_output),
|
426
|
+
_as_state("s1", {"__ab_full_refresh_state_message": True}),
|
431
427
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
432
428
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
433
429
|
_as_stream_status("s2", AirbyteStreamStatus.RUNNING),
|
434
430
|
*_as_records("s2", stream_output),
|
431
|
+
_as_state("s2", {"__ab_full_refresh_state_message": True}),
|
435
432
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
436
433
|
]
|
437
434
|
)
|
@@ -445,11 +442,11 @@ def test_valid_full_refresh_read_with_slices(mocker):
|
|
445
442
|
slices = [{"1": "1"}, {"2": "2"}]
|
446
443
|
# When attempting to sync a slice, just output that slice as a record
|
447
444
|
s1 = MockStream(
|
448
|
-
[({"sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
|
445
|
+
[({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
|
449
446
|
name="s1",
|
450
447
|
)
|
451
448
|
s2 = MockStream(
|
452
|
-
[({"sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
|
449
|
+
[({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
|
453
450
|
name="s2",
|
454
451
|
)
|
455
452
|
|
@@ -469,10 +466,12 @@ def test_valid_full_refresh_read_with_slices(mocker):
|
|
469
466
|
_as_stream_status("s1", AirbyteStreamStatus.STARTED),
|
470
467
|
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
471
468
|
*_as_records("s1", slices),
|
469
|
+
_as_state("s1", {"__ab_full_refresh_state_message": True}),
|
472
470
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
473
471
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
474
472
|
_as_stream_status("s2", AirbyteStreamStatus.RUNNING),
|
475
473
|
*_as_records("s2", slices),
|
474
|
+
_as_state("s2", {"__ab_full_refresh_state_message": True}),
|
476
475
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
477
476
|
]
|
478
477
|
)
|
@@ -482,6 +481,73 @@ def test_valid_full_refresh_read_with_slices(mocker):
|
|
482
481
|
assert expected == messages
|
483
482
|
|
484
483
|
|
484
|
+
def test_full_refresh_does_not_use_incoming_state(mocker):
|
485
|
+
"""Tests that running a full refresh sync does not use an incoming state message from the platform"""
|
486
|
+
slices = [{"1": "1"}, {"2": "2"}]
|
487
|
+
# When attempting to sync a slice, just output that slice as a record
|
488
|
+
s1 = MockStream(
|
489
|
+
[({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
|
490
|
+
name="s1",
|
491
|
+
)
|
492
|
+
s2 = MockStream(
|
493
|
+
[({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
|
494
|
+
name="s2",
|
495
|
+
)
|
496
|
+
|
497
|
+
def stream_slices_side_effect(stream_state: Mapping[str, Any], **kwargs) -> List[Mapping[str, Any]]:
|
498
|
+
if stream_state:
|
499
|
+
return slices[1:]
|
500
|
+
else:
|
501
|
+
return slices
|
502
|
+
|
503
|
+
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
504
|
+
mocker.patch.object(MockStream, "stream_slices", side_effect=stream_slices_side_effect)
|
505
|
+
|
506
|
+
state = [
|
507
|
+
AirbyteStateMessage(
|
508
|
+
type=AirbyteStateType.STREAM,
|
509
|
+
stream=AirbyteStreamState(
|
510
|
+
stream_descriptor=StreamDescriptor(name="s1"),
|
511
|
+
stream_state=AirbyteStateBlob.parse_obj({"created_at": "2024-01-31"}),
|
512
|
+
),
|
513
|
+
),
|
514
|
+
AirbyteStateMessage(
|
515
|
+
type=AirbyteStateType.STREAM,
|
516
|
+
stream=AirbyteStreamState(
|
517
|
+
stream_descriptor=StreamDescriptor(name="s2"),
|
518
|
+
stream_state=AirbyteStateBlob.parse_obj({"__ab_full_refresh_state_message": True}),
|
519
|
+
),
|
520
|
+
),
|
521
|
+
]
|
522
|
+
|
523
|
+
src = MockSource(streams=[s1, s2])
|
524
|
+
catalog = ConfiguredAirbyteCatalog(
|
525
|
+
streams=[
|
526
|
+
_configured_stream(s1, SyncMode.full_refresh),
|
527
|
+
_configured_stream(s2, SyncMode.full_refresh),
|
528
|
+
]
|
529
|
+
)
|
530
|
+
|
531
|
+
expected = _fix_emitted_at(
|
532
|
+
[
|
533
|
+
_as_stream_status("s1", AirbyteStreamStatus.STARTED),
|
534
|
+
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
535
|
+
*_as_records("s1", slices),
|
536
|
+
_as_state("s1", {"__ab_full_refresh_state_message": True}),
|
537
|
+
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
538
|
+
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
539
|
+
_as_stream_status("s2", AirbyteStreamStatus.RUNNING),
|
540
|
+
*_as_records("s2", slices),
|
541
|
+
_as_state("s2", {"__ab_full_refresh_state_message": True}),
|
542
|
+
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
543
|
+
]
|
544
|
+
)
|
545
|
+
|
546
|
+
messages = _fix_emitted_at(list(src.read(logger, {}, catalog, state)))
|
547
|
+
|
548
|
+
assert messages == expected
|
549
|
+
|
550
|
+
|
485
551
|
@pytest.mark.parametrize(
|
486
552
|
"slices",
|
487
553
|
[[{"1": "1"}, {"2": "2"}], [{"date": datetime.date(year=2023, month=1, day=1)}, {"date": datetime.date(year=2023, month=1, day=1)}]],
|
@@ -491,7 +557,7 @@ def test_read_full_refresh_with_slices_sends_slice_messages(mocker, slices):
|
|
491
557
|
debug_logger = logging.getLogger("airbyte.debug")
|
492
558
|
debug_logger.setLevel(logging.DEBUG)
|
493
559
|
stream = MockStream(
|
494
|
-
[({"sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
|
560
|
+
[({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
|
495
561
|
name="s1",
|
496
562
|
)
|
497
563
|
|
@@ -544,14 +610,7 @@ class TestIncrementalRead:
|
|
544
610
|
pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
|
545
611
|
],
|
546
612
|
)
|
547
|
-
|
548
|
-
"per_stream_enabled",
|
549
|
-
[
|
550
|
-
pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
|
551
|
-
pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
|
552
|
-
],
|
553
|
-
)
|
554
|
-
def test_with_state_attribute(self, mocker, use_legacy, per_stream_enabled):
|
613
|
+
def test_with_state_attribute(self, mocker, use_legacy):
|
555
614
|
"""Test correct state passing for the streams that have a state attribute"""
|
556
615
|
stream_output = [{"k1": "v1"}, {"k2": "v2"}]
|
557
616
|
old_state = {"cursor": "old_value"}
|
@@ -589,7 +648,7 @@ class TestIncrementalRead:
|
|
589
648
|
return_value=new_state_from_connector,
|
590
649
|
)
|
591
650
|
mocker.patch.object(MockStreamWithState, "get_json_schema", return_value={})
|
592
|
-
src = MockSource(streams=[stream_1, stream_2]
|
651
|
+
src = MockSource(streams=[stream_1, stream_2])
|
593
652
|
catalog = ConfiguredAirbyteCatalog(
|
594
653
|
streams=[
|
595
654
|
_configured_stream(stream_1, SyncMode.incremental),
|
@@ -603,17 +662,13 @@ class TestIncrementalRead:
|
|
603
662
|
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
604
663
|
_as_record("s1", stream_output[0]),
|
605
664
|
_as_record("s1", stream_output[1]),
|
606
|
-
_as_state(
|
607
|
-
if per_stream_enabled
|
608
|
-
else _as_state({"s1": new_state_from_connector}),
|
665
|
+
_as_state("s1", new_state_from_connector),
|
609
666
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
610
667
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
611
668
|
_as_stream_status("s2", AirbyteStreamStatus.RUNNING),
|
612
669
|
_as_record("s2", stream_output[0]),
|
613
670
|
_as_record("s2", stream_output[1]),
|
614
|
-
_as_state(
|
615
|
-
if per_stream_enabled
|
616
|
-
else _as_state({"s1": new_state_from_connector, "s2": new_state_from_connector}),
|
671
|
+
_as_state("s2", new_state_from_connector),
|
617
672
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
618
673
|
]
|
619
674
|
)
|
@@ -633,14 +688,7 @@ class TestIncrementalRead:
|
|
633
688
|
pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
|
634
689
|
],
|
635
690
|
)
|
636
|
-
|
637
|
-
"per_stream_enabled",
|
638
|
-
[
|
639
|
-
pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
|
640
|
-
pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
|
641
|
-
],
|
642
|
-
)
|
643
|
-
def test_with_checkpoint_interval(self, mocker, use_legacy, per_stream_enabled):
|
691
|
+
def test_with_checkpoint_interval(self, mocker, use_legacy):
|
644
692
|
"""Tests that an incremental read which doesn't specify a checkpoint interval outputs a STATE message
|
645
693
|
after reading N records within a stream.
|
646
694
|
"""
|
@@ -670,7 +718,7 @@ class TestIncrementalRead:
|
|
670
718
|
return_value=1,
|
671
719
|
)
|
672
720
|
|
673
|
-
src = MockSource(streams=[stream_1, stream_2]
|
721
|
+
src = MockSource(streams=[stream_1, stream_2])
|
674
722
|
catalog = ConfiguredAirbyteCatalog(
|
675
723
|
streams=[
|
676
724
|
_configured_stream(stream_1, SyncMode.incremental),
|
@@ -683,18 +731,18 @@ class TestIncrementalRead:
|
|
683
731
|
_as_stream_status("s1", AirbyteStreamStatus.STARTED),
|
684
732
|
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
685
733
|
_as_record("s1", stream_output[0]),
|
686
|
-
_as_state(
|
734
|
+
_as_state("s1", state),
|
687
735
|
_as_record("s1", stream_output[1]),
|
688
|
-
_as_state(
|
689
|
-
_as_state(
|
736
|
+
_as_state("s1", state),
|
737
|
+
_as_state("s1", state),
|
690
738
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
691
739
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
692
740
|
_as_stream_status("s2", AirbyteStreamStatus.RUNNING),
|
693
741
|
_as_record("s2", stream_output[0]),
|
694
|
-
_as_state(
|
742
|
+
_as_state("s2", state),
|
695
743
|
_as_record("s2", stream_output[1]),
|
696
|
-
_as_state(
|
697
|
-
_as_state(
|
744
|
+
_as_state("s2", state),
|
745
|
+
_as_state("s2", state),
|
698
746
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
699
747
|
]
|
700
748
|
)
|
@@ -709,14 +757,7 @@ class TestIncrementalRead:
|
|
709
757
|
pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
|
710
758
|
],
|
711
759
|
)
|
712
|
-
|
713
|
-
"per_stream_enabled",
|
714
|
-
[
|
715
|
-
pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
|
716
|
-
pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
|
717
|
-
],
|
718
|
-
)
|
719
|
-
def test_with_no_interval(self, mocker, use_legacy, per_stream_enabled):
|
760
|
+
def test_with_no_interval(self, mocker, use_legacy):
|
720
761
|
"""Tests that an incremental read which doesn't specify a checkpoint interval outputs
|
721
762
|
a STATE message only after fully reading the stream and does not output any STATE messages during syncing the stream.
|
722
763
|
"""
|
@@ -739,7 +780,7 @@ class TestIncrementalRead:
|
|
739
780
|
mocker.patch.object(MockStream, "supports_incremental", return_value=True)
|
740
781
|
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
741
782
|
|
742
|
-
src = MockSource(streams=[stream_1, stream_2]
|
783
|
+
src = MockSource(streams=[stream_1, stream_2])
|
743
784
|
catalog = ConfiguredAirbyteCatalog(
|
744
785
|
streams=[
|
745
786
|
_configured_stream(stream_1, SyncMode.incremental),
|
@@ -752,12 +793,12 @@ class TestIncrementalRead:
|
|
752
793
|
_as_stream_status("s1", AirbyteStreamStatus.STARTED),
|
753
794
|
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
754
795
|
*_as_records("s1", stream_output),
|
755
|
-
_as_state(
|
796
|
+
_as_state("s1", state),
|
756
797
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
757
798
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
758
799
|
_as_stream_status("s2", AirbyteStreamStatus.RUNNING),
|
759
800
|
*_as_records("s2", stream_output),
|
760
|
-
_as_state(
|
801
|
+
_as_state("s2", state),
|
761
802
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
762
803
|
]
|
763
804
|
)
|
@@ -773,14 +814,7 @@ class TestIncrementalRead:
|
|
773
814
|
pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
|
774
815
|
],
|
775
816
|
)
|
776
|
-
|
777
|
-
"per_stream_enabled",
|
778
|
-
[
|
779
|
-
pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
|
780
|
-
pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
|
781
|
-
],
|
782
|
-
)
|
783
|
-
def test_with_slices(self, mocker, use_legacy, per_stream_enabled):
|
817
|
+
def test_with_slices(self, mocker, use_legacy):
|
784
818
|
"""Tests that an incremental read which uses slices outputs each record in the slice followed by a STATE message, for each slice"""
|
785
819
|
if use_legacy:
|
786
820
|
input_state = defaultdict(dict)
|
@@ -823,7 +857,7 @@ class TestIncrementalRead:
|
|
823
857
|
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
824
858
|
mocker.patch.object(MockStream, "stream_slices", return_value=slices)
|
825
859
|
|
826
|
-
src = MockSource(streams=[stream_1, stream_2]
|
860
|
+
src = MockSource(streams=[stream_1, stream_2])
|
827
861
|
catalog = ConfiguredAirbyteCatalog(
|
828
862
|
streams=[
|
829
863
|
_configured_stream(stream_1, SyncMode.incremental),
|
@@ -837,19 +871,19 @@ class TestIncrementalRead:
|
|
837
871
|
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
838
872
|
# stream 1 slice 1
|
839
873
|
*_as_records("s1", stream_output),
|
840
|
-
_as_state(
|
874
|
+
_as_state("s1", state),
|
841
875
|
# stream 1 slice 2
|
842
876
|
*_as_records("s1", stream_output),
|
843
|
-
_as_state(
|
877
|
+
_as_state("s1", state),
|
844
878
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
845
879
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
846
880
|
_as_stream_status("s2", AirbyteStreamStatus.RUNNING),
|
847
881
|
# stream 2 slice 1
|
848
882
|
*_as_records("s2", stream_output),
|
849
|
-
_as_state(
|
883
|
+
_as_state("s2", state),
|
850
884
|
# stream 2 slice 2
|
851
885
|
*_as_records("s2", stream_output),
|
852
|
-
_as_state(
|
886
|
+
_as_state("s2", state),
|
853
887
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
854
888
|
]
|
855
889
|
)
|
@@ -865,15 +899,8 @@ class TestIncrementalRead:
|
|
865
899
|
pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
|
866
900
|
],
|
867
901
|
)
|
868
|
-
@pytest.mark.parametrize(
|
869
|
-
"per_stream_enabled",
|
870
|
-
[
|
871
|
-
pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
|
872
|
-
pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
|
873
|
-
],
|
874
|
-
)
|
875
902
|
@pytest.mark.parametrize("slices", [pytest.param([], id="test_slices_as_list"), pytest.param(iter([]), id="test_slices_as_iterator")])
|
876
|
-
def test_no_slices(self, mocker, use_legacy,
|
903
|
+
def test_no_slices(self, mocker, use_legacy, slices):
|
877
904
|
"""
|
878
905
|
Tests that an incremental read returns at least one state messages even if no records were read:
|
879
906
|
1. outputs a state message after reading the entire stream
|
@@ -926,7 +953,7 @@ class TestIncrementalRead:
|
|
926
953
|
return_value=2,
|
927
954
|
)
|
928
955
|
|
929
|
-
src = MockSource(streams=[stream_1, stream_2]
|
956
|
+
src = MockSource(streams=[stream_1, stream_2])
|
930
957
|
catalog = ConfiguredAirbyteCatalog(
|
931
958
|
streams=[
|
932
959
|
_configured_stream(stream_1, SyncMode.incremental),
|
@@ -937,10 +964,10 @@ class TestIncrementalRead:
|
|
937
964
|
expected = _fix_emitted_at(
|
938
965
|
[
|
939
966
|
_as_stream_status("s1", AirbyteStreamStatus.STARTED),
|
940
|
-
_as_state(
|
967
|
+
_as_state("s1", state),
|
941
968
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
942
969
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
943
|
-
_as_state(
|
970
|
+
_as_state("s2", state),
|
944
971
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
945
972
|
]
|
946
973
|
)
|
@@ -956,14 +983,7 @@ class TestIncrementalRead:
|
|
956
983
|
pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
|
957
984
|
],
|
958
985
|
)
|
959
|
-
|
960
|
-
"per_stream_enabled",
|
961
|
-
[
|
962
|
-
pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
|
963
|
-
pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
|
964
|
-
],
|
965
|
-
)
|
966
|
-
def test_with_slices_and_interval(self, mocker, use_legacy, per_stream_enabled):
|
986
|
+
def test_with_slices_and_interval(self, mocker, use_legacy):
|
967
987
|
"""
|
968
988
|
Tests that an incremental read which uses slices and a checkpoint interval:
|
969
989
|
1. outputs all records
|
@@ -1016,7 +1036,7 @@ class TestIncrementalRead:
|
|
1016
1036
|
return_value=2,
|
1017
1037
|
)
|
1018
1038
|
|
1019
|
-
src = MockSource(streams=[stream_1, stream_2]
|
1039
|
+
src = MockSource(streams=[stream_1, stream_2])
|
1020
1040
|
catalog = ConfiguredAirbyteCatalog(
|
1021
1041
|
streams=[
|
1022
1042
|
_configured_stream(stream_1, SyncMode.incremental),
|
@@ -1031,32 +1051,32 @@ class TestIncrementalRead:
|
|
1031
1051
|
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
1032
1052
|
_as_record("s1", stream_output[0]),
|
1033
1053
|
_as_record("s1", stream_output[1]),
|
1034
|
-
_as_state(
|
1054
|
+
_as_state("s1", state),
|
1035
1055
|
_as_record("s1", stream_output[2]),
|
1036
|
-
_as_state(
|
1056
|
+
_as_state("s1", state),
|
1037
1057
|
# stream 1 slice 2
|
1038
1058
|
_as_record("s1", stream_output[0]),
|
1039
|
-
_as_state(
|
1059
|
+
_as_state("s1", state),
|
1040
1060
|
_as_record("s1", stream_output[1]),
|
1041
1061
|
_as_record("s1", stream_output[2]),
|
1042
|
-
_as_state(
|
1043
|
-
_as_state(
|
1062
|
+
_as_state("s1", state),
|
1063
|
+
_as_state("s1", state),
|
1044
1064
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
1045
1065
|
# stream 2 slice 1
|
1046
1066
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
1047
1067
|
_as_stream_status("s2", AirbyteStreamStatus.RUNNING),
|
1048
1068
|
_as_record("s2", stream_output[0]),
|
1049
1069
|
_as_record("s2", stream_output[1]),
|
1050
|
-
_as_state(
|
1070
|
+
_as_state("s2", state),
|
1051
1071
|
_as_record("s2", stream_output[2]),
|
1052
|
-
_as_state(
|
1072
|
+
_as_state("s2", state),
|
1053
1073
|
# stream 2 slice 2
|
1054
1074
|
_as_record("s2", stream_output[0]),
|
1055
|
-
_as_state(
|
1075
|
+
_as_state("s2", state),
|
1056
1076
|
_as_record("s2", stream_output[1]),
|
1057
1077
|
_as_record("s2", stream_output[2]),
|
1058
|
-
_as_state(
|
1059
|
-
_as_state(
|
1078
|
+
_as_state("s2", state),
|
1079
|
+
_as_state("s2", state),
|
1060
1080
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
1061
1081
|
]
|
1062
1082
|
)
|
@@ -1065,13 +1085,7 @@ class TestIncrementalRead:
|
|
1065
1085
|
|
1066
1086
|
assert messages == expected
|
1067
1087
|
|
1068
|
-
|
1069
|
-
"per_stream_enabled",
|
1070
|
-
[
|
1071
|
-
pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
|
1072
|
-
],
|
1073
|
-
)
|
1074
|
-
def test_emit_non_records(self, mocker, per_stream_enabled):
|
1088
|
+
def test_emit_non_records(self, mocker):
|
1075
1089
|
"""
|
1076
1090
|
Tests that an incremental read which uses slices and a checkpoint interval:
|
1077
1091
|
1. outputs all records
|
@@ -1129,7 +1143,7 @@ class TestIncrementalRead:
|
|
1129
1143
|
return_value=2,
|
1130
1144
|
)
|
1131
1145
|
|
1132
|
-
src = MockSource(streams=[stream_1, stream_2]
|
1146
|
+
src = MockSource(streams=[stream_1, stream_2])
|
1133
1147
|
catalog = ConfiguredAirbyteCatalog(
|
1134
1148
|
streams=[
|
1135
1149
|
_configured_stream(stream_1, SyncMode.incremental),
|
@@ -1145,17 +1159,17 @@ class TestIncrementalRead:
|
|
1145
1159
|
stream_data_to_airbyte_message("s1", stream_output[0]),
|
1146
1160
|
stream_data_to_airbyte_message("s1", stream_output[1]),
|
1147
1161
|
stream_data_to_airbyte_message("s1", stream_output[2]),
|
1148
|
-
_as_state(
|
1162
|
+
_as_state("s1", state),
|
1149
1163
|
stream_data_to_airbyte_message("s1", stream_output[3]),
|
1150
|
-
_as_state(
|
1164
|
+
_as_state("s1", state),
|
1151
1165
|
# stream 1 slice 2
|
1152
1166
|
stream_data_to_airbyte_message("s1", stream_output[0]),
|
1153
|
-
_as_state(
|
1167
|
+
_as_state("s1", state),
|
1154
1168
|
stream_data_to_airbyte_message("s1", stream_output[1]),
|
1155
1169
|
stream_data_to_airbyte_message("s1", stream_output[2]),
|
1156
1170
|
stream_data_to_airbyte_message("s1", stream_output[3]),
|
1157
|
-
_as_state(
|
1158
|
-
_as_state(
|
1171
|
+
_as_state("s1", state),
|
1172
|
+
_as_state("s1", state),
|
1159
1173
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
1160
1174
|
# stream 2 slice 1
|
1161
1175
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
@@ -1163,17 +1177,17 @@ class TestIncrementalRead:
|
|
1163
1177
|
stream_data_to_airbyte_message("s2", stream_output[0]),
|
1164
1178
|
stream_data_to_airbyte_message("s2", stream_output[1]),
|
1165
1179
|
stream_data_to_airbyte_message("s2", stream_output[2]),
|
1166
|
-
_as_state(
|
1180
|
+
_as_state("s2", state),
|
1167
1181
|
stream_data_to_airbyte_message("s2", stream_output[3]),
|
1168
|
-
_as_state(
|
1182
|
+
_as_state("s2", state),
|
1169
1183
|
# stream 2 slice 2
|
1170
1184
|
stream_data_to_airbyte_message("s2", stream_output[0]),
|
1171
|
-
_as_state(
|
1185
|
+
_as_state("s2", state),
|
1172
1186
|
stream_data_to_airbyte_message("s2", stream_output[1]),
|
1173
1187
|
stream_data_to_airbyte_message("s2", stream_output[2]),
|
1174
1188
|
stream_data_to_airbyte_message("s2", stream_output[3]),
|
1175
|
-
_as_state(
|
1176
|
-
_as_state(
|
1189
|
+
_as_state("s2", state),
|
1190
|
+
_as_state("s2", state),
|
1177
1191
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
1178
1192
|
]
|
1179
1193
|
)
|
@@ -1200,14 +1214,12 @@ def test_checkpoint_state_from_stream_instance():
|
|
1200
1214
|
|
1201
1215
|
# The stream_state passed to checkpoint_state() should be ignored since stream implements state function
|
1202
1216
|
teams_stream.state = {"updated_at": "2022-09-11"}
|
1203
|
-
actual_message = teams_stream._checkpoint_state({"ignored": "state"}, state_manager
|
1204
|
-
assert actual_message == _as_state(
|
1217
|
+
actual_message = teams_stream._checkpoint_state({"ignored": "state"}, state_manager)
|
1218
|
+
assert actual_message == _as_state("teams", {"updated_at": "2022-09-11"})
|
1205
1219
|
|
1206
1220
|
# The stream_state passed to checkpoint_state() should be used since the stream does not implement state function
|
1207
|
-
actual_message = managers_stream._checkpoint_state({"updated": "expected_here"}, state_manager
|
1208
|
-
assert actual_message == _as_state(
|
1209
|
-
{"teams": {"updated_at": "2022-09-11"}, "managers": {"updated": "expected_here"}}, "managers", {"updated": "expected_here"}
|
1210
|
-
)
|
1221
|
+
actual_message = managers_stream._checkpoint_state({"updated": "expected_here"}, state_manager)
|
1222
|
+
assert actual_message == _as_state("managers", {"updated": "expected_here"})
|
1211
1223
|
|
1212
1224
|
|
1213
1225
|
@pytest.mark.parametrize(
|
@@ -1382,9 +1394,9 @@ def test_continue_sync_with_failed_streams_with_override_false(mocker):
|
|
1382
1394
|
the sync when one stream fails with an error.
|
1383
1395
|
"""
|
1384
1396
|
stream_output = [{"k1": "v1"}, {"k2": "v2"}]
|
1385
|
-
s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
|
1397
|
+
s1 = MockStream([({"stream_state": {}, "sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
|
1386
1398
|
s2 = StreamRaisesException(AirbyteTracedException(message="I was born only to crash like Icarus"))
|
1387
|
-
s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
|
1399
|
+
s3 = MockStream([({"stream_state": {}, "sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
|
1388
1400
|
|
1389
1401
|
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
1390
1402
|
mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})
|