airbyte-cdk 0.67.0__py3-none-any.whl → 0.67.2__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/sources/abstract_source.py +30 -69
- airbyte_cdk/sources/connector_state_manager.py +12 -26
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +552 -524
- airbyte_cdk/sources/file_based/config/csv_format.py +2 -0
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +32 -14
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +3 -19
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +1 -3
- airbyte_cdk/sources/streams/__init__.py +2 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +3 -19
- airbyte_cdk/sources/streams/concurrent/cursor.py +1 -3
- airbyte_cdk/sources/streams/core.py +36 -34
- {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/METADATA +3 -2
- {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/RECORD +31 -31
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +2 -1
- unit_tests/sources/file_based/config/test_csv_format.py +6 -1
- unit_tests/sources/file_based/file_types/test_parquet_parser.py +51 -6
- unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +139 -199
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +91 -133
- unit_tests/sources/file_based/stream/concurrent/test_adapters.py +2 -13
- unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +2 -2
- unit_tests/sources/file_based/test_scenarios.py +2 -2
- unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +9 -9
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +5 -5
- unit_tests/sources/streams/concurrent/test_adapters.py +2 -13
- unit_tests/sources/streams/test_stream_read.py +221 -11
- unit_tests/sources/test_abstract_source.py +142 -130
- unit_tests/sources/test_connector_state_manager.py +3 -124
- unit_tests/sources/test_source.py +18 -14
- {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/top_level.txt +0 -0
@@ -54,14 +54,12 @@ class MockSource(AbstractSource):
|
|
54
54
|
self,
|
55
55
|
check_lambda: Callable[[], Tuple[bool, Optional[Any]]] = None,
|
56
56
|
streams: List[Stream] = None,
|
57
|
-
per_stream: bool = True,
|
58
57
|
message_repository: MessageRepository = None,
|
59
58
|
exception_on_missing_stream: bool = True,
|
60
59
|
stop_sync_on_stream_failure: bool = False,
|
61
60
|
):
|
62
61
|
self._streams = streams
|
63
62
|
self.check_lambda = check_lambda
|
64
|
-
self.per_stream = per_stream
|
65
63
|
self.exception_on_missing_stream = exception_on_missing_stream
|
66
64
|
self._message_repository = message_repository
|
67
65
|
self._stop_sync_on_stream_failure = stop_sync_on_stream_failure
|
@@ -286,7 +284,7 @@ def test_read_stream_emits_repository_message_before_record(mocker, message_repo
|
|
286
284
|
stream = MockStream(name="my_stream")
|
287
285
|
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
288
286
|
mocker.patch.object(MockStream, "read_records", side_effect=[[{"a record": "a value"}, {"another record": "another value"}]])
|
289
|
-
message_repository.consume_queue.side_effect = [[message for message in [MESSAGE_FROM_REPOSITORY]], []]
|
287
|
+
message_repository.consume_queue.side_effect = [[message for message in [MESSAGE_FROM_REPOSITORY]], [], []]
|
290
288
|
|
291
289
|
source = MockSource(streams=[stream], message_repository=message_repository)
|
292
290
|
|
@@ -357,19 +355,16 @@ def _as_stream_status(stream: str, status: AirbyteStreamStatus) -> AirbyteMessag
|
|
357
355
|
return AirbyteMessage(type=MessageType.TRACE, trace=trace_message)
|
358
356
|
|
359
357
|
|
360
|
-
def _as_state(
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
stream_descriptor=StreamDescriptor(name=stream_name), stream_state=AirbyteStateBlob.parse_obj(per_stream_state)
|
368
|
-
),
|
369
|
-
data=state_data,
|
358
|
+
def _as_state(stream_name: str = "", per_stream_state: Dict[str, Any] = None):
|
359
|
+
return AirbyteMessage(
|
360
|
+
type=Type.STATE,
|
361
|
+
state=AirbyteStateMessage(
|
362
|
+
type=AirbyteStateType.STREAM,
|
363
|
+
stream=AirbyteStreamState(
|
364
|
+
stream_descriptor=StreamDescriptor(name=stream_name), stream_state=AirbyteStateBlob.parse_obj(per_stream_state)
|
370
365
|
),
|
371
|
-
)
|
372
|
-
|
366
|
+
),
|
367
|
+
)
|
373
368
|
|
374
369
|
|
375
370
|
def _as_error_trace(
|
@@ -410,8 +405,8 @@ def _fix_emitted_at(messages: List[AirbyteMessage]) -> List[AirbyteMessage]:
|
|
410
405
|
def test_valid_full_refresh_read_no_slices(mocker):
|
411
406
|
"""Tests that running a full refresh sync on streams which don't specify slices produces the expected AirbyteMessages"""
|
412
407
|
stream_output = [{"k1": "v1"}, {"k2": "v2"}]
|
413
|
-
s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
|
414
|
-
s2 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s2")
|
408
|
+
s1 = MockStream([({"stream_state": {}, "sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
|
409
|
+
s2 = MockStream([({"stream_state": {}, "sync_mode": SyncMode.full_refresh}, stream_output)], name="s2")
|
415
410
|
|
416
411
|
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
417
412
|
|
@@ -428,10 +423,12 @@ def test_valid_full_refresh_read_no_slices(mocker):
|
|
428
423
|
_as_stream_status("s1", AirbyteStreamStatus.STARTED),
|
429
424
|
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
430
425
|
*_as_records("s1", stream_output),
|
426
|
+
_as_state("s1", {"__ab_full_refresh_state_message": True}),
|
431
427
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
432
428
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
433
429
|
_as_stream_status("s2", AirbyteStreamStatus.RUNNING),
|
434
430
|
*_as_records("s2", stream_output),
|
431
|
+
_as_state("s2", {"__ab_full_refresh_state_message": True}),
|
435
432
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
436
433
|
]
|
437
434
|
)
|
@@ -445,11 +442,11 @@ def test_valid_full_refresh_read_with_slices(mocker):
|
|
445
442
|
slices = [{"1": "1"}, {"2": "2"}]
|
446
443
|
# When attempting to sync a slice, just output that slice as a record
|
447
444
|
s1 = MockStream(
|
448
|
-
[({"sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
|
445
|
+
[({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
|
449
446
|
name="s1",
|
450
447
|
)
|
451
448
|
s2 = MockStream(
|
452
|
-
[({"sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
|
449
|
+
[({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
|
453
450
|
name="s2",
|
454
451
|
)
|
455
452
|
|
@@ -469,10 +466,12 @@ def test_valid_full_refresh_read_with_slices(mocker):
|
|
469
466
|
_as_stream_status("s1", AirbyteStreamStatus.STARTED),
|
470
467
|
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
471
468
|
*_as_records("s1", slices),
|
469
|
+
_as_state("s1", {"__ab_full_refresh_state_message": True}),
|
472
470
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
473
471
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
474
472
|
_as_stream_status("s2", AirbyteStreamStatus.RUNNING),
|
475
473
|
*_as_records("s2", slices),
|
474
|
+
_as_state("s2", {"__ab_full_refresh_state_message": True}),
|
476
475
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
477
476
|
]
|
478
477
|
)
|
@@ -482,6 +481,73 @@ def test_valid_full_refresh_read_with_slices(mocker):
|
|
482
481
|
assert expected == messages
|
483
482
|
|
484
483
|
|
484
|
+
def test_full_refresh_does_not_use_incoming_state(mocker):
|
485
|
+
"""Tests that running a full refresh sync does not use an incoming state message from the platform"""
|
486
|
+
slices = [{"1": "1"}, {"2": "2"}]
|
487
|
+
# When attempting to sync a slice, just output that slice as a record
|
488
|
+
s1 = MockStream(
|
489
|
+
[({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
|
490
|
+
name="s1",
|
491
|
+
)
|
492
|
+
s2 = MockStream(
|
493
|
+
[({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
|
494
|
+
name="s2",
|
495
|
+
)
|
496
|
+
|
497
|
+
def stream_slices_side_effect(stream_state: Mapping[str, Any], **kwargs) -> List[Mapping[str, Any]]:
|
498
|
+
if stream_state:
|
499
|
+
return slices[1:]
|
500
|
+
else:
|
501
|
+
return slices
|
502
|
+
|
503
|
+
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
504
|
+
mocker.patch.object(MockStream, "stream_slices", side_effect=stream_slices_side_effect)
|
505
|
+
|
506
|
+
state = [
|
507
|
+
AirbyteStateMessage(
|
508
|
+
type=AirbyteStateType.STREAM,
|
509
|
+
stream=AirbyteStreamState(
|
510
|
+
stream_descriptor=StreamDescriptor(name="s1"),
|
511
|
+
stream_state=AirbyteStateBlob.parse_obj({"created_at": "2024-01-31"}),
|
512
|
+
),
|
513
|
+
),
|
514
|
+
AirbyteStateMessage(
|
515
|
+
type=AirbyteStateType.STREAM,
|
516
|
+
stream=AirbyteStreamState(
|
517
|
+
stream_descriptor=StreamDescriptor(name="s2"),
|
518
|
+
stream_state=AirbyteStateBlob.parse_obj({"__ab_full_refresh_state_message": True}),
|
519
|
+
),
|
520
|
+
),
|
521
|
+
]
|
522
|
+
|
523
|
+
src = MockSource(streams=[s1, s2])
|
524
|
+
catalog = ConfiguredAirbyteCatalog(
|
525
|
+
streams=[
|
526
|
+
_configured_stream(s1, SyncMode.full_refresh),
|
527
|
+
_configured_stream(s2, SyncMode.full_refresh),
|
528
|
+
]
|
529
|
+
)
|
530
|
+
|
531
|
+
expected = _fix_emitted_at(
|
532
|
+
[
|
533
|
+
_as_stream_status("s1", AirbyteStreamStatus.STARTED),
|
534
|
+
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
535
|
+
*_as_records("s1", slices),
|
536
|
+
_as_state("s1", {"__ab_full_refresh_state_message": True}),
|
537
|
+
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
538
|
+
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
539
|
+
_as_stream_status("s2", AirbyteStreamStatus.RUNNING),
|
540
|
+
*_as_records("s2", slices),
|
541
|
+
_as_state("s2", {"__ab_full_refresh_state_message": True}),
|
542
|
+
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
543
|
+
]
|
544
|
+
)
|
545
|
+
|
546
|
+
messages = _fix_emitted_at(list(src.read(logger, {}, catalog, state)))
|
547
|
+
|
548
|
+
assert messages == expected
|
549
|
+
|
550
|
+
|
485
551
|
@pytest.mark.parametrize(
|
486
552
|
"slices",
|
487
553
|
[[{"1": "1"}, {"2": "2"}], [{"date": datetime.date(year=2023, month=1, day=1)}, {"date": datetime.date(year=2023, month=1, day=1)}]],
|
@@ -491,7 +557,7 @@ def test_read_full_refresh_with_slices_sends_slice_messages(mocker, slices):
|
|
491
557
|
debug_logger = logging.getLogger("airbyte.debug")
|
492
558
|
debug_logger.setLevel(logging.DEBUG)
|
493
559
|
stream = MockStream(
|
494
|
-
[({"sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
|
560
|
+
[({"stream_state": {}, "sync_mode": SyncMode.full_refresh, "stream_slice": s}, [s]) for s in slices],
|
495
561
|
name="s1",
|
496
562
|
)
|
497
563
|
|
@@ -544,14 +610,7 @@ class TestIncrementalRead:
|
|
544
610
|
pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
|
545
611
|
],
|
546
612
|
)
|
547
|
-
|
548
|
-
"per_stream_enabled",
|
549
|
-
[
|
550
|
-
pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
|
551
|
-
pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
|
552
|
-
],
|
553
|
-
)
|
554
|
-
def test_with_state_attribute(self, mocker, use_legacy, per_stream_enabled):
|
613
|
+
def test_with_state_attribute(self, mocker, use_legacy):
|
555
614
|
"""Test correct state passing for the streams that have a state attribute"""
|
556
615
|
stream_output = [{"k1": "v1"}, {"k2": "v2"}]
|
557
616
|
old_state = {"cursor": "old_value"}
|
@@ -589,7 +648,7 @@ class TestIncrementalRead:
|
|
589
648
|
return_value=new_state_from_connector,
|
590
649
|
)
|
591
650
|
mocker.patch.object(MockStreamWithState, "get_json_schema", return_value={})
|
592
|
-
src = MockSource(streams=[stream_1, stream_2]
|
651
|
+
src = MockSource(streams=[stream_1, stream_2])
|
593
652
|
catalog = ConfiguredAirbyteCatalog(
|
594
653
|
streams=[
|
595
654
|
_configured_stream(stream_1, SyncMode.incremental),
|
@@ -603,17 +662,13 @@ class TestIncrementalRead:
|
|
603
662
|
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
604
663
|
_as_record("s1", stream_output[0]),
|
605
664
|
_as_record("s1", stream_output[1]),
|
606
|
-
_as_state(
|
607
|
-
if per_stream_enabled
|
608
|
-
else _as_state({"s1": new_state_from_connector}),
|
665
|
+
_as_state("s1", new_state_from_connector),
|
609
666
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
610
667
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
611
668
|
_as_stream_status("s2", AirbyteStreamStatus.RUNNING),
|
612
669
|
_as_record("s2", stream_output[0]),
|
613
670
|
_as_record("s2", stream_output[1]),
|
614
|
-
_as_state(
|
615
|
-
if per_stream_enabled
|
616
|
-
else _as_state({"s1": new_state_from_connector, "s2": new_state_from_connector}),
|
671
|
+
_as_state("s2", new_state_from_connector),
|
617
672
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
618
673
|
]
|
619
674
|
)
|
@@ -633,14 +688,7 @@ class TestIncrementalRead:
|
|
633
688
|
pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
|
634
689
|
],
|
635
690
|
)
|
636
|
-
|
637
|
-
"per_stream_enabled",
|
638
|
-
[
|
639
|
-
pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
|
640
|
-
pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
|
641
|
-
],
|
642
|
-
)
|
643
|
-
def test_with_checkpoint_interval(self, mocker, use_legacy, per_stream_enabled):
|
691
|
+
def test_with_checkpoint_interval(self, mocker, use_legacy):
|
644
692
|
"""Tests that an incremental read which doesn't specify a checkpoint interval outputs a STATE message
|
645
693
|
after reading N records within a stream.
|
646
694
|
"""
|
@@ -670,7 +718,7 @@ class TestIncrementalRead:
|
|
670
718
|
return_value=1,
|
671
719
|
)
|
672
720
|
|
673
|
-
src = MockSource(streams=[stream_1, stream_2]
|
721
|
+
src = MockSource(streams=[stream_1, stream_2])
|
674
722
|
catalog = ConfiguredAirbyteCatalog(
|
675
723
|
streams=[
|
676
724
|
_configured_stream(stream_1, SyncMode.incremental),
|
@@ -683,18 +731,18 @@ class TestIncrementalRead:
|
|
683
731
|
_as_stream_status("s1", AirbyteStreamStatus.STARTED),
|
684
732
|
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
685
733
|
_as_record("s1", stream_output[0]),
|
686
|
-
_as_state(
|
734
|
+
_as_state("s1", state),
|
687
735
|
_as_record("s1", stream_output[1]),
|
688
|
-
_as_state(
|
689
|
-
_as_state(
|
736
|
+
_as_state("s1", state),
|
737
|
+
_as_state("s1", state),
|
690
738
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
691
739
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
692
740
|
_as_stream_status("s2", AirbyteStreamStatus.RUNNING),
|
693
741
|
_as_record("s2", stream_output[0]),
|
694
|
-
_as_state(
|
742
|
+
_as_state("s2", state),
|
695
743
|
_as_record("s2", stream_output[1]),
|
696
|
-
_as_state(
|
697
|
-
_as_state(
|
744
|
+
_as_state("s2", state),
|
745
|
+
_as_state("s2", state),
|
698
746
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
699
747
|
]
|
700
748
|
)
|
@@ -709,14 +757,7 @@ class TestIncrementalRead:
|
|
709
757
|
pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
|
710
758
|
],
|
711
759
|
)
|
712
|
-
|
713
|
-
"per_stream_enabled",
|
714
|
-
[
|
715
|
-
pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
|
716
|
-
pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
|
717
|
-
],
|
718
|
-
)
|
719
|
-
def test_with_no_interval(self, mocker, use_legacy, per_stream_enabled):
|
760
|
+
def test_with_no_interval(self, mocker, use_legacy):
|
720
761
|
"""Tests that an incremental read which doesn't specify a checkpoint interval outputs
|
721
762
|
a STATE message only after fully reading the stream and does not output any STATE messages during syncing the stream.
|
722
763
|
"""
|
@@ -739,7 +780,7 @@ class TestIncrementalRead:
|
|
739
780
|
mocker.patch.object(MockStream, "supports_incremental", return_value=True)
|
740
781
|
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
741
782
|
|
742
|
-
src = MockSource(streams=[stream_1, stream_2]
|
783
|
+
src = MockSource(streams=[stream_1, stream_2])
|
743
784
|
catalog = ConfiguredAirbyteCatalog(
|
744
785
|
streams=[
|
745
786
|
_configured_stream(stream_1, SyncMode.incremental),
|
@@ -752,12 +793,12 @@ class TestIncrementalRead:
|
|
752
793
|
_as_stream_status("s1", AirbyteStreamStatus.STARTED),
|
753
794
|
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
754
795
|
*_as_records("s1", stream_output),
|
755
|
-
_as_state(
|
796
|
+
_as_state("s1", state),
|
756
797
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
757
798
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
758
799
|
_as_stream_status("s2", AirbyteStreamStatus.RUNNING),
|
759
800
|
*_as_records("s2", stream_output),
|
760
|
-
_as_state(
|
801
|
+
_as_state("s2", state),
|
761
802
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
762
803
|
]
|
763
804
|
)
|
@@ -773,14 +814,7 @@ class TestIncrementalRead:
|
|
773
814
|
pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
|
774
815
|
],
|
775
816
|
)
|
776
|
-
|
777
|
-
"per_stream_enabled",
|
778
|
-
[
|
779
|
-
pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
|
780
|
-
pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
|
781
|
-
],
|
782
|
-
)
|
783
|
-
def test_with_slices(self, mocker, use_legacy, per_stream_enabled):
|
817
|
+
def test_with_slices(self, mocker, use_legacy):
|
784
818
|
"""Tests that an incremental read which uses slices outputs each record in the slice followed by a STATE message, for each slice"""
|
785
819
|
if use_legacy:
|
786
820
|
input_state = defaultdict(dict)
|
@@ -823,7 +857,7 @@ class TestIncrementalRead:
|
|
823
857
|
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
824
858
|
mocker.patch.object(MockStream, "stream_slices", return_value=slices)
|
825
859
|
|
826
|
-
src = MockSource(streams=[stream_1, stream_2]
|
860
|
+
src = MockSource(streams=[stream_1, stream_2])
|
827
861
|
catalog = ConfiguredAirbyteCatalog(
|
828
862
|
streams=[
|
829
863
|
_configured_stream(stream_1, SyncMode.incremental),
|
@@ -837,19 +871,19 @@ class TestIncrementalRead:
|
|
837
871
|
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
838
872
|
# stream 1 slice 1
|
839
873
|
*_as_records("s1", stream_output),
|
840
|
-
_as_state(
|
874
|
+
_as_state("s1", state),
|
841
875
|
# stream 1 slice 2
|
842
876
|
*_as_records("s1", stream_output),
|
843
|
-
_as_state(
|
877
|
+
_as_state("s1", state),
|
844
878
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
845
879
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
846
880
|
_as_stream_status("s2", AirbyteStreamStatus.RUNNING),
|
847
881
|
# stream 2 slice 1
|
848
882
|
*_as_records("s2", stream_output),
|
849
|
-
_as_state(
|
883
|
+
_as_state("s2", state),
|
850
884
|
# stream 2 slice 2
|
851
885
|
*_as_records("s2", stream_output),
|
852
|
-
_as_state(
|
886
|
+
_as_state("s2", state),
|
853
887
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
854
888
|
]
|
855
889
|
)
|
@@ -865,15 +899,8 @@ class TestIncrementalRead:
|
|
865
899
|
pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
|
866
900
|
],
|
867
901
|
)
|
868
|
-
@pytest.mark.parametrize(
|
869
|
-
"per_stream_enabled",
|
870
|
-
[
|
871
|
-
pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
|
872
|
-
pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
|
873
|
-
],
|
874
|
-
)
|
875
902
|
@pytest.mark.parametrize("slices", [pytest.param([], id="test_slices_as_list"), pytest.param(iter([]), id="test_slices_as_iterator")])
|
876
|
-
def test_no_slices(self, mocker, use_legacy,
|
903
|
+
def test_no_slices(self, mocker, use_legacy, slices):
|
877
904
|
"""
|
878
905
|
Tests that an incremental read returns at least one state messages even if no records were read:
|
879
906
|
1. outputs a state message after reading the entire stream
|
@@ -926,7 +953,7 @@ class TestIncrementalRead:
|
|
926
953
|
return_value=2,
|
927
954
|
)
|
928
955
|
|
929
|
-
src = MockSource(streams=[stream_1, stream_2]
|
956
|
+
src = MockSource(streams=[stream_1, stream_2])
|
930
957
|
catalog = ConfiguredAirbyteCatalog(
|
931
958
|
streams=[
|
932
959
|
_configured_stream(stream_1, SyncMode.incremental),
|
@@ -937,10 +964,10 @@ class TestIncrementalRead:
|
|
937
964
|
expected = _fix_emitted_at(
|
938
965
|
[
|
939
966
|
_as_stream_status("s1", AirbyteStreamStatus.STARTED),
|
940
|
-
_as_state(
|
967
|
+
_as_state("s1", state),
|
941
968
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
942
969
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
943
|
-
_as_state(
|
970
|
+
_as_state("s2", state),
|
944
971
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
945
972
|
]
|
946
973
|
)
|
@@ -956,14 +983,7 @@ class TestIncrementalRead:
|
|
956
983
|
pytest.param(False, id="test_incoming_stream_state_as_per_stream_format"),
|
957
984
|
],
|
958
985
|
)
|
959
|
-
|
960
|
-
"per_stream_enabled",
|
961
|
-
[
|
962
|
-
pytest.param(True, id="test_source_emits_state_as_per_stream_format"),
|
963
|
-
pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
|
964
|
-
],
|
965
|
-
)
|
966
|
-
def test_with_slices_and_interval(self, mocker, use_legacy, per_stream_enabled):
|
986
|
+
def test_with_slices_and_interval(self, mocker, use_legacy):
|
967
987
|
"""
|
968
988
|
Tests that an incremental read which uses slices and a checkpoint interval:
|
969
989
|
1. outputs all records
|
@@ -1016,7 +1036,7 @@ class TestIncrementalRead:
|
|
1016
1036
|
return_value=2,
|
1017
1037
|
)
|
1018
1038
|
|
1019
|
-
src = MockSource(streams=[stream_1, stream_2]
|
1039
|
+
src = MockSource(streams=[stream_1, stream_2])
|
1020
1040
|
catalog = ConfiguredAirbyteCatalog(
|
1021
1041
|
streams=[
|
1022
1042
|
_configured_stream(stream_1, SyncMode.incremental),
|
@@ -1031,32 +1051,32 @@ class TestIncrementalRead:
|
|
1031
1051
|
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
1032
1052
|
_as_record("s1", stream_output[0]),
|
1033
1053
|
_as_record("s1", stream_output[1]),
|
1034
|
-
_as_state(
|
1054
|
+
_as_state("s1", state),
|
1035
1055
|
_as_record("s1", stream_output[2]),
|
1036
|
-
_as_state(
|
1056
|
+
_as_state("s1", state),
|
1037
1057
|
# stream 1 slice 2
|
1038
1058
|
_as_record("s1", stream_output[0]),
|
1039
|
-
_as_state(
|
1059
|
+
_as_state("s1", state),
|
1040
1060
|
_as_record("s1", stream_output[1]),
|
1041
1061
|
_as_record("s1", stream_output[2]),
|
1042
|
-
_as_state(
|
1043
|
-
_as_state(
|
1062
|
+
_as_state("s1", state),
|
1063
|
+
_as_state("s1", state),
|
1044
1064
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
1045
1065
|
# stream 2 slice 1
|
1046
1066
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
1047
1067
|
_as_stream_status("s2", AirbyteStreamStatus.RUNNING),
|
1048
1068
|
_as_record("s2", stream_output[0]),
|
1049
1069
|
_as_record("s2", stream_output[1]),
|
1050
|
-
_as_state(
|
1070
|
+
_as_state("s2", state),
|
1051
1071
|
_as_record("s2", stream_output[2]),
|
1052
|
-
_as_state(
|
1072
|
+
_as_state("s2", state),
|
1053
1073
|
# stream 2 slice 2
|
1054
1074
|
_as_record("s2", stream_output[0]),
|
1055
|
-
_as_state(
|
1075
|
+
_as_state("s2", state),
|
1056
1076
|
_as_record("s2", stream_output[1]),
|
1057
1077
|
_as_record("s2", stream_output[2]),
|
1058
|
-
_as_state(
|
1059
|
-
_as_state(
|
1078
|
+
_as_state("s2", state),
|
1079
|
+
_as_state("s2", state),
|
1060
1080
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
1061
1081
|
]
|
1062
1082
|
)
|
@@ -1065,13 +1085,7 @@ class TestIncrementalRead:
|
|
1065
1085
|
|
1066
1086
|
assert messages == expected
|
1067
1087
|
|
1068
|
-
|
1069
|
-
"per_stream_enabled",
|
1070
|
-
[
|
1071
|
-
pytest.param(False, id="test_source_emits_state_as_per_stream_format"),
|
1072
|
-
],
|
1073
|
-
)
|
1074
|
-
def test_emit_non_records(self, mocker, per_stream_enabled):
|
1088
|
+
def test_emit_non_records(self, mocker):
|
1075
1089
|
"""
|
1076
1090
|
Tests that an incremental read which uses slices and a checkpoint interval:
|
1077
1091
|
1. outputs all records
|
@@ -1129,7 +1143,7 @@ class TestIncrementalRead:
|
|
1129
1143
|
return_value=2,
|
1130
1144
|
)
|
1131
1145
|
|
1132
|
-
src = MockSource(streams=[stream_1, stream_2]
|
1146
|
+
src = MockSource(streams=[stream_1, stream_2])
|
1133
1147
|
catalog = ConfiguredAirbyteCatalog(
|
1134
1148
|
streams=[
|
1135
1149
|
_configured_stream(stream_1, SyncMode.incremental),
|
@@ -1145,17 +1159,17 @@ class TestIncrementalRead:
|
|
1145
1159
|
stream_data_to_airbyte_message("s1", stream_output[0]),
|
1146
1160
|
stream_data_to_airbyte_message("s1", stream_output[1]),
|
1147
1161
|
stream_data_to_airbyte_message("s1", stream_output[2]),
|
1148
|
-
_as_state(
|
1162
|
+
_as_state("s1", state),
|
1149
1163
|
stream_data_to_airbyte_message("s1", stream_output[3]),
|
1150
|
-
_as_state(
|
1164
|
+
_as_state("s1", state),
|
1151
1165
|
# stream 1 slice 2
|
1152
1166
|
stream_data_to_airbyte_message("s1", stream_output[0]),
|
1153
|
-
_as_state(
|
1167
|
+
_as_state("s1", state),
|
1154
1168
|
stream_data_to_airbyte_message("s1", stream_output[1]),
|
1155
1169
|
stream_data_to_airbyte_message("s1", stream_output[2]),
|
1156
1170
|
stream_data_to_airbyte_message("s1", stream_output[3]),
|
1157
|
-
_as_state(
|
1158
|
-
_as_state(
|
1171
|
+
_as_state("s1", state),
|
1172
|
+
_as_state("s1", state),
|
1159
1173
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
1160
1174
|
# stream 2 slice 1
|
1161
1175
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
@@ -1163,17 +1177,17 @@ class TestIncrementalRead:
|
|
1163
1177
|
stream_data_to_airbyte_message("s2", stream_output[0]),
|
1164
1178
|
stream_data_to_airbyte_message("s2", stream_output[1]),
|
1165
1179
|
stream_data_to_airbyte_message("s2", stream_output[2]),
|
1166
|
-
_as_state(
|
1180
|
+
_as_state("s2", state),
|
1167
1181
|
stream_data_to_airbyte_message("s2", stream_output[3]),
|
1168
|
-
_as_state(
|
1182
|
+
_as_state("s2", state),
|
1169
1183
|
# stream 2 slice 2
|
1170
1184
|
stream_data_to_airbyte_message("s2", stream_output[0]),
|
1171
|
-
_as_state(
|
1185
|
+
_as_state("s2", state),
|
1172
1186
|
stream_data_to_airbyte_message("s2", stream_output[1]),
|
1173
1187
|
stream_data_to_airbyte_message("s2", stream_output[2]),
|
1174
1188
|
stream_data_to_airbyte_message("s2", stream_output[3]),
|
1175
|
-
_as_state(
|
1176
|
-
_as_state(
|
1189
|
+
_as_state("s2", state),
|
1190
|
+
_as_state("s2", state),
|
1177
1191
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
1178
1192
|
]
|
1179
1193
|
)
|
@@ -1200,14 +1214,12 @@ def test_checkpoint_state_from_stream_instance():
|
|
1200
1214
|
|
1201
1215
|
# The stream_state passed to checkpoint_state() should be ignored since stream implements state function
|
1202
1216
|
teams_stream.state = {"updated_at": "2022-09-11"}
|
1203
|
-
actual_message = teams_stream._checkpoint_state({"ignored": "state"}, state_manager
|
1204
|
-
assert actual_message == _as_state(
|
1217
|
+
actual_message = teams_stream._checkpoint_state({"ignored": "state"}, state_manager)
|
1218
|
+
assert actual_message == _as_state("teams", {"updated_at": "2022-09-11"})
|
1205
1219
|
|
1206
1220
|
# The stream_state passed to checkpoint_state() should be used since the stream does not implement state function
|
1207
|
-
actual_message = managers_stream._checkpoint_state({"updated": "expected_here"}, state_manager
|
1208
|
-
assert actual_message == _as_state(
|
1209
|
-
{"teams": {"updated_at": "2022-09-11"}, "managers": {"updated": "expected_here"}}, "managers", {"updated": "expected_here"}
|
1210
|
-
)
|
1221
|
+
actual_message = managers_stream._checkpoint_state({"updated": "expected_here"}, state_manager)
|
1222
|
+
assert actual_message == _as_state("managers", {"updated": "expected_here"})
|
1211
1223
|
|
1212
1224
|
|
1213
1225
|
@pytest.mark.parametrize(
|
@@ -1382,9 +1394,9 @@ def test_continue_sync_with_failed_streams_with_override_false(mocker):
|
|
1382
1394
|
the sync when one stream fails with an error.
|
1383
1395
|
"""
|
1384
1396
|
stream_output = [{"k1": "v1"}, {"k2": "v2"}]
|
1385
|
-
s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
|
1397
|
+
s1 = MockStream([({"stream_state": {}, "sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
|
1386
1398
|
s2 = StreamRaisesException(AirbyteTracedException(message="I was born only to crash like Icarus"))
|
1387
|
-
s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
|
1399
|
+
s3 = MockStream([({"stream_state": {}, "sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
|
1388
1400
|
|
1389
1401
|
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
1390
1402
|
mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})
|