airbyte-cdk 0.67.0__py3-none-any.whl → 0.67.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/abstract_source.py +30 -69
- airbyte_cdk/sources/connector_state_manager.py +12 -26
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +552 -524
- airbyte_cdk/sources/file_based/config/csv_format.py +2 -0
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +32 -14
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +3 -19
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +1 -3
- airbyte_cdk/sources/streams/__init__.py +2 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +3 -19
- airbyte_cdk/sources/streams/concurrent/cursor.py +1 -3
- airbyte_cdk/sources/streams/core.py +36 -34
- {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/METADATA +3 -2
- {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/RECORD +31 -31
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +2 -1
- unit_tests/sources/file_based/config/test_csv_format.py +6 -1
- unit_tests/sources/file_based/file_types/test_parquet_parser.py +51 -6
- unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +139 -199
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +91 -133
- unit_tests/sources/file_based/stream/concurrent/test_adapters.py +2 -13
- unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +2 -2
- unit_tests/sources/file_based/test_scenarios.py +2 -2
- unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +9 -9
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +5 -5
- unit_tests/sources/streams/concurrent/test_adapters.py +2 -13
- unit_tests/sources/streams/test_stream_read.py +221 -11
- unit_tests/sources/test_abstract_source.py +142 -130
- unit_tests/sources/test_connector_state_manager.py +3 -124
- unit_tests/sources/test_source.py +18 -14
- {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/top_level.txt +0 -0
@@ -73,10 +73,8 @@ single_csv_input_state_is_earlier_scenario = (
|
|
73
73
|
"stream": "stream1",
|
74
74
|
},
|
75
75
|
{
|
76
|
-
"
|
77
|
-
|
78
|
-
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
|
79
|
-
}
|
76
|
+
"history": {"some_old_file.csv": "2023-06-01T03:54:07.000000Z", "a.csv": "2023-06-05T03:54:07.000000Z"},
|
77
|
+
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
|
80
78
|
},
|
81
79
|
]
|
82
80
|
)
|
@@ -154,10 +152,8 @@ single_csv_file_is_skipped_if_same_modified_at_as_in_history = (
|
|
154
152
|
.set_expected_records(
|
155
153
|
[
|
156
154
|
{
|
157
|
-
"
|
158
|
-
|
159
|
-
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
|
160
|
-
}
|
155
|
+
"history": {"a.csv": "2023-06-05T03:54:07.000000Z"},
|
156
|
+
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
|
161
157
|
}
|
162
158
|
]
|
163
159
|
)
|
@@ -253,10 +249,8 @@ single_csv_file_is_synced_if_modified_at_is_more_recent_than_in_history = (
|
|
253
249
|
"stream": "stream1",
|
254
250
|
},
|
255
251
|
{
|
256
|
-
"
|
257
|
-
|
258
|
-
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
|
259
|
-
}
|
252
|
+
"history": {"a.csv": "2023-06-05T03:54:07.000000Z"},
|
253
|
+
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
|
260
254
|
},
|
261
255
|
]
|
262
256
|
)
|
@@ -365,10 +359,8 @@ single_csv_no_input_state_scenario = (
|
|
365
359
|
"stream": "stream1",
|
366
360
|
},
|
367
361
|
{
|
368
|
-
"
|
369
|
-
|
370
|
-
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
|
371
|
-
}
|
362
|
+
"history": {"a.csv": "2023-06-05T03:54:07.000000Z"},
|
363
|
+
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
|
372
364
|
},
|
373
365
|
]
|
374
366
|
)
|
@@ -488,10 +480,8 @@ multi_csv_same_timestamp_scenario = (
|
|
488
480
|
"stream": "stream1",
|
489
481
|
},
|
490
482
|
{
|
491
|
-
"
|
492
|
-
|
493
|
-
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
|
494
|
-
}
|
483
|
+
"history": {"a.csv": "2023-06-05T03:54:07.000000Z", "b.csv": "2023-06-05T03:54:07.000000Z"},
|
484
|
+
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
|
495
485
|
},
|
496
486
|
]
|
497
487
|
)
|
@@ -580,13 +570,11 @@ single_csv_input_state_is_later_scenario = (
|
|
580
570
|
"stream": "stream1",
|
581
571
|
},
|
582
572
|
{
|
583
|
-
"
|
584
|
-
"
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
"_ab_source_file_last_modified": "2023-07-15T23:59:59.000000Z_recent_file.csv",
|
589
|
-
}
|
573
|
+
"history": {
|
574
|
+
"recent_file.csv": "2023-07-15T23:59:59.000000Z",
|
575
|
+
"a.csv": "2023-06-05T03:54:07.000000Z",
|
576
|
+
},
|
577
|
+
"_ab_source_file_last_modified": "2023-07-15T23:59:59.000000Z_recent_file.csv",
|
590
578
|
},
|
591
579
|
]
|
592
580
|
)
|
@@ -693,12 +681,10 @@ multi_csv_different_timestamps_scenario = (
|
|
693
681
|
"stream": "stream1",
|
694
682
|
},
|
695
683
|
{
|
696
|
-
"
|
697
|
-
"
|
698
|
-
|
699
|
-
|
700
|
-
"_ab_source_file_last_modified": "2023-06-04T03:54:07.000000Z_a.csv",
|
701
|
-
}
|
684
|
+
"history": {
|
685
|
+
"a.csv": "2023-06-04T03:54:07.000000Z",
|
686
|
+
},
|
687
|
+
"_ab_source_file_last_modified": "2023-06-04T03:54:07.000000Z_a.csv",
|
702
688
|
},
|
703
689
|
{
|
704
690
|
"data": {
|
@@ -721,10 +707,8 @@ multi_csv_different_timestamps_scenario = (
|
|
721
707
|
"stream": "stream1",
|
722
708
|
},
|
723
709
|
{
|
724
|
-
"
|
725
|
-
|
726
|
-
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
|
727
|
-
}
|
710
|
+
"history": {"a.csv": "2023-06-04T03:54:07.000000Z", "b.csv": "2023-06-05T03:54:07.000000Z"},
|
711
|
+
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
|
728
712
|
},
|
729
713
|
]
|
730
714
|
)
|
@@ -852,10 +836,8 @@ multi_csv_per_timestamp_scenario = (
|
|
852
836
|
"stream": "stream1",
|
853
837
|
},
|
854
838
|
{
|
855
|
-
"
|
856
|
-
|
857
|
-
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
|
858
|
-
}
|
839
|
+
"history": {"a.csv": "2023-06-05T03:54:07.000000Z", "b.csv": "2023-06-05T03:54:07.000000Z"},
|
840
|
+
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
|
859
841
|
},
|
860
842
|
{
|
861
843
|
"data": {
|
@@ -878,14 +860,12 @@ multi_csv_per_timestamp_scenario = (
|
|
878
860
|
"stream": "stream1",
|
879
861
|
},
|
880
862
|
{
|
881
|
-
"
|
882
|
-
"
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
|
888
|
-
}
|
863
|
+
"history": {
|
864
|
+
"a.csv": "2023-06-05T03:54:07.000000Z",
|
865
|
+
"b.csv": "2023-06-05T03:54:07.000000Z",
|
866
|
+
"c.csv": "2023-06-06T03:54:07.000000Z",
|
867
|
+
},
|
868
|
+
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
|
889
869
|
},
|
890
870
|
]
|
891
871
|
)
|
@@ -997,10 +977,8 @@ multi_csv_skip_file_if_already_in_history = (
|
|
997
977
|
"stream": "stream1",
|
998
978
|
},
|
999
979
|
{
|
1000
|
-
"
|
1001
|
-
|
1002
|
-
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
|
1003
|
-
}
|
980
|
+
"history": {"a.csv": "2023-06-05T03:54:07.000000Z", "b.csv": "2023-06-05T03:54:07.000000Z"},
|
981
|
+
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
|
1004
982
|
},
|
1005
983
|
{
|
1006
984
|
"data": {
|
@@ -1023,14 +1001,12 @@ multi_csv_skip_file_if_already_in_history = (
|
|
1023
1001
|
"stream": "stream1",
|
1024
1002
|
},
|
1025
1003
|
{
|
1026
|
-
"
|
1027
|
-
"
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1032
|
-
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
|
1033
|
-
}
|
1004
|
+
"history": {
|
1005
|
+
"a.csv": "2023-06-05T03:54:07.000000Z",
|
1006
|
+
"b.csv": "2023-06-05T03:54:07.000000Z",
|
1007
|
+
"c.csv": "2023-06-06T03:54:07.000000Z",
|
1008
|
+
},
|
1009
|
+
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
|
1034
1010
|
},
|
1035
1011
|
]
|
1036
1012
|
)
|
@@ -1151,14 +1127,12 @@ multi_csv_include_missing_files_within_history_range = (
|
|
1151
1127
|
# {"data": {"col1": "val11c", "col2": "val12c", "col3": "val13c"}, "stream": "stream1"}, # this file is skipped
|
1152
1128
|
# {"data": {"col1": "val21c", "col2": "val22c", "col3": "val23c"}, "stream": "stream1"}, # this file is skipped
|
1153
1129
|
{
|
1154
|
-
"
|
1155
|
-
"
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1159
|
-
|
1160
|
-
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
|
1161
|
-
}
|
1130
|
+
"history": {
|
1131
|
+
"a.csv": "2023-06-05T03:54:07.000000Z",
|
1132
|
+
"b.csv": "2023-06-05T03:54:07.000000Z",
|
1133
|
+
"c.csv": "2023-06-06T03:54:07.000000Z",
|
1134
|
+
},
|
1135
|
+
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
|
1162
1136
|
},
|
1163
1137
|
]
|
1164
1138
|
)
|
@@ -1273,14 +1247,12 @@ multi_csv_remove_old_files_if_history_is_full_scenario = (
|
|
1273
1247
|
"stream": "stream1",
|
1274
1248
|
},
|
1275
1249
|
{
|
1276
|
-
"
|
1277
|
-
"
|
1278
|
-
|
1279
|
-
|
1280
|
-
|
1281
|
-
|
1282
|
-
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_old_file_same_timestamp_as_a.csv",
|
1283
|
-
}
|
1250
|
+
"history": {
|
1251
|
+
"very_old_file.csv": "2023-06-02T03:54:07.000000Z",
|
1252
|
+
"old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
|
1253
|
+
"a.csv": "2023-06-06T03:54:07.000000Z",
|
1254
|
+
},
|
1255
|
+
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_old_file_same_timestamp_as_a.csv",
|
1284
1256
|
},
|
1285
1257
|
{
|
1286
1258
|
"data": {
|
@@ -1303,14 +1275,12 @@ multi_csv_remove_old_files_if_history_is_full_scenario = (
|
|
1303
1275
|
"stream": "stream1",
|
1304
1276
|
},
|
1305
1277
|
{
|
1306
|
-
"
|
1307
|
-
"
|
1308
|
-
|
1309
|
-
|
1310
|
-
|
1311
|
-
|
1312
|
-
"_ab_source_file_last_modified": "2023-06-07T03:54:07.000000Z_b.csv",
|
1313
|
-
}
|
1278
|
+
"history": {
|
1279
|
+
"old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
|
1280
|
+
"a.csv": "2023-06-06T03:54:07.000000Z",
|
1281
|
+
"b.csv": "2023-06-07T03:54:07.000000Z",
|
1282
|
+
},
|
1283
|
+
"_ab_source_file_last_modified": "2023-06-07T03:54:07.000000Z_b.csv",
|
1314
1284
|
},
|
1315
1285
|
{
|
1316
1286
|
"data": {
|
@@ -1333,14 +1303,12 @@ multi_csv_remove_old_files_if_history_is_full_scenario = (
|
|
1333
1303
|
"stream": "stream1",
|
1334
1304
|
},
|
1335
1305
|
{
|
1336
|
-
"
|
1337
|
-
"
|
1338
|
-
|
1339
|
-
|
1340
|
-
|
1341
|
-
|
1342
|
-
"_ab_source_file_last_modified": "2023-06-10T03:54:07.000000Z_c.csv",
|
1343
|
-
}
|
1306
|
+
"history": {
|
1307
|
+
"old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
|
1308
|
+
"b.csv": "2023-06-07T03:54:07.000000Z",
|
1309
|
+
"c.csv": "2023-06-10T03:54:07.000000Z",
|
1310
|
+
},
|
1311
|
+
"_ab_source_file_last_modified": "2023-06-10T03:54:07.000000Z_c.csv",
|
1344
1312
|
},
|
1345
1313
|
]
|
1346
1314
|
)
|
@@ -1528,14 +1496,12 @@ multi_csv_same_timestamp_more_files_than_history_size_scenario = (
|
|
1528
1496
|
"stream": "stream1",
|
1529
1497
|
},
|
1530
1498
|
{
|
1531
|
-
"
|
1532
|
-
"
|
1533
|
-
|
1534
|
-
|
1535
|
-
|
1536
|
-
|
1537
|
-
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_d.csv",
|
1538
|
-
}
|
1499
|
+
"history": {
|
1500
|
+
"b.csv": "2023-06-05T03:54:07.000000Z",
|
1501
|
+
"c.csv": "2023-06-05T03:54:07.000000Z",
|
1502
|
+
"d.csv": "2023-06-05T03:54:07.000000Z",
|
1503
|
+
},
|
1504
|
+
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_d.csv",
|
1539
1505
|
},
|
1540
1506
|
]
|
1541
1507
|
)
|
@@ -1634,14 +1600,12 @@ multi_csv_sync_recent_files_if_history_is_incomplete_scenario = (
|
|
1634
1600
|
.set_expected_records(
|
1635
1601
|
[
|
1636
1602
|
{
|
1637
|
-
"
|
1638
|
-
"
|
1639
|
-
|
1640
|
-
|
1641
|
-
|
1642
|
-
|
1643
|
-
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_d.csv",
|
1644
|
-
}
|
1603
|
+
"history": {
|
1604
|
+
"b.csv": "2023-06-05T03:54:07.000000Z",
|
1605
|
+
"c.csv": "2023-06-05T03:54:07.000000Z",
|
1606
|
+
"d.csv": "2023-06-05T03:54:07.000000Z",
|
1607
|
+
},
|
1608
|
+
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_d.csv",
|
1645
1609
|
}
|
1646
1610
|
]
|
1647
1611
|
)
|
@@ -1773,14 +1737,12 @@ multi_csv_sync_files_within_time_window_if_history_is_incomplete__different_time
|
|
1773
1737
|
"stream": "stream1",
|
1774
1738
|
},
|
1775
1739
|
{
|
1776
|
-
"
|
1777
|
-
"
|
1778
|
-
|
1779
|
-
|
1780
|
-
|
1781
|
-
|
1782
|
-
"_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_e.csv",
|
1783
|
-
}
|
1740
|
+
"history": {
|
1741
|
+
"c.csv": "2023-06-07T03:54:07.000000Z",
|
1742
|
+
"d.csv": "2023-06-08T03:54:07.000000Z",
|
1743
|
+
"e.csv": "2023-06-08T03:54:07.000000Z",
|
1744
|
+
},
|
1745
|
+
"_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_e.csv",
|
1784
1746
|
},
|
1785
1747
|
]
|
1786
1748
|
)
|
@@ -1908,14 +1870,12 @@ multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_differe
|
|
1908
1870
|
"stream": "stream1",
|
1909
1871
|
},
|
1910
1872
|
{
|
1911
|
-
"
|
1912
|
-
"
|
1913
|
-
|
1914
|
-
|
1915
|
-
|
1916
|
-
|
1917
|
-
"_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_d.csv",
|
1918
|
-
}
|
1873
|
+
"history": {
|
1874
|
+
"a.csv": "2023-06-05T03:54:07.000000Z",
|
1875
|
+
"c.csv": "2023-06-07T03:54:07.000000Z",
|
1876
|
+
"d.csv": "2023-06-08T03:54:07.000000Z",
|
1877
|
+
},
|
1878
|
+
"_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_d.csv",
|
1919
1879
|
},
|
1920
1880
|
{
|
1921
1881
|
"data": {
|
@@ -1938,14 +1898,12 @@ multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_differe
|
|
1938
1898
|
"stream": "stream1",
|
1939
1899
|
},
|
1940
1900
|
{
|
1941
|
-
"
|
1942
|
-
"
|
1943
|
-
|
1944
|
-
|
1945
|
-
|
1946
|
-
|
1947
|
-
"_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_d.csv",
|
1948
|
-
}
|
1901
|
+
"history": {
|
1902
|
+
"b.csv": "2023-06-06T03:54:07.000000Z",
|
1903
|
+
"c.csv": "2023-06-07T03:54:07.000000Z",
|
1904
|
+
"d.csv": "2023-06-08T03:54:07.000000Z",
|
1905
|
+
},
|
1906
|
+
"_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_d.csv",
|
1949
1907
|
},
|
1950
1908
|
]
|
1951
1909
|
)
|
@@ -226,25 +226,14 @@ class StreamFacadeTest(unittest.TestCase):
|
|
226
226
|
|
227
227
|
assert actual_stream_data == expected_stream_data
|
228
228
|
|
229
|
-
def
|
229
|
+
def test_read_records(self):
|
230
230
|
expected_stream_data = [{"data": 1}, {"data": 2}]
|
231
231
|
records = [Record(data, "stream") for data in expected_stream_data]
|
232
232
|
partition = Mock()
|
233
233
|
partition.read.return_value = records
|
234
234
|
self._abstract_stream.generate_partitions.return_value = [partition]
|
235
235
|
|
236
|
-
actual_stream_data = list(self._facade.
|
237
|
-
|
238
|
-
assert actual_stream_data == expected_stream_data
|
239
|
-
|
240
|
-
def test_read_records_incremental(self):
|
241
|
-
expected_stream_data = [{"data": 1}, {"data": 2}]
|
242
|
-
records = [Record(data, "stream") for data in expected_stream_data]
|
243
|
-
partition = Mock()
|
244
|
-
partition.read.return_value = records
|
245
|
-
self._abstract_stream.generate_partitions.return_value = [partition]
|
246
|
-
|
247
|
-
actual_stream_data = list(self._facade.read_incremental(None, None, None, None, None, None, None))
|
236
|
+
actual_stream_data = list(self._facade.read(None, None, None, None, None, None))
|
248
237
|
|
249
238
|
assert actual_stream_data == expected_stream_data
|
250
239
|
|
@@ -182,7 +182,7 @@ def test_add_file(
|
|
182
182
|
uri: RemoteFile(uri=uri, last_modified=datetime.strptime(timestamp, DATE_TIME_FORMAT)) for uri, timestamp in expected_pending_files
|
183
183
|
}
|
184
184
|
assert (
|
185
|
-
mock_message_repository.emit_message.call_args_list[0].args[0].state.
|
185
|
+
mock_message_repository.emit_message.call_args_list[0].args[0].state.stream.stream_state._ab_source_file_last_modified
|
186
186
|
== expected_cursor_value
|
187
187
|
)
|
188
188
|
|
@@ -233,7 +233,7 @@ def test_add_file_invalid(
|
|
233
233
|
}
|
234
234
|
assert mock_message_repository.emit_message.call_args_list[0].args[0].log.level.value == "WARN"
|
235
235
|
assert (
|
236
|
-
mock_message_repository.emit_message.call_args_list[1].args[0].state.
|
236
|
+
mock_message_repository.emit_message.call_args_list[1].args[0].state.stream.stream_state._ab_source_file_last_modified
|
237
237
|
== expected_cursor_value
|
238
238
|
)
|
239
239
|
|
@@ -109,10 +109,10 @@ def _verify_read_output(output: EntrypointOutput, scenario: TestScenario[Abstrac
|
|
109
109
|
if hasattr(scenario.source, "cursor_cls") and issubclass(scenario.source.cursor_cls, AbstractConcurrentFileBasedCursor):
|
110
110
|
# Only check the last state emitted because we don't know the order the others will be in.
|
111
111
|
# This may be needed for non-file-based concurrent scenarios too.
|
112
|
-
assert states[-1].state.
|
112
|
+
assert states[-1].state.stream.stream_state.dict() == expected_states[-1]
|
113
113
|
else:
|
114
114
|
for actual, expected in zip(states, expected_states): # states should be emitted in sorted order
|
115
|
-
assert actual.state.
|
115
|
+
assert actual.state.stream.stream_state.dict() == expected
|
116
116
|
|
117
117
|
if scenario.expected_logs:
|
118
118
|
read_logs = scenario.expected_logs.get("read")
|
@@ -73,11 +73,11 @@ test_incremental_stream_with_slice_boundaries_no_input_state = (
|
|
73
73
|
[
|
74
74
|
{"data": {"id": "1", "cursor_field": 0}, "stream": "stream1"},
|
75
75
|
{"data": {"id": "2", "cursor_field": 1}, "stream": "stream1"},
|
76
|
-
{"
|
76
|
+
{"cursor_field": 1},
|
77
77
|
{"data": {"id": "3", "cursor_field": 2}, "stream": "stream1"},
|
78
78
|
{"data": {"id": "4", "cursor_field": 3}, "stream": "stream1"},
|
79
|
-
{"
|
80
|
-
{"
|
79
|
+
{"cursor_field": 2},
|
80
|
+
{"cursor_field": 2}, # see Cursor.ensure_at_least_one_state_emitted
|
81
81
|
]
|
82
82
|
)
|
83
83
|
.set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
|
@@ -150,11 +150,11 @@ test_incremental_stream_with_slice_boundaries_with_legacy_state = (
|
|
150
150
|
[
|
151
151
|
{"data": {"id": "1", "cursor_field": 0}, "stream": "stream1"},
|
152
152
|
{"data": {"id": "2", "cursor_field": 1}, "stream": "stream1"},
|
153
|
-
{"
|
153
|
+
{"cursor_field": 1},
|
154
154
|
{"data": {"id": "3", "cursor_field": 2}, "stream": "stream1"},
|
155
155
|
{"data": {"id": "4", "cursor_field": 3}, "stream": "stream1"},
|
156
|
-
{"
|
157
|
-
{"
|
156
|
+
{"cursor_field": 2},
|
157
|
+
{"cursor_field": 2}, # see Cursor.ensure_at_least_one_state_emitted
|
158
158
|
]
|
159
159
|
)
|
160
160
|
.set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
|
@@ -237,11 +237,11 @@ test_incremental_stream_with_slice_boundaries_with_concurrent_state = (
|
|
237
237
|
[
|
238
238
|
{"data": {"id": "1", "cursor_field": 0}, "stream": "stream1"},
|
239
239
|
{"data": {"id": "2", "cursor_field": 1}, "stream": "stream1"},
|
240
|
-
{"
|
240
|
+
{"cursor_field": 1},
|
241
241
|
{"data": {"id": "3", "cursor_field": 2}, "stream": "stream1"},
|
242
242
|
{"data": {"id": "4", "cursor_field": 3}, "stream": "stream1"},
|
243
|
-
{"
|
244
|
-
{"
|
243
|
+
{"cursor_field": 2},
|
244
|
+
{"cursor_field": 2}, # see Cursor.ensure_at_least_one_state_emitted
|
245
245
|
]
|
246
246
|
)
|
247
247
|
.set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
|
@@ -357,11 +357,11 @@ test_incremental_stream_with_slice_boundaries = (
|
|
357
357
|
[
|
358
358
|
{"data": {"id": "1", "cursor_field": 0}, "stream": "stream1"},
|
359
359
|
{"data": {"id": "2", "cursor_field": 1}, "stream": "stream1"},
|
360
|
-
{"
|
360
|
+
{"cursor_field": 1},
|
361
361
|
{"data": {"id": "3", "cursor_field": 2}, "stream": "stream1"},
|
362
362
|
{"data": {"id": "4", "cursor_field": 3}, "stream": "stream1"},
|
363
|
-
{"
|
364
|
-
{"
|
363
|
+
{"cursor_field": 2},
|
364
|
+
{"cursor_field": 2}, # see Cursor.ensure_at_least_one_state_emitted
|
365
365
|
]
|
366
366
|
)
|
367
367
|
.set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
|
@@ -403,8 +403,8 @@ test_incremental_stream_without_slice_boundaries = (
|
|
403
403
|
[
|
404
404
|
{"data": {"id": "1", "cursor_field": 0}, "stream": "stream1"},
|
405
405
|
{"data": {"id": "2", "cursor_field": 3}, "stream": "stream1"},
|
406
|
-
{"
|
407
|
-
{"
|
406
|
+
{"cursor_field": 3},
|
407
|
+
{"cursor_field": 3}, # see Cursor.ensure_at_least_one_state_emitted
|
408
408
|
]
|
409
409
|
)
|
410
410
|
.set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
|
@@ -244,25 +244,14 @@ class StreamFacadeTest(unittest.TestCase):
|
|
244
244
|
|
245
245
|
assert actual_stream_data == expected_stream_data
|
246
246
|
|
247
|
-
def
|
247
|
+
def test_read_records(self):
|
248
248
|
expected_stream_data = [{"data": 1}, {"data": 2}]
|
249
249
|
records = [Record(data, "stream") for data in expected_stream_data]
|
250
250
|
partition = Mock()
|
251
251
|
partition.read.return_value = records
|
252
252
|
self._abstract_stream.generate_partitions.return_value = [partition]
|
253
253
|
|
254
|
-
actual_stream_data = list(self._facade.
|
255
|
-
|
256
|
-
assert actual_stream_data == expected_stream_data
|
257
|
-
|
258
|
-
def test_read_records_incremental(self):
|
259
|
-
expected_stream_data = [{"data": 1}, {"data": 2}]
|
260
|
-
records = [Record(data, "stream") for data in expected_stream_data]
|
261
|
-
partition = Mock()
|
262
|
-
partition.read.return_value = records
|
263
|
-
self._abstract_stream.generate_partitions.return_value = [partition]
|
264
|
-
|
265
|
-
actual_stream_data = list(self._facade.read_incremental(None, None, None, None, None, None, None))
|
254
|
+
actual_stream_data = list(self._facade.read(None, None, None, None, None, None))
|
266
255
|
|
267
256
|
assert actual_stream_data == expected_stream_data
|
268
257
|
|