airbyte-cdk 0.51.15__py3-none-any.whl → 0.51.17__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +494 -522
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +1 -1
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +2 -37
- airbyte_cdk/sources/file_based/file_based_source.py +1 -1
- airbyte_cdk/sources/file_based/file_types/__init__.py +11 -6
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +1 -1
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +1 -1
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +2 -2
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +5 -5
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +7 -5
- airbyte_cdk/utils/datetime_format_inferrer.py +8 -4
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/RECORD +29 -29
- unit_tests/sources/file_based/file_types/test_avro_parser.py +6 -6
- unit_tests/sources/file_based/scenarios/avro_scenarios.py +5 -6
- unit_tests/sources/file_based/scenarios/check_scenarios.py +8 -8
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +19 -42
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +15 -15
- unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +13 -12
- unit_tests/sources/file_based/scenarios/parquet_scenarios.py +5 -9
- unit_tests/sources/file_based/scenarios/scenario_builder.py +1 -1
- unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +16 -16
- unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +9 -9
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +2 -1
- unit_tests/sources/file_based/stream/test_default_file_based_stream.py +6 -3
- unit_tests/utils/test_datetime_format_inferrer.py +1 -0
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,7 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
|
5
6
|
from airbyte_cdk.sources.file_based.exceptions import ConfigValidationError, FileBasedSourceError
|
6
7
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
7
8
|
from unit_tests.sources.file_based.helpers import EmptySchemaParser, LowInferenceLimitDiscoveryPolicy
|
@@ -15,7 +16,7 @@ single_csv_scenario = (
|
|
15
16
|
"streams": [
|
16
17
|
{
|
17
18
|
"name": "stream1",
|
18
|
-
"
|
19
|
+
"format": {"filetype": "csv"},
|
19
20
|
"globs": ["*"],
|
20
21
|
"validation_policy": "Emit Record",
|
21
22
|
}
|
@@ -64,11 +65,6 @@ single_csv_scenario = (
|
|
64
65
|
"type": "object",
|
65
66
|
"properties": {
|
66
67
|
"name": {"title": "Name", "description": "The name of the stream.", "type": "string"},
|
67
|
-
"file_type": {
|
68
|
-
"title": "File Type",
|
69
|
-
"description": "The data file type that is being extracted for a stream.",
|
70
|
-
"type": "string",
|
71
|
-
},
|
72
68
|
"globs": {
|
73
69
|
"title": "Globs",
|
74
70
|
"description": 'The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look <a href="https://en.wikipedia.org/wiki/Glob_(programming)">here</a>.',
|
@@ -278,7 +274,7 @@ single_csv_scenario = (
|
|
278
274
|
"type": "boolean",
|
279
275
|
},
|
280
276
|
},
|
281
|
-
"required": ["name", "
|
277
|
+
"required": ["name", "format"],
|
282
278
|
},
|
283
279
|
},
|
284
280
|
},
|
@@ -339,7 +335,7 @@ multi_csv_scenario = (
|
|
339
335
|
"streams": [
|
340
336
|
{
|
341
337
|
"name": "stream1",
|
342
|
-
"
|
338
|
+
"format": {"filetype": "csv"},
|
343
339
|
"globs": ["*"],
|
344
340
|
"validation_policy": "Emit Record",
|
345
341
|
}
|
@@ -441,7 +437,7 @@ multi_csv_stream_n_file_exceeds_limit_for_inference = (
|
|
441
437
|
"streams": [
|
442
438
|
{
|
443
439
|
"name": "stream1",
|
444
|
-
"
|
440
|
+
"format": {"filetype": "csv"},
|
445
441
|
"globs": ["*"],
|
446
442
|
"validation_policy": "Emit Record",
|
447
443
|
}
|
@@ -535,13 +531,13 @@ multi_csv_stream_n_file_exceeds_limit_for_inference = (
|
|
535
531
|
|
536
532
|
invalid_csv_scenario = (
|
537
533
|
TestScenarioBuilder()
|
538
|
-
.set_name("invalid_csv_scenario")
|
534
|
+
.set_name("invalid_csv_scenario") # too many values for the number of headers
|
539
535
|
.set_config(
|
540
536
|
{
|
541
537
|
"streams": [
|
542
538
|
{
|
543
539
|
"name": "stream1",
|
544
|
-
"
|
540
|
+
"format": {"filetype": "csv"},
|
545
541
|
"globs": ["*"],
|
546
542
|
"validation_policy": "Emit Record",
|
547
543
|
}
|
@@ -604,7 +600,7 @@ csv_single_stream_scenario = (
|
|
604
600
|
"streams": [
|
605
601
|
{
|
606
602
|
"name": "stream1",
|
607
|
-
"
|
603
|
+
"format": {"filetype": "csv"},
|
608
604
|
"globs": ["*.csv"],
|
609
605
|
"validation_policy": "Emit Record",
|
610
606
|
}
|
@@ -684,13 +680,13 @@ csv_multi_stream_scenario = (
|
|
684
680
|
"streams": [
|
685
681
|
{
|
686
682
|
"name": "stream1",
|
687
|
-
"
|
683
|
+
"format": {"filetype": "csv"},
|
688
684
|
"globs": ["*.csv"],
|
689
685
|
"validation_policy": "Emit Record",
|
690
686
|
},
|
691
687
|
{
|
692
688
|
"name": "stream2",
|
693
|
-
"
|
689
|
+
"format": {"filetype": "csv"},
|
694
690
|
"globs": ["b.csv"],
|
695
691
|
"validation_policy": "Emit Record",
|
696
692
|
},
|
@@ -802,7 +798,6 @@ csv_custom_format_scenario = (
|
|
802
798
|
"streams": [
|
803
799
|
{
|
804
800
|
"name": "stream1",
|
805
|
-
"file_type": "csv",
|
806
801
|
"globs": ["*"],
|
807
802
|
"validation_policy": "Emit Record",
|
808
803
|
"format": {
|
@@ -908,14 +903,12 @@ multi_stream_custom_format = (
|
|
908
903
|
"streams": [
|
909
904
|
{
|
910
905
|
"name": "stream1",
|
911
|
-
"file_type": "csv",
|
912
906
|
"globs": ["*.csv"],
|
913
907
|
"validation_policy": "Emit Record",
|
914
908
|
"format": {"filetype": "csv", "delimiter": "#", "escape_char": "!", "double_quote": True, "newlines_in_values": False},
|
915
909
|
},
|
916
910
|
{
|
917
911
|
"name": "stream2",
|
918
|
-
"file_type": "csv",
|
919
912
|
"globs": ["b.csv"],
|
920
913
|
"validation_policy": "Emit Record",
|
921
914
|
"format": {
|
@@ -1055,7 +1048,7 @@ empty_schema_inference_scenario = (
|
|
1055
1048
|
"streams": [
|
1056
1049
|
{
|
1057
1050
|
"name": "stream1",
|
1058
|
-
"
|
1051
|
+
"format": {"filetype": "csv"},
|
1059
1052
|
"globs": ["*"],
|
1060
1053
|
"validation_policy": "Emit Record",
|
1061
1054
|
}
|
@@ -1096,7 +1089,7 @@ empty_schema_inference_scenario = (
|
|
1096
1089
|
]
|
1097
1090
|
}
|
1098
1091
|
)
|
1099
|
-
.set_parsers({
|
1092
|
+
.set_parsers({CsvFormat: EmptySchemaParser()})
|
1100
1093
|
.set_expected_discover_error(AirbyteTracedException, FileBasedSourceError.SCHEMA_INFERENCE_ERROR.value)
|
1101
1094
|
.set_expected_records(
|
1102
1095
|
[
|
@@ -1130,7 +1123,7 @@ schemaless_csv_scenario = (
|
|
1130
1123
|
"streams": [
|
1131
1124
|
{
|
1132
1125
|
"name": "stream1",
|
1133
|
-
"
|
1126
|
+
"format": {"filetype": "csv"},
|
1134
1127
|
"globs": ["*"],
|
1135
1128
|
"validation_policy": "Skip Record",
|
1136
1129
|
"schemaless": True,
|
@@ -1225,14 +1218,14 @@ schemaless_csv_multi_stream_scenario = (
|
|
1225
1218
|
"streams": [
|
1226
1219
|
{
|
1227
1220
|
"name": "stream1",
|
1228
|
-
"
|
1221
|
+
"format": {"filetype": "csv"},
|
1229
1222
|
"globs": ["a.csv"],
|
1230
1223
|
"validation_policy": "Skip Record",
|
1231
1224
|
"schemaless": True,
|
1232
1225
|
},
|
1233
1226
|
{
|
1234
1227
|
"name": "stream2",
|
1235
|
-
"
|
1228
|
+
"format": {"filetype": "csv"},
|
1236
1229
|
"globs": ["b.csv"],
|
1237
1230
|
"validation_policy": "Skip Record",
|
1238
1231
|
},
|
@@ -1332,7 +1325,7 @@ schemaless_with_user_input_schema_fails_connection_check_scenario = (
|
|
1332
1325
|
"streams": [
|
1333
1326
|
{
|
1334
1327
|
"name": "stream1",
|
1335
|
-
"
|
1328
|
+
"format": {"filetype": "csv"},
|
1336
1329
|
"globs": ["*"],
|
1337
1330
|
"validation_policy": "Skip Record",
|
1338
1331
|
"input_schema": '{"col1": "string", "col2": "string", "col3": "string"}',
|
@@ -1396,7 +1389,7 @@ schemaless_with_user_input_schema_fails_connection_check_multi_stream_scenario =
|
|
1396
1389
|
"streams": [
|
1397
1390
|
{
|
1398
1391
|
"name": "stream1",
|
1399
|
-
"
|
1392
|
+
"format": {"filetype": "csv"},
|
1400
1393
|
"globs": ["a.csv"],
|
1401
1394
|
"validation_policy": "Skip Record",
|
1402
1395
|
"schemaless": True,
|
@@ -1404,7 +1397,7 @@ schemaless_with_user_input_schema_fails_connection_check_multi_stream_scenario =
|
|
1404
1397
|
},
|
1405
1398
|
{
|
1406
1399
|
"name": "stream2",
|
1407
|
-
"
|
1400
|
+
"format": {"filetype": "csv"},
|
1408
1401
|
"globs": ["b.csv"],
|
1409
1402
|
"validation_policy": "Skip Record",
|
1410
1403
|
},
|
@@ -1480,7 +1473,6 @@ csv_string_can_be_null_with_input_schemas_scenario = (
|
|
1480
1473
|
"streams": [
|
1481
1474
|
{
|
1482
1475
|
"name": "stream1",
|
1483
|
-
"file_type": "csv",
|
1484
1476
|
"globs": ["*"],
|
1485
1477
|
"validation_policy": "Emit Record",
|
1486
1478
|
"input_schema": '{"col1": "string", "col2": "string"}',
|
@@ -1549,7 +1541,6 @@ csv_string_are_not_null_if_strings_can_be_null_is_false_scenario = (
|
|
1549
1541
|
"streams": [
|
1550
1542
|
{
|
1551
1543
|
"name": "stream1",
|
1552
|
-
"file_type": "csv",
|
1553
1544
|
"globs": ["*"],
|
1554
1545
|
"validation_policy": "Emit Record",
|
1555
1546
|
"input_schema": '{"col1": "string", "col2": "string"}',
|
@@ -1619,7 +1610,6 @@ csv_string_not_null_if_no_null_values_scenario = (
|
|
1619
1610
|
"streams": [
|
1620
1611
|
{
|
1621
1612
|
"name": "stream1",
|
1622
|
-
"file_type": "csv",
|
1623
1613
|
"globs": ["*"],
|
1624
1614
|
"validation_policy": "Emit Record",
|
1625
1615
|
"format": {
|
@@ -1686,7 +1676,6 @@ csv_strings_can_be_null_not_quoted_scenario = (
|
|
1686
1676
|
"streams": [
|
1687
1677
|
{
|
1688
1678
|
"name": "stream1",
|
1689
|
-
"file_type": "csv",
|
1690
1679
|
"globs": ["*"],
|
1691
1680
|
"validation_policy": "Emit Record",
|
1692
1681
|
"format": {"filetype": "csv", "null_values": ["null"]},
|
@@ -1751,7 +1740,6 @@ csv_newline_in_values_quoted_value_scenario = (
|
|
1751
1740
|
"streams": [
|
1752
1741
|
{
|
1753
1742
|
"name": "stream1",
|
1754
|
-
"file_type": "csv",
|
1755
1743
|
"globs": ["*"],
|
1756
1744
|
"validation_policy": "Emit Record",
|
1757
1745
|
"format": {
|
@@ -1818,7 +1806,6 @@ csv_newline_in_values_not_quoted_scenario = (
|
|
1818
1806
|
"streams": [
|
1819
1807
|
{
|
1820
1808
|
"name": "stream1",
|
1821
|
-
"file_type": "csv",
|
1822
1809
|
"globs": ["*"],
|
1823
1810
|
"validation_policy": "Emit Record",
|
1824
1811
|
"format": {
|
@@ -1897,7 +1884,6 @@ csv_escape_char_is_set_scenario = (
|
|
1897
1884
|
"streams": [
|
1898
1885
|
{
|
1899
1886
|
"name": "stream1",
|
1900
|
-
"file_type": "csv",
|
1901
1887
|
"globs": ["*"],
|
1902
1888
|
"validation_policy": "Emit Record",
|
1903
1889
|
"format": {
|
@@ -1969,7 +1955,6 @@ csv_double_quote_is_set_scenario = (
|
|
1969
1955
|
"streams": [
|
1970
1956
|
{
|
1971
1957
|
"name": "stream1",
|
1972
|
-
"file_type": "csv",
|
1973
1958
|
"globs": ["*"],
|
1974
1959
|
"validation_policy": "Emit Record",
|
1975
1960
|
"format": {
|
@@ -2040,7 +2025,6 @@ csv_custom_delimiter_with_escape_char_scenario = (
|
|
2040
2025
|
"streams": [
|
2041
2026
|
{
|
2042
2027
|
"name": "stream1",
|
2043
|
-
"file_type": "csv",
|
2044
2028
|
"globs": ["*"],
|
2045
2029
|
"validation_policy": "Emit Record",
|
2046
2030
|
"format": {"filetype": "csv", "double_quotes": True, "quote_char": "@", "delimiter": "|", "escape_char": "+"},
|
@@ -2106,7 +2090,6 @@ csv_custom_delimiter_in_double_quotes_scenario = (
|
|
2106
2090
|
"streams": [
|
2107
2091
|
{
|
2108
2092
|
"name": "stream1",
|
2109
|
-
"file_type": "csv",
|
2110
2093
|
"globs": ["*"],
|
2111
2094
|
"validation_policy": "Emit Record",
|
2112
2095
|
"format": {
|
@@ -2176,7 +2159,6 @@ csv_skip_before_header_scenario = (
|
|
2176
2159
|
"streams": [
|
2177
2160
|
{
|
2178
2161
|
"name": "stream1",
|
2179
|
-
"file_type": "csv",
|
2180
2162
|
"globs": ["*"],
|
2181
2163
|
"validation_policy": "Emit Record",
|
2182
2164
|
"format": {"filetype": "csv", "skip_rows_before_header": 2},
|
@@ -2243,7 +2225,6 @@ csv_skip_after_header_scenario = (
|
|
2243
2225
|
"streams": [
|
2244
2226
|
{
|
2245
2227
|
"name": "stream1",
|
2246
|
-
"file_type": "csv",
|
2247
2228
|
"globs": ["*"],
|
2248
2229
|
"validation_policy": "Emit Record",
|
2249
2230
|
"format": {"filetype": "csv", "skip_rows_after_header": 2},
|
@@ -2310,7 +2291,6 @@ csv_skip_before_and_after_header_scenario = (
|
|
2310
2291
|
"streams": [
|
2311
2292
|
{
|
2312
2293
|
"name": "stream1",
|
2313
|
-
"file_type": "csv",
|
2314
2294
|
"globs": ["*"],
|
2315
2295
|
"validation_policy": "Emit Record",
|
2316
2296
|
"format": {
|
@@ -2381,7 +2361,6 @@ csv_autogenerate_column_names_scenario = (
|
|
2381
2361
|
"streams": [
|
2382
2362
|
{
|
2383
2363
|
"name": "stream1",
|
2384
|
-
"file_type": "csv",
|
2385
2364
|
"globs": ["*"],
|
2386
2365
|
"validation_policy": "Emit Record",
|
2387
2366
|
"format": {
|
@@ -2448,7 +2427,6 @@ csv_custom_bool_values_scenario = (
|
|
2448
2427
|
"streams": [
|
2449
2428
|
{
|
2450
2429
|
"name": "stream1",
|
2451
|
-
"file_type": "csv",
|
2452
2430
|
"globs": ["*"],
|
2453
2431
|
"validation_policy": "Emit Record",
|
2454
2432
|
"input_schema": '{"col1": "boolean", "col2": "boolean"}',
|
@@ -2518,7 +2496,6 @@ csv_custom_null_values_scenario = (
|
|
2518
2496
|
"streams": [
|
2519
2497
|
{
|
2520
2498
|
"name": "stream1",
|
2521
|
-
"file_type": "csv",
|
2522
2499
|
"globs": ["*"],
|
2523
2500
|
"validation_policy": "Emit Record",
|
2524
2501
|
"input_schema": '{"col1": "boolean", "col2": "string"}',
|
@@ -2587,7 +2564,7 @@ earlier_csv_scenario = (
|
|
2587
2564
|
"streams": [
|
2588
2565
|
{
|
2589
2566
|
"name": "stream1",
|
2590
|
-
"
|
2567
|
+
"format": {"filetype": "csv"},
|
2591
2568
|
"globs": ["*"],
|
2592
2569
|
"validation_policy": "Emit Record",
|
2593
2570
|
}
|
@@ -13,7 +13,7 @@ single_csv_input_state_is_earlier_scenario = (
|
|
13
13
|
"streams": [
|
14
14
|
{
|
15
15
|
"name": "stream1",
|
16
|
-
"
|
16
|
+
"format": {"filetype": "csv"},
|
17
17
|
"globs": ["*.csv"],
|
18
18
|
"validation_policy": "Emit Record",
|
19
19
|
}
|
@@ -100,7 +100,7 @@ single_csv_file_is_skipped_if_same_modified_at_as_in_history = (
|
|
100
100
|
"streams": [
|
101
101
|
{
|
102
102
|
"name": "stream1",
|
103
|
-
"
|
103
|
+
"format": {"filetype": "csv"},
|
104
104
|
"globs": ["*.csv"],
|
105
105
|
"validation_policy": "Emit Record",
|
106
106
|
}
|
@@ -184,7 +184,7 @@ single_csv_file_is_synced_if_modified_at_is_more_recent_than_in_history = (
|
|
184
184
|
"streams": [
|
185
185
|
{
|
186
186
|
"name": "stream1",
|
187
|
-
"
|
187
|
+
"format": {"filetype": "csv"},
|
188
188
|
"globs": ["*.csv"],
|
189
189
|
"validation_policy": "Emit Record",
|
190
190
|
}
|
@@ -270,7 +270,7 @@ single_csv_no_input_state_scenario = (
|
|
270
270
|
"streams": [
|
271
271
|
{
|
272
272
|
"name": "stream1",
|
273
|
-
"
|
273
|
+
"format": {"filetype": "csv"},
|
274
274
|
"globs": ["*.csv"],
|
275
275
|
"validation_policy": "Emit Record",
|
276
276
|
}
|
@@ -344,7 +344,7 @@ multi_csv_same_timestamp_scenario = (
|
|
344
344
|
"streams": [
|
345
345
|
{
|
346
346
|
"name": "stream1",
|
347
|
-
"
|
347
|
+
"format": {"filetype": "csv"},
|
348
348
|
"globs": ["*.csv"],
|
349
349
|
"validation_policy": "Emit Record",
|
350
350
|
}
|
@@ -435,7 +435,7 @@ single_csv_input_state_is_later_scenario = (
|
|
435
435
|
"streams": [
|
436
436
|
{
|
437
437
|
"name": "stream1",
|
438
|
-
"
|
438
|
+
"format": {"filetype": "csv"},
|
439
439
|
"globs": ["*.csv"],
|
440
440
|
"validation_policy": "Emit Record",
|
441
441
|
}
|
@@ -521,7 +521,7 @@ multi_csv_different_timestamps_scenario = (
|
|
521
521
|
"streams": [
|
522
522
|
{
|
523
523
|
"name": "stream1",
|
524
|
-
"
|
524
|
+
"format": {"filetype": "csv"},
|
525
525
|
"globs": ["*.csv"],
|
526
526
|
"validation_policy": "Emit Record",
|
527
527
|
}
|
@@ -620,7 +620,7 @@ multi_csv_per_timestamp_scenario = (
|
|
620
620
|
"streams": [
|
621
621
|
{
|
622
622
|
"name": "stream1",
|
623
|
-
"
|
623
|
+
"format": {"filetype": "csv"},
|
624
624
|
"globs": ["*.csv"],
|
625
625
|
"validation_policy": "Emit Record",
|
626
626
|
}
|
@@ -733,7 +733,7 @@ multi_csv_skip_file_if_already_in_history = (
|
|
733
733
|
"streams": [
|
734
734
|
{
|
735
735
|
"name": "stream1",
|
736
|
-
"
|
736
|
+
"format": {"filetype": "csv"},
|
737
737
|
"globs": ["*.csv"],
|
738
738
|
"validation_policy": "Emit Record",
|
739
739
|
}
|
@@ -855,7 +855,7 @@ multi_csv_include_missing_files_within_history_range = (
|
|
855
855
|
"streams": [
|
856
856
|
{
|
857
857
|
"name": "stream1",
|
858
|
-
"
|
858
|
+
"format": {"filetype": "csv"},
|
859
859
|
"globs": ["*.csv"],
|
860
860
|
"validation_policy": "Emit Record",
|
861
861
|
}
|
@@ -969,7 +969,7 @@ multi_csv_remove_old_files_if_history_is_full_scenario = (
|
|
969
969
|
"streams": [
|
970
970
|
{
|
971
971
|
"name": "stream1",
|
972
|
-
"
|
972
|
+
"format": {"filetype": "csv"},
|
973
973
|
"globs": ["*.csv"],
|
974
974
|
"validation_policy": "Emit Record",
|
975
975
|
}
|
@@ -1107,7 +1107,7 @@ multi_csv_same_timestamp_more_files_than_history_size_scenario = (
|
|
1107
1107
|
"streams": [
|
1108
1108
|
{
|
1109
1109
|
"name": "stream1",
|
1110
|
-
"
|
1110
|
+
"format": {"filetype": "csv"},
|
1111
1111
|
"globs": ["*.csv"],
|
1112
1112
|
"validation_policy": "Emit Record",
|
1113
1113
|
"days_to_sync_if_history_is_full": 3,
|
@@ -1225,7 +1225,7 @@ multi_csv_sync_recent_files_if_history_is_incomplete_scenario = (
|
|
1225
1225
|
"streams": [
|
1226
1226
|
{
|
1227
1227
|
"name": "stream1",
|
1228
|
-
"
|
1228
|
+
"format": {"filetype": "csv"},
|
1229
1229
|
"globs": ["*.csv"],
|
1230
1230
|
"validation_policy": "Emit Record",
|
1231
1231
|
"days_to_sync_if_history_is_full": 3,
|
@@ -1342,7 +1342,7 @@ multi_csv_sync_files_within_time_window_if_history_is_incomplete__different_time
|
|
1342
1342
|
"streams": [
|
1343
1343
|
{
|
1344
1344
|
"name": "stream1",
|
1345
|
-
"
|
1345
|
+
"format": {"filetype": "csv"},
|
1346
1346
|
"globs": ["*.csv"],
|
1347
1347
|
"validation_policy": "Emit Record",
|
1348
1348
|
"days_to_sync_if_history_is_full": 3,
|
@@ -1465,7 +1465,7 @@ multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_differe
|
|
1465
1465
|
"streams": [
|
1466
1466
|
{
|
1467
1467
|
"name": "stream1",
|
1468
|
-
"
|
1468
|
+
"format": {"filetype": "csv"},
|
1469
1469
|
"globs": ["*.csv"],
|
1470
1470
|
"validation_policy": "Emit Record",
|
1471
1471
|
"days_to_sync_if_history_is_full": 3,
|
@@ -2,6 +2,7 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from airbyte_cdk.sources.file_based.config.jsonl_format import JsonlFormat
|
5
6
|
from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError
|
6
7
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
7
8
|
from unit_tests.sources.file_based.helpers import LowInferenceBytesJsonlParser, LowInferenceLimitDiscoveryPolicy
|
@@ -15,7 +16,7 @@ single_jsonl_scenario = (
|
|
15
16
|
"streams": [
|
16
17
|
{
|
17
18
|
"name": "stream1",
|
18
|
-
"
|
19
|
+
"format": {"filetype": "jsonl"},
|
19
20
|
"globs": ["*"],
|
20
21
|
"validation_policy": "Emit Record",
|
21
22
|
}
|
@@ -82,7 +83,7 @@ multi_jsonl_with_different_keys_scenario = (
|
|
82
83
|
"streams": [
|
83
84
|
{
|
84
85
|
"name": "stream1",
|
85
|
-
"
|
86
|
+
"format": {"filetype": "jsonl"},
|
86
87
|
"globs": ["*"],
|
87
88
|
"validation_policy": "Emit Record",
|
88
89
|
}
|
@@ -163,7 +164,7 @@ multi_jsonl_stream_n_file_exceeds_limit_for_inference = (
|
|
163
164
|
"streams": [
|
164
165
|
{
|
165
166
|
"name": "stream1",
|
166
|
-
"
|
167
|
+
"format": {"filetype": "jsonl"},
|
167
168
|
"globs": ["*"],
|
168
169
|
"validation_policy": "Emit Record",
|
169
170
|
}
|
@@ -241,7 +242,7 @@ multi_jsonl_stream_n_bytes_exceeds_limit_for_inference = (
|
|
241
242
|
"streams": [
|
242
243
|
{
|
243
244
|
"name": "stream1",
|
244
|
-
"
|
245
|
+
"format": {"filetype": "jsonl"},
|
245
246
|
"globs": ["*"],
|
246
247
|
"validation_policy": "Emit Record",
|
247
248
|
}
|
@@ -307,7 +308,7 @@ multi_jsonl_stream_n_bytes_exceeds_limit_for_inference = (
|
|
307
308
|
"_ab_source_file_url": "b.jsonl"}, "stream": "stream1"},
|
308
309
|
]
|
309
310
|
)
|
310
|
-
.set_parsers({
|
311
|
+
.set_parsers({JsonlFormat: LowInferenceBytesJsonlParser()})
|
311
312
|
).build()
|
312
313
|
|
313
314
|
|
@@ -319,7 +320,7 @@ invalid_jsonl_scenario = (
|
|
319
320
|
"streams": [
|
320
321
|
{
|
321
322
|
"name": "stream1",
|
322
|
-
"
|
323
|
+
"format": {"filetype": "jsonl"},
|
323
324
|
"globs": ["*"],
|
324
325
|
"validation_policy": "Emit Record",
|
325
326
|
}
|
@@ -390,13 +391,13 @@ jsonl_multi_stream_scenario = (
|
|
390
391
|
"streams": [
|
391
392
|
{
|
392
393
|
"name": "stream1",
|
393
|
-
"
|
394
|
+
"format": {"filetype": "jsonl"},
|
394
395
|
"globs": ["*.jsonl"],
|
395
396
|
"validation_policy": "Emit Record",
|
396
397
|
},
|
397
398
|
{
|
398
399
|
"name": "stream2",
|
399
|
-
"
|
400
|
+
"format": {"filetype": "jsonl"},
|
400
401
|
"globs": ["b.jsonl"],
|
401
402
|
"validation_policy": "Emit Record",
|
402
403
|
}
|
@@ -501,7 +502,7 @@ schemaless_jsonl_scenario = (
|
|
501
502
|
"streams": [
|
502
503
|
{
|
503
504
|
"name": "stream1",
|
504
|
-
"
|
505
|
+
"format": {"filetype": "jsonl"},
|
505
506
|
"globs": ["*"],
|
506
507
|
"validation_policy": "Skip Record",
|
507
508
|
"schemaless": True,
|
@@ -577,14 +578,14 @@ schemaless_jsonl_multi_stream_scenario = (
|
|
577
578
|
"streams": [
|
578
579
|
{
|
579
580
|
"name": "stream1",
|
580
|
-
"
|
581
|
+
"format": {"filetype": "jsonl"},
|
581
582
|
"globs": ["a.jsonl"],
|
582
583
|
"validation_policy": "Skip Record",
|
583
584
|
"schemaless": True,
|
584
585
|
},
|
585
586
|
{
|
586
587
|
"name": "stream2",
|
587
|
-
"
|
588
|
+
"format": {"filetype": "jsonl"},
|
588
589
|
"globs": ["b.jsonl"],
|
589
590
|
"validation_policy": "Skip Record",
|
590
591
|
}
|
@@ -678,7 +679,7 @@ jsonl_user_input_schema_scenario = (
|
|
678
679
|
"streams": [
|
679
680
|
{
|
680
681
|
"name": "stream1",
|
681
|
-
"
|
682
|
+
"format": {"filetype": "jsonl"},
|
682
683
|
"globs": ["*"],
|
683
684
|
"validation_policy": "Emit Record",
|
684
685
|
"input_schema": '{"col1": "integer", "col2": "string"}'
|
@@ -171,7 +171,7 @@ single_parquet_scenario = (
|
|
171
171
|
"streams": [
|
172
172
|
{
|
173
173
|
"name": "stream1",
|
174
|
-
"
|
174
|
+
"format": {"filetype": "parquet"},
|
175
175
|
"globs": ["*"],
|
176
176
|
"validation_policy": "Emit Record",
|
177
177
|
}
|
@@ -227,7 +227,7 @@ single_partitioned_parquet_scenario = (
|
|
227
227
|
"streams": [
|
228
228
|
{
|
229
229
|
"name": "stream1",
|
230
|
-
"
|
230
|
+
"format": {"filetype": "parquet"},
|
231
231
|
"globs": ["path_prefix/**/*"],
|
232
232
|
"validation_policy": "Emit Record",
|
233
233
|
}
|
@@ -289,7 +289,7 @@ multi_parquet_scenario = (
|
|
289
289
|
"streams": [
|
290
290
|
{
|
291
291
|
"name": "stream1",
|
292
|
-
"
|
292
|
+
"format": {"filetype": "parquet"},
|
293
293
|
"globs": ["*"],
|
294
294
|
"validation_policy": "Emit Record",
|
295
295
|
}
|
@@ -352,7 +352,7 @@ parquet_various_types_scenario = (
|
|
352
352
|
"streams": [
|
353
353
|
{
|
354
354
|
"name": "stream1",
|
355
|
-
"
|
355
|
+
"format": {"filetype": "parquet"},
|
356
356
|
"globs": ["*"],
|
357
357
|
"validation_policy": "Emit Record",
|
358
358
|
}
|
@@ -493,7 +493,7 @@ parquet_file_with_decimal_no_config_scenario = (
|
|
493
493
|
"streams": [
|
494
494
|
{
|
495
495
|
"name": "stream1",
|
496
|
-
"
|
496
|
+
"format": {"filetype": "parquet"},
|
497
497
|
"globs": ["*"],
|
498
498
|
"validation_policy": "Emit Record",
|
499
499
|
}
|
@@ -544,7 +544,6 @@ parquet_file_with_decimal_as_string_scenario = (
|
|
544
544
|
"streams": [
|
545
545
|
{
|
546
546
|
"name": "stream1",
|
547
|
-
"file_type": "parquet",
|
548
547
|
"globs": ["*"],
|
549
548
|
"validation_policy": "Emit Record",
|
550
549
|
"format": {
|
@@ -599,7 +598,6 @@ parquet_file_with_decimal_as_float_scenario = (
|
|
599
598
|
"streams": [
|
600
599
|
{
|
601
600
|
"name": "stream1",
|
602
|
-
"file_type": "parquet",
|
603
601
|
"globs": ["*"],
|
604
602
|
"validation_policy": "Emit Record",
|
605
603
|
"format": {
|
@@ -654,7 +652,6 @@ parquet_file_with_decimal_legacy_config_scenario = (
|
|
654
652
|
"streams": [
|
655
653
|
{
|
656
654
|
"name": "stream1",
|
657
|
-
"file_type": "parquet",
|
658
655
|
"format": {
|
659
656
|
"filetype": "parquet",
|
660
657
|
},
|
@@ -708,7 +705,6 @@ parquet_with_invalid_config_scenario = (
|
|
708
705
|
"streams": [
|
709
706
|
{
|
710
707
|
"name": "stream1",
|
711
|
-
"file_type": "parquet",
|
712
708
|
"globs": ["*"],
|
713
709
|
"validation_policy": "Emit Record",
|
714
710
|
"format": {
|
@@ -163,7 +163,7 @@ class TestScenarioBuilder:
|
|
163
163
|
self._expected_records = expected_records
|
164
164
|
return self
|
165
165
|
|
166
|
-
def set_parsers(self, parsers: Mapping[
|
166
|
+
def set_parsers(self, parsers: Mapping[Type[Any], FileTypeParser]) -> "TestScenarioBuilder":
|
167
167
|
self._parsers = parsers
|
168
168
|
return self
|
169
169
|
|