airbyte-cdk 0.51.15__py3-none-any.whl → 0.51.17__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (29) hide show
  1. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +494 -522
  2. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +1 -1
  3. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +2 -37
  4. airbyte_cdk/sources/file_based/file_based_source.py +1 -1
  5. airbyte_cdk/sources/file_based/file_types/__init__.py +11 -6
  6. airbyte_cdk/sources/file_based/file_types/avro_parser.py +1 -1
  7. airbyte_cdk/sources/file_based/file_types/csv_parser.py +1 -1
  8. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +2 -2
  9. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +5 -5
  10. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +7 -5
  11. airbyte_cdk/utils/datetime_format_inferrer.py +8 -4
  12. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/METADATA +1 -1
  13. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/RECORD +29 -29
  14. unit_tests/sources/file_based/file_types/test_avro_parser.py +6 -6
  15. unit_tests/sources/file_based/scenarios/avro_scenarios.py +5 -6
  16. unit_tests/sources/file_based/scenarios/check_scenarios.py +8 -8
  17. unit_tests/sources/file_based/scenarios/csv_scenarios.py +19 -42
  18. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +15 -15
  19. unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +13 -12
  20. unit_tests/sources/file_based/scenarios/parquet_scenarios.py +5 -9
  21. unit_tests/sources/file_based/scenarios/scenario_builder.py +1 -1
  22. unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +16 -16
  23. unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +9 -9
  24. unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +2 -1
  25. unit_tests/sources/file_based/stream/test_default_file_based_stream.py +6 -3
  26. unit_tests/utils/test_datetime_format_inferrer.py +1 -0
  27. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/LICENSE.txt +0 -0
  28. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/WHEEL +0 -0
  29. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,7 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
5
6
  from airbyte_cdk.sources.file_based.exceptions import ConfigValidationError, FileBasedSourceError
6
7
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
7
8
  from unit_tests.sources.file_based.helpers import EmptySchemaParser, LowInferenceLimitDiscoveryPolicy
@@ -15,7 +16,7 @@ single_csv_scenario = (
15
16
  "streams": [
16
17
  {
17
18
  "name": "stream1",
18
- "file_type": "csv",
19
+ "format": {"filetype": "csv"},
19
20
  "globs": ["*"],
20
21
  "validation_policy": "Emit Record",
21
22
  }
@@ -64,11 +65,6 @@ single_csv_scenario = (
64
65
  "type": "object",
65
66
  "properties": {
66
67
  "name": {"title": "Name", "description": "The name of the stream.", "type": "string"},
67
- "file_type": {
68
- "title": "File Type",
69
- "description": "The data file type that is being extracted for a stream.",
70
- "type": "string",
71
- },
72
68
  "globs": {
73
69
  "title": "Globs",
74
70
  "description": 'The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look <a href="https://en.wikipedia.org/wiki/Glob_(programming)">here</a>.',
@@ -278,7 +274,7 @@ single_csv_scenario = (
278
274
  "type": "boolean",
279
275
  },
280
276
  },
281
- "required": ["name", "file_type"],
277
+ "required": ["name", "format"],
282
278
  },
283
279
  },
284
280
  },
@@ -339,7 +335,7 @@ multi_csv_scenario = (
339
335
  "streams": [
340
336
  {
341
337
  "name": "stream1",
342
- "file_type": "csv",
338
+ "format": {"filetype": "csv"},
343
339
  "globs": ["*"],
344
340
  "validation_policy": "Emit Record",
345
341
  }
@@ -441,7 +437,7 @@ multi_csv_stream_n_file_exceeds_limit_for_inference = (
441
437
  "streams": [
442
438
  {
443
439
  "name": "stream1",
444
- "file_type": "csv",
440
+ "format": {"filetype": "csv"},
445
441
  "globs": ["*"],
446
442
  "validation_policy": "Emit Record",
447
443
  }
@@ -535,13 +531,13 @@ multi_csv_stream_n_file_exceeds_limit_for_inference = (
535
531
 
536
532
  invalid_csv_scenario = (
537
533
  TestScenarioBuilder()
538
- .set_name("invalid_csv_scenario")
534
+ .set_name("invalid_csv_scenario") # too many values for the number of headers
539
535
  .set_config(
540
536
  {
541
537
  "streams": [
542
538
  {
543
539
  "name": "stream1",
544
- "file_type": "csv",
540
+ "format": {"filetype": "csv"},
545
541
  "globs": ["*"],
546
542
  "validation_policy": "Emit Record",
547
543
  }
@@ -604,7 +600,7 @@ csv_single_stream_scenario = (
604
600
  "streams": [
605
601
  {
606
602
  "name": "stream1",
607
- "file_type": "csv",
603
+ "format": {"filetype": "csv"},
608
604
  "globs": ["*.csv"],
609
605
  "validation_policy": "Emit Record",
610
606
  }
@@ -684,13 +680,13 @@ csv_multi_stream_scenario = (
684
680
  "streams": [
685
681
  {
686
682
  "name": "stream1",
687
- "file_type": "csv",
683
+ "format": {"filetype": "csv"},
688
684
  "globs": ["*.csv"],
689
685
  "validation_policy": "Emit Record",
690
686
  },
691
687
  {
692
688
  "name": "stream2",
693
- "file_type": "csv",
689
+ "format": {"filetype": "csv"},
694
690
  "globs": ["b.csv"],
695
691
  "validation_policy": "Emit Record",
696
692
  },
@@ -802,7 +798,6 @@ csv_custom_format_scenario = (
802
798
  "streams": [
803
799
  {
804
800
  "name": "stream1",
805
- "file_type": "csv",
806
801
  "globs": ["*"],
807
802
  "validation_policy": "Emit Record",
808
803
  "format": {
@@ -908,14 +903,12 @@ multi_stream_custom_format = (
908
903
  "streams": [
909
904
  {
910
905
  "name": "stream1",
911
- "file_type": "csv",
912
906
  "globs": ["*.csv"],
913
907
  "validation_policy": "Emit Record",
914
908
  "format": {"filetype": "csv", "delimiter": "#", "escape_char": "!", "double_quote": True, "newlines_in_values": False},
915
909
  },
916
910
  {
917
911
  "name": "stream2",
918
- "file_type": "csv",
919
912
  "globs": ["b.csv"],
920
913
  "validation_policy": "Emit Record",
921
914
  "format": {
@@ -1055,7 +1048,7 @@ empty_schema_inference_scenario = (
1055
1048
  "streams": [
1056
1049
  {
1057
1050
  "name": "stream1",
1058
- "file_type": "csv",
1051
+ "format": {"filetype": "csv"},
1059
1052
  "globs": ["*"],
1060
1053
  "validation_policy": "Emit Record",
1061
1054
  }
@@ -1096,7 +1089,7 @@ empty_schema_inference_scenario = (
1096
1089
  ]
1097
1090
  }
1098
1091
  )
1099
- .set_parsers({"csv": EmptySchemaParser()})
1092
+ .set_parsers({CsvFormat: EmptySchemaParser()})
1100
1093
  .set_expected_discover_error(AirbyteTracedException, FileBasedSourceError.SCHEMA_INFERENCE_ERROR.value)
1101
1094
  .set_expected_records(
1102
1095
  [
@@ -1130,7 +1123,7 @@ schemaless_csv_scenario = (
1130
1123
  "streams": [
1131
1124
  {
1132
1125
  "name": "stream1",
1133
- "file_type": "csv",
1126
+ "format": {"filetype": "csv"},
1134
1127
  "globs": ["*"],
1135
1128
  "validation_policy": "Skip Record",
1136
1129
  "schemaless": True,
@@ -1225,14 +1218,14 @@ schemaless_csv_multi_stream_scenario = (
1225
1218
  "streams": [
1226
1219
  {
1227
1220
  "name": "stream1",
1228
- "file_type": "csv",
1221
+ "format": {"filetype": "csv"},
1229
1222
  "globs": ["a.csv"],
1230
1223
  "validation_policy": "Skip Record",
1231
1224
  "schemaless": True,
1232
1225
  },
1233
1226
  {
1234
1227
  "name": "stream2",
1235
- "file_type": "csv",
1228
+ "format": {"filetype": "csv"},
1236
1229
  "globs": ["b.csv"],
1237
1230
  "validation_policy": "Skip Record",
1238
1231
  },
@@ -1332,7 +1325,7 @@ schemaless_with_user_input_schema_fails_connection_check_scenario = (
1332
1325
  "streams": [
1333
1326
  {
1334
1327
  "name": "stream1",
1335
- "file_type": "csv",
1328
+ "format": {"filetype": "csv"},
1336
1329
  "globs": ["*"],
1337
1330
  "validation_policy": "Skip Record",
1338
1331
  "input_schema": '{"col1": "string", "col2": "string", "col3": "string"}',
@@ -1396,7 +1389,7 @@ schemaless_with_user_input_schema_fails_connection_check_multi_stream_scenario =
1396
1389
  "streams": [
1397
1390
  {
1398
1391
  "name": "stream1",
1399
- "file_type": "csv",
1392
+ "format": {"filetype": "csv"},
1400
1393
  "globs": ["a.csv"],
1401
1394
  "validation_policy": "Skip Record",
1402
1395
  "schemaless": True,
@@ -1404,7 +1397,7 @@ schemaless_with_user_input_schema_fails_connection_check_multi_stream_scenario =
1404
1397
  },
1405
1398
  {
1406
1399
  "name": "stream2",
1407
- "file_type": "csv",
1400
+ "format": {"filetype": "csv"},
1408
1401
  "globs": ["b.csv"],
1409
1402
  "validation_policy": "Skip Record",
1410
1403
  },
@@ -1480,7 +1473,6 @@ csv_string_can_be_null_with_input_schemas_scenario = (
1480
1473
  "streams": [
1481
1474
  {
1482
1475
  "name": "stream1",
1483
- "file_type": "csv",
1484
1476
  "globs": ["*"],
1485
1477
  "validation_policy": "Emit Record",
1486
1478
  "input_schema": '{"col1": "string", "col2": "string"}',
@@ -1549,7 +1541,6 @@ csv_string_are_not_null_if_strings_can_be_null_is_false_scenario = (
1549
1541
  "streams": [
1550
1542
  {
1551
1543
  "name": "stream1",
1552
- "file_type": "csv",
1553
1544
  "globs": ["*"],
1554
1545
  "validation_policy": "Emit Record",
1555
1546
  "input_schema": '{"col1": "string", "col2": "string"}',
@@ -1619,7 +1610,6 @@ csv_string_not_null_if_no_null_values_scenario = (
1619
1610
  "streams": [
1620
1611
  {
1621
1612
  "name": "stream1",
1622
- "file_type": "csv",
1623
1613
  "globs": ["*"],
1624
1614
  "validation_policy": "Emit Record",
1625
1615
  "format": {
@@ -1686,7 +1676,6 @@ csv_strings_can_be_null_not_quoted_scenario = (
1686
1676
  "streams": [
1687
1677
  {
1688
1678
  "name": "stream1",
1689
- "file_type": "csv",
1690
1679
  "globs": ["*"],
1691
1680
  "validation_policy": "Emit Record",
1692
1681
  "format": {"filetype": "csv", "null_values": ["null"]},
@@ -1751,7 +1740,6 @@ csv_newline_in_values_quoted_value_scenario = (
1751
1740
  "streams": [
1752
1741
  {
1753
1742
  "name": "stream1",
1754
- "file_type": "csv",
1755
1743
  "globs": ["*"],
1756
1744
  "validation_policy": "Emit Record",
1757
1745
  "format": {
@@ -1818,7 +1806,6 @@ csv_newline_in_values_not_quoted_scenario = (
1818
1806
  "streams": [
1819
1807
  {
1820
1808
  "name": "stream1",
1821
- "file_type": "csv",
1822
1809
  "globs": ["*"],
1823
1810
  "validation_policy": "Emit Record",
1824
1811
  "format": {
@@ -1897,7 +1884,6 @@ csv_escape_char_is_set_scenario = (
1897
1884
  "streams": [
1898
1885
  {
1899
1886
  "name": "stream1",
1900
- "file_type": "csv",
1901
1887
  "globs": ["*"],
1902
1888
  "validation_policy": "Emit Record",
1903
1889
  "format": {
@@ -1969,7 +1955,6 @@ csv_double_quote_is_set_scenario = (
1969
1955
  "streams": [
1970
1956
  {
1971
1957
  "name": "stream1",
1972
- "file_type": "csv",
1973
1958
  "globs": ["*"],
1974
1959
  "validation_policy": "Emit Record",
1975
1960
  "format": {
@@ -2040,7 +2025,6 @@ csv_custom_delimiter_with_escape_char_scenario = (
2040
2025
  "streams": [
2041
2026
  {
2042
2027
  "name": "stream1",
2043
- "file_type": "csv",
2044
2028
  "globs": ["*"],
2045
2029
  "validation_policy": "Emit Record",
2046
2030
  "format": {"filetype": "csv", "double_quotes": True, "quote_char": "@", "delimiter": "|", "escape_char": "+"},
@@ -2106,7 +2090,6 @@ csv_custom_delimiter_in_double_quotes_scenario = (
2106
2090
  "streams": [
2107
2091
  {
2108
2092
  "name": "stream1",
2109
- "file_type": "csv",
2110
2093
  "globs": ["*"],
2111
2094
  "validation_policy": "Emit Record",
2112
2095
  "format": {
@@ -2176,7 +2159,6 @@ csv_skip_before_header_scenario = (
2176
2159
  "streams": [
2177
2160
  {
2178
2161
  "name": "stream1",
2179
- "file_type": "csv",
2180
2162
  "globs": ["*"],
2181
2163
  "validation_policy": "Emit Record",
2182
2164
  "format": {"filetype": "csv", "skip_rows_before_header": 2},
@@ -2243,7 +2225,6 @@ csv_skip_after_header_scenario = (
2243
2225
  "streams": [
2244
2226
  {
2245
2227
  "name": "stream1",
2246
- "file_type": "csv",
2247
2228
  "globs": ["*"],
2248
2229
  "validation_policy": "Emit Record",
2249
2230
  "format": {"filetype": "csv", "skip_rows_after_header": 2},
@@ -2310,7 +2291,6 @@ csv_skip_before_and_after_header_scenario = (
2310
2291
  "streams": [
2311
2292
  {
2312
2293
  "name": "stream1",
2313
- "file_type": "csv",
2314
2294
  "globs": ["*"],
2315
2295
  "validation_policy": "Emit Record",
2316
2296
  "format": {
@@ -2381,7 +2361,6 @@ csv_autogenerate_column_names_scenario = (
2381
2361
  "streams": [
2382
2362
  {
2383
2363
  "name": "stream1",
2384
- "file_type": "csv",
2385
2364
  "globs": ["*"],
2386
2365
  "validation_policy": "Emit Record",
2387
2366
  "format": {
@@ -2448,7 +2427,6 @@ csv_custom_bool_values_scenario = (
2448
2427
  "streams": [
2449
2428
  {
2450
2429
  "name": "stream1",
2451
- "file_type": "csv",
2452
2430
  "globs": ["*"],
2453
2431
  "validation_policy": "Emit Record",
2454
2432
  "input_schema": '{"col1": "boolean", "col2": "boolean"}',
@@ -2518,7 +2496,6 @@ csv_custom_null_values_scenario = (
2518
2496
  "streams": [
2519
2497
  {
2520
2498
  "name": "stream1",
2521
- "file_type": "csv",
2522
2499
  "globs": ["*"],
2523
2500
  "validation_policy": "Emit Record",
2524
2501
  "input_schema": '{"col1": "boolean", "col2": "string"}',
@@ -2587,7 +2564,7 @@ earlier_csv_scenario = (
2587
2564
  "streams": [
2588
2565
  {
2589
2566
  "name": "stream1",
2590
- "file_type": "csv",
2567
+ "format": {"filetype": "csv"},
2591
2568
  "globs": ["*"],
2592
2569
  "validation_policy": "Emit Record",
2593
2570
  }
@@ -13,7 +13,7 @@ single_csv_input_state_is_earlier_scenario = (
13
13
  "streams": [
14
14
  {
15
15
  "name": "stream1",
16
- "file_type": "csv",
16
+ "format": {"filetype": "csv"},
17
17
  "globs": ["*.csv"],
18
18
  "validation_policy": "Emit Record",
19
19
  }
@@ -100,7 +100,7 @@ single_csv_file_is_skipped_if_same_modified_at_as_in_history = (
100
100
  "streams": [
101
101
  {
102
102
  "name": "stream1",
103
- "file_type": "csv",
103
+ "format": {"filetype": "csv"},
104
104
  "globs": ["*.csv"],
105
105
  "validation_policy": "Emit Record",
106
106
  }
@@ -184,7 +184,7 @@ single_csv_file_is_synced_if_modified_at_is_more_recent_than_in_history = (
184
184
  "streams": [
185
185
  {
186
186
  "name": "stream1",
187
- "file_type": "csv",
187
+ "format": {"filetype": "csv"},
188
188
  "globs": ["*.csv"],
189
189
  "validation_policy": "Emit Record",
190
190
  }
@@ -270,7 +270,7 @@ single_csv_no_input_state_scenario = (
270
270
  "streams": [
271
271
  {
272
272
  "name": "stream1",
273
- "file_type": "csv",
273
+ "format": {"filetype": "csv"},
274
274
  "globs": ["*.csv"],
275
275
  "validation_policy": "Emit Record",
276
276
  }
@@ -344,7 +344,7 @@ multi_csv_same_timestamp_scenario = (
344
344
  "streams": [
345
345
  {
346
346
  "name": "stream1",
347
- "file_type": "csv",
347
+ "format": {"filetype": "csv"},
348
348
  "globs": ["*.csv"],
349
349
  "validation_policy": "Emit Record",
350
350
  }
@@ -435,7 +435,7 @@ single_csv_input_state_is_later_scenario = (
435
435
  "streams": [
436
436
  {
437
437
  "name": "stream1",
438
- "file_type": "csv",
438
+ "format": {"filetype": "csv"},
439
439
  "globs": ["*.csv"],
440
440
  "validation_policy": "Emit Record",
441
441
  }
@@ -521,7 +521,7 @@ multi_csv_different_timestamps_scenario = (
521
521
  "streams": [
522
522
  {
523
523
  "name": "stream1",
524
- "file_type": "csv",
524
+ "format": {"filetype": "csv"},
525
525
  "globs": ["*.csv"],
526
526
  "validation_policy": "Emit Record",
527
527
  }
@@ -620,7 +620,7 @@ multi_csv_per_timestamp_scenario = (
620
620
  "streams": [
621
621
  {
622
622
  "name": "stream1",
623
- "file_type": "csv",
623
+ "format": {"filetype": "csv"},
624
624
  "globs": ["*.csv"],
625
625
  "validation_policy": "Emit Record",
626
626
  }
@@ -733,7 +733,7 @@ multi_csv_skip_file_if_already_in_history = (
733
733
  "streams": [
734
734
  {
735
735
  "name": "stream1",
736
- "file_type": "csv",
736
+ "format": {"filetype": "csv"},
737
737
  "globs": ["*.csv"],
738
738
  "validation_policy": "Emit Record",
739
739
  }
@@ -855,7 +855,7 @@ multi_csv_include_missing_files_within_history_range = (
855
855
  "streams": [
856
856
  {
857
857
  "name": "stream1",
858
- "file_type": "csv",
858
+ "format": {"filetype": "csv"},
859
859
  "globs": ["*.csv"],
860
860
  "validation_policy": "Emit Record",
861
861
  }
@@ -969,7 +969,7 @@ multi_csv_remove_old_files_if_history_is_full_scenario = (
969
969
  "streams": [
970
970
  {
971
971
  "name": "stream1",
972
- "file_type": "csv",
972
+ "format": {"filetype": "csv"},
973
973
  "globs": ["*.csv"],
974
974
  "validation_policy": "Emit Record",
975
975
  }
@@ -1107,7 +1107,7 @@ multi_csv_same_timestamp_more_files_than_history_size_scenario = (
1107
1107
  "streams": [
1108
1108
  {
1109
1109
  "name": "stream1",
1110
- "file_type": "csv",
1110
+ "format": {"filetype": "csv"},
1111
1111
  "globs": ["*.csv"],
1112
1112
  "validation_policy": "Emit Record",
1113
1113
  "days_to_sync_if_history_is_full": 3,
@@ -1225,7 +1225,7 @@ multi_csv_sync_recent_files_if_history_is_incomplete_scenario = (
1225
1225
  "streams": [
1226
1226
  {
1227
1227
  "name": "stream1",
1228
- "file_type": "csv",
1228
+ "format": {"filetype": "csv"},
1229
1229
  "globs": ["*.csv"],
1230
1230
  "validation_policy": "Emit Record",
1231
1231
  "days_to_sync_if_history_is_full": 3,
@@ -1342,7 +1342,7 @@ multi_csv_sync_files_within_time_window_if_history_is_incomplete__different_time
1342
1342
  "streams": [
1343
1343
  {
1344
1344
  "name": "stream1",
1345
- "file_type": "csv",
1345
+ "format": {"filetype": "csv"},
1346
1346
  "globs": ["*.csv"],
1347
1347
  "validation_policy": "Emit Record",
1348
1348
  "days_to_sync_if_history_is_full": 3,
@@ -1465,7 +1465,7 @@ multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_differe
1465
1465
  "streams": [
1466
1466
  {
1467
1467
  "name": "stream1",
1468
- "file_type": "csv",
1468
+ "format": {"filetype": "csv"},
1469
1469
  "globs": ["*.csv"],
1470
1470
  "validation_policy": "Emit Record",
1471
1471
  "days_to_sync_if_history_is_full": 3,
@@ -2,6 +2,7 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ from airbyte_cdk.sources.file_based.config.jsonl_format import JsonlFormat
5
6
  from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError
6
7
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
7
8
  from unit_tests.sources.file_based.helpers import LowInferenceBytesJsonlParser, LowInferenceLimitDiscoveryPolicy
@@ -15,7 +16,7 @@ single_jsonl_scenario = (
15
16
  "streams": [
16
17
  {
17
18
  "name": "stream1",
18
- "file_type": "jsonl",
19
+ "format": {"filetype": "jsonl"},
19
20
  "globs": ["*"],
20
21
  "validation_policy": "Emit Record",
21
22
  }
@@ -82,7 +83,7 @@ multi_jsonl_with_different_keys_scenario = (
82
83
  "streams": [
83
84
  {
84
85
  "name": "stream1",
85
- "file_type": "jsonl",
86
+ "format": {"filetype": "jsonl"},
86
87
  "globs": ["*"],
87
88
  "validation_policy": "Emit Record",
88
89
  }
@@ -163,7 +164,7 @@ multi_jsonl_stream_n_file_exceeds_limit_for_inference = (
163
164
  "streams": [
164
165
  {
165
166
  "name": "stream1",
166
- "file_type": "jsonl",
167
+ "format": {"filetype": "jsonl"},
167
168
  "globs": ["*"],
168
169
  "validation_policy": "Emit Record",
169
170
  }
@@ -241,7 +242,7 @@ multi_jsonl_stream_n_bytes_exceeds_limit_for_inference = (
241
242
  "streams": [
242
243
  {
243
244
  "name": "stream1",
244
- "file_type": "jsonl",
245
+ "format": {"filetype": "jsonl"},
245
246
  "globs": ["*"],
246
247
  "validation_policy": "Emit Record",
247
248
  }
@@ -307,7 +308,7 @@ multi_jsonl_stream_n_bytes_exceeds_limit_for_inference = (
307
308
  "_ab_source_file_url": "b.jsonl"}, "stream": "stream1"},
308
309
  ]
309
310
  )
310
- .set_parsers({"jsonl": LowInferenceBytesJsonlParser()})
311
+ .set_parsers({JsonlFormat: LowInferenceBytesJsonlParser()})
311
312
  ).build()
312
313
 
313
314
 
@@ -319,7 +320,7 @@ invalid_jsonl_scenario = (
319
320
  "streams": [
320
321
  {
321
322
  "name": "stream1",
322
- "file_type": "jsonl",
323
+ "format": {"filetype": "jsonl"},
323
324
  "globs": ["*"],
324
325
  "validation_policy": "Emit Record",
325
326
  }
@@ -390,13 +391,13 @@ jsonl_multi_stream_scenario = (
390
391
  "streams": [
391
392
  {
392
393
  "name": "stream1",
393
- "file_type": "jsonl",
394
+ "format": {"filetype": "jsonl"},
394
395
  "globs": ["*.jsonl"],
395
396
  "validation_policy": "Emit Record",
396
397
  },
397
398
  {
398
399
  "name": "stream2",
399
- "file_type": "jsonl",
400
+ "format": {"filetype": "jsonl"},
400
401
  "globs": ["b.jsonl"],
401
402
  "validation_policy": "Emit Record",
402
403
  }
@@ -501,7 +502,7 @@ schemaless_jsonl_scenario = (
501
502
  "streams": [
502
503
  {
503
504
  "name": "stream1",
504
- "file_type": "jsonl",
505
+ "format": {"filetype": "jsonl"},
505
506
  "globs": ["*"],
506
507
  "validation_policy": "Skip Record",
507
508
  "schemaless": True,
@@ -577,14 +578,14 @@ schemaless_jsonl_multi_stream_scenario = (
577
578
  "streams": [
578
579
  {
579
580
  "name": "stream1",
580
- "file_type": "jsonl",
581
+ "format": {"filetype": "jsonl"},
581
582
  "globs": ["a.jsonl"],
582
583
  "validation_policy": "Skip Record",
583
584
  "schemaless": True,
584
585
  },
585
586
  {
586
587
  "name": "stream2",
587
- "file_type": "jsonl",
588
+ "format": {"filetype": "jsonl"},
588
589
  "globs": ["b.jsonl"],
589
590
  "validation_policy": "Skip Record",
590
591
  }
@@ -678,7 +679,7 @@ jsonl_user_input_schema_scenario = (
678
679
  "streams": [
679
680
  {
680
681
  "name": "stream1",
681
- "file_type": "jsonl",
682
+ "format": {"filetype": "jsonl"},
682
683
  "globs": ["*"],
683
684
  "validation_policy": "Emit Record",
684
685
  "input_schema": '{"col1": "integer", "col2": "string"}'
@@ -171,7 +171,7 @@ single_parquet_scenario = (
171
171
  "streams": [
172
172
  {
173
173
  "name": "stream1",
174
- "file_type": "parquet",
174
+ "format": {"filetype": "parquet"},
175
175
  "globs": ["*"],
176
176
  "validation_policy": "Emit Record",
177
177
  }
@@ -227,7 +227,7 @@ single_partitioned_parquet_scenario = (
227
227
  "streams": [
228
228
  {
229
229
  "name": "stream1",
230
- "file_type": "parquet",
230
+ "format": {"filetype": "parquet"},
231
231
  "globs": ["path_prefix/**/*"],
232
232
  "validation_policy": "Emit Record",
233
233
  }
@@ -289,7 +289,7 @@ multi_parquet_scenario = (
289
289
  "streams": [
290
290
  {
291
291
  "name": "stream1",
292
- "file_type": "parquet",
292
+ "format": {"filetype": "parquet"},
293
293
  "globs": ["*"],
294
294
  "validation_policy": "Emit Record",
295
295
  }
@@ -352,7 +352,7 @@ parquet_various_types_scenario = (
352
352
  "streams": [
353
353
  {
354
354
  "name": "stream1",
355
- "file_type": "parquet",
355
+ "format": {"filetype": "parquet"},
356
356
  "globs": ["*"],
357
357
  "validation_policy": "Emit Record",
358
358
  }
@@ -493,7 +493,7 @@ parquet_file_with_decimal_no_config_scenario = (
493
493
  "streams": [
494
494
  {
495
495
  "name": "stream1",
496
- "file_type": "parquet",
496
+ "format": {"filetype": "parquet"},
497
497
  "globs": ["*"],
498
498
  "validation_policy": "Emit Record",
499
499
  }
@@ -544,7 +544,6 @@ parquet_file_with_decimal_as_string_scenario = (
544
544
  "streams": [
545
545
  {
546
546
  "name": "stream1",
547
- "file_type": "parquet",
548
547
  "globs": ["*"],
549
548
  "validation_policy": "Emit Record",
550
549
  "format": {
@@ -599,7 +598,6 @@ parquet_file_with_decimal_as_float_scenario = (
599
598
  "streams": [
600
599
  {
601
600
  "name": "stream1",
602
- "file_type": "parquet",
603
601
  "globs": ["*"],
604
602
  "validation_policy": "Emit Record",
605
603
  "format": {
@@ -654,7 +652,6 @@ parquet_file_with_decimal_legacy_config_scenario = (
654
652
  "streams": [
655
653
  {
656
654
  "name": "stream1",
657
- "file_type": "parquet",
658
655
  "format": {
659
656
  "filetype": "parquet",
660
657
  },
@@ -708,7 +705,6 @@ parquet_with_invalid_config_scenario = (
708
705
  "streams": [
709
706
  {
710
707
  "name": "stream1",
711
- "file_type": "parquet",
712
708
  "globs": ["*"],
713
709
  "validation_policy": "Emit Record",
714
710
  "format": {
@@ -163,7 +163,7 @@ class TestScenarioBuilder:
163
163
  self._expected_records = expected_records
164
164
  return self
165
165
 
166
- def set_parsers(self, parsers: Mapping[str, FileTypeParser]) -> "TestScenarioBuilder":
166
+ def set_parsers(self, parsers: Mapping[Type[Any], FileTypeParser]) -> "TestScenarioBuilder":
167
167
  self._parsers = parsers
168
168
  return self
169
169