airbyte-cdk 0.51.15__py3-none-any.whl → 0.51.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +494 -522
  2. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +1 -1
  3. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +2 -37
  4. airbyte_cdk/sources/file_based/file_based_source.py +1 -1
  5. airbyte_cdk/sources/file_based/file_types/__init__.py +11 -6
  6. airbyte_cdk/sources/file_based/file_types/avro_parser.py +1 -1
  7. airbyte_cdk/sources/file_based/file_types/csv_parser.py +1 -1
  8. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +2 -2
  9. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +5 -5
  10. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +7 -5
  11. airbyte_cdk/utils/datetime_format_inferrer.py +8 -4
  12. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/METADATA +1 -1
  13. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/RECORD +29 -29
  14. unit_tests/sources/file_based/file_types/test_avro_parser.py +6 -6
  15. unit_tests/sources/file_based/scenarios/avro_scenarios.py +5 -6
  16. unit_tests/sources/file_based/scenarios/check_scenarios.py +8 -8
  17. unit_tests/sources/file_based/scenarios/csv_scenarios.py +19 -42
  18. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +15 -15
  19. unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +13 -12
  20. unit_tests/sources/file_based/scenarios/parquet_scenarios.py +5 -9
  21. unit_tests/sources/file_based/scenarios/scenario_builder.py +1 -1
  22. unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +16 -16
  23. unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +9 -9
  24. unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +2 -1
  25. unit_tests/sources/file_based/stream/test_default_file_based_stream.py +6 -3
  26. unit_tests/utils/test_datetime_format_inferrer.py +1 -0
  27. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/LICENSE.txt +0 -0
  28. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/WHEEL +0 -0
  29. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,7 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
5
6
  from airbyte_cdk.sources.file_based.exceptions import ConfigValidationError, FileBasedSourceError
6
7
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
7
8
  from unit_tests.sources.file_based.helpers import EmptySchemaParser, LowInferenceLimitDiscoveryPolicy
@@ -15,7 +16,7 @@ single_csv_scenario = (
15
16
  "streams": [
16
17
  {
17
18
  "name": "stream1",
18
- "file_type": "csv",
19
+ "format": {"filetype": "csv"},
19
20
  "globs": ["*"],
20
21
  "validation_policy": "Emit Record",
21
22
  }
@@ -64,11 +65,6 @@ single_csv_scenario = (
64
65
  "type": "object",
65
66
  "properties": {
66
67
  "name": {"title": "Name", "description": "The name of the stream.", "type": "string"},
67
- "file_type": {
68
- "title": "File Type",
69
- "description": "The data file type that is being extracted for a stream.",
70
- "type": "string",
71
- },
72
68
  "globs": {
73
69
  "title": "Globs",
74
70
  "description": 'The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look <a href="https://en.wikipedia.org/wiki/Glob_(programming)">here</a>.',
@@ -278,7 +274,7 @@ single_csv_scenario = (
278
274
  "type": "boolean",
279
275
  },
280
276
  },
281
- "required": ["name", "file_type"],
277
+ "required": ["name", "format"],
282
278
  },
283
279
  },
284
280
  },
@@ -339,7 +335,7 @@ multi_csv_scenario = (
339
335
  "streams": [
340
336
  {
341
337
  "name": "stream1",
342
- "file_type": "csv",
338
+ "format": {"filetype": "csv"},
343
339
  "globs": ["*"],
344
340
  "validation_policy": "Emit Record",
345
341
  }
@@ -441,7 +437,7 @@ multi_csv_stream_n_file_exceeds_limit_for_inference = (
441
437
  "streams": [
442
438
  {
443
439
  "name": "stream1",
444
- "file_type": "csv",
440
+ "format": {"filetype": "csv"},
445
441
  "globs": ["*"],
446
442
  "validation_policy": "Emit Record",
447
443
  }
@@ -535,13 +531,13 @@ multi_csv_stream_n_file_exceeds_limit_for_inference = (
535
531
 
536
532
  invalid_csv_scenario = (
537
533
  TestScenarioBuilder()
538
- .set_name("invalid_csv_scenario")
534
+ .set_name("invalid_csv_scenario") # too many values for the number of headers
539
535
  .set_config(
540
536
  {
541
537
  "streams": [
542
538
  {
543
539
  "name": "stream1",
544
- "file_type": "csv",
540
+ "format": {"filetype": "csv"},
545
541
  "globs": ["*"],
546
542
  "validation_policy": "Emit Record",
547
543
  }
@@ -604,7 +600,7 @@ csv_single_stream_scenario = (
604
600
  "streams": [
605
601
  {
606
602
  "name": "stream1",
607
- "file_type": "csv",
603
+ "format": {"filetype": "csv"},
608
604
  "globs": ["*.csv"],
609
605
  "validation_policy": "Emit Record",
610
606
  }
@@ -684,13 +680,13 @@ csv_multi_stream_scenario = (
684
680
  "streams": [
685
681
  {
686
682
  "name": "stream1",
687
- "file_type": "csv",
683
+ "format": {"filetype": "csv"},
688
684
  "globs": ["*.csv"],
689
685
  "validation_policy": "Emit Record",
690
686
  },
691
687
  {
692
688
  "name": "stream2",
693
- "file_type": "csv",
689
+ "format": {"filetype": "csv"},
694
690
  "globs": ["b.csv"],
695
691
  "validation_policy": "Emit Record",
696
692
  },
@@ -802,7 +798,6 @@ csv_custom_format_scenario = (
802
798
  "streams": [
803
799
  {
804
800
  "name": "stream1",
805
- "file_type": "csv",
806
801
  "globs": ["*"],
807
802
  "validation_policy": "Emit Record",
808
803
  "format": {
@@ -908,14 +903,12 @@ multi_stream_custom_format = (
908
903
  "streams": [
909
904
  {
910
905
  "name": "stream1",
911
- "file_type": "csv",
912
906
  "globs": ["*.csv"],
913
907
  "validation_policy": "Emit Record",
914
908
  "format": {"filetype": "csv", "delimiter": "#", "escape_char": "!", "double_quote": True, "newlines_in_values": False},
915
909
  },
916
910
  {
917
911
  "name": "stream2",
918
- "file_type": "csv",
919
912
  "globs": ["b.csv"],
920
913
  "validation_policy": "Emit Record",
921
914
  "format": {
@@ -1055,7 +1048,7 @@ empty_schema_inference_scenario = (
1055
1048
  "streams": [
1056
1049
  {
1057
1050
  "name": "stream1",
1058
- "file_type": "csv",
1051
+ "format": {"filetype": "csv"},
1059
1052
  "globs": ["*"],
1060
1053
  "validation_policy": "Emit Record",
1061
1054
  }
@@ -1096,7 +1089,7 @@ empty_schema_inference_scenario = (
1096
1089
  ]
1097
1090
  }
1098
1091
  )
1099
- .set_parsers({"csv": EmptySchemaParser()})
1092
+ .set_parsers({CsvFormat: EmptySchemaParser()})
1100
1093
  .set_expected_discover_error(AirbyteTracedException, FileBasedSourceError.SCHEMA_INFERENCE_ERROR.value)
1101
1094
  .set_expected_records(
1102
1095
  [
@@ -1130,7 +1123,7 @@ schemaless_csv_scenario = (
1130
1123
  "streams": [
1131
1124
  {
1132
1125
  "name": "stream1",
1133
- "file_type": "csv",
1126
+ "format": {"filetype": "csv"},
1134
1127
  "globs": ["*"],
1135
1128
  "validation_policy": "Skip Record",
1136
1129
  "schemaless": True,
@@ -1225,14 +1218,14 @@ schemaless_csv_multi_stream_scenario = (
1225
1218
  "streams": [
1226
1219
  {
1227
1220
  "name": "stream1",
1228
- "file_type": "csv",
1221
+ "format": {"filetype": "csv"},
1229
1222
  "globs": ["a.csv"],
1230
1223
  "validation_policy": "Skip Record",
1231
1224
  "schemaless": True,
1232
1225
  },
1233
1226
  {
1234
1227
  "name": "stream2",
1235
- "file_type": "csv",
1228
+ "format": {"filetype": "csv"},
1236
1229
  "globs": ["b.csv"],
1237
1230
  "validation_policy": "Skip Record",
1238
1231
  },
@@ -1332,7 +1325,7 @@ schemaless_with_user_input_schema_fails_connection_check_scenario = (
1332
1325
  "streams": [
1333
1326
  {
1334
1327
  "name": "stream1",
1335
- "file_type": "csv",
1328
+ "format": {"filetype": "csv"},
1336
1329
  "globs": ["*"],
1337
1330
  "validation_policy": "Skip Record",
1338
1331
  "input_schema": '{"col1": "string", "col2": "string", "col3": "string"}',
@@ -1396,7 +1389,7 @@ schemaless_with_user_input_schema_fails_connection_check_multi_stream_scenario =
1396
1389
  "streams": [
1397
1390
  {
1398
1391
  "name": "stream1",
1399
- "file_type": "csv",
1392
+ "format": {"filetype": "csv"},
1400
1393
  "globs": ["a.csv"],
1401
1394
  "validation_policy": "Skip Record",
1402
1395
  "schemaless": True,
@@ -1404,7 +1397,7 @@ schemaless_with_user_input_schema_fails_connection_check_multi_stream_scenario =
1404
1397
  },
1405
1398
  {
1406
1399
  "name": "stream2",
1407
- "file_type": "csv",
1400
+ "format": {"filetype": "csv"},
1408
1401
  "globs": ["b.csv"],
1409
1402
  "validation_policy": "Skip Record",
1410
1403
  },
@@ -1480,7 +1473,6 @@ csv_string_can_be_null_with_input_schemas_scenario = (
1480
1473
  "streams": [
1481
1474
  {
1482
1475
  "name": "stream1",
1483
- "file_type": "csv",
1484
1476
  "globs": ["*"],
1485
1477
  "validation_policy": "Emit Record",
1486
1478
  "input_schema": '{"col1": "string", "col2": "string"}',
@@ -1549,7 +1541,6 @@ csv_string_are_not_null_if_strings_can_be_null_is_false_scenario = (
1549
1541
  "streams": [
1550
1542
  {
1551
1543
  "name": "stream1",
1552
- "file_type": "csv",
1553
1544
  "globs": ["*"],
1554
1545
  "validation_policy": "Emit Record",
1555
1546
  "input_schema": '{"col1": "string", "col2": "string"}',
@@ -1619,7 +1610,6 @@ csv_string_not_null_if_no_null_values_scenario = (
1619
1610
  "streams": [
1620
1611
  {
1621
1612
  "name": "stream1",
1622
- "file_type": "csv",
1623
1613
  "globs": ["*"],
1624
1614
  "validation_policy": "Emit Record",
1625
1615
  "format": {
@@ -1686,7 +1676,6 @@ csv_strings_can_be_null_not_quoted_scenario = (
1686
1676
  "streams": [
1687
1677
  {
1688
1678
  "name": "stream1",
1689
- "file_type": "csv",
1690
1679
  "globs": ["*"],
1691
1680
  "validation_policy": "Emit Record",
1692
1681
  "format": {"filetype": "csv", "null_values": ["null"]},
@@ -1751,7 +1740,6 @@ csv_newline_in_values_quoted_value_scenario = (
1751
1740
  "streams": [
1752
1741
  {
1753
1742
  "name": "stream1",
1754
- "file_type": "csv",
1755
1743
  "globs": ["*"],
1756
1744
  "validation_policy": "Emit Record",
1757
1745
  "format": {
@@ -1818,7 +1806,6 @@ csv_newline_in_values_not_quoted_scenario = (
1818
1806
  "streams": [
1819
1807
  {
1820
1808
  "name": "stream1",
1821
- "file_type": "csv",
1822
1809
  "globs": ["*"],
1823
1810
  "validation_policy": "Emit Record",
1824
1811
  "format": {
@@ -1897,7 +1884,6 @@ csv_escape_char_is_set_scenario = (
1897
1884
  "streams": [
1898
1885
  {
1899
1886
  "name": "stream1",
1900
- "file_type": "csv",
1901
1887
  "globs": ["*"],
1902
1888
  "validation_policy": "Emit Record",
1903
1889
  "format": {
@@ -1969,7 +1955,6 @@ csv_double_quote_is_set_scenario = (
1969
1955
  "streams": [
1970
1956
  {
1971
1957
  "name": "stream1",
1972
- "file_type": "csv",
1973
1958
  "globs": ["*"],
1974
1959
  "validation_policy": "Emit Record",
1975
1960
  "format": {
@@ -2040,7 +2025,6 @@ csv_custom_delimiter_with_escape_char_scenario = (
2040
2025
  "streams": [
2041
2026
  {
2042
2027
  "name": "stream1",
2043
- "file_type": "csv",
2044
2028
  "globs": ["*"],
2045
2029
  "validation_policy": "Emit Record",
2046
2030
  "format": {"filetype": "csv", "double_quotes": True, "quote_char": "@", "delimiter": "|", "escape_char": "+"},
@@ -2106,7 +2090,6 @@ csv_custom_delimiter_in_double_quotes_scenario = (
2106
2090
  "streams": [
2107
2091
  {
2108
2092
  "name": "stream1",
2109
- "file_type": "csv",
2110
2093
  "globs": ["*"],
2111
2094
  "validation_policy": "Emit Record",
2112
2095
  "format": {
@@ -2176,7 +2159,6 @@ csv_skip_before_header_scenario = (
2176
2159
  "streams": [
2177
2160
  {
2178
2161
  "name": "stream1",
2179
- "file_type": "csv",
2180
2162
  "globs": ["*"],
2181
2163
  "validation_policy": "Emit Record",
2182
2164
  "format": {"filetype": "csv", "skip_rows_before_header": 2},
@@ -2243,7 +2225,6 @@ csv_skip_after_header_scenario = (
2243
2225
  "streams": [
2244
2226
  {
2245
2227
  "name": "stream1",
2246
- "file_type": "csv",
2247
2228
  "globs": ["*"],
2248
2229
  "validation_policy": "Emit Record",
2249
2230
  "format": {"filetype": "csv", "skip_rows_after_header": 2},
@@ -2310,7 +2291,6 @@ csv_skip_before_and_after_header_scenario = (
2310
2291
  "streams": [
2311
2292
  {
2312
2293
  "name": "stream1",
2313
- "file_type": "csv",
2314
2294
  "globs": ["*"],
2315
2295
  "validation_policy": "Emit Record",
2316
2296
  "format": {
@@ -2381,7 +2361,6 @@ csv_autogenerate_column_names_scenario = (
2381
2361
  "streams": [
2382
2362
  {
2383
2363
  "name": "stream1",
2384
- "file_type": "csv",
2385
2364
  "globs": ["*"],
2386
2365
  "validation_policy": "Emit Record",
2387
2366
  "format": {
@@ -2448,7 +2427,6 @@ csv_custom_bool_values_scenario = (
2448
2427
  "streams": [
2449
2428
  {
2450
2429
  "name": "stream1",
2451
- "file_type": "csv",
2452
2430
  "globs": ["*"],
2453
2431
  "validation_policy": "Emit Record",
2454
2432
  "input_schema": '{"col1": "boolean", "col2": "boolean"}',
@@ -2518,7 +2496,6 @@ csv_custom_null_values_scenario = (
2518
2496
  "streams": [
2519
2497
  {
2520
2498
  "name": "stream1",
2521
- "file_type": "csv",
2522
2499
  "globs": ["*"],
2523
2500
  "validation_policy": "Emit Record",
2524
2501
  "input_schema": '{"col1": "boolean", "col2": "string"}',
@@ -2587,7 +2564,7 @@ earlier_csv_scenario = (
2587
2564
  "streams": [
2588
2565
  {
2589
2566
  "name": "stream1",
2590
- "file_type": "csv",
2567
+ "format": {"filetype": "csv"},
2591
2568
  "globs": ["*"],
2592
2569
  "validation_policy": "Emit Record",
2593
2570
  }
@@ -13,7 +13,7 @@ single_csv_input_state_is_earlier_scenario = (
13
13
  "streams": [
14
14
  {
15
15
  "name": "stream1",
16
- "file_type": "csv",
16
+ "format": {"filetype": "csv"},
17
17
  "globs": ["*.csv"],
18
18
  "validation_policy": "Emit Record",
19
19
  }
@@ -100,7 +100,7 @@ single_csv_file_is_skipped_if_same_modified_at_as_in_history = (
100
100
  "streams": [
101
101
  {
102
102
  "name": "stream1",
103
- "file_type": "csv",
103
+ "format": {"filetype": "csv"},
104
104
  "globs": ["*.csv"],
105
105
  "validation_policy": "Emit Record",
106
106
  }
@@ -184,7 +184,7 @@ single_csv_file_is_synced_if_modified_at_is_more_recent_than_in_history = (
184
184
  "streams": [
185
185
  {
186
186
  "name": "stream1",
187
- "file_type": "csv",
187
+ "format": {"filetype": "csv"},
188
188
  "globs": ["*.csv"],
189
189
  "validation_policy": "Emit Record",
190
190
  }
@@ -270,7 +270,7 @@ single_csv_no_input_state_scenario = (
270
270
  "streams": [
271
271
  {
272
272
  "name": "stream1",
273
- "file_type": "csv",
273
+ "format": {"filetype": "csv"},
274
274
  "globs": ["*.csv"],
275
275
  "validation_policy": "Emit Record",
276
276
  }
@@ -344,7 +344,7 @@ multi_csv_same_timestamp_scenario = (
344
344
  "streams": [
345
345
  {
346
346
  "name": "stream1",
347
- "file_type": "csv",
347
+ "format": {"filetype": "csv"},
348
348
  "globs": ["*.csv"],
349
349
  "validation_policy": "Emit Record",
350
350
  }
@@ -435,7 +435,7 @@ single_csv_input_state_is_later_scenario = (
435
435
  "streams": [
436
436
  {
437
437
  "name": "stream1",
438
- "file_type": "csv",
438
+ "format": {"filetype": "csv"},
439
439
  "globs": ["*.csv"],
440
440
  "validation_policy": "Emit Record",
441
441
  }
@@ -521,7 +521,7 @@ multi_csv_different_timestamps_scenario = (
521
521
  "streams": [
522
522
  {
523
523
  "name": "stream1",
524
- "file_type": "csv",
524
+ "format": {"filetype": "csv"},
525
525
  "globs": ["*.csv"],
526
526
  "validation_policy": "Emit Record",
527
527
  }
@@ -620,7 +620,7 @@ multi_csv_per_timestamp_scenario = (
620
620
  "streams": [
621
621
  {
622
622
  "name": "stream1",
623
- "file_type": "csv",
623
+ "format": {"filetype": "csv"},
624
624
  "globs": ["*.csv"],
625
625
  "validation_policy": "Emit Record",
626
626
  }
@@ -733,7 +733,7 @@ multi_csv_skip_file_if_already_in_history = (
733
733
  "streams": [
734
734
  {
735
735
  "name": "stream1",
736
- "file_type": "csv",
736
+ "format": {"filetype": "csv"},
737
737
  "globs": ["*.csv"],
738
738
  "validation_policy": "Emit Record",
739
739
  }
@@ -855,7 +855,7 @@ multi_csv_include_missing_files_within_history_range = (
855
855
  "streams": [
856
856
  {
857
857
  "name": "stream1",
858
- "file_type": "csv",
858
+ "format": {"filetype": "csv"},
859
859
  "globs": ["*.csv"],
860
860
  "validation_policy": "Emit Record",
861
861
  }
@@ -969,7 +969,7 @@ multi_csv_remove_old_files_if_history_is_full_scenario = (
969
969
  "streams": [
970
970
  {
971
971
  "name": "stream1",
972
- "file_type": "csv",
972
+ "format": {"filetype": "csv"},
973
973
  "globs": ["*.csv"],
974
974
  "validation_policy": "Emit Record",
975
975
  }
@@ -1107,7 +1107,7 @@ multi_csv_same_timestamp_more_files_than_history_size_scenario = (
1107
1107
  "streams": [
1108
1108
  {
1109
1109
  "name": "stream1",
1110
- "file_type": "csv",
1110
+ "format": {"filetype": "csv"},
1111
1111
  "globs": ["*.csv"],
1112
1112
  "validation_policy": "Emit Record",
1113
1113
  "days_to_sync_if_history_is_full": 3,
@@ -1225,7 +1225,7 @@ multi_csv_sync_recent_files_if_history_is_incomplete_scenario = (
1225
1225
  "streams": [
1226
1226
  {
1227
1227
  "name": "stream1",
1228
- "file_type": "csv",
1228
+ "format": {"filetype": "csv"},
1229
1229
  "globs": ["*.csv"],
1230
1230
  "validation_policy": "Emit Record",
1231
1231
  "days_to_sync_if_history_is_full": 3,
@@ -1342,7 +1342,7 @@ multi_csv_sync_files_within_time_window_if_history_is_incomplete__different_time
1342
1342
  "streams": [
1343
1343
  {
1344
1344
  "name": "stream1",
1345
- "file_type": "csv",
1345
+ "format": {"filetype": "csv"},
1346
1346
  "globs": ["*.csv"],
1347
1347
  "validation_policy": "Emit Record",
1348
1348
  "days_to_sync_if_history_is_full": 3,
@@ -1465,7 +1465,7 @@ multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_differe
1465
1465
  "streams": [
1466
1466
  {
1467
1467
  "name": "stream1",
1468
- "file_type": "csv",
1468
+ "format": {"filetype": "csv"},
1469
1469
  "globs": ["*.csv"],
1470
1470
  "validation_policy": "Emit Record",
1471
1471
  "days_to_sync_if_history_is_full": 3,
@@ -2,6 +2,7 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ from airbyte_cdk.sources.file_based.config.jsonl_format import JsonlFormat
5
6
  from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError
6
7
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
7
8
  from unit_tests.sources.file_based.helpers import LowInferenceBytesJsonlParser, LowInferenceLimitDiscoveryPolicy
@@ -15,7 +16,7 @@ single_jsonl_scenario = (
15
16
  "streams": [
16
17
  {
17
18
  "name": "stream1",
18
- "file_type": "jsonl",
19
+ "format": {"filetype": "jsonl"},
19
20
  "globs": ["*"],
20
21
  "validation_policy": "Emit Record",
21
22
  }
@@ -82,7 +83,7 @@ multi_jsonl_with_different_keys_scenario = (
82
83
  "streams": [
83
84
  {
84
85
  "name": "stream1",
85
- "file_type": "jsonl",
86
+ "format": {"filetype": "jsonl"},
86
87
  "globs": ["*"],
87
88
  "validation_policy": "Emit Record",
88
89
  }
@@ -163,7 +164,7 @@ multi_jsonl_stream_n_file_exceeds_limit_for_inference = (
163
164
  "streams": [
164
165
  {
165
166
  "name": "stream1",
166
- "file_type": "jsonl",
167
+ "format": {"filetype": "jsonl"},
167
168
  "globs": ["*"],
168
169
  "validation_policy": "Emit Record",
169
170
  }
@@ -241,7 +242,7 @@ multi_jsonl_stream_n_bytes_exceeds_limit_for_inference = (
241
242
  "streams": [
242
243
  {
243
244
  "name": "stream1",
244
- "file_type": "jsonl",
245
+ "format": {"filetype": "jsonl"},
245
246
  "globs": ["*"],
246
247
  "validation_policy": "Emit Record",
247
248
  }
@@ -307,7 +308,7 @@ multi_jsonl_stream_n_bytes_exceeds_limit_for_inference = (
307
308
  "_ab_source_file_url": "b.jsonl"}, "stream": "stream1"},
308
309
  ]
309
310
  )
310
- .set_parsers({"jsonl": LowInferenceBytesJsonlParser()})
311
+ .set_parsers({JsonlFormat: LowInferenceBytesJsonlParser()})
311
312
  ).build()
312
313
 
313
314
 
@@ -319,7 +320,7 @@ invalid_jsonl_scenario = (
319
320
  "streams": [
320
321
  {
321
322
  "name": "stream1",
322
- "file_type": "jsonl",
323
+ "format": {"filetype": "jsonl"},
323
324
  "globs": ["*"],
324
325
  "validation_policy": "Emit Record",
325
326
  }
@@ -390,13 +391,13 @@ jsonl_multi_stream_scenario = (
390
391
  "streams": [
391
392
  {
392
393
  "name": "stream1",
393
- "file_type": "jsonl",
394
+ "format": {"filetype": "jsonl"},
394
395
  "globs": ["*.jsonl"],
395
396
  "validation_policy": "Emit Record",
396
397
  },
397
398
  {
398
399
  "name": "stream2",
399
- "file_type": "jsonl",
400
+ "format": {"filetype": "jsonl"},
400
401
  "globs": ["b.jsonl"],
401
402
  "validation_policy": "Emit Record",
402
403
  }
@@ -501,7 +502,7 @@ schemaless_jsonl_scenario = (
501
502
  "streams": [
502
503
  {
503
504
  "name": "stream1",
504
- "file_type": "jsonl",
505
+ "format": {"filetype": "jsonl"},
505
506
  "globs": ["*"],
506
507
  "validation_policy": "Skip Record",
507
508
  "schemaless": True,
@@ -577,14 +578,14 @@ schemaless_jsonl_multi_stream_scenario = (
577
578
  "streams": [
578
579
  {
579
580
  "name": "stream1",
580
- "file_type": "jsonl",
581
+ "format": {"filetype": "jsonl"},
581
582
  "globs": ["a.jsonl"],
582
583
  "validation_policy": "Skip Record",
583
584
  "schemaless": True,
584
585
  },
585
586
  {
586
587
  "name": "stream2",
587
- "file_type": "jsonl",
588
+ "format": {"filetype": "jsonl"},
588
589
  "globs": ["b.jsonl"],
589
590
  "validation_policy": "Skip Record",
590
591
  }
@@ -678,7 +679,7 @@ jsonl_user_input_schema_scenario = (
678
679
  "streams": [
679
680
  {
680
681
  "name": "stream1",
681
- "file_type": "jsonl",
682
+ "format": {"filetype": "jsonl"},
682
683
  "globs": ["*"],
683
684
  "validation_policy": "Emit Record",
684
685
  "input_schema": '{"col1": "integer", "col2": "string"}'
@@ -171,7 +171,7 @@ single_parquet_scenario = (
171
171
  "streams": [
172
172
  {
173
173
  "name": "stream1",
174
- "file_type": "parquet",
174
+ "format": {"filetype": "parquet"},
175
175
  "globs": ["*"],
176
176
  "validation_policy": "Emit Record",
177
177
  }
@@ -227,7 +227,7 @@ single_partitioned_parquet_scenario = (
227
227
  "streams": [
228
228
  {
229
229
  "name": "stream1",
230
- "file_type": "parquet",
230
+ "format": {"filetype": "parquet"},
231
231
  "globs": ["path_prefix/**/*"],
232
232
  "validation_policy": "Emit Record",
233
233
  }
@@ -289,7 +289,7 @@ multi_parquet_scenario = (
289
289
  "streams": [
290
290
  {
291
291
  "name": "stream1",
292
- "file_type": "parquet",
292
+ "format": {"filetype": "parquet"},
293
293
  "globs": ["*"],
294
294
  "validation_policy": "Emit Record",
295
295
  }
@@ -352,7 +352,7 @@ parquet_various_types_scenario = (
352
352
  "streams": [
353
353
  {
354
354
  "name": "stream1",
355
- "file_type": "parquet",
355
+ "format": {"filetype": "parquet"},
356
356
  "globs": ["*"],
357
357
  "validation_policy": "Emit Record",
358
358
  }
@@ -493,7 +493,7 @@ parquet_file_with_decimal_no_config_scenario = (
493
493
  "streams": [
494
494
  {
495
495
  "name": "stream1",
496
- "file_type": "parquet",
496
+ "format": {"filetype": "parquet"},
497
497
  "globs": ["*"],
498
498
  "validation_policy": "Emit Record",
499
499
  }
@@ -544,7 +544,6 @@ parquet_file_with_decimal_as_string_scenario = (
544
544
  "streams": [
545
545
  {
546
546
  "name": "stream1",
547
- "file_type": "parquet",
548
547
  "globs": ["*"],
549
548
  "validation_policy": "Emit Record",
550
549
  "format": {
@@ -599,7 +598,6 @@ parquet_file_with_decimal_as_float_scenario = (
599
598
  "streams": [
600
599
  {
601
600
  "name": "stream1",
602
- "file_type": "parquet",
603
601
  "globs": ["*"],
604
602
  "validation_policy": "Emit Record",
605
603
  "format": {
@@ -654,7 +652,6 @@ parquet_file_with_decimal_legacy_config_scenario = (
654
652
  "streams": [
655
653
  {
656
654
  "name": "stream1",
657
- "file_type": "parquet",
658
655
  "format": {
659
656
  "filetype": "parquet",
660
657
  },
@@ -708,7 +705,6 @@ parquet_with_invalid_config_scenario = (
708
705
  "streams": [
709
706
  {
710
707
  "name": "stream1",
711
- "file_type": "parquet",
712
708
  "globs": ["*"],
713
709
  "validation_policy": "Emit Record",
714
710
  "format": {
@@ -163,7 +163,7 @@ class TestScenarioBuilder:
163
163
  self._expected_records = expected_records
164
164
  return self
165
165
 
166
- def set_parsers(self, parsers: Mapping[str, FileTypeParser]) -> "TestScenarioBuilder":
166
+ def set_parsers(self, parsers: Mapping[Type[Any], FileTypeParser]) -> "TestScenarioBuilder":
167
167
  self._parsers = parsers
168
168
  return self
169
169