airbyte-cdk 0.51.15__py3-none-any.whl → 0.51.17__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +494 -522
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +1 -1
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +2 -37
- airbyte_cdk/sources/file_based/file_based_source.py +1 -1
- airbyte_cdk/sources/file_based/file_types/__init__.py +11 -6
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +1 -1
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +1 -1
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +2 -2
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +5 -5
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +7 -5
- airbyte_cdk/utils/datetime_format_inferrer.py +8 -4
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/RECORD +29 -29
- unit_tests/sources/file_based/file_types/test_avro_parser.py +6 -6
- unit_tests/sources/file_based/scenarios/avro_scenarios.py +5 -6
- unit_tests/sources/file_based/scenarios/check_scenarios.py +8 -8
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +19 -42
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +15 -15
- unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +13 -12
- unit_tests/sources/file_based/scenarios/parquet_scenarios.py +5 -9
- unit_tests/sources/file_based/scenarios/scenario_builder.py +1 -1
- unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +16 -16
- unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +9 -9
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +2 -1
- unit_tests/sources/file_based/stream/test_default_file_based_stream.py +6 -3
- unit_tests/utils/test_datetime_format_inferrer.py +1 -0
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/top_level.txt +0 -0
@@ -78,7 +78,7 @@ valid_single_stream_user_input_schema_scenario = (
|
|
78
78
|
"streams": [
|
79
79
|
{
|
80
80
|
"name": "stream1",
|
81
|
-
"
|
81
|
+
"format": {"filetype": "csv"},
|
82
82
|
"globs": ["*"],
|
83
83
|
"validation_policy": "Emit Record",
|
84
84
|
"input_schema": '{"col1": "string", "col2": "string"}',
|
@@ -98,7 +98,7 @@ single_stream_user_input_schema_scenario_schema_is_invalid = (
|
|
98
98
|
"streams": [
|
99
99
|
{
|
100
100
|
"name": "stream1",
|
101
|
-
"
|
101
|
+
"format": {"filetype": "csv"},
|
102
102
|
"globs": ["*"],
|
103
103
|
"validation_policy": "Emit Record",
|
104
104
|
"input_schema": '{"col1": "x", "col2": "string"}',
|
@@ -121,7 +121,7 @@ single_stream_user_input_schema_scenario_emit_nonconforming_records = (
|
|
121
121
|
"streams": [
|
122
122
|
{
|
123
123
|
"name": "stream1",
|
124
|
-
"
|
124
|
+
"format": {"filetype": "csv"},
|
125
125
|
"globs": ["*"],
|
126
126
|
"validation_policy": "Emit Record",
|
127
127
|
"input_schema": '{"col1": "integer", "col2": "string"}',
|
@@ -171,7 +171,7 @@ single_stream_user_input_schema_scenario_skip_nonconforming_records = (
|
|
171
171
|
"streams": [
|
172
172
|
{
|
173
173
|
"name": "stream1",
|
174
|
-
"
|
174
|
+
"format": {"filetype": "csv"},
|
175
175
|
"globs": ["*"],
|
176
176
|
"validation_policy": "Skip Record",
|
177
177
|
"input_schema": '{"col1": "integer", "col2": "string"}',
|
@@ -364,21 +364,21 @@ valid_multi_stream_user_input_schema_scenario = (
|
|
364
364
|
"streams": [
|
365
365
|
{
|
366
366
|
"name": "stream1",
|
367
|
-
"
|
367
|
+
"format": {"filetype": "csv"},
|
368
368
|
"globs": ["a.csv"],
|
369
369
|
"validation_policy": "Emit Record",
|
370
370
|
"input_schema": '{"col1": "string", "col2": "integer"}',
|
371
371
|
},
|
372
372
|
{
|
373
373
|
"name": "stream2",
|
374
|
-
"
|
374
|
+
"format": {"filetype": "csv"},
|
375
375
|
"globs": ["b.csv"],
|
376
376
|
"validation_policy": "Emit Record",
|
377
377
|
"input_schema": '{"col1": "string", "col2": "string", "col3": "string"}',
|
378
378
|
},
|
379
379
|
{
|
380
380
|
"name": "stream3",
|
381
|
-
"
|
381
|
+
"format": {"filetype": "csv"},
|
382
382
|
"globs": ["c.csv"],
|
383
383
|
"validation_policy": "Emit Record",
|
384
384
|
},
|
@@ -398,21 +398,21 @@ multi_stream_user_input_schema_scenario_schema_is_invalid = (
|
|
398
398
|
"streams": [
|
399
399
|
{
|
400
400
|
"name": "stream1",
|
401
|
-
"
|
401
|
+
"format": {"filetype": "csv"},
|
402
402
|
"globs": ["a.csv"],
|
403
403
|
"validation_policy": "Emit Record",
|
404
404
|
"input_schema": '{"col1": "string", "col2": "integer"}',
|
405
405
|
},
|
406
406
|
{
|
407
407
|
"name": "stream2",
|
408
|
-
"
|
408
|
+
"format": {"filetype": "csv"},
|
409
409
|
"globs": ["b.csv"],
|
410
410
|
"validation_policy": "Emit Record",
|
411
411
|
"input_schema": '{"col1": "x", "col2": "string", "col3": "string"}', # this stream's schema is invalid
|
412
412
|
},
|
413
413
|
{
|
414
414
|
"name": "stream3",
|
415
|
-
"
|
415
|
+
"format": {"filetype": "csv"},
|
416
416
|
"globs": ["c.csv"],
|
417
417
|
"validation_policy": "Emit Record",
|
418
418
|
},
|
@@ -435,21 +435,21 @@ multi_stream_user_input_schema_scenario_emit_nonconforming_records = (
|
|
435
435
|
"streams": [
|
436
436
|
{
|
437
437
|
"name": "stream1",
|
438
|
-
"
|
438
|
+
"format": {"filetype": "csv"},
|
439
439
|
"globs": ["a.csv"],
|
440
440
|
"validation_policy": "Emit Record",
|
441
441
|
"input_schema": '{"col1": "string", "col2": "integer"}',
|
442
442
|
},
|
443
443
|
{
|
444
444
|
"name": "stream2",
|
445
|
-
"
|
445
|
+
"format": {"filetype": "csv"},
|
446
446
|
"globs": ["b.csv"],
|
447
447
|
"validation_policy": "Emit Record",
|
448
448
|
"input_schema": '{"col1": "string", "col2": "integer", "col3": "string"}', # this stream's records do not conform to the schema
|
449
449
|
},
|
450
450
|
{
|
451
451
|
"name": "stream3",
|
452
|
-
"
|
452
|
+
"format": {"filetype": "csv"},
|
453
453
|
"globs": ["c.csv"],
|
454
454
|
"validation_policy": "Emit Record",
|
455
455
|
},
|
@@ -574,21 +574,21 @@ multi_stream_user_input_schema_scenario_skip_nonconforming_records = (
|
|
574
574
|
"streams": [
|
575
575
|
{
|
576
576
|
"name": "stream1",
|
577
|
-
"
|
577
|
+
"format": {"filetype": "csv"},
|
578
578
|
"globs": ["a.csv"],
|
579
579
|
"validation_policy": "Emit Record",
|
580
580
|
"input_schema": '{"col1": "string", "col2": "integer"}',
|
581
581
|
},
|
582
582
|
{
|
583
583
|
"name": "stream2",
|
584
|
-
"
|
584
|
+
"format": {"filetype": "csv"},
|
585
585
|
"globs": ["b.csv"],
|
586
586
|
"validation_policy": "Skip Record",
|
587
587
|
"input_schema": '{"col1": "string", "col2": "integer", "col3": "string"}', # this stream's records do not conform to the schema
|
588
588
|
},
|
589
589
|
{
|
590
590
|
"name": "stream3",
|
591
|
-
"
|
591
|
+
"format": {"filetype": "csv"},
|
592
592
|
"globs": ["c.csv"],
|
593
593
|
"validation_policy": "Emit Record",
|
594
594
|
},
|
@@ -204,7 +204,7 @@ skip_record_scenario_single_stream = (
|
|
204
204
|
"streams": [
|
205
205
|
{
|
206
206
|
"name": "stream1",
|
207
|
-
"
|
207
|
+
"format": {"filetype": "csv"},
|
208
208
|
"globs": ["*.csv"],
|
209
209
|
"validation_policy": "Skip Record",
|
210
210
|
}
|
@@ -250,13 +250,13 @@ skip_record_scenario_multi_stream = (
|
|
250
250
|
"streams": [
|
251
251
|
{
|
252
252
|
"name": "stream1",
|
253
|
-
"
|
253
|
+
"format": {"filetype": "csv"},
|
254
254
|
"globs": ["a/*.csv"],
|
255
255
|
"validation_policy": "Skip Record",
|
256
256
|
},
|
257
257
|
{
|
258
258
|
"name": "stream2",
|
259
|
-
"
|
259
|
+
"format": {"filetype": "csv"},
|
260
260
|
"globs": ["b/*.csv"],
|
261
261
|
"validation_policy": "Skip Record",
|
262
262
|
}
|
@@ -317,7 +317,7 @@ emit_record_scenario_single_stream = (
|
|
317
317
|
"streams": [
|
318
318
|
{
|
319
319
|
"name": "stream1",
|
320
|
-
"
|
320
|
+
"format": {"filetype": "csv"},
|
321
321
|
"globs": ["*.csv"],
|
322
322
|
"validation_policy": "Emit Record",
|
323
323
|
}
|
@@ -359,13 +359,13 @@ emit_record_scenario_multi_stream = (
|
|
359
359
|
"streams": [
|
360
360
|
{
|
361
361
|
"name": "stream1",
|
362
|
-
"
|
362
|
+
"format": {"filetype": "csv"},
|
363
363
|
"globs": ["a/*.csv"],
|
364
364
|
"validation_policy": "Emit Record",
|
365
365
|
},
|
366
366
|
{
|
367
367
|
"name": "stream2",
|
368
|
-
"
|
368
|
+
"format": {"filetype": "csv"},
|
369
369
|
"globs": ["b/*.csv"],
|
370
370
|
"validation_policy": "Emit Record",
|
371
371
|
}
|
@@ -418,7 +418,7 @@ wait_for_rediscovery_scenario_single_stream = (
|
|
418
418
|
"streams": [
|
419
419
|
{
|
420
420
|
"name": "stream1",
|
421
|
-
"
|
421
|
+
"format": {"filetype": "csv"},
|
422
422
|
"globs": ["*.csv"],
|
423
423
|
"validation_policy": "Wait for Discover",
|
424
424
|
}
|
@@ -453,13 +453,13 @@ wait_for_rediscovery_scenario_multi_stream = (
|
|
453
453
|
"streams": [
|
454
454
|
{
|
455
455
|
"name": "stream1",
|
456
|
-
"
|
456
|
+
"format": {"filetype": "csv"},
|
457
457
|
"globs": ["a/*.csv"],
|
458
458
|
"validation_policy": "Wait for Discover",
|
459
459
|
},
|
460
460
|
{
|
461
461
|
"name": "stream2",
|
462
|
-
"
|
462
|
+
"format": {"filetype": "csv"},
|
463
463
|
"globs": ["b/*.csv"],
|
464
464
|
"validation_policy": "Wait for Discover",
|
465
465
|
}
|
@@ -7,6 +7,7 @@ from typing import Any, List, Mapping
|
|
7
7
|
from unittest.mock import MagicMock
|
8
8
|
|
9
9
|
import pytest
|
10
|
+
from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
|
10
11
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig, ValidationPolicy
|
11
12
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
12
13
|
from airbyte_cdk.sources.file_based.stream.cursor.default_file_based_cursor import DefaultFileBasedCursor
|
@@ -264,5 +265,5 @@ def get_cursor(max_history_size: int, days_to_sync_if_history_is_full: int) -> D
|
|
264
265
|
cursor_cls = DefaultFileBasedCursor
|
265
266
|
cursor_cls.DEFAULT_MAX_HISTORY_SIZE = max_history_size
|
266
267
|
config = FileBasedStreamConfig(
|
267
|
-
|
268
|
+
format=CsvFormat(), name="test", validation_policy=ValidationPolicy.emit_record, days_to_sync_if_history_is_full=days_to_sync_if_history_is_full)
|
268
269
|
return cursor_cls(config)
|
@@ -19,6 +19,10 @@ from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
|
|
19
19
|
from airbyte_cdk.sources.file_based.stream.default_file_based_stream import DefaultFileBasedStream
|
20
20
|
|
21
21
|
|
22
|
+
class MockFormat:
|
23
|
+
pass
|
24
|
+
|
25
|
+
|
22
26
|
@pytest.mark.parametrize(
|
23
27
|
"input_schema, expected_output",
|
24
28
|
[
|
@@ -60,13 +64,12 @@ def test_fill_nulls(input_schema: Mapping[str, Any], expected_output: Mapping[st
|
|
60
64
|
|
61
65
|
|
62
66
|
class DefaultFileBasedStreamTest(unittest.TestCase):
|
63
|
-
_FILE_TYPE = "file_type"
|
64
67
|
_NOW = datetime(2022, 10, 22, tzinfo=timezone.utc)
|
65
68
|
_A_RECORD = {"a_record": 1}
|
66
69
|
|
67
70
|
def setUp(self) -> None:
|
68
71
|
self._stream_config = Mock()
|
69
|
-
self._stream_config.
|
72
|
+
self._stream_config.format = MockFormat()
|
70
73
|
self._stream_config.name = "a stream name"
|
71
74
|
self._catalog_schema = Mock()
|
72
75
|
self._stream_reader = Mock(spec=AbstractFileBasedStreamReader)
|
@@ -83,7 +86,7 @@ class DefaultFileBasedStreamTest(unittest.TestCase):
|
|
83
86
|
stream_reader=self._stream_reader,
|
84
87
|
availability_strategy=self._availability_strategy,
|
85
88
|
discovery_policy=self._discovery_policy,
|
86
|
-
parsers={
|
89
|
+
parsers={MockFormat: self._parser},
|
87
90
|
validation_policy=self._validation_policy,
|
88
91
|
cursor=self._cursor,
|
89
92
|
)
|
@@ -22,6 +22,7 @@ NOW = 1234567
|
|
22
22
|
("timestamp_ms_match_string", [{"d": "1686058051000"}], {"d": "%ms"}),
|
23
23
|
("timestamp_no_match_integer", [{"d": 99}], {}),
|
24
24
|
("timestamp_no_match_string", [{"d": "99999999999999999999"}], {}),
|
25
|
+
("timestamp_overflow", [{"d": f"{10**100}_100"}], {}), # this case was previously causing OverflowError hence this test
|
25
26
|
("simple_no_match", [{"d": "20220203"}], {}),
|
26
27
|
("multiple_match", [{"d": "2022-02-03", "e": "2022-02-03"}], {"d": "%Y-%m-%d", "e": "%Y-%m-%d"}),
|
27
28
|
(
|
File without changes
|
File without changes
|
File without changes
|