airbyte-cdk 0.51.16__py3-none-any.whl → 0.51.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +494 -522
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +1 -1
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +2 -37
- airbyte_cdk/sources/file_based/file_based_source.py +1 -1
- airbyte_cdk/sources/file_based/file_types/__init__.py +11 -6
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +1 -1
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +1 -1
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +2 -2
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +5 -5
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +7 -5
- {airbyte_cdk-0.51.16.dist-info → airbyte_cdk-0.51.17.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.51.16.dist-info → airbyte_cdk-0.51.17.dist-info}/RECORD +27 -27
- unit_tests/sources/file_based/file_types/test_avro_parser.py +6 -6
- unit_tests/sources/file_based/scenarios/avro_scenarios.py +5 -6
- unit_tests/sources/file_based/scenarios/check_scenarios.py +8 -8
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +19 -42
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +15 -15
- unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +13 -12
- unit_tests/sources/file_based/scenarios/parquet_scenarios.py +5 -9
- unit_tests/sources/file_based/scenarios/scenario_builder.py +1 -1
- unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +16 -16
- unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +9 -9
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +2 -1
- unit_tests/sources/file_based/stream/test_default_file_based_stream.py +6 -3
- {airbyte_cdk-0.51.16.dist-info → airbyte_cdk-0.51.17.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.51.16.dist-info → airbyte_cdk-0.51.17.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.51.16.dist-info → airbyte_cdk-0.51.17.dist-info}/top_level.txt +0 -0
@@ -78,7 +78,7 @@ valid_single_stream_user_input_schema_scenario = (
|
|
78
78
|
"streams": [
|
79
79
|
{
|
80
80
|
"name": "stream1",
|
81
|
-
"
|
81
|
+
"format": {"filetype": "csv"},
|
82
82
|
"globs": ["*"],
|
83
83
|
"validation_policy": "Emit Record",
|
84
84
|
"input_schema": '{"col1": "string", "col2": "string"}',
|
@@ -98,7 +98,7 @@ single_stream_user_input_schema_scenario_schema_is_invalid = (
|
|
98
98
|
"streams": [
|
99
99
|
{
|
100
100
|
"name": "stream1",
|
101
|
-
"
|
101
|
+
"format": {"filetype": "csv"},
|
102
102
|
"globs": ["*"],
|
103
103
|
"validation_policy": "Emit Record",
|
104
104
|
"input_schema": '{"col1": "x", "col2": "string"}',
|
@@ -121,7 +121,7 @@ single_stream_user_input_schema_scenario_emit_nonconforming_records = (
|
|
121
121
|
"streams": [
|
122
122
|
{
|
123
123
|
"name": "stream1",
|
124
|
-
"
|
124
|
+
"format": {"filetype": "csv"},
|
125
125
|
"globs": ["*"],
|
126
126
|
"validation_policy": "Emit Record",
|
127
127
|
"input_schema": '{"col1": "integer", "col2": "string"}',
|
@@ -171,7 +171,7 @@ single_stream_user_input_schema_scenario_skip_nonconforming_records = (
|
|
171
171
|
"streams": [
|
172
172
|
{
|
173
173
|
"name": "stream1",
|
174
|
-
"
|
174
|
+
"format": {"filetype": "csv"},
|
175
175
|
"globs": ["*"],
|
176
176
|
"validation_policy": "Skip Record",
|
177
177
|
"input_schema": '{"col1": "integer", "col2": "string"}',
|
@@ -364,21 +364,21 @@ valid_multi_stream_user_input_schema_scenario = (
|
|
364
364
|
"streams": [
|
365
365
|
{
|
366
366
|
"name": "stream1",
|
367
|
-
"
|
367
|
+
"format": {"filetype": "csv"},
|
368
368
|
"globs": ["a.csv"],
|
369
369
|
"validation_policy": "Emit Record",
|
370
370
|
"input_schema": '{"col1": "string", "col2": "integer"}',
|
371
371
|
},
|
372
372
|
{
|
373
373
|
"name": "stream2",
|
374
|
-
"
|
374
|
+
"format": {"filetype": "csv"},
|
375
375
|
"globs": ["b.csv"],
|
376
376
|
"validation_policy": "Emit Record",
|
377
377
|
"input_schema": '{"col1": "string", "col2": "string", "col3": "string"}',
|
378
378
|
},
|
379
379
|
{
|
380
380
|
"name": "stream3",
|
381
|
-
"
|
381
|
+
"format": {"filetype": "csv"},
|
382
382
|
"globs": ["c.csv"],
|
383
383
|
"validation_policy": "Emit Record",
|
384
384
|
},
|
@@ -398,21 +398,21 @@ multi_stream_user_input_schema_scenario_schema_is_invalid = (
|
|
398
398
|
"streams": [
|
399
399
|
{
|
400
400
|
"name": "stream1",
|
401
|
-
"
|
401
|
+
"format": {"filetype": "csv"},
|
402
402
|
"globs": ["a.csv"],
|
403
403
|
"validation_policy": "Emit Record",
|
404
404
|
"input_schema": '{"col1": "string", "col2": "integer"}',
|
405
405
|
},
|
406
406
|
{
|
407
407
|
"name": "stream2",
|
408
|
-
"
|
408
|
+
"format": {"filetype": "csv"},
|
409
409
|
"globs": ["b.csv"],
|
410
410
|
"validation_policy": "Emit Record",
|
411
411
|
"input_schema": '{"col1": "x", "col2": "string", "col3": "string"}', # this stream's schema is invalid
|
412
412
|
},
|
413
413
|
{
|
414
414
|
"name": "stream3",
|
415
|
-
"
|
415
|
+
"format": {"filetype": "csv"},
|
416
416
|
"globs": ["c.csv"],
|
417
417
|
"validation_policy": "Emit Record",
|
418
418
|
},
|
@@ -435,21 +435,21 @@ multi_stream_user_input_schema_scenario_emit_nonconforming_records = (
|
|
435
435
|
"streams": [
|
436
436
|
{
|
437
437
|
"name": "stream1",
|
438
|
-
"
|
438
|
+
"format": {"filetype": "csv"},
|
439
439
|
"globs": ["a.csv"],
|
440
440
|
"validation_policy": "Emit Record",
|
441
441
|
"input_schema": '{"col1": "string", "col2": "integer"}',
|
442
442
|
},
|
443
443
|
{
|
444
444
|
"name": "stream2",
|
445
|
-
"
|
445
|
+
"format": {"filetype": "csv"},
|
446
446
|
"globs": ["b.csv"],
|
447
447
|
"validation_policy": "Emit Record",
|
448
448
|
"input_schema": '{"col1": "string", "col2": "integer", "col3": "string"}', # this stream's records do not conform to the schema
|
449
449
|
},
|
450
450
|
{
|
451
451
|
"name": "stream3",
|
452
|
-
"
|
452
|
+
"format": {"filetype": "csv"},
|
453
453
|
"globs": ["c.csv"],
|
454
454
|
"validation_policy": "Emit Record",
|
455
455
|
},
|
@@ -574,21 +574,21 @@ multi_stream_user_input_schema_scenario_skip_nonconforming_records = (
|
|
574
574
|
"streams": [
|
575
575
|
{
|
576
576
|
"name": "stream1",
|
577
|
-
"
|
577
|
+
"format": {"filetype": "csv"},
|
578
578
|
"globs": ["a.csv"],
|
579
579
|
"validation_policy": "Emit Record",
|
580
580
|
"input_schema": '{"col1": "string", "col2": "integer"}',
|
581
581
|
},
|
582
582
|
{
|
583
583
|
"name": "stream2",
|
584
|
-
"
|
584
|
+
"format": {"filetype": "csv"},
|
585
585
|
"globs": ["b.csv"],
|
586
586
|
"validation_policy": "Skip Record",
|
587
587
|
"input_schema": '{"col1": "string", "col2": "integer", "col3": "string"}', # this stream's records do not conform to the schema
|
588
588
|
},
|
589
589
|
{
|
590
590
|
"name": "stream3",
|
591
|
-
"
|
591
|
+
"format": {"filetype": "csv"},
|
592
592
|
"globs": ["c.csv"],
|
593
593
|
"validation_policy": "Emit Record",
|
594
594
|
},
|
@@ -204,7 +204,7 @@ skip_record_scenario_single_stream = (
|
|
204
204
|
"streams": [
|
205
205
|
{
|
206
206
|
"name": "stream1",
|
207
|
-
"
|
207
|
+
"format": {"filetype": "csv"},
|
208
208
|
"globs": ["*.csv"],
|
209
209
|
"validation_policy": "Skip Record",
|
210
210
|
}
|
@@ -250,13 +250,13 @@ skip_record_scenario_multi_stream = (
|
|
250
250
|
"streams": [
|
251
251
|
{
|
252
252
|
"name": "stream1",
|
253
|
-
"
|
253
|
+
"format": {"filetype": "csv"},
|
254
254
|
"globs": ["a/*.csv"],
|
255
255
|
"validation_policy": "Skip Record",
|
256
256
|
},
|
257
257
|
{
|
258
258
|
"name": "stream2",
|
259
|
-
"
|
259
|
+
"format": {"filetype": "csv"},
|
260
260
|
"globs": ["b/*.csv"],
|
261
261
|
"validation_policy": "Skip Record",
|
262
262
|
}
|
@@ -317,7 +317,7 @@ emit_record_scenario_single_stream = (
|
|
317
317
|
"streams": [
|
318
318
|
{
|
319
319
|
"name": "stream1",
|
320
|
-
"
|
320
|
+
"format": {"filetype": "csv"},
|
321
321
|
"globs": ["*.csv"],
|
322
322
|
"validation_policy": "Emit Record",
|
323
323
|
}
|
@@ -359,13 +359,13 @@ emit_record_scenario_multi_stream = (
|
|
359
359
|
"streams": [
|
360
360
|
{
|
361
361
|
"name": "stream1",
|
362
|
-
"
|
362
|
+
"format": {"filetype": "csv"},
|
363
363
|
"globs": ["a/*.csv"],
|
364
364
|
"validation_policy": "Emit Record",
|
365
365
|
},
|
366
366
|
{
|
367
367
|
"name": "stream2",
|
368
|
-
"
|
368
|
+
"format": {"filetype": "csv"},
|
369
369
|
"globs": ["b/*.csv"],
|
370
370
|
"validation_policy": "Emit Record",
|
371
371
|
}
|
@@ -418,7 +418,7 @@ wait_for_rediscovery_scenario_single_stream = (
|
|
418
418
|
"streams": [
|
419
419
|
{
|
420
420
|
"name": "stream1",
|
421
|
-
"
|
421
|
+
"format": {"filetype": "csv"},
|
422
422
|
"globs": ["*.csv"],
|
423
423
|
"validation_policy": "Wait for Discover",
|
424
424
|
}
|
@@ -453,13 +453,13 @@ wait_for_rediscovery_scenario_multi_stream = (
|
|
453
453
|
"streams": [
|
454
454
|
{
|
455
455
|
"name": "stream1",
|
456
|
-
"
|
456
|
+
"format": {"filetype": "csv"},
|
457
457
|
"globs": ["a/*.csv"],
|
458
458
|
"validation_policy": "Wait for Discover",
|
459
459
|
},
|
460
460
|
{
|
461
461
|
"name": "stream2",
|
462
|
-
"
|
462
|
+
"format": {"filetype": "csv"},
|
463
463
|
"globs": ["b/*.csv"],
|
464
464
|
"validation_policy": "Wait for Discover",
|
465
465
|
}
|
@@ -7,6 +7,7 @@ from typing import Any, List, Mapping
|
|
7
7
|
from unittest.mock import MagicMock
|
8
8
|
|
9
9
|
import pytest
|
10
|
+
from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
|
10
11
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig, ValidationPolicy
|
11
12
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
12
13
|
from airbyte_cdk.sources.file_based.stream.cursor.default_file_based_cursor import DefaultFileBasedCursor
|
@@ -264,5 +265,5 @@ def get_cursor(max_history_size: int, days_to_sync_if_history_is_full: int) -> D
|
|
264
265
|
cursor_cls = DefaultFileBasedCursor
|
265
266
|
cursor_cls.DEFAULT_MAX_HISTORY_SIZE = max_history_size
|
266
267
|
config = FileBasedStreamConfig(
|
267
|
-
|
268
|
+
format=CsvFormat(), name="test", validation_policy=ValidationPolicy.emit_record, days_to_sync_if_history_is_full=days_to_sync_if_history_is_full)
|
268
269
|
return cursor_cls(config)
|
@@ -19,6 +19,10 @@ from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
|
|
19
19
|
from airbyte_cdk.sources.file_based.stream.default_file_based_stream import DefaultFileBasedStream
|
20
20
|
|
21
21
|
|
22
|
+
class MockFormat:
|
23
|
+
pass
|
24
|
+
|
25
|
+
|
22
26
|
@pytest.mark.parametrize(
|
23
27
|
"input_schema, expected_output",
|
24
28
|
[
|
@@ -60,13 +64,12 @@ def test_fill_nulls(input_schema: Mapping[str, Any], expected_output: Mapping[st
|
|
60
64
|
|
61
65
|
|
62
66
|
class DefaultFileBasedStreamTest(unittest.TestCase):
|
63
|
-
_FILE_TYPE = "file_type"
|
64
67
|
_NOW = datetime(2022, 10, 22, tzinfo=timezone.utc)
|
65
68
|
_A_RECORD = {"a_record": 1}
|
66
69
|
|
67
70
|
def setUp(self) -> None:
|
68
71
|
self._stream_config = Mock()
|
69
|
-
self._stream_config.
|
72
|
+
self._stream_config.format = MockFormat()
|
70
73
|
self._stream_config.name = "a stream name"
|
71
74
|
self._catalog_schema = Mock()
|
72
75
|
self._stream_reader = Mock(spec=AbstractFileBasedStreamReader)
|
@@ -83,7 +86,7 @@ class DefaultFileBasedStreamTest(unittest.TestCase):
|
|
83
86
|
stream_reader=self._stream_reader,
|
84
87
|
availability_strategy=self._availability_strategy,
|
85
88
|
discovery_policy=self._discovery_policy,
|
86
|
-
parsers={
|
89
|
+
parsers={MockFormat: self._parser},
|
87
90
|
validation_policy=self._validation_policy,
|
88
91
|
cursor=self._cursor,
|
89
92
|
)
|
File without changes
|
File without changes
|
File without changes
|