airbyte-cdk 0.51.15__py3-none-any.whl → 0.51.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +494 -522
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +1 -1
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +2 -37
- airbyte_cdk/sources/file_based/file_based_source.py +1 -1
- airbyte_cdk/sources/file_based/file_types/__init__.py +11 -6
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +1 -1
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +1 -1
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +2 -2
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +5 -5
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +7 -5
- airbyte_cdk/utils/datetime_format_inferrer.py +8 -4
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/RECORD +29 -29
- unit_tests/sources/file_based/file_types/test_avro_parser.py +6 -6
- unit_tests/sources/file_based/scenarios/avro_scenarios.py +5 -6
- unit_tests/sources/file_based/scenarios/check_scenarios.py +8 -8
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +19 -42
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +15 -15
- unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +13 -12
- unit_tests/sources/file_based/scenarios/parquet_scenarios.py +5 -9
- unit_tests/sources/file_based/scenarios/scenario_builder.py +1 -1
- unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +16 -16
- unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +9 -9
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +2 -1
- unit_tests/sources/file_based/stream/test_default_file_based_stream.py +6 -3
- unit_tests/utils/test_datetime_format_inferrer.py +1 -0
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/top_level.txt +0 -0
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py
CHANGED
@@ -73,7 +73,7 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
|
|
73
73
|
return files
|
74
74
|
|
75
75
|
def _check_parse_record(self, stream: "AbstractFileBasedStream", file: RemoteFile, logger: logging.Logger) -> None:
|
76
|
-
parser = stream.get_parser(
|
76
|
+
parser = stream.get_parser()
|
77
77
|
|
78
78
|
try:
|
79
79
|
record = next(iter(parser.parse_records(stream.config, file, self.stream_reader, logger, discovered_schema=None)))
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
from enum import Enum
|
6
|
-
from typing import Any, List, Mapping, Optional,
|
6
|
+
from typing import Any, List, Mapping, Optional, Union
|
7
7
|
|
8
8
|
from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
|
9
9
|
from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
|
@@ -16,9 +16,6 @@ from pydantic import BaseModel, Field, validator
|
|
16
16
|
PrimaryKeyType = Optional[Union[str, List[str]]]
|
17
17
|
|
18
18
|
|
19
|
-
VALID_FILE_TYPES: Mapping[str, Type[BaseModel]] = {"avro": AvroFormat, "csv": CsvFormat, "jsonl": JsonlFormat, "parquet": ParquetFormat}
|
20
|
-
|
21
|
-
|
22
19
|
class ValidationPolicy(Enum):
|
23
20
|
emit_record = "Emit Record"
|
24
21
|
skip_record = "Skip Record"
|
@@ -27,7 +24,6 @@ class ValidationPolicy(Enum):
|
|
27
24
|
|
28
25
|
class FileBasedStreamConfig(BaseModel):
|
29
26
|
name: str = Field(title="Name", description="The name of the stream.")
|
30
|
-
file_type: str = Field(title="File Type", description="The data file type that is being extracted for a stream.")
|
31
27
|
globs: Optional[List[str]] = Field(
|
32
28
|
title="Globs",
|
33
29
|
description='The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look <a href="https://en.wikipedia.org/wiki/Glob_(programming)">here</a>.',
|
@@ -54,7 +50,7 @@ class FileBasedStreamConfig(BaseModel):
|
|
54
50
|
description="When the state history of the file store is full, syncs will only read files that were last modified in the provided day range.",
|
55
51
|
default=3,
|
56
52
|
)
|
57
|
-
format:
|
53
|
+
format: Union[AvroFormat, CsvFormat, JsonlFormat, ParquetFormat] = Field(
|
58
54
|
title="Format",
|
59
55
|
description="The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.",
|
60
56
|
)
|
@@ -64,37 +60,6 @@ class FileBasedStreamConfig(BaseModel):
|
|
64
60
|
default=False,
|
65
61
|
)
|
66
62
|
|
67
|
-
@validator("file_type", pre=True)
|
68
|
-
def validate_file_type(cls, v: str) -> str:
|
69
|
-
if v not in VALID_FILE_TYPES:
|
70
|
-
raise ValueError(f"Format filetype {v} is not a supported file type")
|
71
|
-
return v
|
72
|
-
|
73
|
-
@classmethod
|
74
|
-
def _transform_legacy_config(cls, legacy_config: Mapping[str, Any], file_type: str) -> Mapping[str, Any]:
|
75
|
-
if file_type.casefold() not in VALID_FILE_TYPES:
|
76
|
-
raise ValueError(f"Format filetype {file_type} is not a supported file type")
|
77
|
-
if file_type.casefold() == "parquet" or file_type.casefold() == "avro":
|
78
|
-
legacy_config = cls._transform_legacy_parquet_or_avro_config(legacy_config)
|
79
|
-
return {file_type: VALID_FILE_TYPES[file_type.casefold()].parse_obj({key: val for key, val in legacy_config.items()})}
|
80
|
-
|
81
|
-
@classmethod
|
82
|
-
def _transform_legacy_parquet_or_avro_config(cls, config: Mapping[str, Any]) -> Mapping[str, Any]:
|
83
|
-
"""
|
84
|
-
The legacy parquet parser converts decimal fields to numbers. This isn't desirable because it can lead to precision loss.
|
85
|
-
To avoid introducing a breaking change with the new default, we will set decimal_as_float to True in the legacy configs.
|
86
|
-
"""
|
87
|
-
filetype = config.get("filetype")
|
88
|
-
if filetype != "parquet" and filetype != "avro":
|
89
|
-
raise ValueError(
|
90
|
-
f"Expected {filetype} format, got {config}. This is probably due to a CDK bug. Please reach out to the Airbyte team for support."
|
91
|
-
)
|
92
|
-
if config.get("decimal_as_float"):
|
93
|
-
raise ValueError(
|
94
|
-
f"Received legacy {filetype} file form with 'decimal_as_float' set. This is unexpected. Please reach out to the Airbyte team for support."
|
95
|
-
)
|
96
|
-
return {**config, **{"decimal_as_float": True}}
|
97
|
-
|
98
63
|
@validator("input_schema", pre=True)
|
99
64
|
def validate_input_schema(cls, v: Optional[str]) -> Optional[str]:
|
100
65
|
if v:
|
@@ -33,7 +33,7 @@ class FileBasedSource(AbstractSource, ABC):
|
|
33
33
|
catalog_path: Optional[str] = None,
|
34
34
|
availability_strategy: Optional[AbstractFileBasedAvailabilityStrategy] = None,
|
35
35
|
discovery_policy: AbstractDiscoveryPolicy = DefaultDiscoveryPolicy(),
|
36
|
-
parsers: Mapping[
|
36
|
+
parsers: Mapping[Type[Any], FileTypeParser] = default_parsers,
|
37
37
|
validation_policies: Mapping[ValidationPolicy, AbstractSchemaValidationPolicy] = DEFAULT_SCHEMA_VALIDATION_POLICIES,
|
38
38
|
cursor_cls: Type[AbstractFileBasedCursor] = DefaultFileBasedCursor,
|
39
39
|
):
|
@@ -1,4 +1,9 @@
|
|
1
|
-
from typing import Mapping
|
1
|
+
from typing import Any, Mapping, Type
|
2
|
+
|
3
|
+
from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
|
4
|
+
from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
|
5
|
+
from airbyte_cdk.sources.file_based.config.jsonl_format import JsonlFormat
|
6
|
+
from airbyte_cdk.sources.file_based.config.parquet_format import ParquetFormat
|
2
7
|
|
3
8
|
from .avro_parser import AvroParser
|
4
9
|
from .csv_parser import CsvParser
|
@@ -6,11 +11,11 @@ from .file_type_parser import FileTypeParser
|
|
6
11
|
from .jsonl_parser import JsonlParser
|
7
12
|
from .parquet_parser import ParquetParser
|
8
13
|
|
9
|
-
default_parsers: Mapping[
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
+
default_parsers: Mapping[Type[Any], FileTypeParser] = {
|
15
|
+
AvroFormat: AvroParser(),
|
16
|
+
CsvFormat: CsvParser(),
|
17
|
+
JsonlFormat: JsonlParser(),
|
18
|
+
ParquetFormat: ParquetParser(),
|
14
19
|
}
|
15
20
|
|
16
21
|
__all__ = ["AvroParser", "CsvParser", "JsonlParser", "ParquetParser", "default_parsers"]
|
@@ -49,7 +49,7 @@ class AvroParser(FileTypeParser):
|
|
49
49
|
stream_reader: AbstractFileBasedStreamReader,
|
50
50
|
logger: logging.Logger,
|
51
51
|
) -> SchemaType:
|
52
|
-
avro_format = config.format
|
52
|
+
avro_format = config.format
|
53
53
|
if not isinstance(avro_format, AvroFormat):
|
54
54
|
raise ValueError(f"Expected ParquetFormat, got {avro_format}")
|
55
55
|
|
@@ -422,7 +422,7 @@ def _no_cast(row: Mapping[str, str]) -> Mapping[str, str]:
|
|
422
422
|
|
423
423
|
|
424
424
|
def _extract_format(config: FileBasedStreamConfig) -> CsvFormat:
|
425
|
-
config_format = config.format
|
425
|
+
config_format = config.format
|
426
426
|
if not isinstance(config_format, CsvFormat):
|
427
427
|
raise ValueError(f"Invalid format config: {config_format}")
|
428
428
|
return config_format
|
@@ -30,7 +30,7 @@ class ParquetParser(FileTypeParser):
|
|
30
30
|
stream_reader: AbstractFileBasedStreamReader,
|
31
31
|
logger: logging.Logger,
|
32
32
|
) -> SchemaType:
|
33
|
-
parquet_format = config.format
|
33
|
+
parquet_format = config.format
|
34
34
|
if not isinstance(parquet_format, ParquetFormat):
|
35
35
|
raise ValueError(f"Expected ParquetFormat, got {parquet_format}")
|
36
36
|
|
@@ -54,7 +54,7 @@ class ParquetParser(FileTypeParser):
|
|
54
54
|
logger: logging.Logger,
|
55
55
|
discovered_schema: Optional[Mapping[str, SchemaType]],
|
56
56
|
) -> Iterable[Dict[str, Any]]:
|
57
|
-
parquet_format = config.format
|
57
|
+
parquet_format = config.format
|
58
58
|
if not isinstance(parquet_format, ParquetFormat):
|
59
59
|
logger.info(f"Expected ParquetFormat, got {parquet_format}")
|
60
60
|
raise ConfigValidationError(FileBasedSourceError.CONFIG_VALIDATION_ERROR)
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from abc import abstractmethod
|
6
6
|
from functools import cached_property, lru_cache
|
7
|
-
from typing import Any, Dict, Iterable, List, Mapping, Optional
|
7
|
+
from typing import Any, Dict, Iterable, List, Mapping, Optional, Type
|
8
8
|
|
9
9
|
from airbyte_cdk.models import SyncMode
|
10
10
|
from airbyte_cdk.sources.file_based.availability_strategy import AbstractFileBasedAvailabilityStrategy
|
@@ -42,7 +42,7 @@ class AbstractFileBasedStream(Stream):
|
|
42
42
|
stream_reader: AbstractFileBasedStreamReader,
|
43
43
|
availability_strategy: AbstractFileBasedAvailabilityStrategy,
|
44
44
|
discovery_policy: AbstractDiscoveryPolicy,
|
45
|
-
parsers: Dict[
|
45
|
+
parsers: Dict[Type[Any], FileTypeParser],
|
46
46
|
validation_policy: AbstractSchemaValidationPolicy,
|
47
47
|
):
|
48
48
|
super().__init__()
|
@@ -121,11 +121,11 @@ class AbstractFileBasedStream(Stream):
|
|
121
121
|
"""
|
122
122
|
...
|
123
123
|
|
124
|
-
def get_parser(self
|
124
|
+
def get_parser(self) -> FileTypeParser:
|
125
125
|
try:
|
126
|
-
return self._parsers[
|
126
|
+
return self._parsers[type(self.config.format)]
|
127
127
|
except KeyError:
|
128
|
-
raise UndefinedParserError(FileBasedSourceError.UNDEFINED_PARSER, stream=self.name,
|
128
|
+
raise UndefinedParserError(FileBasedSourceError.UNDEFINED_PARSER, stream=self.name, format=type(self.config.format))
|
129
129
|
|
130
130
|
def record_passes_validation_policy(self, record: Mapping[str, Any]) -> bool:
|
131
131
|
if self.validation_policy:
|
@@ -5,6 +5,7 @@
|
|
5
5
|
import asyncio
|
6
6
|
import itertools
|
7
7
|
import traceback
|
8
|
+
from copy import deepcopy
|
8
9
|
from functools import cache
|
9
10
|
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Set, Union
|
10
11
|
|
@@ -79,7 +80,7 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
79
80
|
# On read requests we should always have the catalog available
|
80
81
|
raise MissingSchemaError(FileBasedSourceError.MISSING_SCHEMA, stream=self.name)
|
81
82
|
# The stream only supports a single file type, so we can use the same parser for all files
|
82
|
-
parser = self.get_parser(
|
83
|
+
parser = self.get_parser()
|
83
84
|
for file in stream_slice["files"]:
|
84
85
|
# only serialize the datetime once
|
85
86
|
file_datetime_string = file.last_modified.strftime(self.DATE_TIME_FORMAT)
|
@@ -190,7 +191,7 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
190
191
|
if not inferred_schema:
|
191
192
|
raise InvalidSchemaError(
|
192
193
|
FileBasedSourceError.INVALID_SCHEMA_ERROR,
|
193
|
-
details=f"Empty schema. Please check that the files are valid {self.config.
|
194
|
+
details=f"Empty schema. Please check that the files are valid for format {self.config.format}",
|
194
195
|
stream=self.name,
|
195
196
|
)
|
196
197
|
|
@@ -210,7 +211,8 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
210
211
|
def infer_schema(self, files: List[RemoteFile]) -> Mapping[str, Any]:
|
211
212
|
loop = asyncio.get_event_loop()
|
212
213
|
schema = loop.run_until_complete(self._infer_schema(files))
|
213
|
-
|
214
|
+
# as infer schema returns a Mapping that is assumed to be immutable, we need to create a deepcopy to avoid modifying the reference
|
215
|
+
return self._fill_nulls(deepcopy(schema))
|
214
216
|
|
215
217
|
@staticmethod
|
216
218
|
def _fill_nulls(schema: Mapping[str, Any]) -> Mapping[str, Any]:
|
@@ -258,11 +260,11 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
258
260
|
|
259
261
|
async def _infer_file_schema(self, file: RemoteFile) -> SchemaType:
|
260
262
|
try:
|
261
|
-
return await self.get_parser(
|
263
|
+
return await self.get_parser().infer_schema(self.config, file, self._stream_reader, self.logger)
|
262
264
|
except Exception as exc:
|
263
265
|
raise SchemaInferenceError(
|
264
266
|
FileBasedSourceError.SCHEMA_INFERENCE_ERROR,
|
265
267
|
file=file.uri,
|
266
|
-
|
268
|
+
format=str(self.config.format),
|
267
269
|
stream=self.name,
|
268
270
|
) from exc
|
@@ -36,10 +36,14 @@ class DatetimeFormatInferrer:
|
|
36
36
|
This is the case if the value is a string or an integer between 1_000_000_000 and 2_000_000_000 for seconds
|
37
37
|
or between 1_000_000_000_000 and 2_000_000_000_000 for milliseconds.
|
38
38
|
This is separate from the format check for performance reasons"""
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
39
|
+
if isinstance(value, (str, int)):
|
40
|
+
try:
|
41
|
+
value_as_int = int(value)
|
42
|
+
for timestamp_range in self._timestamp_heuristic_ranges:
|
43
|
+
if value_as_int in timestamp_range:
|
44
|
+
return True
|
45
|
+
except ValueError:
|
46
|
+
# given that it's not parsable as an int, it can represent a datetime with one of the self._formats
|
43
47
|
return True
|
44
48
|
return False
|
45
49
|
|
@@ -73,7 +73,7 @@ airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=dyIM-bzh54
|
|
73
73
|
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=Dc0F87nElWsz_Ikj938eQ9uqZvyqgFhZ8Dqf_-hvndc,4800
|
74
74
|
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=V6WGKJ9cXX1rjuM4bK3Cs9xEryMlkY2U3FMsSBhrgC8,3098
|
75
75
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=EiYnzwCHZV7EYqMJqcy6xKSeHvTKZBsQndjbEwmiTW4,93
|
76
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
76
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=ebor38wlQVqYD2QXk5X8v9xDZl0cEpIc2mFaKvpuiPE,57170
|
77
77
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
78
78
|
airbyte_cdk/sources/declarative/parsers/class_types_registry.py,sha256=bK4a74opm6WHyV7HqOVws6GE5Z7cLNc5MaTha69abIQ,6086
|
79
79
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=y7_G5mM07zxT5YG975kdC2PAja-Uc83pYp8WrV3GNdo,522
|
@@ -146,36 +146,36 @@ airbyte_cdk/sources/embedded/runner.py,sha256=kZ0CcUANuMjdZ4fmvp_w9P2IcsS9WSHxNq
|
|
146
146
|
airbyte_cdk/sources/embedded/tools.py,sha256=-Z4tZ4AP1OTi_zrqFM3YV8Rt7c60wvsrv0Dc-rTZ2uw,744
|
147
147
|
airbyte_cdk/sources/file_based/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
148
148
|
airbyte_cdk/sources/file_based/exceptions.py,sha256=K3b0IH4xxY75GwRzueAoWfBZDSkctGWDtDEKkSwnrM4,4344
|
149
|
-
airbyte_cdk/sources/file_based/file_based_source.py,sha256=
|
149
|
+
airbyte_cdk/sources/file_based/file_based_source.py,sha256=NKS3p_cClQoKC0elj8tJr4ayAUnUXXgTjGr3SVHxC4Q,6937
|
150
150
|
airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=K9fFHcSL4E8v-X2l38wRAcZCjpyifr35orvby8vQt84,3749
|
151
151
|
airbyte_cdk/sources/file_based/remote_file.py,sha256=xIDwDDBPhJI1K8YZuXjEfjxakZPMieBKJM6vmq6G5tw,248
|
152
152
|
airbyte_cdk/sources/file_based/schema_helpers.py,sha256=XBkOutIw_n6SNYU34qbyTbl0Ppt0i4k3sVFMSaX3wJo,9103
|
153
153
|
airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
|
154
154
|
airbyte_cdk/sources/file_based/availability_strategy/__init__.py,sha256=WiPPuQGfmQlFUMFR5h3ECc-VzBj4vair6_4WAL87AEI,277
|
155
155
|
airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py,sha256=NeHCiG4FFohzYpQQFfmTL4-5oI0nElHWgXX1xrm8-SU,1269
|
156
|
-
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=
|
156
|
+
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=RWcRhNjytH-09_d4MVKDC37B3EGsqe2DheHpLNNMmzE,4243
|
157
157
|
airbyte_cdk/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
158
158
|
airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=WrV4sKtJoZ1dK31HK7NdBKlnYHkmu6NqjmEpkVqJ6tQ,4582
|
159
159
|
airbyte_cdk/sources/file_based/config/avro_format.py,sha256=oLJIuNInu-MgjkVFqwHvmQ4CPZa4NZingq_I0_trQ3g,589
|
160
160
|
airbyte_cdk/sources/file_based/config/csv_format.py,sha256=xlBZ5WyAshagjjjbUV_je1JyZ1oY1GbIzJRUZ9UfSvo,7095
|
161
|
-
airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=
|
161
|
+
airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=BTlc2Sw1UP9XF1D4ZYTjubI_msEijBfh9vW8GhOGtIA,3858
|
162
162
|
airbyte_cdk/sources/file_based/config/jsonl_format.py,sha256=usmTeTw8xw8OKwrz8MsiS5E1LQiVEbedGHMHNAfOOlk,252
|
163
163
|
airbyte_cdk/sources/file_based/config/parquet_format.py,sha256=O_Eq0yVzjPiKDz8H1-f9yMowtCcJwT9F2prNYpXZkp0,614
|
164
164
|
airbyte_cdk/sources/file_based/discovery_policy/__init__.py,sha256=x_7JsQGiS7Ytmr0ZDS0SNYGcNUzC4wCm3_1-Mf3ZFnw,283
|
165
165
|
airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha256=cz9po5Cn6u50uq3hDy46pqnPR4JDcnRItZX9k0WDUJU,520
|
166
166
|
airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=Mx3zT9Dem4uNfaUT0oOtrESsuB1LrGAi5N-uw2swZZA,701
|
167
|
-
airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=
|
168
|
-
airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256
|
169
|
-
airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=
|
167
|
+
airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=wWLnHooFrnCwfhMoguDODtQxRVQyrjy0mDrSi4TWwPM,808
|
168
|
+
airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=-C_BODUMg8X4jzN85C9Q2R__cpFeLlKycLC_JbctLF8,8670
|
169
|
+
airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=VVV829XszmDRqmgv6VBshMF4CSEzmP8rL-OlGttRu7c,17649
|
170
170
|
airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=cThTLc1YKSAapOn70lB09SzruRIPSShGIMz1f92QYV8,1555
|
171
171
|
airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=Kz6HLF0CrFHQ1Y6rJKGr7KmBWSLeDYFQmkg0WIi7Frg,5395
|
172
|
-
airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=
|
172
|
+
airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=gk2PnjKYT3aL5H7N5jo6OL4vpeNjC588xKrz2_UCNSU,8679
|
173
173
|
airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py,sha256=sEVnRhZ8x9f7PNjo6lewxid9z0PI8eSj7gSoFC3MH1Y,527
|
174
174
|
airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py,sha256=uwk6Ugf23xKG4PRPVVRVwpcNjTwPgxejl03vLSEzK0s,604
|
175
175
|
airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py,sha256=ZeAa0z50ywMU2chNjQ7JpL4yePU1NajhBa8FS7rXLVo,1643
|
176
176
|
airbyte_cdk/sources/file_based/stream/__init__.py,sha256=QPDqdgjsabOQD93dSFqHGaFS_3pIwm-chEabZHiPJi0,265
|
177
|
-
airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=
|
178
|
-
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=
|
177
|
+
airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=Vyrfn24nvM4JDgZgbIdHK0vaX66sl7vLSNvYS-D5ZtY,5800
|
178
|
+
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=OT7QYytD1V2JY0CHHhIKp62QOnHJquM-gjUrV12rBdM,12379
|
179
179
|
airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
|
180
180
|
airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=i-FPeK8lwCzX34GCcmvL5Yvdh8-uu7FeCVYDoFbD7IY,1920
|
181
181
|
airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=kuJRKgDYOGXRk0V0I8BpFxg0hGv7SfV_nBpmmn45F88,6815
|
@@ -214,7 +214,7 @@ airbyte_cdk/sources/utils/transform.py,sha256=4GYmO6bq33HF-a1in0dKQKqUOYI1bWItyu
|
|
214
214
|
airbyte_cdk/sources/utils/types.py,sha256=41ZQR681t5TUnOScij58d088sb99klH_ZENFcaYro_g,175
|
215
215
|
airbyte_cdk/utils/__init__.py,sha256=kFLcs2P-tbPyeVOJS9rOv1jZdnSpjG24ro0CHgt_CIk,215
|
216
216
|
airbyte_cdk/utils/airbyte_secrets_utils.py,sha256=q3aDl8T10ufGbeqnUPqbZLxQcHdkf2kDfQK_upWzBbI,2894
|
217
|
-
airbyte_cdk/utils/datetime_format_inferrer.py,sha256=
|
217
|
+
airbyte_cdk/utils/datetime_format_inferrer.py,sha256=gGKDQ3OdY18R5CVFhq4c7zB_E4Cxe6J6SLA29cz3cJM,3954
|
218
218
|
airbyte_cdk/utils/event_timing.py,sha256=Hn5kCc9xGKLcV5EYpJCZwNiz9neKKu2WG8FJF_hy278,2377
|
219
219
|
airbyte_cdk/utils/mapping_helpers.py,sha256=tVkbgnxy12Ah2Jxh_3tKW7CTKTAVIcPexsBhsiyTbp4,1729
|
220
220
|
airbyte_cdk/utils/schema_inferrer.py,sha256=D8vFVgeK6VLcAug4YVAHfa3D29On0A_nMlwq9SPlfPI,3799
|
@@ -327,23 +327,23 @@ unit_tests/sources/file_based/config/test_abstract_file_based_spec.py,sha256=wmZ
|
|
327
327
|
unit_tests/sources/file_based/config/test_csv_format.py,sha256=VYL-9Ec8hW_yO2Pj9F8pDfITcgEAFtSublYda7ut7QE,1132
|
328
328
|
unit_tests/sources/file_based/config/test_file_based_stream_config.py,sha256=1eMsHlMQIFwyw20HjnhgKuiw6399sMcLTQ4LP09kTT4,3060
|
329
329
|
unit_tests/sources/file_based/file_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
330
|
-
unit_tests/sources/file_based/file_types/test_avro_parser.py,sha256=
|
330
|
+
unit_tests/sources/file_based/file_types/test_avro_parser.py,sha256=Rk-FB6-o350BpDp2QemprAgBHKlFIiom0EBgDD5Tygg,10991
|
331
331
|
unit_tests/sources/file_based/file_types/test_csv_parser.py,sha256=PalrxCRHAyoIp12IWWyePS9QF4LcvNVkqrKdwkrayJ4,22457
|
332
332
|
unit_tests/sources/file_based/file_types/test_jsonl_parser.py,sha256=foTf9U9LyAS8OR0BonwNgFWPqTrmzFV2lpPUfRMrioE,6134
|
333
333
|
unit_tests/sources/file_based/file_types/test_parquet_parser.py,sha256=D7sKTty8aEqMDWWGKWUqDbWjTxhGkygU7ns4-_JceRY,13543
|
334
334
|
unit_tests/sources/file_based/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
335
|
-
unit_tests/sources/file_based/scenarios/avro_scenarios.py,sha256=
|
336
|
-
unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=
|
337
|
-
unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=
|
338
|
-
unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=
|
339
|
-
unit_tests/sources/file_based/scenarios/jsonl_scenarios.py,sha256=
|
340
|
-
unit_tests/sources/file_based/scenarios/parquet_scenarios.py,sha256=
|
341
|
-
unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=
|
342
|
-
unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py,sha256=
|
343
|
-
unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py,sha256=
|
335
|
+
unit_tests/sources/file_based/scenarios/avro_scenarios.py,sha256=UFGCnb_nDU7NFhzxy854OeIx-PbNGvL4v2Nny5CTukE,29809
|
336
|
+
unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=wW8Z1-oAJk7o94zkd_nn7qQdughkx65oGuyhs75ZiVk,5676
|
337
|
+
unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=w33c5Tm-0MMjWYSJS1p8d3P2OJR-5kL74AbkkGovyho,97990
|
338
|
+
unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=ZSRWFnkVEeg1rPMpmoKRU4u4U9tdor8nYaj-aaZ7oag,60875
|
339
|
+
unit_tests/sources/file_based/scenarios/jsonl_scenarios.py,sha256=rgFxNgy7I3L4Mz3PvtHB2ar_lFbe58WY3RatiddpTso,27753
|
340
|
+
unit_tests/sources/file_based/scenarios/parquet_scenarios.py,sha256=x4xQ9c3vJj5Fs-hmxnqovI2hRT56aHARbZR61Ws7mC8,26831
|
341
|
+
unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=RrFXL1pdkyjidl3yq40FKcBCAIv_Pmd-0mcGJ6E8mcI,10183
|
342
|
+
unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py,sha256=P11FDBe1mtaPLUKeSxrZ7Z9yGXw6hvwnACfkU9sJRhw,28483
|
343
|
+
unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py,sha256=9IKNyJ8YX0hO0rWOhQugrDswVLMj5PZIpklVrmgiYVY,26814
|
344
344
|
unit_tests/sources/file_based/stream/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
345
|
-
unit_tests/sources/file_based/stream/test_default_file_based_cursor.py,sha256=
|
346
|
-
unit_tests/sources/file_based/stream/test_default_file_based_stream.py,sha256=
|
345
|
+
unit_tests/sources/file_based/stream/test_default_file_based_cursor.py,sha256=acUTX704mpw68ljH0atQx--f7STX_UynO8UtROw11Gw,12565
|
346
|
+
unit_tests/sources/file_based/stream/test_default_file_based_stream.py,sha256=DLEzbhfSjzbo7M-GMlJUx6XKgw7T5UuBeM2Ola0Cm7k,5771
|
347
347
|
unit_tests/sources/fixtures/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
348
348
|
unit_tests/sources/fixtures/source_test_fixture.py,sha256=r-UtR241EGQMZTw1RoKaatrpCGeQn7OIuRPWfG9f7nI,5380
|
349
349
|
unit_tests/sources/message/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -359,14 +359,14 @@ unit_tests/sources/streams/http/auth/test_auth.py,sha256=gdWpJ-cR64qRXmmPOQWhVd4
|
|
359
359
|
unit_tests/sources/streams/http/requests_native_auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
360
360
|
unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py,sha256=h5LIjR_1hEdsnXmyp7fISa0isYjXIEjMw-8I8L0ZNLE,15024
|
361
361
|
unit_tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
362
|
-
unit_tests/utils/test_datetime_format_inferrer.py,sha256=
|
362
|
+
unit_tests/utils/test_datetime_format_inferrer.py,sha256=1EUW1_afccMDrZM6YZyyPqrdwsUxZTaBxJNVa4TjiN8,3616
|
363
363
|
unit_tests/utils/test_mapping_helpers.py,sha256=hqRppuban9hGKviiNFqp2fNdAz77d1_gjvgg8L7-jy8,1408
|
364
364
|
unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg4MNPAG-xhpk,7817
|
365
365
|
unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
|
366
366
|
unit_tests/utils/test_stream_status_utils.py,sha256=NpV155JMXA6CG-2Zvofa14lItobyh3Onttc59X4m5DI,3382
|
367
367
|
unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
|
368
|
-
airbyte_cdk-0.51.
|
369
|
-
airbyte_cdk-0.51.
|
370
|
-
airbyte_cdk-0.51.
|
371
|
-
airbyte_cdk-0.51.
|
372
|
-
airbyte_cdk-0.51.
|
368
|
+
airbyte_cdk-0.51.17.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
369
|
+
airbyte_cdk-0.51.17.dist-info/METADATA,sha256=yC2mwyB29fJpI-FDENnoicx-VLHUApYTF2b1SY_u0Lo,11516
|
370
|
+
airbyte_cdk-0.51.17.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
371
|
+
airbyte_cdk-0.51.17.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
|
372
|
+
airbyte_cdk-0.51.17.dist-info/RECORD,,
|
@@ -142,17 +142,17 @@ _double_as_string_avro_format = AvroFormat(double_as_string=True)
|
|
142
142
|
id="test_decimal_missing_precision"),
|
143
143
|
pytest.param(_default_avro_format, {"type": "bytes", "logicalType": "decimal", "precision": 9}, None, ValueError,
|
144
144
|
id="test_decimal_missing_scale"),
|
145
|
-
pytest.param(_default_avro_format, {"type": "bytes", "logicalType": "uuid"}, {"type":
|
146
|
-
pytest.param(_default_avro_format, {"type": "int", "logicalType": "date"}, {"type":
|
145
|
+
pytest.param(_default_avro_format, {"type": "bytes", "logicalType": "uuid"}, {"type": "string"}, None, id="test_uuid"),
|
146
|
+
pytest.param(_default_avro_format, {"type": "int", "logicalType": "date"}, {"type": "string", "format": "date"}, None,
|
147
147
|
id="test_date"),
|
148
|
-
pytest.param(_default_avro_format, {"type": "int", "logicalType": "time-millis"}, {"type":
|
149
|
-
pytest.param(_default_avro_format, {"type": "long", "logicalType": "time-micros"}, {"type":
|
148
|
+
pytest.param(_default_avro_format, {"type": "int", "logicalType": "time-millis"}, {"type": "integer"}, None, id="test_time_millis"),
|
149
|
+
pytest.param(_default_avro_format, {"type": "long", "logicalType": "time-micros"}, {"type": "integer"}, None,
|
150
150
|
id="test_time_micros"),
|
151
151
|
pytest.param(
|
152
152
|
_default_avro_format,
|
153
|
-
{"type": "long", "logicalType": "timestamp-millis"}, {"type":
|
153
|
+
{"type": "long", "logicalType": "timestamp-millis"}, {"type": "string", "format": "date-time"}, None, id="test_timestamp_millis"
|
154
154
|
),
|
155
|
-
pytest.param(_default_avro_format, {"type": "long", "logicalType": "timestamp-micros"}, {"type":
|
155
|
+
pytest.param(_default_avro_format, {"type": "long", "logicalType": "timestamp-micros"}, {"type": "string"}, None,
|
156
156
|
id="test_timestamp_micros"),
|
157
157
|
pytest.param(
|
158
158
|
_default_avro_format,
|
@@ -203,7 +203,7 @@ single_avro_scenario = (
|
|
203
203
|
"streams": [
|
204
204
|
{
|
205
205
|
"name": "stream1",
|
206
|
-
"
|
206
|
+
"format": {"filetype": "avro"},
|
207
207
|
"globs": ["*"],
|
208
208
|
"validation_policy": "Emit Record",
|
209
209
|
}
|
@@ -266,7 +266,7 @@ multiple_avro_combine_schema_scenario = (
|
|
266
266
|
"streams": [
|
267
267
|
{
|
268
268
|
"name": "stream1",
|
269
|
-
"
|
269
|
+
"format": {"filetype": "avro"},
|
270
270
|
"globs": ["*"],
|
271
271
|
"validation_policy": "Emit Record",
|
272
272
|
}
|
@@ -362,7 +362,7 @@ avro_all_types_scenario = (
|
|
362
362
|
"streams": [
|
363
363
|
{
|
364
364
|
"name": "stream1",
|
365
|
-
"
|
365
|
+
"format": {"filetype": "avro"},
|
366
366
|
"globs": ["*"],
|
367
367
|
"validation_policy": "Emit Record",
|
368
368
|
}
|
@@ -463,13 +463,13 @@ multiple_streams_avro_scenario = (
|
|
463
463
|
"streams": [
|
464
464
|
{
|
465
465
|
"name": "songs_stream",
|
466
|
-
"
|
466
|
+
"format": {"filetype": "avro"},
|
467
467
|
"globs": ["*_songs.avro"],
|
468
468
|
"validation_policy": "Emit Record",
|
469
469
|
},
|
470
470
|
{
|
471
471
|
"name": "festivals_stream",
|
472
|
-
"
|
472
|
+
"format": {"filetype": "avro"},
|
473
473
|
"globs": ["*_festivals.avro"],
|
474
474
|
"validation_policy": "Emit Record",
|
475
475
|
},
|
@@ -629,7 +629,6 @@ avro_file_with_double_as_number_scenario = (
|
|
629
629
|
"streams": [
|
630
630
|
{
|
631
631
|
"name": "stream1",
|
632
|
-
"file_type": "avro",
|
633
632
|
"globs": ["*"],
|
634
633
|
"validation_policy": "Emit Record",
|
635
634
|
"format": {
|
@@ -17,7 +17,7 @@ _base_success_scenario = (
|
|
17
17
|
"streams": [
|
18
18
|
{
|
19
19
|
"name": "stream1",
|
20
|
-
"
|
20
|
+
"format": {"filetype": "csv"},
|
21
21
|
"globs": ["*.csv"],
|
22
22
|
"validation_policy": "Emit Record",
|
23
23
|
}
|
@@ -55,13 +55,13 @@ success_multi_stream_scenario = (
|
|
55
55
|
"streams": [
|
56
56
|
{
|
57
57
|
"name": "stream1",
|
58
|
-
"
|
58
|
+
"format": {"filetype": "csv"},
|
59
59
|
"globs": ["*.csv", "*.gz"],
|
60
60
|
"validation_policy": "Emit Record",
|
61
61
|
},
|
62
62
|
{
|
63
63
|
"name": "stream2",
|
64
|
-
"
|
64
|
+
"format": {"filetype": "csv"},
|
65
65
|
"globs": ["*.csv", "*.gz"],
|
66
66
|
"validation_policy": "Emit Record",
|
67
67
|
}
|
@@ -79,7 +79,7 @@ success_extensionless_scenario = (
|
|
79
79
|
"streams": [
|
80
80
|
{
|
81
81
|
"name": "stream1",
|
82
|
-
"
|
82
|
+
"format": {"filetype": "csv"},
|
83
83
|
"globs": ["*"],
|
84
84
|
"validation_policy": "Emit Record",
|
85
85
|
}
|
@@ -109,7 +109,7 @@ success_user_provided_schema_scenario = (
|
|
109
109
|
"streams": [
|
110
110
|
{
|
111
111
|
"name": "stream1",
|
112
|
-
"
|
112
|
+
"format": {"filetype": "csv"},
|
113
113
|
"globs": ["*.csv"],
|
114
114
|
"validation_policy": "Emit Record",
|
115
115
|
"input_schema": '{"col1": "string", "col2": "string"}',
|
@@ -158,7 +158,7 @@ error_record_validation_user_provided_schema_scenario = (
|
|
158
158
|
"streams": [
|
159
159
|
{
|
160
160
|
"name": "stream1",
|
161
|
-
"
|
161
|
+
"format": {"filetype": "csv"},
|
162
162
|
"globs": ["*.csv"],
|
163
163
|
"validation_policy": "always_fail",
|
164
164
|
"input_schema": '{"col1": "number", "col2": "string"}',
|
@@ -179,13 +179,13 @@ error_multi_stream_scenario = (
|
|
179
179
|
"streams": [
|
180
180
|
{
|
181
181
|
"name": "stream1",
|
182
|
-
"
|
182
|
+
"format": {"filetype": "csv"},
|
183
183
|
"globs": ["*.csv"],
|
184
184
|
"validation_policy": "Emit Record",
|
185
185
|
},
|
186
186
|
{
|
187
187
|
"name": "stream2",
|
188
|
-
"
|
188
|
+
"format": {"filetype": "jsonl"},
|
189
189
|
"globs": ["*.csv"],
|
190
190
|
"validation_policy": "Emit Record",
|
191
191
|
}
|