airbyte-cdk 0.51.15__py3-none-any.whl → 0.51.17__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +494 -522
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +1 -1
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +2 -37
- airbyte_cdk/sources/file_based/file_based_source.py +1 -1
- airbyte_cdk/sources/file_based/file_types/__init__.py +11 -6
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +1 -1
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +1 -1
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +2 -2
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +5 -5
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +7 -5
- airbyte_cdk/utils/datetime_format_inferrer.py +8 -4
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/RECORD +29 -29
- unit_tests/sources/file_based/file_types/test_avro_parser.py +6 -6
- unit_tests/sources/file_based/scenarios/avro_scenarios.py +5 -6
- unit_tests/sources/file_based/scenarios/check_scenarios.py +8 -8
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +19 -42
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +15 -15
- unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +13 -12
- unit_tests/sources/file_based/scenarios/parquet_scenarios.py +5 -9
- unit_tests/sources/file_based/scenarios/scenario_builder.py +1 -1
- unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +16 -16
- unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +9 -9
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +2 -1
- unit_tests/sources/file_based/stream/test_default_file_based_stream.py +6 -3
- unit_tests/utils/test_datetime_format_inferrer.py +1 -0
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/top_level.txt +0 -0
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py
CHANGED
@@ -73,7 +73,7 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
|
|
73
73
|
return files
|
74
74
|
|
75
75
|
def _check_parse_record(self, stream: "AbstractFileBasedStream", file: RemoteFile, logger: logging.Logger) -> None:
|
76
|
-
parser = stream.get_parser(
|
76
|
+
parser = stream.get_parser()
|
77
77
|
|
78
78
|
try:
|
79
79
|
record = next(iter(parser.parse_records(stream.config, file, self.stream_reader, logger, discovered_schema=None)))
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
from enum import Enum
|
6
|
-
from typing import Any, List, Mapping, Optional,
|
6
|
+
from typing import Any, List, Mapping, Optional, Union
|
7
7
|
|
8
8
|
from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
|
9
9
|
from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
|
@@ -16,9 +16,6 @@ from pydantic import BaseModel, Field, validator
|
|
16
16
|
PrimaryKeyType = Optional[Union[str, List[str]]]
|
17
17
|
|
18
18
|
|
19
|
-
VALID_FILE_TYPES: Mapping[str, Type[BaseModel]] = {"avro": AvroFormat, "csv": CsvFormat, "jsonl": JsonlFormat, "parquet": ParquetFormat}
|
20
|
-
|
21
|
-
|
22
19
|
class ValidationPolicy(Enum):
|
23
20
|
emit_record = "Emit Record"
|
24
21
|
skip_record = "Skip Record"
|
@@ -27,7 +24,6 @@ class ValidationPolicy(Enum):
|
|
27
24
|
|
28
25
|
class FileBasedStreamConfig(BaseModel):
|
29
26
|
name: str = Field(title="Name", description="The name of the stream.")
|
30
|
-
file_type: str = Field(title="File Type", description="The data file type that is being extracted for a stream.")
|
31
27
|
globs: Optional[List[str]] = Field(
|
32
28
|
title="Globs",
|
33
29
|
description='The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look <a href="https://en.wikipedia.org/wiki/Glob_(programming)">here</a>.',
|
@@ -54,7 +50,7 @@ class FileBasedStreamConfig(BaseModel):
|
|
54
50
|
description="When the state history of the file store is full, syncs will only read files that were last modified in the provided day range.",
|
55
51
|
default=3,
|
56
52
|
)
|
57
|
-
format:
|
53
|
+
format: Union[AvroFormat, CsvFormat, JsonlFormat, ParquetFormat] = Field(
|
58
54
|
title="Format",
|
59
55
|
description="The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.",
|
60
56
|
)
|
@@ -64,37 +60,6 @@ class FileBasedStreamConfig(BaseModel):
|
|
64
60
|
default=False,
|
65
61
|
)
|
66
62
|
|
67
|
-
@validator("file_type", pre=True)
|
68
|
-
def validate_file_type(cls, v: str) -> str:
|
69
|
-
if v not in VALID_FILE_TYPES:
|
70
|
-
raise ValueError(f"Format filetype {v} is not a supported file type")
|
71
|
-
return v
|
72
|
-
|
73
|
-
@classmethod
|
74
|
-
def _transform_legacy_config(cls, legacy_config: Mapping[str, Any], file_type: str) -> Mapping[str, Any]:
|
75
|
-
if file_type.casefold() not in VALID_FILE_TYPES:
|
76
|
-
raise ValueError(f"Format filetype {file_type} is not a supported file type")
|
77
|
-
if file_type.casefold() == "parquet" or file_type.casefold() == "avro":
|
78
|
-
legacy_config = cls._transform_legacy_parquet_or_avro_config(legacy_config)
|
79
|
-
return {file_type: VALID_FILE_TYPES[file_type.casefold()].parse_obj({key: val for key, val in legacy_config.items()})}
|
80
|
-
|
81
|
-
@classmethod
|
82
|
-
def _transform_legacy_parquet_or_avro_config(cls, config: Mapping[str, Any]) -> Mapping[str, Any]:
|
83
|
-
"""
|
84
|
-
The legacy parquet parser converts decimal fields to numbers. This isn't desirable because it can lead to precision loss.
|
85
|
-
To avoid introducing a breaking change with the new default, we will set decimal_as_float to True in the legacy configs.
|
86
|
-
"""
|
87
|
-
filetype = config.get("filetype")
|
88
|
-
if filetype != "parquet" and filetype != "avro":
|
89
|
-
raise ValueError(
|
90
|
-
f"Expected {filetype} format, got {config}. This is probably due to a CDK bug. Please reach out to the Airbyte team for support."
|
91
|
-
)
|
92
|
-
if config.get("decimal_as_float"):
|
93
|
-
raise ValueError(
|
94
|
-
f"Received legacy {filetype} file form with 'decimal_as_float' set. This is unexpected. Please reach out to the Airbyte team for support."
|
95
|
-
)
|
96
|
-
return {**config, **{"decimal_as_float": True}}
|
97
|
-
|
98
63
|
@validator("input_schema", pre=True)
|
99
64
|
def validate_input_schema(cls, v: Optional[str]) -> Optional[str]:
|
100
65
|
if v:
|
@@ -33,7 +33,7 @@ class FileBasedSource(AbstractSource, ABC):
|
|
33
33
|
catalog_path: Optional[str] = None,
|
34
34
|
availability_strategy: Optional[AbstractFileBasedAvailabilityStrategy] = None,
|
35
35
|
discovery_policy: AbstractDiscoveryPolicy = DefaultDiscoveryPolicy(),
|
36
|
-
parsers: Mapping[
|
36
|
+
parsers: Mapping[Type[Any], FileTypeParser] = default_parsers,
|
37
37
|
validation_policies: Mapping[ValidationPolicy, AbstractSchemaValidationPolicy] = DEFAULT_SCHEMA_VALIDATION_POLICIES,
|
38
38
|
cursor_cls: Type[AbstractFileBasedCursor] = DefaultFileBasedCursor,
|
39
39
|
):
|
@@ -1,4 +1,9 @@
|
|
1
|
-
from typing import Mapping
|
1
|
+
from typing import Any, Mapping, Type
|
2
|
+
|
3
|
+
from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
|
4
|
+
from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
|
5
|
+
from airbyte_cdk.sources.file_based.config.jsonl_format import JsonlFormat
|
6
|
+
from airbyte_cdk.sources.file_based.config.parquet_format import ParquetFormat
|
2
7
|
|
3
8
|
from .avro_parser import AvroParser
|
4
9
|
from .csv_parser import CsvParser
|
@@ -6,11 +11,11 @@ from .file_type_parser import FileTypeParser
|
|
6
11
|
from .jsonl_parser import JsonlParser
|
7
12
|
from .parquet_parser import ParquetParser
|
8
13
|
|
9
|
-
default_parsers: Mapping[
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
+
default_parsers: Mapping[Type[Any], FileTypeParser] = {
|
15
|
+
AvroFormat: AvroParser(),
|
16
|
+
CsvFormat: CsvParser(),
|
17
|
+
JsonlFormat: JsonlParser(),
|
18
|
+
ParquetFormat: ParquetParser(),
|
14
19
|
}
|
15
20
|
|
16
21
|
__all__ = ["AvroParser", "CsvParser", "JsonlParser", "ParquetParser", "default_parsers"]
|
@@ -49,7 +49,7 @@ class AvroParser(FileTypeParser):
|
|
49
49
|
stream_reader: AbstractFileBasedStreamReader,
|
50
50
|
logger: logging.Logger,
|
51
51
|
) -> SchemaType:
|
52
|
-
avro_format = config.format
|
52
|
+
avro_format = config.format
|
53
53
|
if not isinstance(avro_format, AvroFormat):
|
54
54
|
raise ValueError(f"Expected ParquetFormat, got {avro_format}")
|
55
55
|
|
@@ -422,7 +422,7 @@ def _no_cast(row: Mapping[str, str]) -> Mapping[str, str]:
|
|
422
422
|
|
423
423
|
|
424
424
|
def _extract_format(config: FileBasedStreamConfig) -> CsvFormat:
|
425
|
-
config_format = config.format
|
425
|
+
config_format = config.format
|
426
426
|
if not isinstance(config_format, CsvFormat):
|
427
427
|
raise ValueError(f"Invalid format config: {config_format}")
|
428
428
|
return config_format
|
@@ -30,7 +30,7 @@ class ParquetParser(FileTypeParser):
|
|
30
30
|
stream_reader: AbstractFileBasedStreamReader,
|
31
31
|
logger: logging.Logger,
|
32
32
|
) -> SchemaType:
|
33
|
-
parquet_format = config.format
|
33
|
+
parquet_format = config.format
|
34
34
|
if not isinstance(parquet_format, ParquetFormat):
|
35
35
|
raise ValueError(f"Expected ParquetFormat, got {parquet_format}")
|
36
36
|
|
@@ -54,7 +54,7 @@ class ParquetParser(FileTypeParser):
|
|
54
54
|
logger: logging.Logger,
|
55
55
|
discovered_schema: Optional[Mapping[str, SchemaType]],
|
56
56
|
) -> Iterable[Dict[str, Any]]:
|
57
|
-
parquet_format = config.format
|
57
|
+
parquet_format = config.format
|
58
58
|
if not isinstance(parquet_format, ParquetFormat):
|
59
59
|
logger.info(f"Expected ParquetFormat, got {parquet_format}")
|
60
60
|
raise ConfigValidationError(FileBasedSourceError.CONFIG_VALIDATION_ERROR)
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from abc import abstractmethod
|
6
6
|
from functools import cached_property, lru_cache
|
7
|
-
from typing import Any, Dict, Iterable, List, Mapping, Optional
|
7
|
+
from typing import Any, Dict, Iterable, List, Mapping, Optional, Type
|
8
8
|
|
9
9
|
from airbyte_cdk.models import SyncMode
|
10
10
|
from airbyte_cdk.sources.file_based.availability_strategy import AbstractFileBasedAvailabilityStrategy
|
@@ -42,7 +42,7 @@ class AbstractFileBasedStream(Stream):
|
|
42
42
|
stream_reader: AbstractFileBasedStreamReader,
|
43
43
|
availability_strategy: AbstractFileBasedAvailabilityStrategy,
|
44
44
|
discovery_policy: AbstractDiscoveryPolicy,
|
45
|
-
parsers: Dict[
|
45
|
+
parsers: Dict[Type[Any], FileTypeParser],
|
46
46
|
validation_policy: AbstractSchemaValidationPolicy,
|
47
47
|
):
|
48
48
|
super().__init__()
|
@@ -121,11 +121,11 @@ class AbstractFileBasedStream(Stream):
|
|
121
121
|
"""
|
122
122
|
...
|
123
123
|
|
124
|
-
def get_parser(self
|
124
|
+
def get_parser(self) -> FileTypeParser:
|
125
125
|
try:
|
126
|
-
return self._parsers[
|
126
|
+
return self._parsers[type(self.config.format)]
|
127
127
|
except KeyError:
|
128
|
-
raise UndefinedParserError(FileBasedSourceError.UNDEFINED_PARSER, stream=self.name,
|
128
|
+
raise UndefinedParserError(FileBasedSourceError.UNDEFINED_PARSER, stream=self.name, format=type(self.config.format))
|
129
129
|
|
130
130
|
def record_passes_validation_policy(self, record: Mapping[str, Any]) -> bool:
|
131
131
|
if self.validation_policy:
|
@@ -5,6 +5,7 @@
|
|
5
5
|
import asyncio
|
6
6
|
import itertools
|
7
7
|
import traceback
|
8
|
+
from copy import deepcopy
|
8
9
|
from functools import cache
|
9
10
|
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Set, Union
|
10
11
|
|
@@ -79,7 +80,7 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
79
80
|
# On read requests we should always have the catalog available
|
80
81
|
raise MissingSchemaError(FileBasedSourceError.MISSING_SCHEMA, stream=self.name)
|
81
82
|
# The stream only supports a single file type, so we can use the same parser for all files
|
82
|
-
parser = self.get_parser(
|
83
|
+
parser = self.get_parser()
|
83
84
|
for file in stream_slice["files"]:
|
84
85
|
# only serialize the datetime once
|
85
86
|
file_datetime_string = file.last_modified.strftime(self.DATE_TIME_FORMAT)
|
@@ -190,7 +191,7 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
190
191
|
if not inferred_schema:
|
191
192
|
raise InvalidSchemaError(
|
192
193
|
FileBasedSourceError.INVALID_SCHEMA_ERROR,
|
193
|
-
details=f"Empty schema. Please check that the files are valid {self.config.
|
194
|
+
details=f"Empty schema. Please check that the files are valid for format {self.config.format}",
|
194
195
|
stream=self.name,
|
195
196
|
)
|
196
197
|
|
@@ -210,7 +211,8 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
210
211
|
def infer_schema(self, files: List[RemoteFile]) -> Mapping[str, Any]:
|
211
212
|
loop = asyncio.get_event_loop()
|
212
213
|
schema = loop.run_until_complete(self._infer_schema(files))
|
213
|
-
|
214
|
+
# as infer schema returns a Mapping that is assumed to be immutable, we need to create a deepcopy to avoid modifying the reference
|
215
|
+
return self._fill_nulls(deepcopy(schema))
|
214
216
|
|
215
217
|
@staticmethod
|
216
218
|
def _fill_nulls(schema: Mapping[str, Any]) -> Mapping[str, Any]:
|
@@ -258,11 +260,11 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
258
260
|
|
259
261
|
async def _infer_file_schema(self, file: RemoteFile) -> SchemaType:
|
260
262
|
try:
|
261
|
-
return await self.get_parser(
|
263
|
+
return await self.get_parser().infer_schema(self.config, file, self._stream_reader, self.logger)
|
262
264
|
except Exception as exc:
|
263
265
|
raise SchemaInferenceError(
|
264
266
|
FileBasedSourceError.SCHEMA_INFERENCE_ERROR,
|
265
267
|
file=file.uri,
|
266
|
-
|
268
|
+
format=str(self.config.format),
|
267
269
|
stream=self.name,
|
268
270
|
) from exc
|
@@ -36,10 +36,14 @@ class DatetimeFormatInferrer:
|
|
36
36
|
This is the case if the value is a string or an integer between 1_000_000_000 and 2_000_000_000 for seconds
|
37
37
|
or between 1_000_000_000_000 and 2_000_000_000_000 for milliseconds.
|
38
38
|
This is separate from the format check for performance reasons"""
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
39
|
+
if isinstance(value, (str, int)):
|
40
|
+
try:
|
41
|
+
value_as_int = int(value)
|
42
|
+
for timestamp_range in self._timestamp_heuristic_ranges:
|
43
|
+
if value_as_int in timestamp_range:
|
44
|
+
return True
|
45
|
+
except ValueError:
|
46
|
+
# given that it's not parsable as an int, it can represent a datetime with one of the self._formats
|
43
47
|
return True
|
44
48
|
return False
|
45
49
|
|
@@ -73,7 +73,7 @@ airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=dyIM-bzh54
|
|
73
73
|
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=Dc0F87nElWsz_Ikj938eQ9uqZvyqgFhZ8Dqf_-hvndc,4800
|
74
74
|
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=V6WGKJ9cXX1rjuM4bK3Cs9xEryMlkY2U3FMsSBhrgC8,3098
|
75
75
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=EiYnzwCHZV7EYqMJqcy6xKSeHvTKZBsQndjbEwmiTW4,93
|
76
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
76
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=ebor38wlQVqYD2QXk5X8v9xDZl0cEpIc2mFaKvpuiPE,57170
|
77
77
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
78
78
|
airbyte_cdk/sources/declarative/parsers/class_types_registry.py,sha256=bK4a74opm6WHyV7HqOVws6GE5Z7cLNc5MaTha69abIQ,6086
|
79
79
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=y7_G5mM07zxT5YG975kdC2PAja-Uc83pYp8WrV3GNdo,522
|
@@ -146,36 +146,36 @@ airbyte_cdk/sources/embedded/runner.py,sha256=kZ0CcUANuMjdZ4fmvp_w9P2IcsS9WSHxNq
|
|
146
146
|
airbyte_cdk/sources/embedded/tools.py,sha256=-Z4tZ4AP1OTi_zrqFM3YV8Rt7c60wvsrv0Dc-rTZ2uw,744
|
147
147
|
airbyte_cdk/sources/file_based/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
148
148
|
airbyte_cdk/sources/file_based/exceptions.py,sha256=K3b0IH4xxY75GwRzueAoWfBZDSkctGWDtDEKkSwnrM4,4344
|
149
|
-
airbyte_cdk/sources/file_based/file_based_source.py,sha256=
|
149
|
+
airbyte_cdk/sources/file_based/file_based_source.py,sha256=NKS3p_cClQoKC0elj8tJr4ayAUnUXXgTjGr3SVHxC4Q,6937
|
150
150
|
airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=K9fFHcSL4E8v-X2l38wRAcZCjpyifr35orvby8vQt84,3749
|
151
151
|
airbyte_cdk/sources/file_based/remote_file.py,sha256=xIDwDDBPhJI1K8YZuXjEfjxakZPMieBKJM6vmq6G5tw,248
|
152
152
|
airbyte_cdk/sources/file_based/schema_helpers.py,sha256=XBkOutIw_n6SNYU34qbyTbl0Ppt0i4k3sVFMSaX3wJo,9103
|
153
153
|
airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
|
154
154
|
airbyte_cdk/sources/file_based/availability_strategy/__init__.py,sha256=WiPPuQGfmQlFUMFR5h3ECc-VzBj4vair6_4WAL87AEI,277
|
155
155
|
airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py,sha256=NeHCiG4FFohzYpQQFfmTL4-5oI0nElHWgXX1xrm8-SU,1269
|
156
|
-
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=
|
156
|
+
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=RWcRhNjytH-09_d4MVKDC37B3EGsqe2DheHpLNNMmzE,4243
|
157
157
|
airbyte_cdk/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
158
158
|
airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=WrV4sKtJoZ1dK31HK7NdBKlnYHkmu6NqjmEpkVqJ6tQ,4582
|
159
159
|
airbyte_cdk/sources/file_based/config/avro_format.py,sha256=oLJIuNInu-MgjkVFqwHvmQ4CPZa4NZingq_I0_trQ3g,589
|
160
160
|
airbyte_cdk/sources/file_based/config/csv_format.py,sha256=xlBZ5WyAshagjjjbUV_je1JyZ1oY1GbIzJRUZ9UfSvo,7095
|
161
|
-
airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=
|
161
|
+
airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=BTlc2Sw1UP9XF1D4ZYTjubI_msEijBfh9vW8GhOGtIA,3858
|
162
162
|
airbyte_cdk/sources/file_based/config/jsonl_format.py,sha256=usmTeTw8xw8OKwrz8MsiS5E1LQiVEbedGHMHNAfOOlk,252
|
163
163
|
airbyte_cdk/sources/file_based/config/parquet_format.py,sha256=O_Eq0yVzjPiKDz8H1-f9yMowtCcJwT9F2prNYpXZkp0,614
|
164
164
|
airbyte_cdk/sources/file_based/discovery_policy/__init__.py,sha256=x_7JsQGiS7Ytmr0ZDS0SNYGcNUzC4wCm3_1-Mf3ZFnw,283
|
165
165
|
airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha256=cz9po5Cn6u50uq3hDy46pqnPR4JDcnRItZX9k0WDUJU,520
|
166
166
|
airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=Mx3zT9Dem4uNfaUT0oOtrESsuB1LrGAi5N-uw2swZZA,701
|
167
|
-
airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=
|
168
|
-
airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256
|
169
|
-
airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=
|
167
|
+
airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=wWLnHooFrnCwfhMoguDODtQxRVQyrjy0mDrSi4TWwPM,808
|
168
|
+
airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=-C_BODUMg8X4jzN85C9Q2R__cpFeLlKycLC_JbctLF8,8670
|
169
|
+
airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=VVV829XszmDRqmgv6VBshMF4CSEzmP8rL-OlGttRu7c,17649
|
170
170
|
airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=cThTLc1YKSAapOn70lB09SzruRIPSShGIMz1f92QYV8,1555
|
171
171
|
airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=Kz6HLF0CrFHQ1Y6rJKGr7KmBWSLeDYFQmkg0WIi7Frg,5395
|
172
|
-
airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=
|
172
|
+
airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=gk2PnjKYT3aL5H7N5jo6OL4vpeNjC588xKrz2_UCNSU,8679
|
173
173
|
airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py,sha256=sEVnRhZ8x9f7PNjo6lewxid9z0PI8eSj7gSoFC3MH1Y,527
|
174
174
|
airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py,sha256=uwk6Ugf23xKG4PRPVVRVwpcNjTwPgxejl03vLSEzK0s,604
|
175
175
|
airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py,sha256=ZeAa0z50ywMU2chNjQ7JpL4yePU1NajhBa8FS7rXLVo,1643
|
176
176
|
airbyte_cdk/sources/file_based/stream/__init__.py,sha256=QPDqdgjsabOQD93dSFqHGaFS_3pIwm-chEabZHiPJi0,265
|
177
|
-
airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=
|
178
|
-
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=
|
177
|
+
airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=Vyrfn24nvM4JDgZgbIdHK0vaX66sl7vLSNvYS-D5ZtY,5800
|
178
|
+
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=OT7QYytD1V2JY0CHHhIKp62QOnHJquM-gjUrV12rBdM,12379
|
179
179
|
airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
|
180
180
|
airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=i-FPeK8lwCzX34GCcmvL5Yvdh8-uu7FeCVYDoFbD7IY,1920
|
181
181
|
airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=kuJRKgDYOGXRk0V0I8BpFxg0hGv7SfV_nBpmmn45F88,6815
|
@@ -214,7 +214,7 @@ airbyte_cdk/sources/utils/transform.py,sha256=4GYmO6bq33HF-a1in0dKQKqUOYI1bWItyu
|
|
214
214
|
airbyte_cdk/sources/utils/types.py,sha256=41ZQR681t5TUnOScij58d088sb99klH_ZENFcaYro_g,175
|
215
215
|
airbyte_cdk/utils/__init__.py,sha256=kFLcs2P-tbPyeVOJS9rOv1jZdnSpjG24ro0CHgt_CIk,215
|
216
216
|
airbyte_cdk/utils/airbyte_secrets_utils.py,sha256=q3aDl8T10ufGbeqnUPqbZLxQcHdkf2kDfQK_upWzBbI,2894
|
217
|
-
airbyte_cdk/utils/datetime_format_inferrer.py,sha256=
|
217
|
+
airbyte_cdk/utils/datetime_format_inferrer.py,sha256=gGKDQ3OdY18R5CVFhq4c7zB_E4Cxe6J6SLA29cz3cJM,3954
|
218
218
|
airbyte_cdk/utils/event_timing.py,sha256=Hn5kCc9xGKLcV5EYpJCZwNiz9neKKu2WG8FJF_hy278,2377
|
219
219
|
airbyte_cdk/utils/mapping_helpers.py,sha256=tVkbgnxy12Ah2Jxh_3tKW7CTKTAVIcPexsBhsiyTbp4,1729
|
220
220
|
airbyte_cdk/utils/schema_inferrer.py,sha256=D8vFVgeK6VLcAug4YVAHfa3D29On0A_nMlwq9SPlfPI,3799
|
@@ -327,23 +327,23 @@ unit_tests/sources/file_based/config/test_abstract_file_based_spec.py,sha256=wmZ
|
|
327
327
|
unit_tests/sources/file_based/config/test_csv_format.py,sha256=VYL-9Ec8hW_yO2Pj9F8pDfITcgEAFtSublYda7ut7QE,1132
|
328
328
|
unit_tests/sources/file_based/config/test_file_based_stream_config.py,sha256=1eMsHlMQIFwyw20HjnhgKuiw6399sMcLTQ4LP09kTT4,3060
|
329
329
|
unit_tests/sources/file_based/file_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
330
|
-
unit_tests/sources/file_based/file_types/test_avro_parser.py,sha256=
|
330
|
+
unit_tests/sources/file_based/file_types/test_avro_parser.py,sha256=Rk-FB6-o350BpDp2QemprAgBHKlFIiom0EBgDD5Tygg,10991
|
331
331
|
unit_tests/sources/file_based/file_types/test_csv_parser.py,sha256=PalrxCRHAyoIp12IWWyePS9QF4LcvNVkqrKdwkrayJ4,22457
|
332
332
|
unit_tests/sources/file_based/file_types/test_jsonl_parser.py,sha256=foTf9U9LyAS8OR0BonwNgFWPqTrmzFV2lpPUfRMrioE,6134
|
333
333
|
unit_tests/sources/file_based/file_types/test_parquet_parser.py,sha256=D7sKTty8aEqMDWWGKWUqDbWjTxhGkygU7ns4-_JceRY,13543
|
334
334
|
unit_tests/sources/file_based/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
335
|
-
unit_tests/sources/file_based/scenarios/avro_scenarios.py,sha256=
|
336
|
-
unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=
|
337
|
-
unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=
|
338
|
-
unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=
|
339
|
-
unit_tests/sources/file_based/scenarios/jsonl_scenarios.py,sha256=
|
340
|
-
unit_tests/sources/file_based/scenarios/parquet_scenarios.py,sha256=
|
341
|
-
unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=
|
342
|
-
unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py,sha256=
|
343
|
-
unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py,sha256=
|
335
|
+
unit_tests/sources/file_based/scenarios/avro_scenarios.py,sha256=UFGCnb_nDU7NFhzxy854OeIx-PbNGvL4v2Nny5CTukE,29809
|
336
|
+
unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=wW8Z1-oAJk7o94zkd_nn7qQdughkx65oGuyhs75ZiVk,5676
|
337
|
+
unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=w33c5Tm-0MMjWYSJS1p8d3P2OJR-5kL74AbkkGovyho,97990
|
338
|
+
unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=ZSRWFnkVEeg1rPMpmoKRU4u4U9tdor8nYaj-aaZ7oag,60875
|
339
|
+
unit_tests/sources/file_based/scenarios/jsonl_scenarios.py,sha256=rgFxNgy7I3L4Mz3PvtHB2ar_lFbe58WY3RatiddpTso,27753
|
340
|
+
unit_tests/sources/file_based/scenarios/parquet_scenarios.py,sha256=x4xQ9c3vJj5Fs-hmxnqovI2hRT56aHARbZR61Ws7mC8,26831
|
341
|
+
unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=RrFXL1pdkyjidl3yq40FKcBCAIv_Pmd-0mcGJ6E8mcI,10183
|
342
|
+
unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py,sha256=P11FDBe1mtaPLUKeSxrZ7Z9yGXw6hvwnACfkU9sJRhw,28483
|
343
|
+
unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py,sha256=9IKNyJ8YX0hO0rWOhQugrDswVLMj5PZIpklVrmgiYVY,26814
|
344
344
|
unit_tests/sources/file_based/stream/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
345
|
-
unit_tests/sources/file_based/stream/test_default_file_based_cursor.py,sha256=
|
346
|
-
unit_tests/sources/file_based/stream/test_default_file_based_stream.py,sha256=
|
345
|
+
unit_tests/sources/file_based/stream/test_default_file_based_cursor.py,sha256=acUTX704mpw68ljH0atQx--f7STX_UynO8UtROw11Gw,12565
|
346
|
+
unit_tests/sources/file_based/stream/test_default_file_based_stream.py,sha256=DLEzbhfSjzbo7M-GMlJUx6XKgw7T5UuBeM2Ola0Cm7k,5771
|
347
347
|
unit_tests/sources/fixtures/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
348
348
|
unit_tests/sources/fixtures/source_test_fixture.py,sha256=r-UtR241EGQMZTw1RoKaatrpCGeQn7OIuRPWfG9f7nI,5380
|
349
349
|
unit_tests/sources/message/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -359,14 +359,14 @@ unit_tests/sources/streams/http/auth/test_auth.py,sha256=gdWpJ-cR64qRXmmPOQWhVd4
|
|
359
359
|
unit_tests/sources/streams/http/requests_native_auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
360
360
|
unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py,sha256=h5LIjR_1hEdsnXmyp7fISa0isYjXIEjMw-8I8L0ZNLE,15024
|
361
361
|
unit_tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
362
|
-
unit_tests/utils/test_datetime_format_inferrer.py,sha256=
|
362
|
+
unit_tests/utils/test_datetime_format_inferrer.py,sha256=1EUW1_afccMDrZM6YZyyPqrdwsUxZTaBxJNVa4TjiN8,3616
|
363
363
|
unit_tests/utils/test_mapping_helpers.py,sha256=hqRppuban9hGKviiNFqp2fNdAz77d1_gjvgg8L7-jy8,1408
|
364
364
|
unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg4MNPAG-xhpk,7817
|
365
365
|
unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
|
366
366
|
unit_tests/utils/test_stream_status_utils.py,sha256=NpV155JMXA6CG-2Zvofa14lItobyh3Onttc59X4m5DI,3382
|
367
367
|
unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
|
368
|
-
airbyte_cdk-0.51.
|
369
|
-
airbyte_cdk-0.51.
|
370
|
-
airbyte_cdk-0.51.
|
371
|
-
airbyte_cdk-0.51.
|
372
|
-
airbyte_cdk-0.51.
|
368
|
+
airbyte_cdk-0.51.17.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
369
|
+
airbyte_cdk-0.51.17.dist-info/METADATA,sha256=yC2mwyB29fJpI-FDENnoicx-VLHUApYTF2b1SY_u0Lo,11516
|
370
|
+
airbyte_cdk-0.51.17.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
371
|
+
airbyte_cdk-0.51.17.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
|
372
|
+
airbyte_cdk-0.51.17.dist-info/RECORD,,
|
@@ -142,17 +142,17 @@ _double_as_string_avro_format = AvroFormat(double_as_string=True)
|
|
142
142
|
id="test_decimal_missing_precision"),
|
143
143
|
pytest.param(_default_avro_format, {"type": "bytes", "logicalType": "decimal", "precision": 9}, None, ValueError,
|
144
144
|
id="test_decimal_missing_scale"),
|
145
|
-
pytest.param(_default_avro_format, {"type": "bytes", "logicalType": "uuid"}, {"type":
|
146
|
-
pytest.param(_default_avro_format, {"type": "int", "logicalType": "date"}, {"type":
|
145
|
+
pytest.param(_default_avro_format, {"type": "bytes", "logicalType": "uuid"}, {"type": "string"}, None, id="test_uuid"),
|
146
|
+
pytest.param(_default_avro_format, {"type": "int", "logicalType": "date"}, {"type": "string", "format": "date"}, None,
|
147
147
|
id="test_date"),
|
148
|
-
pytest.param(_default_avro_format, {"type": "int", "logicalType": "time-millis"}, {"type":
|
149
|
-
pytest.param(_default_avro_format, {"type": "long", "logicalType": "time-micros"}, {"type":
|
148
|
+
pytest.param(_default_avro_format, {"type": "int", "logicalType": "time-millis"}, {"type": "integer"}, None, id="test_time_millis"),
|
149
|
+
pytest.param(_default_avro_format, {"type": "long", "logicalType": "time-micros"}, {"type": "integer"}, None,
|
150
150
|
id="test_time_micros"),
|
151
151
|
pytest.param(
|
152
152
|
_default_avro_format,
|
153
|
-
{"type": "long", "logicalType": "timestamp-millis"}, {"type":
|
153
|
+
{"type": "long", "logicalType": "timestamp-millis"}, {"type": "string", "format": "date-time"}, None, id="test_timestamp_millis"
|
154
154
|
),
|
155
|
-
pytest.param(_default_avro_format, {"type": "long", "logicalType": "timestamp-micros"}, {"type":
|
155
|
+
pytest.param(_default_avro_format, {"type": "long", "logicalType": "timestamp-micros"}, {"type": "string"}, None,
|
156
156
|
id="test_timestamp_micros"),
|
157
157
|
pytest.param(
|
158
158
|
_default_avro_format,
|
@@ -203,7 +203,7 @@ single_avro_scenario = (
|
|
203
203
|
"streams": [
|
204
204
|
{
|
205
205
|
"name": "stream1",
|
206
|
-
"
|
206
|
+
"format": {"filetype": "avro"},
|
207
207
|
"globs": ["*"],
|
208
208
|
"validation_policy": "Emit Record",
|
209
209
|
}
|
@@ -266,7 +266,7 @@ multiple_avro_combine_schema_scenario = (
|
|
266
266
|
"streams": [
|
267
267
|
{
|
268
268
|
"name": "stream1",
|
269
|
-
"
|
269
|
+
"format": {"filetype": "avro"},
|
270
270
|
"globs": ["*"],
|
271
271
|
"validation_policy": "Emit Record",
|
272
272
|
}
|
@@ -362,7 +362,7 @@ avro_all_types_scenario = (
|
|
362
362
|
"streams": [
|
363
363
|
{
|
364
364
|
"name": "stream1",
|
365
|
-
"
|
365
|
+
"format": {"filetype": "avro"},
|
366
366
|
"globs": ["*"],
|
367
367
|
"validation_policy": "Emit Record",
|
368
368
|
}
|
@@ -463,13 +463,13 @@ multiple_streams_avro_scenario = (
|
|
463
463
|
"streams": [
|
464
464
|
{
|
465
465
|
"name": "songs_stream",
|
466
|
-
"
|
466
|
+
"format": {"filetype": "avro"},
|
467
467
|
"globs": ["*_songs.avro"],
|
468
468
|
"validation_policy": "Emit Record",
|
469
469
|
},
|
470
470
|
{
|
471
471
|
"name": "festivals_stream",
|
472
|
-
"
|
472
|
+
"format": {"filetype": "avro"},
|
473
473
|
"globs": ["*_festivals.avro"],
|
474
474
|
"validation_policy": "Emit Record",
|
475
475
|
},
|
@@ -629,7 +629,6 @@ avro_file_with_double_as_number_scenario = (
|
|
629
629
|
"streams": [
|
630
630
|
{
|
631
631
|
"name": "stream1",
|
632
|
-
"file_type": "avro",
|
633
632
|
"globs": ["*"],
|
634
633
|
"validation_policy": "Emit Record",
|
635
634
|
"format": {
|
@@ -17,7 +17,7 @@ _base_success_scenario = (
|
|
17
17
|
"streams": [
|
18
18
|
{
|
19
19
|
"name": "stream1",
|
20
|
-
"
|
20
|
+
"format": {"filetype": "csv"},
|
21
21
|
"globs": ["*.csv"],
|
22
22
|
"validation_policy": "Emit Record",
|
23
23
|
}
|
@@ -55,13 +55,13 @@ success_multi_stream_scenario = (
|
|
55
55
|
"streams": [
|
56
56
|
{
|
57
57
|
"name": "stream1",
|
58
|
-
"
|
58
|
+
"format": {"filetype": "csv"},
|
59
59
|
"globs": ["*.csv", "*.gz"],
|
60
60
|
"validation_policy": "Emit Record",
|
61
61
|
},
|
62
62
|
{
|
63
63
|
"name": "stream2",
|
64
|
-
"
|
64
|
+
"format": {"filetype": "csv"},
|
65
65
|
"globs": ["*.csv", "*.gz"],
|
66
66
|
"validation_policy": "Emit Record",
|
67
67
|
}
|
@@ -79,7 +79,7 @@ success_extensionless_scenario = (
|
|
79
79
|
"streams": [
|
80
80
|
{
|
81
81
|
"name": "stream1",
|
82
|
-
"
|
82
|
+
"format": {"filetype": "csv"},
|
83
83
|
"globs": ["*"],
|
84
84
|
"validation_policy": "Emit Record",
|
85
85
|
}
|
@@ -109,7 +109,7 @@ success_user_provided_schema_scenario = (
|
|
109
109
|
"streams": [
|
110
110
|
{
|
111
111
|
"name": "stream1",
|
112
|
-
"
|
112
|
+
"format": {"filetype": "csv"},
|
113
113
|
"globs": ["*.csv"],
|
114
114
|
"validation_policy": "Emit Record",
|
115
115
|
"input_schema": '{"col1": "string", "col2": "string"}',
|
@@ -158,7 +158,7 @@ error_record_validation_user_provided_schema_scenario = (
|
|
158
158
|
"streams": [
|
159
159
|
{
|
160
160
|
"name": "stream1",
|
161
|
-
"
|
161
|
+
"format": {"filetype": "csv"},
|
162
162
|
"globs": ["*.csv"],
|
163
163
|
"validation_policy": "always_fail",
|
164
164
|
"input_schema": '{"col1": "number", "col2": "string"}',
|
@@ -179,13 +179,13 @@ error_multi_stream_scenario = (
|
|
179
179
|
"streams": [
|
180
180
|
{
|
181
181
|
"name": "stream1",
|
182
|
-
"
|
182
|
+
"format": {"filetype": "csv"},
|
183
183
|
"globs": ["*.csv"],
|
184
184
|
"validation_policy": "Emit Record",
|
185
185
|
},
|
186
186
|
{
|
187
187
|
"name": "stream2",
|
188
|
-
"
|
188
|
+
"format": {"filetype": "jsonl"},
|
189
189
|
"globs": ["*.csv"],
|
190
190
|
"validation_policy": "Emit Record",
|
191
191
|
}
|