airbyte-cdk 0.51.15__py3-none-any.whl → 0.51.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +494 -522
  2. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +1 -1
  3. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +2 -37
  4. airbyte_cdk/sources/file_based/file_based_source.py +1 -1
  5. airbyte_cdk/sources/file_based/file_types/__init__.py +11 -6
  6. airbyte_cdk/sources/file_based/file_types/avro_parser.py +1 -1
  7. airbyte_cdk/sources/file_based/file_types/csv_parser.py +1 -1
  8. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +2 -2
  9. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +5 -5
  10. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +7 -5
  11. airbyte_cdk/utils/datetime_format_inferrer.py +8 -4
  12. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/METADATA +1 -1
  13. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/RECORD +29 -29
  14. unit_tests/sources/file_based/file_types/test_avro_parser.py +6 -6
  15. unit_tests/sources/file_based/scenarios/avro_scenarios.py +5 -6
  16. unit_tests/sources/file_based/scenarios/check_scenarios.py +8 -8
  17. unit_tests/sources/file_based/scenarios/csv_scenarios.py +19 -42
  18. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +15 -15
  19. unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +13 -12
  20. unit_tests/sources/file_based/scenarios/parquet_scenarios.py +5 -9
  21. unit_tests/sources/file_based/scenarios/scenario_builder.py +1 -1
  22. unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +16 -16
  23. unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +9 -9
  24. unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +2 -1
  25. unit_tests/sources/file_based/stream/test_default_file_based_stream.py +6 -3
  26. unit_tests/utils/test_datetime_format_inferrer.py +1 -0
  27. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/LICENSE.txt +0 -0
  28. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/WHEEL +0 -0
  29. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/top_level.txt +0 -0
@@ -73,7 +73,7 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
73
73
  return files
74
74
 
75
75
  def _check_parse_record(self, stream: "AbstractFileBasedStream", file: RemoteFile, logger: logging.Logger) -> None:
76
- parser = stream.get_parser(stream.config.file_type)
76
+ parser = stream.get_parser()
77
77
 
78
78
  try:
79
79
  record = next(iter(parser.parse_records(stream.config, file, self.stream_reader, logger, discovered_schema=None)))
@@ -3,7 +3,7 @@
3
3
  #
4
4
 
5
5
  from enum import Enum
6
- from typing import Any, List, Mapping, Optional, Type, Union
6
+ from typing import Any, List, Mapping, Optional, Union
7
7
 
8
8
  from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
9
9
  from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
@@ -16,9 +16,6 @@ from pydantic import BaseModel, Field, validator
16
16
  PrimaryKeyType = Optional[Union[str, List[str]]]
17
17
 
18
18
 
19
- VALID_FILE_TYPES: Mapping[str, Type[BaseModel]] = {"avro": AvroFormat, "csv": CsvFormat, "jsonl": JsonlFormat, "parquet": ParquetFormat}
20
-
21
-
22
19
  class ValidationPolicy(Enum):
23
20
  emit_record = "Emit Record"
24
21
  skip_record = "Skip Record"
@@ -27,7 +24,6 @@ class ValidationPolicy(Enum):
27
24
 
28
25
  class FileBasedStreamConfig(BaseModel):
29
26
  name: str = Field(title="Name", description="The name of the stream.")
30
- file_type: str = Field(title="File Type", description="The data file type that is being extracted for a stream.")
31
27
  globs: Optional[List[str]] = Field(
32
28
  title="Globs",
33
29
  description='The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look <a href="https://en.wikipedia.org/wiki/Glob_(programming)">here</a>.',
@@ -54,7 +50,7 @@ class FileBasedStreamConfig(BaseModel):
54
50
  description="When the state history of the file store is full, syncs will only read files that were last modified in the provided day range.",
55
51
  default=3,
56
52
  )
57
- format: Optional[Union[AvroFormat, CsvFormat, JsonlFormat, ParquetFormat]] = Field(
53
+ format: Union[AvroFormat, CsvFormat, JsonlFormat, ParquetFormat] = Field(
58
54
  title="Format",
59
55
  description="The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.",
60
56
  )
@@ -64,37 +60,6 @@ class FileBasedStreamConfig(BaseModel):
64
60
  default=False,
65
61
  )
66
62
 
67
- @validator("file_type", pre=True)
68
- def validate_file_type(cls, v: str) -> str:
69
- if v not in VALID_FILE_TYPES:
70
- raise ValueError(f"Format filetype {v} is not a supported file type")
71
- return v
72
-
73
- @classmethod
74
- def _transform_legacy_config(cls, legacy_config: Mapping[str, Any], file_type: str) -> Mapping[str, Any]:
75
- if file_type.casefold() not in VALID_FILE_TYPES:
76
- raise ValueError(f"Format filetype {file_type} is not a supported file type")
77
- if file_type.casefold() == "parquet" or file_type.casefold() == "avro":
78
- legacy_config = cls._transform_legacy_parquet_or_avro_config(legacy_config)
79
- return {file_type: VALID_FILE_TYPES[file_type.casefold()].parse_obj({key: val for key, val in legacy_config.items()})}
80
-
81
- @classmethod
82
- def _transform_legacy_parquet_or_avro_config(cls, config: Mapping[str, Any]) -> Mapping[str, Any]:
83
- """
84
- The legacy parquet parser converts decimal fields to numbers. This isn't desirable because it can lead to precision loss.
85
- To avoid introducing a breaking change with the new default, we will set decimal_as_float to True in the legacy configs.
86
- """
87
- filetype = config.get("filetype")
88
- if filetype != "parquet" and filetype != "avro":
89
- raise ValueError(
90
- f"Expected {filetype} format, got {config}. This is probably due to a CDK bug. Please reach out to the Airbyte team for support."
91
- )
92
- if config.get("decimal_as_float"):
93
- raise ValueError(
94
- f"Received legacy {filetype} file form with 'decimal_as_float' set. This is unexpected. Please reach out to the Airbyte team for support."
95
- )
96
- return {**config, **{"decimal_as_float": True}}
97
-
98
63
  @validator("input_schema", pre=True)
99
64
  def validate_input_schema(cls, v: Optional[str]) -> Optional[str]:
100
65
  if v:
@@ -33,7 +33,7 @@ class FileBasedSource(AbstractSource, ABC):
33
33
  catalog_path: Optional[str] = None,
34
34
  availability_strategy: Optional[AbstractFileBasedAvailabilityStrategy] = None,
35
35
  discovery_policy: AbstractDiscoveryPolicy = DefaultDiscoveryPolicy(),
36
- parsers: Mapping[str, FileTypeParser] = default_parsers,
36
+ parsers: Mapping[Type[Any], FileTypeParser] = default_parsers,
37
37
  validation_policies: Mapping[ValidationPolicy, AbstractSchemaValidationPolicy] = DEFAULT_SCHEMA_VALIDATION_POLICIES,
38
38
  cursor_cls: Type[AbstractFileBasedCursor] = DefaultFileBasedCursor,
39
39
  ):
@@ -1,4 +1,9 @@
1
- from typing import Mapping
1
+ from typing import Any, Mapping, Type
2
+
3
+ from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
4
+ from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
5
+ from airbyte_cdk.sources.file_based.config.jsonl_format import JsonlFormat
6
+ from airbyte_cdk.sources.file_based.config.parquet_format import ParquetFormat
2
7
 
3
8
  from .avro_parser import AvroParser
4
9
  from .csv_parser import CsvParser
@@ -6,11 +11,11 @@ from .file_type_parser import FileTypeParser
6
11
  from .jsonl_parser import JsonlParser
7
12
  from .parquet_parser import ParquetParser
8
13
 
9
- default_parsers: Mapping[str, FileTypeParser] = {
10
- "avro": AvroParser(),
11
- "csv": CsvParser(),
12
- "jsonl": JsonlParser(),
13
- "parquet": ParquetParser(),
14
+ default_parsers: Mapping[Type[Any], FileTypeParser] = {
15
+ AvroFormat: AvroParser(),
16
+ CsvFormat: CsvParser(),
17
+ JsonlFormat: JsonlParser(),
18
+ ParquetFormat: ParquetParser(),
14
19
  }
15
20
 
16
21
  __all__ = ["AvroParser", "CsvParser", "JsonlParser", "ParquetParser", "default_parsers"]
@@ -49,7 +49,7 @@ class AvroParser(FileTypeParser):
49
49
  stream_reader: AbstractFileBasedStreamReader,
50
50
  logger: logging.Logger,
51
51
  ) -> SchemaType:
52
- avro_format = config.format or AvroFormat()
52
+ avro_format = config.format
53
53
  if not isinstance(avro_format, AvroFormat):
54
54
  raise ValueError(f"Expected ParquetFormat, got {avro_format}")
55
55
 
@@ -422,7 +422,7 @@ def _no_cast(row: Mapping[str, str]) -> Mapping[str, str]:
422
422
 
423
423
 
424
424
  def _extract_format(config: FileBasedStreamConfig) -> CsvFormat:
425
- config_format = config.format or CsvFormat()
425
+ config_format = config.format
426
426
  if not isinstance(config_format, CsvFormat):
427
427
  raise ValueError(f"Invalid format config: {config_format}")
428
428
  return config_format
@@ -30,7 +30,7 @@ class ParquetParser(FileTypeParser):
30
30
  stream_reader: AbstractFileBasedStreamReader,
31
31
  logger: logging.Logger,
32
32
  ) -> SchemaType:
33
- parquet_format = config.format or ParquetFormat()
33
+ parquet_format = config.format
34
34
  if not isinstance(parquet_format, ParquetFormat):
35
35
  raise ValueError(f"Expected ParquetFormat, got {parquet_format}")
36
36
 
@@ -54,7 +54,7 @@ class ParquetParser(FileTypeParser):
54
54
  logger: logging.Logger,
55
55
  discovered_schema: Optional[Mapping[str, SchemaType]],
56
56
  ) -> Iterable[Dict[str, Any]]:
57
- parquet_format = config.format or ParquetFormat()
57
+ parquet_format = config.format
58
58
  if not isinstance(parquet_format, ParquetFormat):
59
59
  logger.info(f"Expected ParquetFormat, got {parquet_format}")
60
60
  raise ConfigValidationError(FileBasedSourceError.CONFIG_VALIDATION_ERROR)
@@ -4,7 +4,7 @@
4
4
 
5
5
  from abc import abstractmethod
6
6
  from functools import cached_property, lru_cache
7
- from typing import Any, Dict, Iterable, List, Mapping, Optional
7
+ from typing import Any, Dict, Iterable, List, Mapping, Optional, Type
8
8
 
9
9
  from airbyte_cdk.models import SyncMode
10
10
  from airbyte_cdk.sources.file_based.availability_strategy import AbstractFileBasedAvailabilityStrategy
@@ -42,7 +42,7 @@ class AbstractFileBasedStream(Stream):
42
42
  stream_reader: AbstractFileBasedStreamReader,
43
43
  availability_strategy: AbstractFileBasedAvailabilityStrategy,
44
44
  discovery_policy: AbstractDiscoveryPolicy,
45
- parsers: Dict[str, FileTypeParser],
45
+ parsers: Dict[Type[Any], FileTypeParser],
46
46
  validation_policy: AbstractSchemaValidationPolicy,
47
47
  ):
48
48
  super().__init__()
@@ -121,11 +121,11 @@ class AbstractFileBasedStream(Stream):
121
121
  """
122
122
  ...
123
123
 
124
- def get_parser(self, file_type: str) -> FileTypeParser:
124
+ def get_parser(self) -> FileTypeParser:
125
125
  try:
126
- return self._parsers[file_type]
126
+ return self._parsers[type(self.config.format)]
127
127
  except KeyError:
128
- raise UndefinedParserError(FileBasedSourceError.UNDEFINED_PARSER, stream=self.name, file_type=file_type)
128
+ raise UndefinedParserError(FileBasedSourceError.UNDEFINED_PARSER, stream=self.name, format=type(self.config.format))
129
129
 
130
130
  def record_passes_validation_policy(self, record: Mapping[str, Any]) -> bool:
131
131
  if self.validation_policy:
@@ -5,6 +5,7 @@
5
5
  import asyncio
6
6
  import itertools
7
7
  import traceback
8
+ from copy import deepcopy
8
9
  from functools import cache
9
10
  from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Set, Union
10
11
 
@@ -79,7 +80,7 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
79
80
  # On read requests we should always have the catalog available
80
81
  raise MissingSchemaError(FileBasedSourceError.MISSING_SCHEMA, stream=self.name)
81
82
  # The stream only supports a single file type, so we can use the same parser for all files
82
- parser = self.get_parser(self.config.file_type)
83
+ parser = self.get_parser()
83
84
  for file in stream_slice["files"]:
84
85
  # only serialize the datetime once
85
86
  file_datetime_string = file.last_modified.strftime(self.DATE_TIME_FORMAT)
@@ -190,7 +191,7 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
190
191
  if not inferred_schema:
191
192
  raise InvalidSchemaError(
192
193
  FileBasedSourceError.INVALID_SCHEMA_ERROR,
193
- details=f"Empty schema. Please check that the files are valid {self.config.file_type}",
194
+ details=f"Empty schema. Please check that the files are valid for format {self.config.format}",
194
195
  stream=self.name,
195
196
  )
196
197
 
@@ -210,7 +211,8 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
210
211
  def infer_schema(self, files: List[RemoteFile]) -> Mapping[str, Any]:
211
212
  loop = asyncio.get_event_loop()
212
213
  schema = loop.run_until_complete(self._infer_schema(files))
213
- return self._fill_nulls(schema)
214
+ # as infer schema returns a Mapping that is assumed to be immutable, we need to create a deepcopy to avoid modifying the reference
215
+ return self._fill_nulls(deepcopy(schema))
214
216
 
215
217
  @staticmethod
216
218
  def _fill_nulls(schema: Mapping[str, Any]) -> Mapping[str, Any]:
@@ -258,11 +260,11 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
258
260
 
259
261
  async def _infer_file_schema(self, file: RemoteFile) -> SchemaType:
260
262
  try:
261
- return await self.get_parser(self.config.file_type).infer_schema(self.config, file, self._stream_reader, self.logger)
263
+ return await self.get_parser().infer_schema(self.config, file, self._stream_reader, self.logger)
262
264
  except Exception as exc:
263
265
  raise SchemaInferenceError(
264
266
  FileBasedSourceError.SCHEMA_INFERENCE_ERROR,
265
267
  file=file.uri,
266
- stream_file_type=self.config.file_type,
268
+ format=str(self.config.format),
267
269
  stream=self.name,
268
270
  ) from exc
@@ -36,10 +36,14 @@ class DatetimeFormatInferrer:
36
36
  This is the case if the value is a string or an integer between 1_000_000_000 and 2_000_000_000 for seconds
37
37
  or between 1_000_000_000_000 and 2_000_000_000_000 for milliseconds.
38
38
  This is separate from the format check for performance reasons"""
39
- for timestamp_range in self._timestamp_heuristic_ranges:
40
- if isinstance(value, str) and (not value.isdecimal() or int(value) in timestamp_range):
41
- return True
42
- if isinstance(value, int) and value in timestamp_range:
39
+ if isinstance(value, (str, int)):
40
+ try:
41
+ value_as_int = int(value)
42
+ for timestamp_range in self._timestamp_heuristic_ranges:
43
+ if value_as_int in timestamp_range:
44
+ return True
45
+ except ValueError:
46
+ # given that it's not parsable as an int, it can represent a datetime with one of the self._formats
43
47
  return True
44
48
  return False
45
49
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 0.51.15
3
+ Version: 0.51.17
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://github.com/airbytehq/airbyte
6
6
  Author: Airbyte
@@ -73,7 +73,7 @@ airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=dyIM-bzh54
73
73
  airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=Dc0F87nElWsz_Ikj938eQ9uqZvyqgFhZ8Dqf_-hvndc,4800
74
74
  airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=V6WGKJ9cXX1rjuM4bK3Cs9xEryMlkY2U3FMsSBhrgC8,3098
75
75
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=EiYnzwCHZV7EYqMJqcy6xKSeHvTKZBsQndjbEwmiTW4,93
76
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=q5VS1aiJwgK2nOsqpnc_lQWTvxhbH078jufxcedon9Q,57422
76
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=ebor38wlQVqYD2QXk5X8v9xDZl0cEpIc2mFaKvpuiPE,57170
77
77
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
78
78
  airbyte_cdk/sources/declarative/parsers/class_types_registry.py,sha256=bK4a74opm6WHyV7HqOVws6GE5Z7cLNc5MaTha69abIQ,6086
79
79
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=y7_G5mM07zxT5YG975kdC2PAja-Uc83pYp8WrV3GNdo,522
@@ -146,36 +146,36 @@ airbyte_cdk/sources/embedded/runner.py,sha256=kZ0CcUANuMjdZ4fmvp_w9P2IcsS9WSHxNq
146
146
  airbyte_cdk/sources/embedded/tools.py,sha256=-Z4tZ4AP1OTi_zrqFM3YV8Rt7c60wvsrv0Dc-rTZ2uw,744
147
147
  airbyte_cdk/sources/file_based/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
148
148
  airbyte_cdk/sources/file_based/exceptions.py,sha256=K3b0IH4xxY75GwRzueAoWfBZDSkctGWDtDEKkSwnrM4,4344
149
- airbyte_cdk/sources/file_based/file_based_source.py,sha256=NCbXAGPWBQSPAf5x2U2eCdOLUd26RhO5s6K87_AF8Es,6931
149
+ airbyte_cdk/sources/file_based/file_based_source.py,sha256=NKS3p_cClQoKC0elj8tJr4ayAUnUXXgTjGr3SVHxC4Q,6937
150
150
  airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=K9fFHcSL4E8v-X2l38wRAcZCjpyifr35orvby8vQt84,3749
151
151
  airbyte_cdk/sources/file_based/remote_file.py,sha256=xIDwDDBPhJI1K8YZuXjEfjxakZPMieBKJM6vmq6G5tw,248
152
152
  airbyte_cdk/sources/file_based/schema_helpers.py,sha256=XBkOutIw_n6SNYU34qbyTbl0Ppt0i4k3sVFMSaX3wJo,9103
153
153
  airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
154
154
  airbyte_cdk/sources/file_based/availability_strategy/__init__.py,sha256=WiPPuQGfmQlFUMFR5h3ECc-VzBj4vair6_4WAL87AEI,277
155
155
  airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py,sha256=NeHCiG4FFohzYpQQFfmTL4-5oI0nElHWgXX1xrm8-SU,1269
156
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=InGBlGbInuNUQ6oaK5A9oICVc7ZNHMSYo8g5Vy2smOo,4266
156
+ airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=RWcRhNjytH-09_d4MVKDC37B3EGsqe2DheHpLNNMmzE,4243
157
157
  airbyte_cdk/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
158
  airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=WrV4sKtJoZ1dK31HK7NdBKlnYHkmu6NqjmEpkVqJ6tQ,4582
159
159
  airbyte_cdk/sources/file_based/config/avro_format.py,sha256=oLJIuNInu-MgjkVFqwHvmQ4CPZa4NZingq_I0_trQ3g,589
160
160
  airbyte_cdk/sources/file_based/config/csv_format.py,sha256=xlBZ5WyAshagjjjbUV_je1JyZ1oY1GbIzJRUZ9UfSvo,7095
161
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=5R0UlPJUGGx5OnpezZ0Fd8dyO4y2vMZtiPZR_3rfvSk,5916
161
+ airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=BTlc2Sw1UP9XF1D4ZYTjubI_msEijBfh9vW8GhOGtIA,3858
162
162
  airbyte_cdk/sources/file_based/config/jsonl_format.py,sha256=usmTeTw8xw8OKwrz8MsiS5E1LQiVEbedGHMHNAfOOlk,252
163
163
  airbyte_cdk/sources/file_based/config/parquet_format.py,sha256=O_Eq0yVzjPiKDz8H1-f9yMowtCcJwT9F2prNYpXZkp0,614
164
164
  airbyte_cdk/sources/file_based/discovery_policy/__init__.py,sha256=x_7JsQGiS7Ytmr0ZDS0SNYGcNUzC4wCm3_1-Mf3ZFnw,283
165
165
  airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha256=cz9po5Cn6u50uq3hDy46pqnPR4JDcnRItZX9k0WDUJU,520
166
166
  airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=Mx3zT9Dem4uNfaUT0oOtrESsuB1LrGAi5N-uw2swZZA,701
167
- airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=N3a8cjXwRUN2__46IJTwrWlsyFiSA1xtSgPcPH28sn0,476
168
- airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=7PVaW17wn80HYW1mu074X2dy0UgFoqFqGIOKN2ZMKD0,8686
169
- airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=EYjhQzomn0_wZ8bnhXjKXkRL5xJcZhW2mSoTN21eyO8,17664
167
+ airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=wWLnHooFrnCwfhMoguDODtQxRVQyrjy0mDrSi4TWwPM,808
168
+ airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=-C_BODUMg8X4jzN85C9Q2R__cpFeLlKycLC_JbctLF8,8670
169
+ airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=VVV829XszmDRqmgv6VBshMF4CSEzmP8rL-OlGttRu7c,17649
170
170
  airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=cThTLc1YKSAapOn70lB09SzruRIPSShGIMz1f92QYV8,1555
171
171
  airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=Kz6HLF0CrFHQ1Y6rJKGr7KmBWSLeDYFQmkg0WIi7Frg,5395
172
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=QulQ_soGb1LpQ_KTxqWZjmfACGkTUDUOeuSmNFtcSLk,8717
172
+ airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=gk2PnjKYT3aL5H7N5jo6OL4vpeNjC588xKrz2_UCNSU,8679
173
173
  airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py,sha256=sEVnRhZ8x9f7PNjo6lewxid9z0PI8eSj7gSoFC3MH1Y,527
174
174
  airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py,sha256=uwk6Ugf23xKG4PRPVVRVwpcNjTwPgxejl03vLSEzK0s,604
175
175
  airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py,sha256=ZeAa0z50ywMU2chNjQ7JpL4yePU1NajhBa8FS7rXLVo,1643
176
176
  airbyte_cdk/sources/file_based/stream/__init__.py,sha256=QPDqdgjsabOQD93dSFqHGaFS_3pIwm-chEabZHiPJi0,265
177
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=BXO0b4iHNJUsF7GVIWTnY5Zlj-IjHS_JmqQlKsSDgz8,5777
178
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=4mZz9cn0gFDC4CepZXUxZoH9J7Z41BrN6zA9J5IfI9w,12246
177
+ airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=Vyrfn24nvM4JDgZgbIdHK0vaX66sl7vLSNvYS-D5ZtY,5800
178
+ airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=OT7QYytD1V2JY0CHHhIKp62QOnHJquM-gjUrV12rBdM,12379
179
179
  airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
180
180
  airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=i-FPeK8lwCzX34GCcmvL5Yvdh8-uu7FeCVYDoFbD7IY,1920
181
181
  airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=kuJRKgDYOGXRk0V0I8BpFxg0hGv7SfV_nBpmmn45F88,6815
@@ -214,7 +214,7 @@ airbyte_cdk/sources/utils/transform.py,sha256=4GYmO6bq33HF-a1in0dKQKqUOYI1bWItyu
214
214
  airbyte_cdk/sources/utils/types.py,sha256=41ZQR681t5TUnOScij58d088sb99klH_ZENFcaYro_g,175
215
215
  airbyte_cdk/utils/__init__.py,sha256=kFLcs2P-tbPyeVOJS9rOv1jZdnSpjG24ro0CHgt_CIk,215
216
216
  airbyte_cdk/utils/airbyte_secrets_utils.py,sha256=q3aDl8T10ufGbeqnUPqbZLxQcHdkf2kDfQK_upWzBbI,2894
217
- airbyte_cdk/utils/datetime_format_inferrer.py,sha256=3Se5Jv-skHiG8PlaDh67CTpN525BaPwmMzo4dAWMqcA,3802
217
+ airbyte_cdk/utils/datetime_format_inferrer.py,sha256=gGKDQ3OdY18R5CVFhq4c7zB_E4Cxe6J6SLA29cz3cJM,3954
218
218
  airbyte_cdk/utils/event_timing.py,sha256=Hn5kCc9xGKLcV5EYpJCZwNiz9neKKu2WG8FJF_hy278,2377
219
219
  airbyte_cdk/utils/mapping_helpers.py,sha256=tVkbgnxy12Ah2Jxh_3tKW7CTKTAVIcPexsBhsiyTbp4,1729
220
220
  airbyte_cdk/utils/schema_inferrer.py,sha256=D8vFVgeK6VLcAug4YVAHfa3D29On0A_nMlwq9SPlfPI,3799
@@ -327,23 +327,23 @@ unit_tests/sources/file_based/config/test_abstract_file_based_spec.py,sha256=wmZ
327
327
  unit_tests/sources/file_based/config/test_csv_format.py,sha256=VYL-9Ec8hW_yO2Pj9F8pDfITcgEAFtSublYda7ut7QE,1132
328
328
  unit_tests/sources/file_based/config/test_file_based_stream_config.py,sha256=1eMsHlMQIFwyw20HjnhgKuiw6399sMcLTQ4LP09kTT4,3060
329
329
  unit_tests/sources/file_based/file_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
330
- unit_tests/sources/file_based/file_types/test_avro_parser.py,sha256=INqwKXcgNb3h_tktNXYU6WNUD-iNwRYHCd3IrnQa5R4,11051
330
+ unit_tests/sources/file_based/file_types/test_avro_parser.py,sha256=Rk-FB6-o350BpDp2QemprAgBHKlFIiom0EBgDD5Tygg,10991
331
331
  unit_tests/sources/file_based/file_types/test_csv_parser.py,sha256=PalrxCRHAyoIp12IWWyePS9QF4LcvNVkqrKdwkrayJ4,22457
332
332
  unit_tests/sources/file_based/file_types/test_jsonl_parser.py,sha256=foTf9U9LyAS8OR0BonwNgFWPqTrmzFV2lpPUfRMrioE,6134
333
333
  unit_tests/sources/file_based/file_types/test_parquet_parser.py,sha256=D7sKTty8aEqMDWWGKWUqDbWjTxhGkygU7ns4-_JceRY,13543
334
334
  unit_tests/sources/file_based/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
335
- unit_tests/sources/file_based/scenarios/avro_scenarios.py,sha256=xUfw0crAvSTgQ2-chJx2ZiigQyo5IfrCuOFC1TWXXsQ,29795
336
- unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=xZf28rlH93ap8JCkAjwocng-uAW-mvMx6BDOLbvVCig,5588
337
- unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=HTu2I391NQ1-xo5mV5Y62jvPa1MATMT6eN0gpk-Om9s,98773
338
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=0maHng11cFmvzFLOniyBxOEYoKj4DYR3NO9-pSYoFLs,60710
339
- unit_tests/sources/file_based/scenarios/jsonl_scenarios.py,sha256=3vcgKCvsLLPkIpYErGnjsm_sN5-vMqaLctZzvQ_BkBc,27553
340
- unit_tests/sources/file_based/scenarios/parquet_scenarios.py,sha256=bBYFkAEvEr5O7zyG8dbK-Dbg-TTbdD_Os4HMrylatHQ,26952
341
- unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=iktYJ-WuDDLdT1rsWOvx-2z3Nt4VgkrvpzsMlMDfBGA,10177
342
- unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py,sha256=sxO3f4mMu6wUt5TISAycfIOSszkrkQYDnKLWgu-2GcM,28307
343
- unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py,sha256=GFACt5-0BMFVtrYueA1_6va2euHfhKcBLcYfDHHnmFI,26715
335
+ unit_tests/sources/file_based/scenarios/avro_scenarios.py,sha256=UFGCnb_nDU7NFhzxy854OeIx-PbNGvL4v2Nny5CTukE,29809
336
+ unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=wW8Z1-oAJk7o94zkd_nn7qQdughkx65oGuyhs75ZiVk,5676
337
+ unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=w33c5Tm-0MMjWYSJS1p8d3P2OJR-5kL74AbkkGovyho,97990
338
+ unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=ZSRWFnkVEeg1rPMpmoKRU4u4U9tdor8nYaj-aaZ7oag,60875
339
+ unit_tests/sources/file_based/scenarios/jsonl_scenarios.py,sha256=rgFxNgy7I3L4Mz3PvtHB2ar_lFbe58WY3RatiddpTso,27753
340
+ unit_tests/sources/file_based/scenarios/parquet_scenarios.py,sha256=x4xQ9c3vJj5Fs-hmxnqovI2hRT56aHARbZR61Ws7mC8,26831
341
+ unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=RrFXL1pdkyjidl3yq40FKcBCAIv_Pmd-0mcGJ6E8mcI,10183
342
+ unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py,sha256=P11FDBe1mtaPLUKeSxrZ7Z9yGXw6hvwnACfkU9sJRhw,28483
343
+ unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py,sha256=9IKNyJ8YX0hO0rWOhQugrDswVLMj5PZIpklVrmgiYVY,26814
344
344
  unit_tests/sources/file_based/stream/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
345
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py,sha256=fzggaGwtXgNk-sAjQ8D71CPTCNBVxBS6HW63FKdkKME,12491
346
- unit_tests/sources/file_based/stream/test_default_file_based_stream.py,sha256=gZlNs6s9dRgcFLrLZtkJDRSje_8gDylHj3xktjsUMVo,5782
345
+ unit_tests/sources/file_based/stream/test_default_file_based_cursor.py,sha256=acUTX704mpw68ljH0atQx--f7STX_UynO8UtROw11Gw,12565
346
+ unit_tests/sources/file_based/stream/test_default_file_based_stream.py,sha256=DLEzbhfSjzbo7M-GMlJUx6XKgw7T5UuBeM2Ola0Cm7k,5771
347
347
  unit_tests/sources/fixtures/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
348
348
  unit_tests/sources/fixtures/source_test_fixture.py,sha256=r-UtR241EGQMZTw1RoKaatrpCGeQn7OIuRPWfG9f7nI,5380
349
349
  unit_tests/sources/message/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -359,14 +359,14 @@ unit_tests/sources/streams/http/auth/test_auth.py,sha256=gdWpJ-cR64qRXmmPOQWhVd4
359
359
  unit_tests/sources/streams/http/requests_native_auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
360
360
  unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py,sha256=h5LIjR_1hEdsnXmyp7fISa0isYjXIEjMw-8I8L0ZNLE,15024
361
361
  unit_tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
362
- unit_tests/utils/test_datetime_format_inferrer.py,sha256=DiNIGjEnj3DibDdlX9jhRgOkpo9h834hle2dLKEoRWQ,3487
362
+ unit_tests/utils/test_datetime_format_inferrer.py,sha256=1EUW1_afccMDrZM6YZyyPqrdwsUxZTaBxJNVa4TjiN8,3616
363
363
  unit_tests/utils/test_mapping_helpers.py,sha256=hqRppuban9hGKviiNFqp2fNdAz77d1_gjvgg8L7-jy8,1408
364
364
  unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg4MNPAG-xhpk,7817
365
365
  unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
366
366
  unit_tests/utils/test_stream_status_utils.py,sha256=NpV155JMXA6CG-2Zvofa14lItobyh3Onttc59X4m5DI,3382
367
367
  unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
368
- airbyte_cdk-0.51.15.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
369
- airbyte_cdk-0.51.15.dist-info/METADATA,sha256=pBjKGDqHo4jxenU32mYsXyq1UjGBwdlJRlVuy_SxxJg,11516
370
- airbyte_cdk-0.51.15.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
371
- airbyte_cdk-0.51.15.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
372
- airbyte_cdk-0.51.15.dist-info/RECORD,,
368
+ airbyte_cdk-0.51.17.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
369
+ airbyte_cdk-0.51.17.dist-info/METADATA,sha256=yC2mwyB29fJpI-FDENnoicx-VLHUApYTF2b1SY_u0Lo,11516
370
+ airbyte_cdk-0.51.17.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
371
+ airbyte_cdk-0.51.17.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
372
+ airbyte_cdk-0.51.17.dist-info/RECORD,,
@@ -142,17 +142,17 @@ _double_as_string_avro_format = AvroFormat(double_as_string=True)
142
142
  id="test_decimal_missing_precision"),
143
143
  pytest.param(_default_avro_format, {"type": "bytes", "logicalType": "decimal", "precision": 9}, None, ValueError,
144
144
  id="test_decimal_missing_scale"),
145
- pytest.param(_default_avro_format, {"type": "bytes", "logicalType": "uuid"}, {"type": ["null", "string"]}, None, id="test_uuid"),
146
- pytest.param(_default_avro_format, {"type": "int", "logicalType": "date"}, {"type": ["null", "string"], "format": "date"}, None,
145
+ pytest.param(_default_avro_format, {"type": "bytes", "logicalType": "uuid"}, {"type": "string"}, None, id="test_uuid"),
146
+ pytest.param(_default_avro_format, {"type": "int", "logicalType": "date"}, {"type": "string", "format": "date"}, None,
147
147
  id="test_date"),
148
- pytest.param(_default_avro_format, {"type": "int", "logicalType": "time-millis"}, {"type": ["null", "integer"]}, None, id="test_time_millis"),
149
- pytest.param(_default_avro_format, {"type": "long", "logicalType": "time-micros"}, {"type": ["null", "integer"]}, None,
148
+ pytest.param(_default_avro_format, {"type": "int", "logicalType": "time-millis"}, {"type": "integer"}, None, id="test_time_millis"),
149
+ pytest.param(_default_avro_format, {"type": "long", "logicalType": "time-micros"}, {"type": "integer"}, None,
150
150
  id="test_time_micros"),
151
151
  pytest.param(
152
152
  _default_avro_format,
153
- {"type": "long", "logicalType": "timestamp-millis"}, {"type": ["null", "string"], "format": "date-time"}, None, id="test_timestamp_millis"
153
+ {"type": "long", "logicalType": "timestamp-millis"}, {"type": "string", "format": "date-time"}, None, id="test_timestamp_millis"
154
154
  ),
155
- pytest.param(_default_avro_format, {"type": "long", "logicalType": "timestamp-micros"}, {"type": ["null", "string"]}, None,
155
+ pytest.param(_default_avro_format, {"type": "long", "logicalType": "timestamp-micros"}, {"type": "string"}, None,
156
156
  id="test_timestamp_micros"),
157
157
  pytest.param(
158
158
  _default_avro_format,
@@ -203,7 +203,7 @@ single_avro_scenario = (
203
203
  "streams": [
204
204
  {
205
205
  "name": "stream1",
206
- "file_type": "avro",
206
+ "format": {"filetype": "avro"},
207
207
  "globs": ["*"],
208
208
  "validation_policy": "Emit Record",
209
209
  }
@@ -266,7 +266,7 @@ multiple_avro_combine_schema_scenario = (
266
266
  "streams": [
267
267
  {
268
268
  "name": "stream1",
269
- "file_type": "avro",
269
+ "format": {"filetype": "avro"},
270
270
  "globs": ["*"],
271
271
  "validation_policy": "Emit Record",
272
272
  }
@@ -362,7 +362,7 @@ avro_all_types_scenario = (
362
362
  "streams": [
363
363
  {
364
364
  "name": "stream1",
365
- "file_type": "avro",
365
+ "format": {"filetype": "avro"},
366
366
  "globs": ["*"],
367
367
  "validation_policy": "Emit Record",
368
368
  }
@@ -463,13 +463,13 @@ multiple_streams_avro_scenario = (
463
463
  "streams": [
464
464
  {
465
465
  "name": "songs_stream",
466
- "file_type": "avro",
466
+ "format": {"filetype": "avro"},
467
467
  "globs": ["*_songs.avro"],
468
468
  "validation_policy": "Emit Record",
469
469
  },
470
470
  {
471
471
  "name": "festivals_stream",
472
- "file_type": "avro",
472
+ "format": {"filetype": "avro"},
473
473
  "globs": ["*_festivals.avro"],
474
474
  "validation_policy": "Emit Record",
475
475
  },
@@ -629,7 +629,6 @@ avro_file_with_double_as_number_scenario = (
629
629
  "streams": [
630
630
  {
631
631
  "name": "stream1",
632
- "file_type": "avro",
633
632
  "globs": ["*"],
634
633
  "validation_policy": "Emit Record",
635
634
  "format": {
@@ -17,7 +17,7 @@ _base_success_scenario = (
17
17
  "streams": [
18
18
  {
19
19
  "name": "stream1",
20
- "file_type": "csv",
20
+ "format": {"filetype": "csv"},
21
21
  "globs": ["*.csv"],
22
22
  "validation_policy": "Emit Record",
23
23
  }
@@ -55,13 +55,13 @@ success_multi_stream_scenario = (
55
55
  "streams": [
56
56
  {
57
57
  "name": "stream1",
58
- "file_type": "csv",
58
+ "format": {"filetype": "csv"},
59
59
  "globs": ["*.csv", "*.gz"],
60
60
  "validation_policy": "Emit Record",
61
61
  },
62
62
  {
63
63
  "name": "stream2",
64
- "file_type": "csv",
64
+ "format": {"filetype": "csv"},
65
65
  "globs": ["*.csv", "*.gz"],
66
66
  "validation_policy": "Emit Record",
67
67
  }
@@ -79,7 +79,7 @@ success_extensionless_scenario = (
79
79
  "streams": [
80
80
  {
81
81
  "name": "stream1",
82
- "file_type": "csv",
82
+ "format": {"filetype": "csv"},
83
83
  "globs": ["*"],
84
84
  "validation_policy": "Emit Record",
85
85
  }
@@ -109,7 +109,7 @@ success_user_provided_schema_scenario = (
109
109
  "streams": [
110
110
  {
111
111
  "name": "stream1",
112
- "file_type": "csv",
112
+ "format": {"filetype": "csv"},
113
113
  "globs": ["*.csv"],
114
114
  "validation_policy": "Emit Record",
115
115
  "input_schema": '{"col1": "string", "col2": "string"}',
@@ -158,7 +158,7 @@ error_record_validation_user_provided_schema_scenario = (
158
158
  "streams": [
159
159
  {
160
160
  "name": "stream1",
161
- "file_type": "csv",
161
+ "format": {"filetype": "csv"},
162
162
  "globs": ["*.csv"],
163
163
  "validation_policy": "always_fail",
164
164
  "input_schema": '{"col1": "number", "col2": "string"}',
@@ -179,13 +179,13 @@ error_multi_stream_scenario = (
179
179
  "streams": [
180
180
  {
181
181
  "name": "stream1",
182
- "file_type": "csv",
182
+ "format": {"filetype": "csv"},
183
183
  "globs": ["*.csv"],
184
184
  "validation_policy": "Emit Record",
185
185
  },
186
186
  {
187
187
  "name": "stream2",
188
- "file_type": "jsonl",
188
+ "format": {"filetype": "jsonl"},
189
189
  "globs": ["*.csv"],
190
190
  "validation_policy": "Emit Record",
191
191
  }