airbyte-cdk 0.51.15__py3-none-any.whl → 0.51.17__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (29) hide show
  1. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +494 -522
  2. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +1 -1
  3. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +2 -37
  4. airbyte_cdk/sources/file_based/file_based_source.py +1 -1
  5. airbyte_cdk/sources/file_based/file_types/__init__.py +11 -6
  6. airbyte_cdk/sources/file_based/file_types/avro_parser.py +1 -1
  7. airbyte_cdk/sources/file_based/file_types/csv_parser.py +1 -1
  8. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +2 -2
  9. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +5 -5
  10. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +7 -5
  11. airbyte_cdk/utils/datetime_format_inferrer.py +8 -4
  12. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/METADATA +1 -1
  13. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/RECORD +29 -29
  14. unit_tests/sources/file_based/file_types/test_avro_parser.py +6 -6
  15. unit_tests/sources/file_based/scenarios/avro_scenarios.py +5 -6
  16. unit_tests/sources/file_based/scenarios/check_scenarios.py +8 -8
  17. unit_tests/sources/file_based/scenarios/csv_scenarios.py +19 -42
  18. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +15 -15
  19. unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +13 -12
  20. unit_tests/sources/file_based/scenarios/parquet_scenarios.py +5 -9
  21. unit_tests/sources/file_based/scenarios/scenario_builder.py +1 -1
  22. unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +16 -16
  23. unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +9 -9
  24. unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +2 -1
  25. unit_tests/sources/file_based/stream/test_default_file_based_stream.py +6 -3
  26. unit_tests/utils/test_datetime_format_inferrer.py +1 -0
  27. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/LICENSE.txt +0 -0
  28. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/WHEEL +0 -0
  29. {airbyte_cdk-0.51.15.dist-info → airbyte_cdk-0.51.17.dist-info}/top_level.txt +0 -0
@@ -73,7 +73,7 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
73
73
  return files
74
74
 
75
75
  def _check_parse_record(self, stream: "AbstractFileBasedStream", file: RemoteFile, logger: logging.Logger) -> None:
76
- parser = stream.get_parser(stream.config.file_type)
76
+ parser = stream.get_parser()
77
77
 
78
78
  try:
79
79
  record = next(iter(parser.parse_records(stream.config, file, self.stream_reader, logger, discovered_schema=None)))
@@ -3,7 +3,7 @@
3
3
  #
4
4
 
5
5
  from enum import Enum
6
- from typing import Any, List, Mapping, Optional, Type, Union
6
+ from typing import Any, List, Mapping, Optional, Union
7
7
 
8
8
  from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
9
9
  from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
@@ -16,9 +16,6 @@ from pydantic import BaseModel, Field, validator
16
16
  PrimaryKeyType = Optional[Union[str, List[str]]]
17
17
 
18
18
 
19
- VALID_FILE_TYPES: Mapping[str, Type[BaseModel]] = {"avro": AvroFormat, "csv": CsvFormat, "jsonl": JsonlFormat, "parquet": ParquetFormat}
20
-
21
-
22
19
  class ValidationPolicy(Enum):
23
20
  emit_record = "Emit Record"
24
21
  skip_record = "Skip Record"
@@ -27,7 +24,6 @@ class ValidationPolicy(Enum):
27
24
 
28
25
  class FileBasedStreamConfig(BaseModel):
29
26
  name: str = Field(title="Name", description="The name of the stream.")
30
- file_type: str = Field(title="File Type", description="The data file type that is being extracted for a stream.")
31
27
  globs: Optional[List[str]] = Field(
32
28
  title="Globs",
33
29
  description='The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look <a href="https://en.wikipedia.org/wiki/Glob_(programming)">here</a>.',
@@ -54,7 +50,7 @@ class FileBasedStreamConfig(BaseModel):
54
50
  description="When the state history of the file store is full, syncs will only read files that were last modified in the provided day range.",
55
51
  default=3,
56
52
  )
57
- format: Optional[Union[AvroFormat, CsvFormat, JsonlFormat, ParquetFormat]] = Field(
53
+ format: Union[AvroFormat, CsvFormat, JsonlFormat, ParquetFormat] = Field(
58
54
  title="Format",
59
55
  description="The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.",
60
56
  )
@@ -64,37 +60,6 @@ class FileBasedStreamConfig(BaseModel):
64
60
  default=False,
65
61
  )
66
62
 
67
- @validator("file_type", pre=True)
68
- def validate_file_type(cls, v: str) -> str:
69
- if v not in VALID_FILE_TYPES:
70
- raise ValueError(f"Format filetype {v} is not a supported file type")
71
- return v
72
-
73
- @classmethod
74
- def _transform_legacy_config(cls, legacy_config: Mapping[str, Any], file_type: str) -> Mapping[str, Any]:
75
- if file_type.casefold() not in VALID_FILE_TYPES:
76
- raise ValueError(f"Format filetype {file_type} is not a supported file type")
77
- if file_type.casefold() == "parquet" or file_type.casefold() == "avro":
78
- legacy_config = cls._transform_legacy_parquet_or_avro_config(legacy_config)
79
- return {file_type: VALID_FILE_TYPES[file_type.casefold()].parse_obj({key: val for key, val in legacy_config.items()})}
80
-
81
- @classmethod
82
- def _transform_legacy_parquet_or_avro_config(cls, config: Mapping[str, Any]) -> Mapping[str, Any]:
83
- """
84
- The legacy parquet parser converts decimal fields to numbers. This isn't desirable because it can lead to precision loss.
85
- To avoid introducing a breaking change with the new default, we will set decimal_as_float to True in the legacy configs.
86
- """
87
- filetype = config.get("filetype")
88
- if filetype != "parquet" and filetype != "avro":
89
- raise ValueError(
90
- f"Expected {filetype} format, got {config}. This is probably due to a CDK bug. Please reach out to the Airbyte team for support."
91
- )
92
- if config.get("decimal_as_float"):
93
- raise ValueError(
94
- f"Received legacy {filetype} file form with 'decimal_as_float' set. This is unexpected. Please reach out to the Airbyte team for support."
95
- )
96
- return {**config, **{"decimal_as_float": True}}
97
-
98
63
  @validator("input_schema", pre=True)
99
64
  def validate_input_schema(cls, v: Optional[str]) -> Optional[str]:
100
65
  if v:
@@ -33,7 +33,7 @@ class FileBasedSource(AbstractSource, ABC):
33
33
  catalog_path: Optional[str] = None,
34
34
  availability_strategy: Optional[AbstractFileBasedAvailabilityStrategy] = None,
35
35
  discovery_policy: AbstractDiscoveryPolicy = DefaultDiscoveryPolicy(),
36
- parsers: Mapping[str, FileTypeParser] = default_parsers,
36
+ parsers: Mapping[Type[Any], FileTypeParser] = default_parsers,
37
37
  validation_policies: Mapping[ValidationPolicy, AbstractSchemaValidationPolicy] = DEFAULT_SCHEMA_VALIDATION_POLICIES,
38
38
  cursor_cls: Type[AbstractFileBasedCursor] = DefaultFileBasedCursor,
39
39
  ):
@@ -1,4 +1,9 @@
1
- from typing import Mapping
1
+ from typing import Any, Mapping, Type
2
+
3
+ from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
4
+ from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
5
+ from airbyte_cdk.sources.file_based.config.jsonl_format import JsonlFormat
6
+ from airbyte_cdk.sources.file_based.config.parquet_format import ParquetFormat
2
7
 
3
8
  from .avro_parser import AvroParser
4
9
  from .csv_parser import CsvParser
@@ -6,11 +11,11 @@ from .file_type_parser import FileTypeParser
6
11
  from .jsonl_parser import JsonlParser
7
12
  from .parquet_parser import ParquetParser
8
13
 
9
- default_parsers: Mapping[str, FileTypeParser] = {
10
- "avro": AvroParser(),
11
- "csv": CsvParser(),
12
- "jsonl": JsonlParser(),
13
- "parquet": ParquetParser(),
14
+ default_parsers: Mapping[Type[Any], FileTypeParser] = {
15
+ AvroFormat: AvroParser(),
16
+ CsvFormat: CsvParser(),
17
+ JsonlFormat: JsonlParser(),
18
+ ParquetFormat: ParquetParser(),
14
19
  }
15
20
 
16
21
  __all__ = ["AvroParser", "CsvParser", "JsonlParser", "ParquetParser", "default_parsers"]
@@ -49,7 +49,7 @@ class AvroParser(FileTypeParser):
49
49
  stream_reader: AbstractFileBasedStreamReader,
50
50
  logger: logging.Logger,
51
51
  ) -> SchemaType:
52
- avro_format = config.format or AvroFormat()
52
+ avro_format = config.format
53
53
  if not isinstance(avro_format, AvroFormat):
54
54
  raise ValueError(f"Expected ParquetFormat, got {avro_format}")
55
55
 
@@ -422,7 +422,7 @@ def _no_cast(row: Mapping[str, str]) -> Mapping[str, str]:
422
422
 
423
423
 
424
424
  def _extract_format(config: FileBasedStreamConfig) -> CsvFormat:
425
- config_format = config.format or CsvFormat()
425
+ config_format = config.format
426
426
  if not isinstance(config_format, CsvFormat):
427
427
  raise ValueError(f"Invalid format config: {config_format}")
428
428
  return config_format
@@ -30,7 +30,7 @@ class ParquetParser(FileTypeParser):
30
30
  stream_reader: AbstractFileBasedStreamReader,
31
31
  logger: logging.Logger,
32
32
  ) -> SchemaType:
33
- parquet_format = config.format or ParquetFormat()
33
+ parquet_format = config.format
34
34
  if not isinstance(parquet_format, ParquetFormat):
35
35
  raise ValueError(f"Expected ParquetFormat, got {parquet_format}")
36
36
 
@@ -54,7 +54,7 @@ class ParquetParser(FileTypeParser):
54
54
  logger: logging.Logger,
55
55
  discovered_schema: Optional[Mapping[str, SchemaType]],
56
56
  ) -> Iterable[Dict[str, Any]]:
57
- parquet_format = config.format or ParquetFormat()
57
+ parquet_format = config.format
58
58
  if not isinstance(parquet_format, ParquetFormat):
59
59
  logger.info(f"Expected ParquetFormat, got {parquet_format}")
60
60
  raise ConfigValidationError(FileBasedSourceError.CONFIG_VALIDATION_ERROR)
@@ -4,7 +4,7 @@
4
4
 
5
5
  from abc import abstractmethod
6
6
  from functools import cached_property, lru_cache
7
- from typing import Any, Dict, Iterable, List, Mapping, Optional
7
+ from typing import Any, Dict, Iterable, List, Mapping, Optional, Type
8
8
 
9
9
  from airbyte_cdk.models import SyncMode
10
10
  from airbyte_cdk.sources.file_based.availability_strategy import AbstractFileBasedAvailabilityStrategy
@@ -42,7 +42,7 @@ class AbstractFileBasedStream(Stream):
42
42
  stream_reader: AbstractFileBasedStreamReader,
43
43
  availability_strategy: AbstractFileBasedAvailabilityStrategy,
44
44
  discovery_policy: AbstractDiscoveryPolicy,
45
- parsers: Dict[str, FileTypeParser],
45
+ parsers: Dict[Type[Any], FileTypeParser],
46
46
  validation_policy: AbstractSchemaValidationPolicy,
47
47
  ):
48
48
  super().__init__()
@@ -121,11 +121,11 @@ class AbstractFileBasedStream(Stream):
121
121
  """
122
122
  ...
123
123
 
124
- def get_parser(self, file_type: str) -> FileTypeParser:
124
+ def get_parser(self) -> FileTypeParser:
125
125
  try:
126
- return self._parsers[file_type]
126
+ return self._parsers[type(self.config.format)]
127
127
  except KeyError:
128
- raise UndefinedParserError(FileBasedSourceError.UNDEFINED_PARSER, stream=self.name, file_type=file_type)
128
+ raise UndefinedParserError(FileBasedSourceError.UNDEFINED_PARSER, stream=self.name, format=type(self.config.format))
129
129
 
130
130
  def record_passes_validation_policy(self, record: Mapping[str, Any]) -> bool:
131
131
  if self.validation_policy:
@@ -5,6 +5,7 @@
5
5
  import asyncio
6
6
  import itertools
7
7
  import traceback
8
+ from copy import deepcopy
8
9
  from functools import cache
9
10
  from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Set, Union
10
11
 
@@ -79,7 +80,7 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
79
80
  # On read requests we should always have the catalog available
80
81
  raise MissingSchemaError(FileBasedSourceError.MISSING_SCHEMA, stream=self.name)
81
82
  # The stream only supports a single file type, so we can use the same parser for all files
82
- parser = self.get_parser(self.config.file_type)
83
+ parser = self.get_parser()
83
84
  for file in stream_slice["files"]:
84
85
  # only serialize the datetime once
85
86
  file_datetime_string = file.last_modified.strftime(self.DATE_TIME_FORMAT)
@@ -190,7 +191,7 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
190
191
  if not inferred_schema:
191
192
  raise InvalidSchemaError(
192
193
  FileBasedSourceError.INVALID_SCHEMA_ERROR,
193
- details=f"Empty schema. Please check that the files are valid {self.config.file_type}",
194
+ details=f"Empty schema. Please check that the files are valid for format {self.config.format}",
194
195
  stream=self.name,
195
196
  )
196
197
 
@@ -210,7 +211,8 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
210
211
  def infer_schema(self, files: List[RemoteFile]) -> Mapping[str, Any]:
211
212
  loop = asyncio.get_event_loop()
212
213
  schema = loop.run_until_complete(self._infer_schema(files))
213
- return self._fill_nulls(schema)
214
+ # as infer schema returns a Mapping that is assumed to be immutable, we need to create a deepcopy to avoid modifying the reference
215
+ return self._fill_nulls(deepcopy(schema))
214
216
 
215
217
  @staticmethod
216
218
  def _fill_nulls(schema: Mapping[str, Any]) -> Mapping[str, Any]:
@@ -258,11 +260,11 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
258
260
 
259
261
  async def _infer_file_schema(self, file: RemoteFile) -> SchemaType:
260
262
  try:
261
- return await self.get_parser(self.config.file_type).infer_schema(self.config, file, self._stream_reader, self.logger)
263
+ return await self.get_parser().infer_schema(self.config, file, self._stream_reader, self.logger)
262
264
  except Exception as exc:
263
265
  raise SchemaInferenceError(
264
266
  FileBasedSourceError.SCHEMA_INFERENCE_ERROR,
265
267
  file=file.uri,
266
- stream_file_type=self.config.file_type,
268
+ format=str(self.config.format),
267
269
  stream=self.name,
268
270
  ) from exc
@@ -36,10 +36,14 @@ class DatetimeFormatInferrer:
36
36
  This is the case if the value is a string or an integer between 1_000_000_000 and 2_000_000_000 for seconds
37
37
  or between 1_000_000_000_000 and 2_000_000_000_000 for milliseconds.
38
38
  This is separate from the format check for performance reasons"""
39
- for timestamp_range in self._timestamp_heuristic_ranges:
40
- if isinstance(value, str) and (not value.isdecimal() or int(value) in timestamp_range):
41
- return True
42
- if isinstance(value, int) and value in timestamp_range:
39
+ if isinstance(value, (str, int)):
40
+ try:
41
+ value_as_int = int(value)
42
+ for timestamp_range in self._timestamp_heuristic_ranges:
43
+ if value_as_int in timestamp_range:
44
+ return True
45
+ except ValueError:
46
+ # given that it's not parsable as an int, it can represent a datetime with one of the self._formats
43
47
  return True
44
48
  return False
45
49
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 0.51.15
3
+ Version: 0.51.17
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://github.com/airbytehq/airbyte
6
6
  Author: Airbyte
@@ -73,7 +73,7 @@ airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=dyIM-bzh54
73
73
  airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=Dc0F87nElWsz_Ikj938eQ9uqZvyqgFhZ8Dqf_-hvndc,4800
74
74
  airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=V6WGKJ9cXX1rjuM4bK3Cs9xEryMlkY2U3FMsSBhrgC8,3098
75
75
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=EiYnzwCHZV7EYqMJqcy6xKSeHvTKZBsQndjbEwmiTW4,93
76
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=q5VS1aiJwgK2nOsqpnc_lQWTvxhbH078jufxcedon9Q,57422
76
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=ebor38wlQVqYD2QXk5X8v9xDZl0cEpIc2mFaKvpuiPE,57170
77
77
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
78
78
  airbyte_cdk/sources/declarative/parsers/class_types_registry.py,sha256=bK4a74opm6WHyV7HqOVws6GE5Z7cLNc5MaTha69abIQ,6086
79
79
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=y7_G5mM07zxT5YG975kdC2PAja-Uc83pYp8WrV3GNdo,522
@@ -146,36 +146,36 @@ airbyte_cdk/sources/embedded/runner.py,sha256=kZ0CcUANuMjdZ4fmvp_w9P2IcsS9WSHxNq
146
146
  airbyte_cdk/sources/embedded/tools.py,sha256=-Z4tZ4AP1OTi_zrqFM3YV8Rt7c60wvsrv0Dc-rTZ2uw,744
147
147
  airbyte_cdk/sources/file_based/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
148
148
  airbyte_cdk/sources/file_based/exceptions.py,sha256=K3b0IH4xxY75GwRzueAoWfBZDSkctGWDtDEKkSwnrM4,4344
149
- airbyte_cdk/sources/file_based/file_based_source.py,sha256=NCbXAGPWBQSPAf5x2U2eCdOLUd26RhO5s6K87_AF8Es,6931
149
+ airbyte_cdk/sources/file_based/file_based_source.py,sha256=NKS3p_cClQoKC0elj8tJr4ayAUnUXXgTjGr3SVHxC4Q,6937
150
150
  airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=K9fFHcSL4E8v-X2l38wRAcZCjpyifr35orvby8vQt84,3749
151
151
  airbyte_cdk/sources/file_based/remote_file.py,sha256=xIDwDDBPhJI1K8YZuXjEfjxakZPMieBKJM6vmq6G5tw,248
152
152
  airbyte_cdk/sources/file_based/schema_helpers.py,sha256=XBkOutIw_n6SNYU34qbyTbl0Ppt0i4k3sVFMSaX3wJo,9103
153
153
  airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
154
154
  airbyte_cdk/sources/file_based/availability_strategy/__init__.py,sha256=WiPPuQGfmQlFUMFR5h3ECc-VzBj4vair6_4WAL87AEI,277
155
155
  airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py,sha256=NeHCiG4FFohzYpQQFfmTL4-5oI0nElHWgXX1xrm8-SU,1269
156
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=InGBlGbInuNUQ6oaK5A9oICVc7ZNHMSYo8g5Vy2smOo,4266
156
+ airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=RWcRhNjytH-09_d4MVKDC37B3EGsqe2DheHpLNNMmzE,4243
157
157
  airbyte_cdk/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
158
  airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=WrV4sKtJoZ1dK31HK7NdBKlnYHkmu6NqjmEpkVqJ6tQ,4582
159
159
  airbyte_cdk/sources/file_based/config/avro_format.py,sha256=oLJIuNInu-MgjkVFqwHvmQ4CPZa4NZingq_I0_trQ3g,589
160
160
  airbyte_cdk/sources/file_based/config/csv_format.py,sha256=xlBZ5WyAshagjjjbUV_je1JyZ1oY1GbIzJRUZ9UfSvo,7095
161
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=5R0UlPJUGGx5OnpezZ0Fd8dyO4y2vMZtiPZR_3rfvSk,5916
161
+ airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=BTlc2Sw1UP9XF1D4ZYTjubI_msEijBfh9vW8GhOGtIA,3858
162
162
  airbyte_cdk/sources/file_based/config/jsonl_format.py,sha256=usmTeTw8xw8OKwrz8MsiS5E1LQiVEbedGHMHNAfOOlk,252
163
163
  airbyte_cdk/sources/file_based/config/parquet_format.py,sha256=O_Eq0yVzjPiKDz8H1-f9yMowtCcJwT9F2prNYpXZkp0,614
164
164
  airbyte_cdk/sources/file_based/discovery_policy/__init__.py,sha256=x_7JsQGiS7Ytmr0ZDS0SNYGcNUzC4wCm3_1-Mf3ZFnw,283
165
165
  airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha256=cz9po5Cn6u50uq3hDy46pqnPR4JDcnRItZX9k0WDUJU,520
166
166
  airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=Mx3zT9Dem4uNfaUT0oOtrESsuB1LrGAi5N-uw2swZZA,701
167
- airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=N3a8cjXwRUN2__46IJTwrWlsyFiSA1xtSgPcPH28sn0,476
168
- airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=7PVaW17wn80HYW1mu074X2dy0UgFoqFqGIOKN2ZMKD0,8686
169
- airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=EYjhQzomn0_wZ8bnhXjKXkRL5xJcZhW2mSoTN21eyO8,17664
167
+ airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=wWLnHooFrnCwfhMoguDODtQxRVQyrjy0mDrSi4TWwPM,808
168
+ airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=-C_BODUMg8X4jzN85C9Q2R__cpFeLlKycLC_JbctLF8,8670
169
+ airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=VVV829XszmDRqmgv6VBshMF4CSEzmP8rL-OlGttRu7c,17649
170
170
  airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=cThTLc1YKSAapOn70lB09SzruRIPSShGIMz1f92QYV8,1555
171
171
  airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=Kz6HLF0CrFHQ1Y6rJKGr7KmBWSLeDYFQmkg0WIi7Frg,5395
172
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=QulQ_soGb1LpQ_KTxqWZjmfACGkTUDUOeuSmNFtcSLk,8717
172
+ airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=gk2PnjKYT3aL5H7N5jo6OL4vpeNjC588xKrz2_UCNSU,8679
173
173
  airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py,sha256=sEVnRhZ8x9f7PNjo6lewxid9z0PI8eSj7gSoFC3MH1Y,527
174
174
  airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py,sha256=uwk6Ugf23xKG4PRPVVRVwpcNjTwPgxejl03vLSEzK0s,604
175
175
  airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py,sha256=ZeAa0z50ywMU2chNjQ7JpL4yePU1NajhBa8FS7rXLVo,1643
176
176
  airbyte_cdk/sources/file_based/stream/__init__.py,sha256=QPDqdgjsabOQD93dSFqHGaFS_3pIwm-chEabZHiPJi0,265
177
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=BXO0b4iHNJUsF7GVIWTnY5Zlj-IjHS_JmqQlKsSDgz8,5777
178
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=4mZz9cn0gFDC4CepZXUxZoH9J7Z41BrN6zA9J5IfI9w,12246
177
+ airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=Vyrfn24nvM4JDgZgbIdHK0vaX66sl7vLSNvYS-D5ZtY,5800
178
+ airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=OT7QYytD1V2JY0CHHhIKp62QOnHJquM-gjUrV12rBdM,12379
179
179
  airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
180
180
  airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=i-FPeK8lwCzX34GCcmvL5Yvdh8-uu7FeCVYDoFbD7IY,1920
181
181
  airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=kuJRKgDYOGXRk0V0I8BpFxg0hGv7SfV_nBpmmn45F88,6815
@@ -214,7 +214,7 @@ airbyte_cdk/sources/utils/transform.py,sha256=4GYmO6bq33HF-a1in0dKQKqUOYI1bWItyu
214
214
  airbyte_cdk/sources/utils/types.py,sha256=41ZQR681t5TUnOScij58d088sb99klH_ZENFcaYro_g,175
215
215
  airbyte_cdk/utils/__init__.py,sha256=kFLcs2P-tbPyeVOJS9rOv1jZdnSpjG24ro0CHgt_CIk,215
216
216
  airbyte_cdk/utils/airbyte_secrets_utils.py,sha256=q3aDl8T10ufGbeqnUPqbZLxQcHdkf2kDfQK_upWzBbI,2894
217
- airbyte_cdk/utils/datetime_format_inferrer.py,sha256=3Se5Jv-skHiG8PlaDh67CTpN525BaPwmMzo4dAWMqcA,3802
217
+ airbyte_cdk/utils/datetime_format_inferrer.py,sha256=gGKDQ3OdY18R5CVFhq4c7zB_E4Cxe6J6SLA29cz3cJM,3954
218
218
  airbyte_cdk/utils/event_timing.py,sha256=Hn5kCc9xGKLcV5EYpJCZwNiz9neKKu2WG8FJF_hy278,2377
219
219
  airbyte_cdk/utils/mapping_helpers.py,sha256=tVkbgnxy12Ah2Jxh_3tKW7CTKTAVIcPexsBhsiyTbp4,1729
220
220
  airbyte_cdk/utils/schema_inferrer.py,sha256=D8vFVgeK6VLcAug4YVAHfa3D29On0A_nMlwq9SPlfPI,3799
@@ -327,23 +327,23 @@ unit_tests/sources/file_based/config/test_abstract_file_based_spec.py,sha256=wmZ
327
327
  unit_tests/sources/file_based/config/test_csv_format.py,sha256=VYL-9Ec8hW_yO2Pj9F8pDfITcgEAFtSublYda7ut7QE,1132
328
328
  unit_tests/sources/file_based/config/test_file_based_stream_config.py,sha256=1eMsHlMQIFwyw20HjnhgKuiw6399sMcLTQ4LP09kTT4,3060
329
329
  unit_tests/sources/file_based/file_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
330
- unit_tests/sources/file_based/file_types/test_avro_parser.py,sha256=INqwKXcgNb3h_tktNXYU6WNUD-iNwRYHCd3IrnQa5R4,11051
330
+ unit_tests/sources/file_based/file_types/test_avro_parser.py,sha256=Rk-FB6-o350BpDp2QemprAgBHKlFIiom0EBgDD5Tygg,10991
331
331
  unit_tests/sources/file_based/file_types/test_csv_parser.py,sha256=PalrxCRHAyoIp12IWWyePS9QF4LcvNVkqrKdwkrayJ4,22457
332
332
  unit_tests/sources/file_based/file_types/test_jsonl_parser.py,sha256=foTf9U9LyAS8OR0BonwNgFWPqTrmzFV2lpPUfRMrioE,6134
333
333
  unit_tests/sources/file_based/file_types/test_parquet_parser.py,sha256=D7sKTty8aEqMDWWGKWUqDbWjTxhGkygU7ns4-_JceRY,13543
334
334
  unit_tests/sources/file_based/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
335
- unit_tests/sources/file_based/scenarios/avro_scenarios.py,sha256=xUfw0crAvSTgQ2-chJx2ZiigQyo5IfrCuOFC1TWXXsQ,29795
336
- unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=xZf28rlH93ap8JCkAjwocng-uAW-mvMx6BDOLbvVCig,5588
337
- unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=HTu2I391NQ1-xo5mV5Y62jvPa1MATMT6eN0gpk-Om9s,98773
338
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=0maHng11cFmvzFLOniyBxOEYoKj4DYR3NO9-pSYoFLs,60710
339
- unit_tests/sources/file_based/scenarios/jsonl_scenarios.py,sha256=3vcgKCvsLLPkIpYErGnjsm_sN5-vMqaLctZzvQ_BkBc,27553
340
- unit_tests/sources/file_based/scenarios/parquet_scenarios.py,sha256=bBYFkAEvEr5O7zyG8dbK-Dbg-TTbdD_Os4HMrylatHQ,26952
341
- unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=iktYJ-WuDDLdT1rsWOvx-2z3Nt4VgkrvpzsMlMDfBGA,10177
342
- unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py,sha256=sxO3f4mMu6wUt5TISAycfIOSszkrkQYDnKLWgu-2GcM,28307
343
- unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py,sha256=GFACt5-0BMFVtrYueA1_6va2euHfhKcBLcYfDHHnmFI,26715
335
+ unit_tests/sources/file_based/scenarios/avro_scenarios.py,sha256=UFGCnb_nDU7NFhzxy854OeIx-PbNGvL4v2Nny5CTukE,29809
336
+ unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=wW8Z1-oAJk7o94zkd_nn7qQdughkx65oGuyhs75ZiVk,5676
337
+ unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=w33c5Tm-0MMjWYSJS1p8d3P2OJR-5kL74AbkkGovyho,97990
338
+ unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=ZSRWFnkVEeg1rPMpmoKRU4u4U9tdor8nYaj-aaZ7oag,60875
339
+ unit_tests/sources/file_based/scenarios/jsonl_scenarios.py,sha256=rgFxNgy7I3L4Mz3PvtHB2ar_lFbe58WY3RatiddpTso,27753
340
+ unit_tests/sources/file_based/scenarios/parquet_scenarios.py,sha256=x4xQ9c3vJj5Fs-hmxnqovI2hRT56aHARbZR61Ws7mC8,26831
341
+ unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=RrFXL1pdkyjidl3yq40FKcBCAIv_Pmd-0mcGJ6E8mcI,10183
342
+ unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py,sha256=P11FDBe1mtaPLUKeSxrZ7Z9yGXw6hvwnACfkU9sJRhw,28483
343
+ unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py,sha256=9IKNyJ8YX0hO0rWOhQugrDswVLMj5PZIpklVrmgiYVY,26814
344
344
  unit_tests/sources/file_based/stream/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
345
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py,sha256=fzggaGwtXgNk-sAjQ8D71CPTCNBVxBS6HW63FKdkKME,12491
346
- unit_tests/sources/file_based/stream/test_default_file_based_stream.py,sha256=gZlNs6s9dRgcFLrLZtkJDRSje_8gDylHj3xktjsUMVo,5782
345
+ unit_tests/sources/file_based/stream/test_default_file_based_cursor.py,sha256=acUTX704mpw68ljH0atQx--f7STX_UynO8UtROw11Gw,12565
346
+ unit_tests/sources/file_based/stream/test_default_file_based_stream.py,sha256=DLEzbhfSjzbo7M-GMlJUx6XKgw7T5UuBeM2Ola0Cm7k,5771
347
347
  unit_tests/sources/fixtures/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
348
348
  unit_tests/sources/fixtures/source_test_fixture.py,sha256=r-UtR241EGQMZTw1RoKaatrpCGeQn7OIuRPWfG9f7nI,5380
349
349
  unit_tests/sources/message/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -359,14 +359,14 @@ unit_tests/sources/streams/http/auth/test_auth.py,sha256=gdWpJ-cR64qRXmmPOQWhVd4
359
359
  unit_tests/sources/streams/http/requests_native_auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
360
360
  unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py,sha256=h5LIjR_1hEdsnXmyp7fISa0isYjXIEjMw-8I8L0ZNLE,15024
361
361
  unit_tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
362
- unit_tests/utils/test_datetime_format_inferrer.py,sha256=DiNIGjEnj3DibDdlX9jhRgOkpo9h834hle2dLKEoRWQ,3487
362
+ unit_tests/utils/test_datetime_format_inferrer.py,sha256=1EUW1_afccMDrZM6YZyyPqrdwsUxZTaBxJNVa4TjiN8,3616
363
363
  unit_tests/utils/test_mapping_helpers.py,sha256=hqRppuban9hGKviiNFqp2fNdAz77d1_gjvgg8L7-jy8,1408
364
364
  unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg4MNPAG-xhpk,7817
365
365
  unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
366
366
  unit_tests/utils/test_stream_status_utils.py,sha256=NpV155JMXA6CG-2Zvofa14lItobyh3Onttc59X4m5DI,3382
367
367
  unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
368
- airbyte_cdk-0.51.15.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
369
- airbyte_cdk-0.51.15.dist-info/METADATA,sha256=pBjKGDqHo4jxenU32mYsXyq1UjGBwdlJRlVuy_SxxJg,11516
370
- airbyte_cdk-0.51.15.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
371
- airbyte_cdk-0.51.15.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
372
- airbyte_cdk-0.51.15.dist-info/RECORD,,
368
+ airbyte_cdk-0.51.17.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
369
+ airbyte_cdk-0.51.17.dist-info/METADATA,sha256=yC2mwyB29fJpI-FDENnoicx-VLHUApYTF2b1SY_u0Lo,11516
370
+ airbyte_cdk-0.51.17.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
371
+ airbyte_cdk-0.51.17.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
372
+ airbyte_cdk-0.51.17.dist-info/RECORD,,
@@ -142,17 +142,17 @@ _double_as_string_avro_format = AvroFormat(double_as_string=True)
142
142
  id="test_decimal_missing_precision"),
143
143
  pytest.param(_default_avro_format, {"type": "bytes", "logicalType": "decimal", "precision": 9}, None, ValueError,
144
144
  id="test_decimal_missing_scale"),
145
- pytest.param(_default_avro_format, {"type": "bytes", "logicalType": "uuid"}, {"type": ["null", "string"]}, None, id="test_uuid"),
146
- pytest.param(_default_avro_format, {"type": "int", "logicalType": "date"}, {"type": ["null", "string"], "format": "date"}, None,
145
+ pytest.param(_default_avro_format, {"type": "bytes", "logicalType": "uuid"}, {"type": "string"}, None, id="test_uuid"),
146
+ pytest.param(_default_avro_format, {"type": "int", "logicalType": "date"}, {"type": "string", "format": "date"}, None,
147
147
  id="test_date"),
148
- pytest.param(_default_avro_format, {"type": "int", "logicalType": "time-millis"}, {"type": ["null", "integer"]}, None, id="test_time_millis"),
149
- pytest.param(_default_avro_format, {"type": "long", "logicalType": "time-micros"}, {"type": ["null", "integer"]}, None,
148
+ pytest.param(_default_avro_format, {"type": "int", "logicalType": "time-millis"}, {"type": "integer"}, None, id="test_time_millis"),
149
+ pytest.param(_default_avro_format, {"type": "long", "logicalType": "time-micros"}, {"type": "integer"}, None,
150
150
  id="test_time_micros"),
151
151
  pytest.param(
152
152
  _default_avro_format,
153
- {"type": "long", "logicalType": "timestamp-millis"}, {"type": ["null", "string"], "format": "date-time"}, None, id="test_timestamp_millis"
153
+ {"type": "long", "logicalType": "timestamp-millis"}, {"type": "string", "format": "date-time"}, None, id="test_timestamp_millis"
154
154
  ),
155
- pytest.param(_default_avro_format, {"type": "long", "logicalType": "timestamp-micros"}, {"type": ["null", "string"]}, None,
155
+ pytest.param(_default_avro_format, {"type": "long", "logicalType": "timestamp-micros"}, {"type": "string"}, None,
156
156
  id="test_timestamp_micros"),
157
157
  pytest.param(
158
158
  _default_avro_format,
@@ -203,7 +203,7 @@ single_avro_scenario = (
203
203
  "streams": [
204
204
  {
205
205
  "name": "stream1",
206
- "file_type": "avro",
206
+ "format": {"filetype": "avro"},
207
207
  "globs": ["*"],
208
208
  "validation_policy": "Emit Record",
209
209
  }
@@ -266,7 +266,7 @@ multiple_avro_combine_schema_scenario = (
266
266
  "streams": [
267
267
  {
268
268
  "name": "stream1",
269
- "file_type": "avro",
269
+ "format": {"filetype": "avro"},
270
270
  "globs": ["*"],
271
271
  "validation_policy": "Emit Record",
272
272
  }
@@ -362,7 +362,7 @@ avro_all_types_scenario = (
362
362
  "streams": [
363
363
  {
364
364
  "name": "stream1",
365
- "file_type": "avro",
365
+ "format": {"filetype": "avro"},
366
366
  "globs": ["*"],
367
367
  "validation_policy": "Emit Record",
368
368
  }
@@ -463,13 +463,13 @@ multiple_streams_avro_scenario = (
463
463
  "streams": [
464
464
  {
465
465
  "name": "songs_stream",
466
- "file_type": "avro",
466
+ "format": {"filetype": "avro"},
467
467
  "globs": ["*_songs.avro"],
468
468
  "validation_policy": "Emit Record",
469
469
  },
470
470
  {
471
471
  "name": "festivals_stream",
472
- "file_type": "avro",
472
+ "format": {"filetype": "avro"},
473
473
  "globs": ["*_festivals.avro"],
474
474
  "validation_policy": "Emit Record",
475
475
  },
@@ -629,7 +629,6 @@ avro_file_with_double_as_number_scenario = (
629
629
  "streams": [
630
630
  {
631
631
  "name": "stream1",
632
- "file_type": "avro",
633
632
  "globs": ["*"],
634
633
  "validation_policy": "Emit Record",
635
634
  "format": {
@@ -17,7 +17,7 @@ _base_success_scenario = (
17
17
  "streams": [
18
18
  {
19
19
  "name": "stream1",
20
- "file_type": "csv",
20
+ "format": {"filetype": "csv"},
21
21
  "globs": ["*.csv"],
22
22
  "validation_policy": "Emit Record",
23
23
  }
@@ -55,13 +55,13 @@ success_multi_stream_scenario = (
55
55
  "streams": [
56
56
  {
57
57
  "name": "stream1",
58
- "file_type": "csv",
58
+ "format": {"filetype": "csv"},
59
59
  "globs": ["*.csv", "*.gz"],
60
60
  "validation_policy": "Emit Record",
61
61
  },
62
62
  {
63
63
  "name": "stream2",
64
- "file_type": "csv",
64
+ "format": {"filetype": "csv"},
65
65
  "globs": ["*.csv", "*.gz"],
66
66
  "validation_policy": "Emit Record",
67
67
  }
@@ -79,7 +79,7 @@ success_extensionless_scenario = (
79
79
  "streams": [
80
80
  {
81
81
  "name": "stream1",
82
- "file_type": "csv",
82
+ "format": {"filetype": "csv"},
83
83
  "globs": ["*"],
84
84
  "validation_policy": "Emit Record",
85
85
  }
@@ -109,7 +109,7 @@ success_user_provided_schema_scenario = (
109
109
  "streams": [
110
110
  {
111
111
  "name": "stream1",
112
- "file_type": "csv",
112
+ "format": {"filetype": "csv"},
113
113
  "globs": ["*.csv"],
114
114
  "validation_policy": "Emit Record",
115
115
  "input_schema": '{"col1": "string", "col2": "string"}',
@@ -158,7 +158,7 @@ error_record_validation_user_provided_schema_scenario = (
158
158
  "streams": [
159
159
  {
160
160
  "name": "stream1",
161
- "file_type": "csv",
161
+ "format": {"filetype": "csv"},
162
162
  "globs": ["*.csv"],
163
163
  "validation_policy": "always_fail",
164
164
  "input_schema": '{"col1": "number", "col2": "string"}',
@@ -179,13 +179,13 @@ error_multi_stream_scenario = (
179
179
  "streams": [
180
180
  {
181
181
  "name": "stream1",
182
- "file_type": "csv",
182
+ "format": {"filetype": "csv"},
183
183
  "globs": ["*.csv"],
184
184
  "validation_policy": "Emit Record",
185
185
  },
186
186
  {
187
187
  "name": "stream2",
188
- "file_type": "jsonl",
188
+ "format": {"filetype": "jsonl"},
189
189
  "globs": ["*.csv"],
190
190
  "validation_policy": "Emit Record",
191
191
  }