airbyte-cdk 0.51.5__py3-none-any.whl → 0.51.7__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +0 -4
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +0 -6
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +21 -6
- airbyte_cdk/sources/file_based/config/csv_format.py +64 -6
- airbyte_cdk/sources/file_based/exceptions.py +0 -1
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +6 -5
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +2 -2
- {airbyte_cdk-0.51.5.dist-info → airbyte_cdk-0.51.7.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.51.5.dist-info → airbyte_cdk-0.51.7.dist-info}/RECORD +19 -16
- unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +52 -0
- unit_tests/sources/file_based/config/test_csv_format.py +28 -0
- unit_tests/sources/file_based/file_types/test_csv_parser.py +24 -2
- unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -8
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +38 -17
- unit_tests/sources/file_based/test_scenarios.py +0 -2
- {airbyte_cdk-0.51.5.dist-info → airbyte_cdk-0.51.7.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.51.5.dist-info → airbyte_cdk-0.51.7.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.51.5.dist-info → airbyte_cdk-0.51.7.dist-info}/top_level.txt +0 -0
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py
CHANGED
@@ -55,7 +55,6 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
|
|
55
55
|
"""
|
56
56
|
try:
|
57
57
|
files = self._check_list_files(stream)
|
58
|
-
self._check_extensions(stream, files)
|
59
58
|
self._check_parse_record(stream, files[0], logger)
|
60
59
|
except CheckAvailabilityError:
|
61
60
|
return False, "".join(traceback.format_exc())
|
@@ -73,11 +72,6 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
|
|
73
72
|
|
74
73
|
return files
|
75
74
|
|
76
|
-
def _check_extensions(self, stream: "AbstractFileBasedStream", files: List[RemoteFile]) -> None:
|
77
|
-
if not all(f.extension_agrees_with_file_type(stream.config.file_type) for f in files):
|
78
|
-
raise CheckAvailabilityError(FileBasedSourceError.EXTENSION_MISMATCH, stream=stream.name)
|
79
|
-
return None
|
80
|
-
|
81
75
|
def _check_parse_record(self, stream: "AbstractFileBasedStream", file: RemoteFile, logger: logging.Logger) -> None:
|
82
76
|
parser = stream.get_parser(stream.config.file_type)
|
83
77
|
|
@@ -66,9 +66,7 @@ class AbstractFileBasedSpec(BaseModel):
|
|
66
66
|
for format in objects_to_check["oneOf"]:
|
67
67
|
for key in format["properties"]:
|
68
68
|
object_property = format["properties"][key]
|
69
|
-
|
70
|
-
object_property["enum"] = object_property["allOf"][0]["enum"]
|
71
|
-
object_property.pop("allOf")
|
69
|
+
AbstractFileBasedSpec.move_enum_to_root(object_property)
|
72
70
|
|
73
71
|
properties_to_change = ["validation_policy"]
|
74
72
|
for property_to_change in properties_to_change:
|
@@ -76,7 +74,24 @@ class AbstractFileBasedSpec(BaseModel):
|
|
76
74
|
if "anyOf" in property_object:
|
77
75
|
schema["properties"]["streams"]["items"]["properties"][property_to_change]["type"] = "object"
|
78
76
|
schema["properties"]["streams"]["items"]["properties"][property_to_change]["oneOf"] = property_object.pop("anyOf")
|
79
|
-
|
80
|
-
|
81
|
-
|
77
|
+
AbstractFileBasedSpec.move_enum_to_root(property_object)
|
78
|
+
|
79
|
+
csv_format_schemas = list(
|
80
|
+
filter(
|
81
|
+
lambda format: format["properties"]["filetype"]["default"] == "csv",
|
82
|
+
schema["properties"]["streams"]["items"]["properties"]["format"]["oneOf"],
|
83
|
+
)
|
84
|
+
)
|
85
|
+
if len(csv_format_schemas) != 1:
|
86
|
+
raise ValueError(f"Expecting only one CSV format but got {csv_format_schemas}")
|
87
|
+
csv_format_schemas[0]["properties"]["header_definition"]["oneOf"] = csv_format_schemas[0]["properties"]["header_definition"].pop(
|
88
|
+
"anyOf", []
|
89
|
+
)
|
90
|
+
csv_format_schemas[0]["properties"]["header_definition"]["type"] = "object"
|
82
91
|
return schema
|
92
|
+
|
93
|
+
@staticmethod
|
94
|
+
def move_enum_to_root(object_property: Dict[str, Any]) -> None:
|
95
|
+
if "allOf" in object_property and "enum" in object_property["allOf"][0]:
|
96
|
+
object_property["enum"] = object_property["allOf"][0]["enum"]
|
97
|
+
object_property.pop("allOf")
|
@@ -4,9 +4,9 @@
|
|
4
4
|
|
5
5
|
import codecs
|
6
6
|
from enum import Enum
|
7
|
-
from typing import Optional, Set
|
7
|
+
from typing import Any, Dict, List, Optional, Set, Union
|
8
8
|
|
9
|
-
from pydantic import BaseModel, Field, validator
|
9
|
+
from pydantic import BaseModel, Field, ValidationError, root_validator, validator
|
10
10
|
from typing_extensions import Literal
|
11
11
|
|
12
12
|
|
@@ -15,6 +15,52 @@ class InferenceType(Enum):
|
|
15
15
|
PRIMITIVE_TYPES_ONLY = "Primitive Types Only"
|
16
16
|
|
17
17
|
|
18
|
+
class CsvHeaderDefinitionType(Enum):
|
19
|
+
FROM_CSV = "From CSV"
|
20
|
+
AUTOGENERATED = "Autogenerated"
|
21
|
+
USER_PROVIDED = "User Provided"
|
22
|
+
|
23
|
+
|
24
|
+
class CsvHeaderFromCsv(BaseModel):
|
25
|
+
class Config:
|
26
|
+
title = "From CSV"
|
27
|
+
|
28
|
+
header_definition_type: Literal[CsvHeaderDefinitionType.FROM_CSV.value] = CsvHeaderDefinitionType.FROM_CSV.value # type: ignore
|
29
|
+
|
30
|
+
def has_header_row(self) -> bool:
|
31
|
+
return True
|
32
|
+
|
33
|
+
|
34
|
+
class CsvHeaderAutogenerated(BaseModel):
|
35
|
+
class Config:
|
36
|
+
title = "Autogenerated"
|
37
|
+
|
38
|
+
header_definition_type: Literal[CsvHeaderDefinitionType.AUTOGENERATED.value] = CsvHeaderDefinitionType.AUTOGENERATED.value # type: ignore
|
39
|
+
|
40
|
+
def has_header_row(self) -> bool:
|
41
|
+
return False
|
42
|
+
|
43
|
+
|
44
|
+
class CsvHeaderUserProvided(BaseModel):
|
45
|
+
class Config:
|
46
|
+
title = "User Provided"
|
47
|
+
|
48
|
+
header_definition_type: Literal[CsvHeaderDefinitionType.USER_PROVIDED.value] = CsvHeaderDefinitionType.USER_PROVIDED.value # type: ignore
|
49
|
+
column_names: List[str] = Field(
|
50
|
+
title="Column Names",
|
51
|
+
description="The column names that will be used while emitting the CSV records",
|
52
|
+
)
|
53
|
+
|
54
|
+
def has_header_row(self) -> bool:
|
55
|
+
return False
|
56
|
+
|
57
|
+
@validator("column_names")
|
58
|
+
def validate_column_names(cls, v: List[str]) -> List[str]:
|
59
|
+
if not v:
|
60
|
+
raise ValueError("At least one column name needs to be provided when using user provided headers")
|
61
|
+
return v
|
62
|
+
|
63
|
+
|
18
64
|
DEFAULT_TRUE_VALUES = ["y", "yes", "t", "true", "on", "1"]
|
19
65
|
DEFAULT_FALSE_VALUES = ["n", "no", "f", "false", "off", "0"]
|
20
66
|
|
@@ -64,10 +110,10 @@ class CsvFormat(BaseModel):
|
|
64
110
|
skip_rows_after_header: int = Field(
|
65
111
|
title="Skip Rows After Header", default=0, description="The number of rows to skip after the header row."
|
66
112
|
)
|
67
|
-
|
68
|
-
title="
|
69
|
-
default=
|
70
|
-
description="
|
113
|
+
header_definition: Union[CsvHeaderFromCsv, CsvHeaderAutogenerated, CsvHeaderUserProvided] = Field(
|
114
|
+
title="CSV Header Definition",
|
115
|
+
default=CsvHeaderFromCsv(),
|
116
|
+
description="How headers will be defined. `User Provided` assumes the CSV does not have a header row and uses the headers provided and `Autogenerated` assumes the CSV does not have a header row and the CDK will generate headers using for `f{i}` where `i` is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.",
|
71
117
|
)
|
72
118
|
true_values: Set[str] = Field(
|
73
119
|
title="True Values",
|
@@ -113,3 +159,15 @@ class CsvFormat(BaseModel):
|
|
113
159
|
except LookupError:
|
114
160
|
raise ValueError(f"invalid encoding format: {v}")
|
115
161
|
return v
|
162
|
+
|
163
|
+
@root_validator
|
164
|
+
def validate_optional_args(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
165
|
+
definition_type = values.get("header_definition_type")
|
166
|
+
column_names = values.get("user_provided_column_names")
|
167
|
+
if definition_type == CsvHeaderDefinitionType.USER_PROVIDED and not column_names:
|
168
|
+
raise ValidationError("`user_provided_column_names` should be defined if the definition 'User Provided'.", model=CsvFormat)
|
169
|
+
if definition_type != CsvHeaderDefinitionType.USER_PROVIDED and column_names:
|
170
|
+
raise ValidationError(
|
171
|
+
"`user_provided_column_names` should not be defined if the definition is not 'User Provided'.", model=CsvFormat
|
172
|
+
)
|
173
|
+
return values
|
@@ -7,7 +7,6 @@ from enum import Enum
|
|
7
7
|
|
8
8
|
class FileBasedSourceError(Enum):
|
9
9
|
EMPTY_STREAM = "No files were identified in the stream. This may be because there are no files in the specified container, or because your glob patterns did not match any files. Please verify that your source contains files last modified after the start_date and that your glob patterns are not overly strict."
|
10
|
-
EXTENSION_MISMATCH = "The file type that you specified for this stream does not agree with the extension of one or more files in the stream. You may need to modify your glob patterns."
|
11
10
|
GLOB_PARSE_ERROR = (
|
12
11
|
"Error parsing glob pattern. Please refer to the glob pattern rules at https://facelessuser.github.io/wcmatch/glob/#split."
|
13
12
|
)
|
@@ -11,7 +11,7 @@ from functools import partial
|
|
11
11
|
from io import IOBase
|
12
12
|
from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Set
|
13
13
|
|
14
|
-
from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat, InferenceType
|
14
|
+
from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat, CsvHeaderAutogenerated, CsvHeaderUserProvided, InferenceType
|
15
15
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
16
16
|
from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
|
17
17
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
|
@@ -48,11 +48,9 @@ class _CsvReader:
|
|
48
48
|
with stream_reader.open_file(file, file_read_mode, config_format.encoding, logger) as fp:
|
49
49
|
headers = self._get_headers(fp, config_format, dialect_name)
|
50
50
|
|
51
|
-
# we assume that if we autogenerate columns, it is because we don't have headers
|
52
|
-
# if a user wants to autogenerate_column_names with a CSV having headers, he can skip rows
|
53
51
|
rows_to_skip = (
|
54
52
|
config_format.skip_rows_before_header
|
55
|
-
+ (
|
53
|
+
+ (1 if config_format.header_definition.has_header_row() else 0)
|
56
54
|
+ config_format.skip_rows_after_header
|
57
55
|
)
|
58
56
|
self._skip_rows(fp, rows_to_skip)
|
@@ -74,8 +72,11 @@ class _CsvReader:
|
|
74
72
|
Assumes the fp is pointing to the beginning of the files and will reset it as such
|
75
73
|
"""
|
76
74
|
# Note that this method assumes the dialect has already been registered if we're parsing the headers
|
75
|
+
if isinstance(config_format.header_definition, CsvHeaderUserProvided):
|
76
|
+
return config_format.header_definition.column_names # type: ignore # should be CsvHeaderUserProvided given the type
|
77
|
+
|
77
78
|
self._skip_rows(fp, config_format.skip_rows_before_header)
|
78
|
-
if config_format.
|
79
|
+
if isinstance(config_format.header_definition, CsvHeaderAutogenerated):
|
79
80
|
headers = self._auto_generate_headers(fp, dialect_name)
|
80
81
|
else:
|
81
82
|
# Then read the header
|
@@ -6,7 +6,7 @@ from abc import abstractmethod
|
|
6
6
|
from functools import cached_property, lru_cache
|
7
7
|
from typing import Any, Dict, Iterable, List, Mapping, Optional
|
8
8
|
|
9
|
-
from airbyte_cdk.models import
|
9
|
+
from airbyte_cdk.models import SyncMode
|
10
10
|
from airbyte_cdk.sources.file_based.availability_strategy import AbstractFileBasedAvailabilityStrategy
|
11
11
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig, PrimaryKeyType
|
12
12
|
from airbyte_cdk.sources.file_based.discovery_policy import AbstractDiscoveryPolicy
|
@@ -38,7 +38,7 @@ class AbstractFileBasedStream(Stream):
|
|
38
38
|
def __init__(
|
39
39
|
self,
|
40
40
|
config: FileBasedStreamConfig,
|
41
|
-
catalog_schema: Optional[
|
41
|
+
catalog_schema: Optional[Mapping[str, Any]],
|
42
42
|
stream_reader: AbstractFileBasedStreamReader,
|
43
43
|
availability_strategy: AbstractFileBasedAvailabilityStrategy,
|
44
44
|
discovery_policy: AbstractDiscoveryPolicy,
|
@@ -64,7 +64,7 @@ airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=dyIM-bzh54
|
|
64
64
|
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=Dc0F87nElWsz_Ikj938eQ9uqZvyqgFhZ8Dqf_-hvndc,4800
|
65
65
|
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=V6WGKJ9cXX1rjuM4bK3Cs9xEryMlkY2U3FMsSBhrgC8,3098
|
66
66
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=EiYnzwCHZV7EYqMJqcy6xKSeHvTKZBsQndjbEwmiTW4,93
|
67
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256
|
67
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=ebor38wlQVqYD2QXk5X8v9xDZl0cEpIc2mFaKvpuiPE,57170
|
68
68
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
69
69
|
airbyte_cdk/sources/declarative/parsers/class_types_registry.py,sha256=bK4a74opm6WHyV7HqOVws6GE5Z7cLNc5MaTha69abIQ,6086
|
70
70
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=y7_G5mM07zxT5YG975kdC2PAja-Uc83pYp8WrV3GNdo,522
|
@@ -136,7 +136,7 @@ airbyte_cdk/sources/embedded/catalog.py,sha256=mIM7rO5CZAUIHKbrKwn1-Zn9_e3sLiHrT
|
|
136
136
|
airbyte_cdk/sources/embedded/runner.py,sha256=kZ0CcUANuMjdZ4fmvp_w9P2IcsS9WSHxNqYHqMwcfXI,1390
|
137
137
|
airbyte_cdk/sources/embedded/tools.py,sha256=-Z4tZ4AP1OTi_zrqFM3YV8Rt7c60wvsrv0Dc-rTZ2uw,744
|
138
138
|
airbyte_cdk/sources/file_based/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
139
|
-
airbyte_cdk/sources/file_based/exceptions.py,sha256=
|
139
|
+
airbyte_cdk/sources/file_based/exceptions.py,sha256=4jwHysXT6r2o37Z7ch00nbo45wPVsmCorRYbYTmWd2Q,3656
|
140
140
|
airbyte_cdk/sources/file_based/file_based_source.py,sha256=NCbXAGPWBQSPAf5x2U2eCdOLUd26RhO5s6K87_AF8Es,6931
|
141
141
|
airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=K9fFHcSL4E8v-X2l38wRAcZCjpyifr35orvby8vQt84,3749
|
142
142
|
airbyte_cdk/sources/file_based/remote_file.py,sha256=s3Qz2N786yqSMXqcWmsTOvYhgs-ry0xFcn5fGyyz7bY,581
|
@@ -144,11 +144,11 @@ airbyte_cdk/sources/file_based/schema_helpers.py,sha256=XBkOutIw_n6SNYU34qbyTbl0
|
|
144
144
|
airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
|
145
145
|
airbyte_cdk/sources/file_based/availability_strategy/__init__.py,sha256=WiPPuQGfmQlFUMFR5h3ECc-VzBj4vair6_4WAL87AEI,277
|
146
146
|
airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py,sha256=NeHCiG4FFohzYpQQFfmTL4-5oI0nElHWgXX1xrm8-SU,1269
|
147
|
-
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=
|
147
|
+
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=InGBlGbInuNUQ6oaK5A9oICVc7ZNHMSYo8g5Vy2smOo,4266
|
148
148
|
airbyte_cdk/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
149
|
-
airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=
|
149
|
+
airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=WrV4sKtJoZ1dK31HK7NdBKlnYHkmu6NqjmEpkVqJ6tQ,4582
|
150
150
|
airbyte_cdk/sources/file_based/config/avro_format.py,sha256=qGBB0RTjWDGZW-ilIwIq9OZl2BC-jBaq2WGrI3WVBsQ,597
|
151
|
-
airbyte_cdk/sources/file_based/config/csv_format.py,sha256
|
151
|
+
airbyte_cdk/sources/file_based/config/csv_format.py,sha256=-r-uGQlo-nXfhPuOR05XtYx_1vht74r8_am2_p8mcP8,7166
|
152
152
|
airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=5R0UlPJUGGx5OnpezZ0Fd8dyO4y2vMZtiPZR_3rfvSk,5916
|
153
153
|
airbyte_cdk/sources/file_based/config/jsonl_format.py,sha256=B-s1uy9RiKpKMwmMlR7UT3WeQPlTI-xclD0fVM4IU1Q,254
|
154
154
|
airbyte_cdk/sources/file_based/config/parquet_format.py,sha256=zvcHATNKoBIgU2UXuGnoldqLoRXG_X8ZzAkpqGPJtq4,625
|
@@ -157,7 +157,7 @@ airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha
|
|
157
157
|
airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=Mx3zT9Dem4uNfaUT0oOtrESsuB1LrGAi5N-uw2swZZA,701
|
158
158
|
airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=N3a8cjXwRUN2__46IJTwrWlsyFiSA1xtSgPcPH28sn0,476
|
159
159
|
airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=7PVaW17wn80HYW1mu074X2dy0UgFoqFqGIOKN2ZMKD0,8686
|
160
|
-
airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=
|
160
|
+
airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=KgdpdkHAFducvXM2jQr356M0WVol-vX0cm42n9Kf_Yc,16684
|
161
161
|
airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=cThTLc1YKSAapOn70lB09SzruRIPSShGIMz1f92QYV8,1555
|
162
162
|
airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=Kz6HLF0CrFHQ1Y6rJKGr7KmBWSLeDYFQmkg0WIi7Frg,5395
|
163
163
|
airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=QulQ_soGb1LpQ_KTxqWZjmfACGkTUDUOeuSmNFtcSLk,8717
|
@@ -165,7 +165,7 @@ airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py,sha256=sEV
|
|
165
165
|
airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py,sha256=uwk6Ugf23xKG4PRPVVRVwpcNjTwPgxejl03vLSEzK0s,604
|
166
166
|
airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py,sha256=ZeAa0z50ywMU2chNjQ7JpL4yePU1NajhBa8FS7rXLVo,1643
|
167
167
|
airbyte_cdk/sources/file_based/stream/__init__.py,sha256=QPDqdgjsabOQD93dSFqHGaFS_3pIwm-chEabZHiPJi0,265
|
168
|
-
airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=
|
168
|
+
airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=BXO0b4iHNJUsF7GVIWTnY5Zlj-IjHS_JmqQlKsSDgz8,5777
|
169
169
|
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=eFYqN657J5A0sf9og_w7qea8lu2xtUobjYYDldfmbmA,11839
|
170
170
|
airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
|
171
171
|
airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=i-FPeK8lwCzX34GCcmvL5Yvdh8-uu7FeCVYDoFbD7IY,1920
|
@@ -308,20 +308,23 @@ unit_tests/sources/file_based/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
|
|
308
308
|
unit_tests/sources/file_based/helpers.py,sha256=JNCRl13oLRRun2XyYLSKLzfrzzOAMT57yUY0vZasxL4,2567
|
309
309
|
unit_tests/sources/file_based/in_memory_files_source.py,sha256=HSZEtN7wb_NhBx4LVAEeAaeTByIBYZLr6xXJLI0FFLU,7777
|
310
310
|
unit_tests/sources/file_based/test_file_based_stream_reader.py,sha256=gVJcjj8Q83LTxcU3LL9gv-9SAY21umtOMDTy1Z7A9OU,7552
|
311
|
-
unit_tests/sources/file_based/test_scenarios.py,sha256=
|
311
|
+
unit_tests/sources/file_based/test_scenarios.py,sha256=1s3hN6xkmqHKGa348rK3sDLf-PPiEx0w-qfRi70gQnc,18167
|
312
312
|
unit_tests/sources/file_based/test_schema_helpers.py,sha256=XJ27ecw0sjlSnKgQqV1DgnnjKB1TR2btq22OITh1Qdk,12333
|
313
|
+
unit_tests/sources/file_based/availability_strategy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
314
|
+
unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py,sha256=HzxFJVJFv3YpjVmJm45ZyS2HpbnhtEX2hm4r8VjkRFE,2463
|
313
315
|
unit_tests/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
314
316
|
unit_tests/sources/file_based/config/test_abstract_file_based_spec.py,sha256=wmZAC-nBiUedMZi0n4zaC9oiZD9UTuYP5zJC1xxRnME,1216
|
317
|
+
unit_tests/sources/file_based/config/test_csv_format.py,sha256=VYL-9Ec8hW_yO2Pj9F8pDfITcgEAFtSublYda7ut7QE,1132
|
315
318
|
unit_tests/sources/file_based/config/test_file_based_stream_config.py,sha256=1eMsHlMQIFwyw20HjnhgKuiw6399sMcLTQ4LP09kTT4,3060
|
316
319
|
unit_tests/sources/file_based/file_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
317
320
|
unit_tests/sources/file_based/file_types/test_avro_parser.py,sha256=INqwKXcgNb3h_tktNXYU6WNUD-iNwRYHCd3IrnQa5R4,11051
|
318
|
-
unit_tests/sources/file_based/file_types/test_csv_parser.py,sha256=
|
321
|
+
unit_tests/sources/file_based/file_types/test_csv_parser.py,sha256=4onvErJCMNeSquZr7c1dX4TzqJlvQ3wulYCjAU_IblU,21266
|
319
322
|
unit_tests/sources/file_based/file_types/test_jsonl_parser.py,sha256=foTf9U9LyAS8OR0BonwNgFWPqTrmzFV2lpPUfRMrioE,6134
|
320
323
|
unit_tests/sources/file_based/file_types/test_parquet_parser.py,sha256=D7sKTty8aEqMDWWGKWUqDbWjTxhGkygU7ns4-_JceRY,13543
|
321
324
|
unit_tests/sources/file_based/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
322
325
|
unit_tests/sources/file_based/scenarios/avro_scenarios.py,sha256=xUfw0crAvSTgQ2-chJx2ZiigQyo5IfrCuOFC1TWXXsQ,29795
|
323
|
-
unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=
|
324
|
-
unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=
|
326
|
+
unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=xZf28rlH93ap8JCkAjwocng-uAW-mvMx6BDOLbvVCig,5588
|
327
|
+
unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=nG4O2Ah0Uwgjg6SVTuioO_gPOigKxm-PlM2Tw21svYw,98724
|
325
328
|
unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=0maHng11cFmvzFLOniyBxOEYoKj4DYR3NO9-pSYoFLs,60710
|
326
329
|
unit_tests/sources/file_based/scenarios/jsonl_scenarios.py,sha256=N83fga4gMKkbm6hYnen1Z5p5eEgjnMB_M_sXx6B96cU,27503
|
327
330
|
unit_tests/sources/file_based/scenarios/parquet_scenarios.py,sha256=-cBO1ZwberBxNMqDOtKz8yGwm3zB7elz_st2NKHeczM,26955
|
@@ -352,8 +355,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
|
|
352
355
|
unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
|
353
356
|
unit_tests/utils/test_stream_status_utils.py,sha256=NpV155JMXA6CG-2Zvofa14lItobyh3Onttc59X4m5DI,3382
|
354
357
|
unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
|
355
|
-
airbyte_cdk-0.51.
|
356
|
-
airbyte_cdk-0.51.
|
357
|
-
airbyte_cdk-0.51.
|
358
|
-
airbyte_cdk-0.51.
|
359
|
-
airbyte_cdk-0.51.
|
358
|
+
airbyte_cdk-0.51.7.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
359
|
+
airbyte_cdk-0.51.7.dist-info/METADATA,sha256=YOrAlHsZod4Nq3VugY7nbE7MDd8r8ZU7gcvX4YzTuk0,9399
|
360
|
+
airbyte_cdk-0.51.7.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
361
|
+
airbyte_cdk-0.51.7.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
|
362
|
+
airbyte_cdk-0.51.7.dist-info/RECORD,,
|
File without changes
|
unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import unittest
|
6
|
+
from datetime import datetime
|
7
|
+
from unittest.mock import Mock, PropertyMock
|
8
|
+
|
9
|
+
from airbyte_cdk.sources.file_based.availability_strategy.default_file_based_availability_strategy import (
|
10
|
+
DefaultFileBasedAvailabilityStrategy,
|
11
|
+
)
|
12
|
+
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
13
|
+
from airbyte_cdk.sources.file_based.config.jsonl_format import JsonlFormat
|
14
|
+
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
|
15
|
+
from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
|
16
|
+
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
17
|
+
from airbyte_cdk.sources.file_based.stream import AbstractFileBasedStream
|
18
|
+
|
19
|
+
_FILE_WITH_UNKNOWN_EXTENSION = RemoteFile(uri="a.unknown_extension", last_modified=datetime.now(), file_type="csv")
|
20
|
+
_ANY_CONFIG = FileBasedStreamConfig(
|
21
|
+
name="config.name",
|
22
|
+
file_type="parquet",
|
23
|
+
format=JsonlFormat(),
|
24
|
+
)
|
25
|
+
_ANY_SCHEMA = {"key": "value"}
|
26
|
+
|
27
|
+
|
28
|
+
class DefaultFileBasedAvailabilityStrategyTest(unittest.TestCase):
|
29
|
+
|
30
|
+
def setUp(self) -> None:
|
31
|
+
self._stream_reader = Mock(spec=AbstractFileBasedStreamReader)
|
32
|
+
self._strategy = DefaultFileBasedAvailabilityStrategy(self._stream_reader)
|
33
|
+
|
34
|
+
self._parser = Mock(spec=FileTypeParser)
|
35
|
+
self._stream = Mock(spec=AbstractFileBasedStream)
|
36
|
+
self._stream.get_parser.return_value = self._parser
|
37
|
+
self._stream.catalog_schema = _ANY_SCHEMA
|
38
|
+
self._stream.config = _ANY_CONFIG
|
39
|
+
self._stream.validation_policy = PropertyMock(validate_schema_before_sync=False)
|
40
|
+
|
41
|
+
def test_given_file_extension_does_not_match_when_check_availability_and_parsability_then_stream_is_still_available(self) -> None:
|
42
|
+
"""
|
43
|
+
Before, we had a validation on the file extension but it turns out that in production, users sometimes have mismatch there. The
|
44
|
+
example we've seen was for JSONL parser but the file extension was just `.json`. Note that there we more than one record extracted
|
45
|
+
from this stream so it's not just that the file is one JSON object
|
46
|
+
"""
|
47
|
+
self._stream.list_files.return_value = [_FILE_WITH_UNKNOWN_EXTENSION]
|
48
|
+
self._parser.parse_records.return_value = [{"a record": 1}]
|
49
|
+
|
50
|
+
is_available, reason = self._strategy.check_availability_and_parsability(self._stream, Mock(), Mock())
|
51
|
+
|
52
|
+
assert is_available
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import unittest
|
6
|
+
|
7
|
+
import pytest
|
8
|
+
from airbyte_cdk.sources.file_based.config.csv_format import CsvHeaderAutogenerated, CsvHeaderFromCsv, CsvHeaderUserProvided
|
9
|
+
from pydantic import ValidationError
|
10
|
+
|
11
|
+
|
12
|
+
class CsvHeaderDefinitionTest(unittest.TestCase):
|
13
|
+
def test_given_user_provided_and_not_column_names_provided_then_raise_exception(self) -> None:
|
14
|
+
with pytest.raises(ValidationError):
|
15
|
+
CsvHeaderUserProvided(column_names=[])
|
16
|
+
|
17
|
+
def test_given_user_provided_and_column_names_then_config_is_valid(self) -> None:
|
18
|
+
# no error means that this test succeeds
|
19
|
+
CsvHeaderUserProvided(column_names=["1", "2", "3"])
|
20
|
+
|
21
|
+
def test_given_user_provided_then_csv_does_not_have_header_row(self) -> None:
|
22
|
+
assert not CsvHeaderUserProvided(column_names=["1", "2", "3"]).has_header_row()
|
23
|
+
|
24
|
+
def test_given_autogenerated_then_csv_does_not_have_header_row(self) -> None:
|
25
|
+
assert not CsvHeaderAutogenerated().has_header_row()
|
26
|
+
|
27
|
+
def test_given_from_csv_then_csv_has_header_row(self) -> None:
|
28
|
+
assert CsvHeaderFromCsv().has_header_row()
|
@@ -13,7 +13,14 @@ from unittest import TestCase, mock
|
|
13
13
|
from unittest.mock import Mock
|
14
14
|
|
15
15
|
import pytest
|
16
|
-
from airbyte_cdk.sources.file_based.config.csv_format import
|
16
|
+
from airbyte_cdk.sources.file_based.config.csv_format import (
|
17
|
+
DEFAULT_FALSE_VALUES,
|
18
|
+
DEFAULT_TRUE_VALUES,
|
19
|
+
CsvFormat,
|
20
|
+
CsvHeaderAutogenerated,
|
21
|
+
CsvHeaderUserProvided,
|
22
|
+
InferenceType,
|
23
|
+
)
|
17
24
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
18
25
|
from airbyte_cdk.sources.file_based.exceptions import RecordParseError
|
19
26
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
|
@@ -278,13 +285,28 @@ class CsvReaderTest(unittest.TestCase):
|
|
278
285
|
assert list(data_generator) == [{"header": "a value"}, {"header": "another value"}]
|
279
286
|
|
280
287
|
def test_given_autogenerated_headers_when_read_data_then_generate_headers_with_format_fX(self) -> None:
|
281
|
-
self._config_format.
|
288
|
+
self._config_format.header_definition = CsvHeaderAutogenerated()
|
282
289
|
self._stream_reader.open_file.return_value = CsvFileBuilder().with_data(["0,1,2,3,4,5,6"]).build()
|
283
290
|
|
284
291
|
data_generator = self._read_data()
|
285
292
|
|
286
293
|
assert list(data_generator) == [{"f0": "0", "f1": "1", "f2": "2", "f3": "3", "f4": "4", "f5": "5", "f6": "6"}]
|
287
294
|
|
295
|
+
def test_given_user_provided_headers_when_read_data_then_use_user_provided_headers(self) -> None:
|
296
|
+
self._config_format.header_definition = CsvHeaderUserProvided(column_names=["first", "second", "third", "fourth"])
|
297
|
+
self._stream_reader.open_file.return_value = CsvFileBuilder().with_data(["0,1,2,3"]).build()
|
298
|
+
|
299
|
+
data_generator = self._read_data()
|
300
|
+
|
301
|
+
assert list(data_generator) == [{"first": "0", "second": "1", "third": "2", "fourth": "3"}]
|
302
|
+
|
303
|
+
def test_given_len_mistmatch_on_user_provided_headers_when_read_data_then_raise_error(self) -> None:
|
304
|
+
self._config_format.header_definition = CsvHeaderUserProvided(column_names=["missing", "one", "column"])
|
305
|
+
self._stream_reader.open_file.return_value = CsvFileBuilder().with_data(["0,1,2,3"]).build()
|
306
|
+
|
307
|
+
with pytest.raises(RecordParseError):
|
308
|
+
list(self._read_data())
|
309
|
+
|
288
310
|
def test_given_skip_rows_after_header_when_read_data_then_do_not_parse_skipped_rows(self) -> None:
|
289
311
|
self._config_format.skip_rows_after_header = 1
|
290
312
|
self._stream_reader.open_file.return_value = (
|
@@ -134,14 +134,6 @@ error_empty_stream_scenario = (
|
|
134
134
|
).build()
|
135
135
|
|
136
136
|
|
137
|
-
error_extension_mismatch_scenario = (
|
138
|
-
_base_failure_scenario.copy()
|
139
|
-
.set_name("error_extension_mismatch_scenario")
|
140
|
-
.set_file_type("jsonl")
|
141
|
-
.set_expected_check_error(None, FileBasedSourceError.EXTENSION_MISMATCH.value)
|
142
|
-
).build()
|
143
|
-
|
144
|
-
|
145
137
|
error_listing_files_scenario = (
|
146
138
|
_base_failure_scenario.copy()
|
147
139
|
.set_name("error_listing_files_scenario")
|
@@ -180,11 +180,43 @@ single_csv_scenario = (
|
|
180
180
|
"default": 0,
|
181
181
|
"type": "integer",
|
182
182
|
},
|
183
|
-
"
|
184
|
-
"title": "
|
185
|
-
"
|
186
|
-
"
|
187
|
-
"
|
183
|
+
"header_definition": {
|
184
|
+
"title": "CSV Header Definition",
|
185
|
+
"type": "object",
|
186
|
+
"description": "How headers will be defined. `User Provided` assumes the CSV does not have a header row and uses the headers provided and `Autogenerated` assumes the CSV does not have a header row and the CDK will generate headers using for `f{i}` where `i` is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.",
|
187
|
+
"default": {"header_definition_type": "From CSV"},
|
188
|
+
"oneOf": [
|
189
|
+
{
|
190
|
+
"title": "From CSV",
|
191
|
+
"type": "object",
|
192
|
+
"properties": {
|
193
|
+
"header_definition_type": {"title": "Header Definition Type", "default": "From CSV", "enum": ["From CSV"], "type": "string"},
|
194
|
+
},
|
195
|
+
},
|
196
|
+
{
|
197
|
+
"title": "Autogenerated",
|
198
|
+
"type": "object",
|
199
|
+
"properties": {
|
200
|
+
"header_definition_type": {"title": "Header Definition Type", "default": "Autogenerated", "enum": ["Autogenerated"], "type": "string"},
|
201
|
+
},
|
202
|
+
},
|
203
|
+
{
|
204
|
+
"title": "User Provided",
|
205
|
+
"type": "object",
|
206
|
+
"properties": {
|
207
|
+
"header_definition_type": {"title": "Header Definition Type", "default": "User Provided", "enum": ["User Provided"], "type": "string"},
|
208
|
+
"column_names": {
|
209
|
+
"title": "Column Names",
|
210
|
+
"description": "The column names that will be used while emitting the CSV records",
|
211
|
+
"type": "array",
|
212
|
+
"items": {
|
213
|
+
"type": "string"
|
214
|
+
},
|
215
|
+
}
|
216
|
+
},
|
217
|
+
"required": ["column_names"]
|
218
|
+
},
|
219
|
+
]
|
188
220
|
},
|
189
221
|
"true_values": {
|
190
222
|
"title": "True Values",
|
@@ -761,7 +793,6 @@ csv_multi_stream_scenario = (
|
|
761
793
|
)
|
762
794
|
).build()
|
763
795
|
|
764
|
-
|
765
796
|
csv_custom_format_scenario = (
|
766
797
|
TestScenarioBuilder()
|
767
798
|
.set_name("csv_custom_format")
|
@@ -868,7 +899,6 @@ csv_custom_format_scenario = (
|
|
868
899
|
)
|
869
900
|
).build()
|
870
901
|
|
871
|
-
|
872
902
|
multi_stream_custom_format = (
|
873
903
|
TestScenarioBuilder()
|
874
904
|
.set_name("multi_stream_custom_format_scenario")
|
@@ -1016,7 +1046,6 @@ multi_stream_custom_format = (
|
|
1016
1046
|
)
|
1017
1047
|
).build()
|
1018
1048
|
|
1019
|
-
|
1020
1049
|
empty_schema_inference_scenario = (
|
1021
1050
|
TestScenarioBuilder()
|
1022
1051
|
.set_name("empty_schema_inference_scenario")
|
@@ -1092,7 +1121,6 @@ empty_schema_inference_scenario = (
|
|
1092
1121
|
)
|
1093
1122
|
).build()
|
1094
1123
|
|
1095
|
-
|
1096
1124
|
schemaless_csv_scenario = (
|
1097
1125
|
TestScenarioBuilder()
|
1098
1126
|
.set_name("schemaless_csv_scenario")
|
@@ -1188,7 +1216,6 @@ schemaless_csv_scenario = (
|
|
1188
1216
|
)
|
1189
1217
|
).build()
|
1190
1218
|
|
1191
|
-
|
1192
1219
|
schemaless_csv_multi_stream_scenario = (
|
1193
1220
|
TestScenarioBuilder()
|
1194
1221
|
.set_name("schemaless_csv_multi_stream_scenario")
|
@@ -1296,7 +1323,6 @@ schemaless_csv_multi_stream_scenario = (
|
|
1296
1323
|
)
|
1297
1324
|
).build()
|
1298
1325
|
|
1299
|
-
|
1300
1326
|
schemaless_with_user_input_schema_fails_connection_check_scenario = (
|
1301
1327
|
TestScenarioBuilder()
|
1302
1328
|
.set_name("schemaless_with_user_input_schema_fails_connection_check_scenario")
|
@@ -1361,7 +1387,6 @@ schemaless_with_user_input_schema_fails_connection_check_scenario = (
|
|
1361
1387
|
.set_expected_read_error(ConfigValidationError, FileBasedSourceError.CONFIG_VALIDATION_ERROR.value)
|
1362
1388
|
).build()
|
1363
1389
|
|
1364
|
-
|
1365
1390
|
schemaless_with_user_input_schema_fails_connection_check_multi_stream_scenario = (
|
1366
1391
|
TestScenarioBuilder()
|
1367
1392
|
.set_name("schemaless_with_user_input_schema_fails_connection_check_multi_stream_scenario")
|
@@ -1446,7 +1471,6 @@ schemaless_with_user_input_schema_fails_connection_check_multi_stream_scenario =
|
|
1446
1471
|
.set_expected_read_error(ConfigValidationError, FileBasedSourceError.CONFIG_VALIDATION_ERROR.value)
|
1447
1472
|
).build()
|
1448
1473
|
|
1449
|
-
|
1450
1474
|
csv_string_can_be_null_with_input_schemas_scenario = (
|
1451
1475
|
TestScenarioBuilder()
|
1452
1476
|
.set_name("csv_string_can_be_null_with_input_schema")
|
@@ -2143,7 +2167,6 @@ csv_custom_delimiter_in_double_quotes_scenario = (
|
|
2143
2167
|
)
|
2144
2168
|
).build()
|
2145
2169
|
|
2146
|
-
|
2147
2170
|
csv_skip_before_header_scenario = (
|
2148
2171
|
TestScenarioBuilder()
|
2149
2172
|
.set_name("csv_skip_before_header")
|
@@ -2278,7 +2301,6 @@ csv_skip_after_header_scenario = (
|
|
2278
2301
|
)
|
2279
2302
|
).build()
|
2280
2303
|
|
2281
|
-
|
2282
2304
|
csv_skip_before_and_after_header_scenario = (
|
2283
2305
|
TestScenarioBuilder()
|
2284
2306
|
.set_name("csv_skip_before_after_header")
|
@@ -2363,7 +2385,7 @@ csv_autogenerate_column_names_scenario = (
|
|
2363
2385
|
"validation_policy": "Emit Record",
|
2364
2386
|
"format": {
|
2365
2387
|
"filetype": "csv",
|
2366
|
-
"
|
2388
|
+
"header_definition": {"header_definition_type": "Autogenerated"},
|
2367
2389
|
},
|
2368
2390
|
}
|
2369
2391
|
],
|
@@ -2556,7 +2578,6 @@ csv_custom_null_values_scenario = (
|
|
2556
2578
|
)
|
2557
2579
|
).build()
|
2558
2580
|
|
2559
|
-
|
2560
2581
|
earlier_csv_scenario = (
|
2561
2582
|
TestScenarioBuilder()
|
2562
2583
|
.set_name("earlier_csv_stream")
|
@@ -24,7 +24,6 @@ from unit_tests.sources.file_based.scenarios.avro_scenarios import (
|
|
24
24
|
)
|
25
25
|
from unit_tests.sources.file_based.scenarios.check_scenarios import (
|
26
26
|
error_empty_stream_scenario,
|
27
|
-
error_extension_mismatch_scenario,
|
28
27
|
error_listing_files_scenario,
|
29
28
|
error_multi_stream_scenario,
|
30
29
|
error_reading_file_scenario,
|
@@ -309,7 +308,6 @@ def test_spec(capsys: CaptureFixture[str], scenario: TestScenario) -> None:
|
|
309
308
|
|
310
309
|
check_scenarios = [
|
311
310
|
error_empty_stream_scenario,
|
312
|
-
error_extension_mismatch_scenario,
|
313
311
|
error_listing_files_scenario,
|
314
312
|
error_reading_file_scenario,
|
315
313
|
error_record_validation_user_provided_schema_scenario,
|
File without changes
|
File without changes
|
File without changes
|