airbyte-cdk 0.51.3__py3-none-any.whl → 0.51.4__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +4 -5
- {airbyte_cdk-0.51.3.dist-info → airbyte_cdk-0.51.4.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.51.3.dist-info → airbyte_cdk-0.51.4.dist-info}/RECORD +8 -8
- {airbyte_cdk-0.51.3.dist-info → airbyte_cdk-0.51.4.dist-info}/WHEEL +1 -1
- unit_tests/sources/file_based/file_types/test_csv_parser.py +1 -1
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -2
- {airbyte_cdk-0.51.3.dist-info → airbyte_cdk-0.51.4.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.51.3.dist-info → airbyte_cdk-0.51.4.dist-info}/top_level.txt +0 -0
@@ -158,7 +158,7 @@ class CsvParser(FileTypeParser):
|
|
158
158
|
deduped_property_types = CsvParser._pre_propcess_property_types(property_types)
|
159
159
|
else:
|
160
160
|
deduped_property_types = {}
|
161
|
-
cast_fn = CsvParser._get_cast_function(deduped_property_types, config_format, logger)
|
161
|
+
cast_fn = CsvParser._get_cast_function(deduped_property_types, config_format, logger, config.schemaless)
|
162
162
|
data_generator = self._csv_reader.read_data(config, file, stream_reader, logger, self.file_read_mode)
|
163
163
|
for row in data_generator:
|
164
164
|
yield CsvParser._to_nullable(cast_fn(row), deduped_property_types, config_format.null_values, config_format.strings_can_be_null)
|
@@ -170,10 +170,10 @@ class CsvParser(FileTypeParser):
|
|
170
170
|
|
171
171
|
@staticmethod
|
172
172
|
def _get_cast_function(
|
173
|
-
deduped_property_types: Mapping[str, str], config_format: CsvFormat, logger: logging.Logger
|
173
|
+
deduped_property_types: Mapping[str, str], config_format: CsvFormat, logger: logging.Logger, schemaless: bool
|
174
174
|
) -> Callable[[Mapping[str, str]], Mapping[str, str]]:
|
175
175
|
# Only cast values if the schema is provided
|
176
|
-
if deduped_property_types:
|
176
|
+
if deduped_property_types and not schemaless:
|
177
177
|
return partial(CsvParser._cast_types, deduped_property_types=deduped_property_types, config_format=config_format, logger=logger)
|
178
178
|
else:
|
179
179
|
# If no schema is provided, yield the rows as they are
|
@@ -275,11 +275,10 @@ class CsvParser(FileTypeParser):
|
|
275
275
|
except ValueError:
|
276
276
|
warnings.append(_format_warning(key, value, prop_type))
|
277
277
|
|
278
|
+
result[key] = cast_value
|
278
279
|
else:
|
279
280
|
warnings.append(_format_warning(key, value, prop_type))
|
280
281
|
|
281
|
-
result[key] = cast_value
|
282
|
-
|
283
282
|
if warnings:
|
284
283
|
logger.warning(
|
285
284
|
f"{FileBasedSourceError.ERROR_CASTING_VALUE.value}: {','.join([w for w in warnings])}",
|
@@ -157,7 +157,7 @@ airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha
|
|
157
157
|
airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=Mx3zT9Dem4uNfaUT0oOtrESsuB1LrGAi5N-uw2swZZA,701
|
158
158
|
airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=N3a8cjXwRUN2__46IJTwrWlsyFiSA1xtSgPcPH28sn0,476
|
159
159
|
airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=7PVaW17wn80HYW1mu074X2dy0UgFoqFqGIOKN2ZMKD0,8686
|
160
|
-
airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=
|
160
|
+
airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=SsWy_8KunUz1MYKroix6fWv37mbPEH0h5SoW4g3Qjf4,16676
|
161
161
|
airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=cThTLc1YKSAapOn70lB09SzruRIPSShGIMz1f92QYV8,1555
|
162
162
|
airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=Kz6HLF0CrFHQ1Y6rJKGr7KmBWSLeDYFQmkg0WIi7Frg,5395
|
163
163
|
airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=QulQ_soGb1LpQ_KTxqWZjmfACGkTUDUOeuSmNFtcSLk,8717
|
@@ -315,13 +315,13 @@ unit_tests/sources/file_based/config/test_abstract_file_based_spec.py,sha256=wmZ
|
|
315
315
|
unit_tests/sources/file_based/config/test_file_based_stream_config.py,sha256=1eMsHlMQIFwyw20HjnhgKuiw6399sMcLTQ4LP09kTT4,3060
|
316
316
|
unit_tests/sources/file_based/file_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
317
317
|
unit_tests/sources/file_based/file_types/test_avro_parser.py,sha256=INqwKXcgNb3h_tktNXYU6WNUD-iNwRYHCd3IrnQa5R4,11051
|
318
|
-
unit_tests/sources/file_based/file_types/test_csv_parser.py,sha256=
|
318
|
+
unit_tests/sources/file_based/file_types/test_csv_parser.py,sha256=KB4WDy3aMAZ0CmJiqFaTUOZlK4urpvG9bwcwQ-h2-VY,20303
|
319
319
|
unit_tests/sources/file_based/file_types/test_jsonl_parser.py,sha256=foTf9U9LyAS8OR0BonwNgFWPqTrmzFV2lpPUfRMrioE,6134
|
320
320
|
unit_tests/sources/file_based/file_types/test_parquet_parser.py,sha256=D7sKTty8aEqMDWWGKWUqDbWjTxhGkygU7ns4-_JceRY,13543
|
321
321
|
unit_tests/sources/file_based/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
322
322
|
unit_tests/sources/file_based/scenarios/avro_scenarios.py,sha256=xUfw0crAvSTgQ2-chJx2ZiigQyo5IfrCuOFC1TWXXsQ,29795
|
323
323
|
unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=7DR49LCjns72Dv5-R-cg6_SUR1zpHtE9_uFEWoYwx1s,5834
|
324
|
-
unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=
|
324
|
+
unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=UVdDblKj3R5qQkh-dj4xqZ2822GyJuymaAerWbX9HeE,95707
|
325
325
|
unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=0maHng11cFmvzFLOniyBxOEYoKj4DYR3NO9-pSYoFLs,60710
|
326
326
|
unit_tests/sources/file_based/scenarios/jsonl_scenarios.py,sha256=N83fga4gMKkbm6hYnen1Z5p5eEgjnMB_M_sXx6B96cU,27503
|
327
327
|
unit_tests/sources/file_based/scenarios/parquet_scenarios.py,sha256=-cBO1ZwberBxNMqDOtKz8yGwm3zB7elz_st2NKHeczM,26955
|
@@ -352,8 +352,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
|
|
352
352
|
unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
|
353
353
|
unit_tests/utils/test_stream_status_utils.py,sha256=NpV155JMXA6CG-2Zvofa14lItobyh3Onttc59X4m5DI,3382
|
354
354
|
unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
|
355
|
-
airbyte_cdk-0.51.
|
356
|
-
airbyte_cdk-0.51.
|
357
|
-
airbyte_cdk-0.51.
|
358
|
-
airbyte_cdk-0.51.
|
359
|
-
airbyte_cdk-0.51.
|
355
|
+
airbyte_cdk-0.51.4.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
356
|
+
airbyte_cdk-0.51.4.dist-info/METADATA,sha256=imzQYjx2YvmpJDtuZK6lUo96gKmLJBYNOZf06KeAg-s,9399
|
357
|
+
airbyte_cdk-0.51.4.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
358
|
+
airbyte_cdk-0.51.4.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
|
359
|
+
airbyte_cdk-0.51.4.dist-info/RECORD,,
|
@@ -100,7 +100,7 @@ logger = logging.getLogger()
|
|
100
100
|
pytest.param(
|
101
101
|
{"col9": "['a', 'b']"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col9": "['a', 'b']"}, id="cannot-cast-to-list-of-objects"
|
102
102
|
),
|
103
|
-
pytest.param({"col11": "x"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {
|
103
|
+
pytest.param({"col11": "x"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {}, id="item-not-in-props-doesn't-error"),
|
104
104
|
],
|
105
105
|
)
|
106
106
|
def test_cast_to_python_type(row: Dict[str, str], true_values: Set[str], false_values: Set[str], expected_output: Dict[str, Any]) -> None:
|
@@ -481,7 +481,6 @@ multi_csv_stream_n_file_exceeds_limit_for_inference = (
|
|
481
481
|
"data": {
|
482
482
|
"col1": "val11b",
|
483
483
|
"col2": "val12b",
|
484
|
-
"col3": "val13b",
|
485
484
|
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
486
485
|
"_ab_source_file_url": "b.csv",
|
487
486
|
},
|
@@ -491,7 +490,6 @@ multi_csv_stream_n_file_exceeds_limit_for_inference = (
|
|
491
490
|
"data": {
|
492
491
|
"col1": "val21b",
|
493
492
|
"col2": "val22b",
|
494
|
-
"col3": "val23b",
|
495
493
|
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
496
494
|
"_ab_source_file_url": "b.csv",
|
497
495
|
},
|
File without changes
|
File without changes
|