airbyte-cdk 0.51.3__py3-none-any.whl → 0.51.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +4 -5
- {airbyte_cdk-0.51.3.dist-info → airbyte_cdk-0.51.4.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.51.3.dist-info → airbyte_cdk-0.51.4.dist-info}/RECORD +8 -8
- {airbyte_cdk-0.51.3.dist-info → airbyte_cdk-0.51.4.dist-info}/WHEEL +1 -1
- unit_tests/sources/file_based/file_types/test_csv_parser.py +1 -1
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -2
- {airbyte_cdk-0.51.3.dist-info → airbyte_cdk-0.51.4.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.51.3.dist-info → airbyte_cdk-0.51.4.dist-info}/top_level.txt +0 -0
| @@ -158,7 +158,7 @@ class CsvParser(FileTypeParser): | |
| 158 158 | 
             
                        deduped_property_types = CsvParser._pre_propcess_property_types(property_types)
         | 
| 159 159 | 
             
                    else:
         | 
| 160 160 | 
             
                        deduped_property_types = {}
         | 
| 161 | 
            -
                    cast_fn = CsvParser._get_cast_function(deduped_property_types, config_format, logger)
         | 
| 161 | 
            +
                    cast_fn = CsvParser._get_cast_function(deduped_property_types, config_format, logger, config.schemaless)
         | 
| 162 162 | 
             
                    data_generator = self._csv_reader.read_data(config, file, stream_reader, logger, self.file_read_mode)
         | 
| 163 163 | 
             
                    for row in data_generator:
         | 
| 164 164 | 
             
                        yield CsvParser._to_nullable(cast_fn(row), deduped_property_types, config_format.null_values, config_format.strings_can_be_null)
         | 
| @@ -170,10 +170,10 @@ class CsvParser(FileTypeParser): | |
| 170 170 |  | 
| 171 171 | 
             
                @staticmethod
         | 
| 172 172 | 
             
                def _get_cast_function(
         | 
| 173 | 
            -
                    deduped_property_types: Mapping[str, str], config_format: CsvFormat, logger: logging.Logger
         | 
| 173 | 
            +
                    deduped_property_types: Mapping[str, str], config_format: CsvFormat, logger: logging.Logger, schemaless: bool
         | 
| 174 174 | 
             
                ) -> Callable[[Mapping[str, str]], Mapping[str, str]]:
         | 
| 175 175 | 
             
                    # Only cast values if the schema is provided
         | 
| 176 | 
            -
                    if deduped_property_types:
         | 
| 176 | 
            +
                    if deduped_property_types and not schemaless:
         | 
| 177 177 | 
             
                        return partial(CsvParser._cast_types, deduped_property_types=deduped_property_types, config_format=config_format, logger=logger)
         | 
| 178 178 | 
             
                    else:
         | 
| 179 179 | 
             
                        # If no schema is provided, yield the rows as they are
         | 
| @@ -275,11 +275,10 @@ class CsvParser(FileTypeParser): | |
| 275 275 | 
             
                                except ValueError:
         | 
| 276 276 | 
             
                                    warnings.append(_format_warning(key, value, prop_type))
         | 
| 277 277 |  | 
| 278 | 
            +
                            result[key] = cast_value
         | 
| 278 279 | 
             
                        else:
         | 
| 279 280 | 
             
                            warnings.append(_format_warning(key, value, prop_type))
         | 
| 280 281 |  | 
| 281 | 
            -
                        result[key] = cast_value
         | 
| 282 | 
            -
             | 
| 283 282 | 
             
                    if warnings:
         | 
| 284 283 | 
             
                        logger.warning(
         | 
| 285 284 | 
             
                            f"{FileBasedSourceError.ERROR_CASTING_VALUE.value}: {','.join([w for w in warnings])}",
         | 
| @@ -157,7 +157,7 @@ airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha | |
| 157 157 | 
             
            airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=Mx3zT9Dem4uNfaUT0oOtrESsuB1LrGAi5N-uw2swZZA,701
         | 
| 158 158 | 
             
            airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=N3a8cjXwRUN2__46IJTwrWlsyFiSA1xtSgPcPH28sn0,476
         | 
| 159 159 | 
             
            airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=7PVaW17wn80HYW1mu074X2dy0UgFoqFqGIOKN2ZMKD0,8686
         | 
| 160 | 
            -
            airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256= | 
| 160 | 
            +
            airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=SsWy_8KunUz1MYKroix6fWv37mbPEH0h5SoW4g3Qjf4,16676
         | 
| 161 161 | 
             
            airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=cThTLc1YKSAapOn70lB09SzruRIPSShGIMz1f92QYV8,1555
         | 
| 162 162 | 
             
            airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=Kz6HLF0CrFHQ1Y6rJKGr7KmBWSLeDYFQmkg0WIi7Frg,5395
         | 
| 163 163 | 
             
            airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=QulQ_soGb1LpQ_KTxqWZjmfACGkTUDUOeuSmNFtcSLk,8717
         | 
| @@ -315,13 +315,13 @@ unit_tests/sources/file_based/config/test_abstract_file_based_spec.py,sha256=wmZ | |
| 315 315 | 
             
            unit_tests/sources/file_based/config/test_file_based_stream_config.py,sha256=1eMsHlMQIFwyw20HjnhgKuiw6399sMcLTQ4LP09kTT4,3060
         | 
| 316 316 | 
             
            unit_tests/sources/file_based/file_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         | 
| 317 317 | 
             
            unit_tests/sources/file_based/file_types/test_avro_parser.py,sha256=INqwKXcgNb3h_tktNXYU6WNUD-iNwRYHCd3IrnQa5R4,11051
         | 
| 318 | 
            -
            unit_tests/sources/file_based/file_types/test_csv_parser.py,sha256= | 
| 318 | 
            +
            unit_tests/sources/file_based/file_types/test_csv_parser.py,sha256=KB4WDy3aMAZ0CmJiqFaTUOZlK4urpvG9bwcwQ-h2-VY,20303
         | 
| 319 319 | 
             
            unit_tests/sources/file_based/file_types/test_jsonl_parser.py,sha256=foTf9U9LyAS8OR0BonwNgFWPqTrmzFV2lpPUfRMrioE,6134
         | 
| 320 320 | 
             
            unit_tests/sources/file_based/file_types/test_parquet_parser.py,sha256=D7sKTty8aEqMDWWGKWUqDbWjTxhGkygU7ns4-_JceRY,13543
         | 
| 321 321 | 
             
            unit_tests/sources/file_based/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         | 
| 322 322 | 
             
            unit_tests/sources/file_based/scenarios/avro_scenarios.py,sha256=xUfw0crAvSTgQ2-chJx2ZiigQyo5IfrCuOFC1TWXXsQ,29795
         | 
| 323 323 | 
             
            unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=7DR49LCjns72Dv5-R-cg6_SUR1zpHtE9_uFEWoYwx1s,5834
         | 
| 324 | 
            -
            unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256= | 
| 324 | 
            +
            unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=UVdDblKj3R5qQkh-dj4xqZ2822GyJuymaAerWbX9HeE,95707
         | 
| 325 325 | 
             
            unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=0maHng11cFmvzFLOniyBxOEYoKj4DYR3NO9-pSYoFLs,60710
         | 
| 326 326 | 
             
            unit_tests/sources/file_based/scenarios/jsonl_scenarios.py,sha256=N83fga4gMKkbm6hYnen1Z5p5eEgjnMB_M_sXx6B96cU,27503
         | 
| 327 327 | 
             
            unit_tests/sources/file_based/scenarios/parquet_scenarios.py,sha256=-cBO1ZwberBxNMqDOtKz8yGwm3zB7elz_st2NKHeczM,26955
         | 
| @@ -352,8 +352,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg | |
| 352 352 | 
             
            unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
         | 
| 353 353 | 
             
            unit_tests/utils/test_stream_status_utils.py,sha256=NpV155JMXA6CG-2Zvofa14lItobyh3Onttc59X4m5DI,3382
         | 
| 354 354 | 
             
            unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
         | 
| 355 | 
            -
            airbyte_cdk-0.51. | 
| 356 | 
            -
            airbyte_cdk-0.51. | 
| 357 | 
            -
            airbyte_cdk-0.51. | 
| 358 | 
            -
            airbyte_cdk-0.51. | 
| 359 | 
            -
            airbyte_cdk-0.51. | 
| 355 | 
            +
            airbyte_cdk-0.51.4.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
         | 
| 356 | 
            +
            airbyte_cdk-0.51.4.dist-info/METADATA,sha256=imzQYjx2YvmpJDtuZK6lUo96gKmLJBYNOZf06KeAg-s,9399
         | 
| 357 | 
            +
            airbyte_cdk-0.51.4.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
         | 
| 358 | 
            +
            airbyte_cdk-0.51.4.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
         | 
| 359 | 
            +
            airbyte_cdk-0.51.4.dist-info/RECORD,,
         | 
| @@ -100,7 +100,7 @@ logger = logging.getLogger() | |
| 100 100 | 
             
                    pytest.param(
         | 
| 101 101 | 
             
                        {"col9": "['a', 'b']"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col9": "['a', 'b']"}, id="cannot-cast-to-list-of-objects"
         | 
| 102 102 | 
             
                    ),
         | 
| 103 | 
            -
                    pytest.param({"col11": "x"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, { | 
| 103 | 
            +
                    pytest.param({"col11": "x"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {}, id="item-not-in-props-doesn't-error"),
         | 
| 104 104 | 
             
                ],
         | 
| 105 105 | 
             
            )
         | 
| 106 106 | 
             
            def test_cast_to_python_type(row: Dict[str, str], true_values: Set[str], false_values: Set[str], expected_output: Dict[str, Any]) -> None:
         | 
| @@ -481,7 +481,6 @@ multi_csv_stream_n_file_exceeds_limit_for_inference = ( | |
| 481 481 | 
             
                            "data": {
         | 
| 482 482 | 
             
                                "col1": "val11b",
         | 
| 483 483 | 
             
                                "col2": "val12b",
         | 
| 484 | 
            -
                                "col3": "val13b",
         | 
| 485 484 | 
             
                                "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
         | 
| 486 485 | 
             
                                "_ab_source_file_url": "b.csv",
         | 
| 487 486 | 
             
                            },
         | 
| @@ -491,7 +490,6 @@ multi_csv_stream_n_file_exceeds_limit_for_inference = ( | |
| 491 490 | 
             
                            "data": {
         | 
| 492 491 | 
             
                                "col1": "val21b",
         | 
| 493 492 | 
             
                                "col2": "val22b",
         | 
| 494 | 
            -
                                "col3": "val23b",
         | 
| 495 493 | 
             
                                "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
         | 
| 496 494 | 
             
                                "_ab_source_file_url": "b.csv",
         | 
| 497 495 | 
             
                            },
         | 
| 
            File without changes
         | 
| 
            File without changes
         |