airbyte-cdk 0.50.0__py3-none-any.whl → 0.50.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/entrypoint.py +7 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3 -3
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +3 -3
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +9 -9
- airbyte_cdk/sources/file_based/config/csv_format.py +42 -6
- airbyte_cdk/sources/file_based/file_based_source.py +4 -5
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +114 -59
- airbyte_cdk/sources/file_based/stream/cursor/__init__.py +2 -2
- airbyte_cdk/sources/file_based/stream/cursor/{file_based_cursor.py → abstract_file_based_cursor.py} +9 -1
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +10 -10
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +15 -2
- {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/RECORD +25 -24
- unit_tests/sources/file_based/config/test_csv_format.py +23 -0
- unit_tests/sources/file_based/file_types/test_csv_parser.py +50 -18
- unit_tests/sources/file_based/helpers.py +5 -0
- unit_tests/sources/file_based/in_memory_files_source.py +11 -3
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +1254 -47
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +6 -5
- unit_tests/sources/file_based/scenarios/scenario_builder.py +8 -7
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +13 -12
- unit_tests/sources/file_based/test_scenarios.py +30 -0
- {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
|
|
1
1
|
airbyte_cdk/__init__.py,sha256=OBQWv5rF_QTRpOiP6J8J8oTU-GGrfi18i1PRFpahKks,262
|
2
2
|
airbyte_cdk/config_observation.py,sha256=3kjxv8xTwCnub2_fTWnMPRx0E7vly1BUeyXOSK15Ql4,3610
|
3
3
|
airbyte_cdk/connector.py,sha256=LtTAmBFV1LBUz_fOEbQ_EvBhyUsz8AGOlDsvK8QOOo0,4396
|
4
|
-
airbyte_cdk/entrypoint.py,sha256=
|
4
|
+
airbyte_cdk/entrypoint.py,sha256=guPK0UnKWxpN0G3d0etxkaXX7wtgWIymLJ8M1ssKNQc,12393
|
5
5
|
airbyte_cdk/exception_handler.py,sha256=CwkiPdZ1WMOr3CBkvKFyHiyLerXGRqBrVlB4p0OImGI,1125
|
6
6
|
airbyte_cdk/logger.py,sha256=4Mi2MEQi1uh59BP9Dxw_UEbZuxaJewqK_jvEU2b10nk,3985
|
7
7
|
airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -23,7 +23,7 @@ airbyte_cdk/sources/http_logger.py,sha256=v0kkpDtA0GUOgj6_3AayrYaBrSHBqG4t3MGbrt
|
|
23
23
|
airbyte_cdk/sources/source.py,sha256=N3vHZzdUsBETFsql-YpO-LcgjolT_jcnAuHBhGD6Hqk,4278
|
24
24
|
airbyte_cdk/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
25
25
|
airbyte_cdk/sources/declarative/create_partial.py,sha256=sUJOwD8hBzW4pxw2XhYlSTMgl-WMc5WpP5Oq_jo3fHw,3371
|
26
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
26
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=5Z8OMI6UrR6n6CwaklMhm3_z7JsQvhXDVCGb1S6o5iU,83049
|
27
27
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=U2As9PDKmcWDgbsWUo-RetJ9fxQOBlwntWZ0NOgs5Ac,1453
|
28
28
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=3E8I_hsJC9vlh8BgMOAbUaEhQVvlzSeDCo5nEkCuDzg,5844
|
29
29
|
airbyte_cdk/sources/declarative/exceptions.py,sha256=kTPUA4I2NV4J6HDz-mKPGMrfuc592akJnOyYx38l_QM,176
|
@@ -64,7 +64,7 @@ airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=dyIM-bzh54
|
|
64
64
|
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=Dc0F87nElWsz_Ikj938eQ9uqZvyqgFhZ8Dqf_-hvndc,4800
|
65
65
|
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=V6WGKJ9cXX1rjuM4bK3Cs9xEryMlkY2U3FMsSBhrgC8,3098
|
66
66
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=EiYnzwCHZV7EYqMJqcy6xKSeHvTKZBsQndjbEwmiTW4,93
|
67
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
67
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=HTK0MrQpNV3CfmcCURT8pKCkPuqzJIp5auFmiIIiylo,57104
|
68
68
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
69
69
|
airbyte_cdk/sources/declarative/parsers/class_types_registry.py,sha256=bK4a74opm6WHyV7HqOVws6GE5Z7cLNc5MaTha69abIQ,6086
|
70
70
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=y7_G5mM07zxT5YG975kdC2PAja-Uc83pYp8WrV3GNdo,522
|
@@ -137,18 +137,18 @@ airbyte_cdk/sources/embedded/runner.py,sha256=TykiigEz39Y4QryHTbSm7VGvFOxo7duiCY
|
|
137
137
|
airbyte_cdk/sources/embedded/tools.py,sha256=-Z4tZ4AP1OTi_zrqFM3YV8Rt7c60wvsrv0Dc-rTZ2uw,744
|
138
138
|
airbyte_cdk/sources/file_based/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
139
139
|
airbyte_cdk/sources/file_based/exceptions.py,sha256=GKdFFteVMgYS1nzr2SeDXm7DYTHvs1PLoftWoXOl5Vk,3810
|
140
|
-
airbyte_cdk/sources/file_based/file_based_source.py,sha256=
|
140
|
+
airbyte_cdk/sources/file_based/file_based_source.py,sha256=uOxyngmCjuBLniWOHDpivXH8WlYIfzWqQOQSjBlVzvc,6786
|
141
141
|
airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=s2okNhgsI-d6_duhb-9osJ1RioQQI5NmqC0U9kNeYrc,3420
|
142
142
|
airbyte_cdk/sources/file_based/remote_file.py,sha256=s3Qz2N786yqSMXqcWmsTOvYhgs-ry0xFcn5fGyyz7bY,581
|
143
143
|
airbyte_cdk/sources/file_based/schema_helpers.py,sha256=c22G3ukPPayoOioSMUjtWSjd4zXih9X_yKvAPRmogfE,9025
|
144
144
|
airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
|
145
145
|
airbyte_cdk/sources/file_based/availability_strategy/__init__.py,sha256=WiPPuQGfmQlFUMFR5h3ECc-VzBj4vair6_4WAL87AEI,277
|
146
146
|
airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py,sha256=NeHCiG4FFohzYpQQFfmTL4-5oI0nElHWgXX1xrm8-SU,1269
|
147
|
-
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=
|
147
|
+
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=axNPioHqk-fvQRqQTxjc4wKZeT4LSRA8u7lWG_EiSDk,4611
|
148
148
|
airbyte_cdk/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
149
149
|
airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=CCZKoFMX9-tDEY_RY_ziYil0PSPTeKsz6vuoKv6XqvY,4523
|
150
150
|
airbyte_cdk/sources/file_based/config/avro_format.py,sha256=KkV7i-36aITHnacBztpvllmBDXIXwEfKbgRYEICgs30,565
|
151
|
-
airbyte_cdk/sources/file_based/config/csv_format.py,sha256=
|
151
|
+
airbyte_cdk/sources/file_based/config/csv_format.py,sha256=3Hh2McAIvMT8G6TFCGlq2vd96ox7xcO9K7jbIa-2SPo,5061
|
152
152
|
airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=-iqYoUbC3Nuy97_mKdQMZxhEVlhr3a4aoTqGOmqA0-Y,5878
|
153
153
|
airbyte_cdk/sources/file_based/config/jsonl_format.py,sha256=BI1VHv-0qbbdw8ZH7t9iV4a7JfSWRzPatkeOvLU_GI0,204
|
154
154
|
airbyte_cdk/sources/file_based/config/parquet_format.py,sha256=wqe8iF-QzIto97uoRlDBNI84ED-4H1CZyMy-TSo8Z6Y,573
|
@@ -157,7 +157,7 @@ airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha
|
|
157
157
|
airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=Mx3zT9Dem4uNfaUT0oOtrESsuB1LrGAi5N-uw2swZZA,701
|
158
158
|
airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=N3a8cjXwRUN2__46IJTwrWlsyFiSA1xtSgPcPH28sn0,476
|
159
159
|
airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=2wzcZmQ0_VYyJmRYllGrEEYwrP9hYwXVJKz-DO6JyoE,8079
|
160
|
-
airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=
|
160
|
+
airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=QpFKzvi2cdnScjln9WJdBxkrBFJl79PK1SW0pT7vGrs,10877
|
161
161
|
airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=UgQkC-J7T9u3twlILRCvXXrcHJCwkjQYKhdNvccYhbY,1424
|
162
162
|
airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=dceNH10qb-V3sTHOMcuLy_07nSde1s7l3E7KsQf8pI0,2715
|
163
163
|
airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=BdmcM-9zVdCsuSw_A4GmStQw88IiG9XJwqpEuS3Jud4,8318
|
@@ -166,10 +166,10 @@ airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_valida
|
|
166
166
|
airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py,sha256=ckBp4rv5wiUapA4ZEoe1fc6ILx-LdcRn26W4WJCrt_k,1506
|
167
167
|
airbyte_cdk/sources/file_based/stream/__init__.py,sha256=QPDqdgjsabOQD93dSFqHGaFS_3pIwm-chEabZHiPJi0,265
|
168
168
|
airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=tvVew6din9y8a3hItzU0PjTQrMxbVI7bK-3pRTvOswg,5810
|
169
|
-
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=
|
170
|
-
airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=
|
171
|
-
airbyte_cdk/sources/file_based/stream/cursor/
|
172
|
-
airbyte_cdk/sources/file_based/stream/cursor/
|
169
|
+
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=K81wNOjd-p69WMoJWShup2wbEumXK5CT311u-J985_w,11687
|
170
|
+
airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
|
171
|
+
airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=i-FPeK8lwCzX34GCcmvL5Yvdh8-uu7FeCVYDoFbD7IY,1920
|
172
|
+
airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=6waK1sNLNlQLxEbMtuJgv0GUl76wPhwanV7cX2DkSK0,6778
|
173
173
|
airbyte_cdk/sources/message/__init__.py,sha256=14ZSLah9uyI_CyK7_jIyq521vlgKAdihe6Ciw6-jLgU,372
|
174
174
|
airbyte_cdk/sources/message/repository.py,sha256=kflbIkUwCWXMKpe6566TD_HRjRqEZKQ0h2RpxzjWLJk,4994
|
175
175
|
airbyte_cdk/sources/singer/__init__.py,sha256=D3zQSiWT0B9t0kKE4JPZjrcDnP2YnFNJ3dfYqSaxo9w,246
|
@@ -305,31 +305,32 @@ unit_tests/sources/declarative/states/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyR
|
|
305
305
|
unit_tests/sources/declarative/stream_slicers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
306
306
|
unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py,sha256=_tEjpxK_7sbfppnYT0IhhBMBuGiEXai1qmfigF6Q9RA,7901
|
307
307
|
unit_tests/sources/file_based/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
308
|
-
unit_tests/sources/file_based/helpers.py,sha256
|
309
|
-
unit_tests/sources/file_based/in_memory_files_source.py,sha256=
|
308
|
+
unit_tests/sources/file_based/helpers.py,sha256=-tmNKF0Jbjn8BsBz-Qkdc_3yKvWpVLaauaSwxdMHD3Q,2542
|
309
|
+
unit_tests/sources/file_based/in_memory_files_source.py,sha256=tvt3-fhg_0iN5iCAyxuOjmhmrBLavIcm3mf3Q-P9llE,8062
|
310
310
|
unit_tests/sources/file_based/test_file_based_stream_reader.py,sha256=Im3QmgXTyWXPA2VAtu9uLYas7YXB4NVsZw8zzQYZeeg,5283
|
311
|
-
unit_tests/sources/file_based/test_scenarios.py,sha256=
|
311
|
+
unit_tests/sources/file_based/test_scenarios.py,sha256=hqsHaC2n1H6AwX8W9RufNQwLyWwGacOwm29SZ4yGNSo,17923
|
312
312
|
unit_tests/sources/file_based/test_schema_helpers.py,sha256=XJ27ecw0sjlSnKgQqV1DgnnjKB1TR2btq22OITh1Qdk,12333
|
313
313
|
unit_tests/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
314
314
|
unit_tests/sources/file_based/config/test_abstract_file_based_spec.py,sha256=wmZAC-nBiUedMZi0n4zaC9oiZD9UTuYP5zJC1xxRnME,1216
|
315
|
+
unit_tests/sources/file_based/config/test_csv_format.py,sha256=WoCUSeQdaBgZf8q2Q7GTRStvAwg8A7fl9xv9UJdjv1M,1111
|
315
316
|
unit_tests/sources/file_based/config/test_file_based_stream_config.py,sha256=ISDmD13D3Sr_AlqgH9KJ-YbCogCyYyNTTiicm9NBQNw,4942
|
316
317
|
unit_tests/sources/file_based/file_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
317
318
|
unit_tests/sources/file_based/file_types/test_avro_parser.py,sha256=eFsgq9UrIUBxlVqPCcW8cUn3CeF6BNG57kRiIrHFSmg,8646
|
318
|
-
unit_tests/sources/file_based/file_types/test_csv_parser.py,sha256=
|
319
|
+
unit_tests/sources/file_based/file_types/test_csv_parser.py,sha256=RC3YgHjPhLHQ2yfDGlFL7u1YZXYwqcxPuZI_1CCgP6Y,4602
|
319
320
|
unit_tests/sources/file_based/file_types/test_jsonl_parser.py,sha256=X2iivsiEsG_PnvUo042jgrfLqGJAs-aUtw_kChdROsY,1155
|
320
321
|
unit_tests/sources/file_based/file_types/test_parquet_parser.py,sha256=nI2Vdr88fb9JsRiRgCA8CQMiB6j8cHMRIoodbo4plqY,13517
|
321
322
|
unit_tests/sources/file_based/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
322
323
|
unit_tests/sources/file_based/scenarios/avro_scenarios.py,sha256=amqovlN0gKFeo_DnzQtHaHEjnmPdws1nFVOB_DeOmwc,30030
|
323
324
|
unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=lIDfVo8clROEyElYZLGi8OViahOCCTCDaIL9acAD9xo,5834
|
324
|
-
unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=
|
325
|
-
unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=
|
325
|
+
unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=Fg13wx-c6Cy8AleqL1DJajmrLZyh8tBebRXDr99nU34,104467
|
326
|
+
unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=gPBcbnbdlh0995Xvj6aJn9oXzfvpj-4g2LZHlWoAkBE,60710
|
326
327
|
unit_tests/sources/file_based/scenarios/jsonl_scenarios.py,sha256=hhf2YT2-N8rg4WwPU4zK2ry0i-mmFPpMppgPEFFupcM,27503
|
327
328
|
unit_tests/sources/file_based/scenarios/parquet_scenarios.py,sha256=ebxJjkkgc55RnSVcr0y7BEM0DvAGLw7h54U_cv5G1xI,22200
|
328
|
-
unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=
|
329
|
+
unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=iktYJ-WuDDLdT1rsWOvx-2z3Nt4VgkrvpzsMlMDfBGA,10177
|
329
330
|
unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py,sha256=fVaunKXwi-UEaG7gHQHEDAczWh8aj6TCLovY3aLuDa4,28325
|
330
331
|
unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py,sha256=BPQSFO0jX9bcm2RsvPectGf1x17Fo9QgmrWNkmtX4nA,25224
|
331
332
|
unit_tests/sources/file_based/stream/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
332
|
-
unit_tests/sources/file_based/stream/test_default_file_based_cursor.py,sha256=
|
333
|
+
unit_tests/sources/file_based/stream/test_default_file_based_cursor.py,sha256=DHMHULhV_8pFFrdE2vaL-hIGumgsMtyKM3_0TCfAw_Y,12459
|
333
334
|
unit_tests/sources/file_based/stream/test_default_file_based_stream.py,sha256=22Rgp1HvZss2WdOcfenbnvx18tfFJ_trPuWp299RW5E,1545
|
334
335
|
unit_tests/sources/fixtures/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
335
336
|
unit_tests/sources/fixtures/source_test_fixture.py,sha256=r-UtR241EGQMZTw1RoKaatrpCGeQn7OIuRPWfG9f7nI,5380
|
@@ -352,8 +353,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
|
|
352
353
|
unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
|
353
354
|
unit_tests/utils/test_stream_status_utils.py,sha256=NpV155JMXA6CG-2Zvofa14lItobyh3Onttc59X4m5DI,3382
|
354
355
|
unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
|
355
|
-
airbyte_cdk-0.50.
|
356
|
-
airbyte_cdk-0.50.
|
357
|
-
airbyte_cdk-0.50.
|
358
|
-
airbyte_cdk-0.50.
|
359
|
-
airbyte_cdk-0.50.
|
356
|
+
airbyte_cdk-0.50.2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
357
|
+
airbyte_cdk-0.50.2.dist-info/METADATA,sha256=Trm1tuhElrbiQAKTWMrRR5YUO3gxdU4B0qgfvrGmDYc,9439
|
358
|
+
airbyte_cdk-0.50.2.dist-info/WHEEL,sha256=AtBG6SXL3KF_v0NxLf0ehyVOh0cold-JbJYXNGorC6Q,92
|
359
|
+
airbyte_cdk-0.50.2.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
|
360
|
+
airbyte_cdk-0.50.2.dist-info/RECORD,,
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import pytest as pytest
|
6
|
+
from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
|
7
|
+
|
8
|
+
|
9
|
+
@pytest.mark.parametrize(
|
10
|
+
"skip_rows_before_header, autogenerate_column_names, expected_error",
|
11
|
+
[
|
12
|
+
pytest.param(1, True, ValueError, id="test_skip_rows_before_header_and_autogenerate_column_names"),
|
13
|
+
pytest.param(1, False, None, id="test_skip_rows_before_header_and_no_autogenerate_column_names"),
|
14
|
+
pytest.param(0, True, None, id="test_no_skip_rows_before_header_and_autogenerate_column_names"),
|
15
|
+
pytest.param(0, False, None, id="test_no_skip_rows_before_header_and_no_autogenerate_column_names"),
|
16
|
+
]
|
17
|
+
)
|
18
|
+
def test_csv_format(skip_rows_before_header, autogenerate_column_names, expected_error):
|
19
|
+
if expected_error:
|
20
|
+
with pytest.raises(expected_error):
|
21
|
+
CsvFormat(skip_rows_before_header=skip_rows_before_header, autogenerate_column_names=autogenerate_column_names)
|
22
|
+
else:
|
23
|
+
CsvFormat(skip_rows_before_header=skip_rows_before_header, autogenerate_column_names=autogenerate_column_names)
|
@@ -3,9 +3,12 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import logging
|
6
|
+
from unittest.mock import MagicMock, Mock
|
6
7
|
|
7
8
|
import pytest
|
8
|
-
from airbyte_cdk.sources.file_based.
|
9
|
+
from airbyte_cdk.sources.file_based.config.csv_format import DEFAULT_FALSE_VALUES, DEFAULT_TRUE_VALUES, CsvFormat
|
10
|
+
from airbyte_cdk.sources.file_based.exceptions import RecordParseError
|
11
|
+
from airbyte_cdk.sources.file_based.file_types.csv_parser import CsvParser, _cast_types
|
9
12
|
|
10
13
|
PROPERTY_TYPES = {
|
11
14
|
"col1": "null",
|
@@ -23,7 +26,7 @@ logger = logging.getLogger()
|
|
23
26
|
|
24
27
|
|
25
28
|
@pytest.mark.parametrize(
|
26
|
-
"row,expected_output",
|
29
|
+
"row, true_values, false_values, expected_output",
|
27
30
|
[
|
28
31
|
pytest.param(
|
29
32
|
{
|
@@ -36,7 +39,10 @@ logger = logging.getLogger()
|
|
36
39
|
"col7": '[1, 2]',
|
37
40
|
"col8": '["1", "2"]',
|
38
41
|
"col9": '[{"a": "b"}, {"a": "c"}]',
|
39
|
-
},
|
42
|
+
},
|
43
|
+
DEFAULT_TRUE_VALUES,
|
44
|
+
DEFAULT_FALSE_VALUES,
|
45
|
+
{
|
40
46
|
"col1": None,
|
41
47
|
"col2": True,
|
42
48
|
"col3": 1,
|
@@ -47,20 +53,46 @@ logger = logging.getLogger()
|
|
47
53
|
"col8": ["1", "2"],
|
48
54
|
"col9": [{"a": "b"}, {"a": "c"}],
|
49
55
|
}, id="cast-all-cols"),
|
50
|
-
pytest.param({"col1": "1"}, {"col1": "1"}, id="cannot-cast-to-null"),
|
51
|
-
pytest.param({"col2": "1"}, {"col2": True}, id="cast-1-to-bool"),
|
52
|
-
pytest.param({"col2": "0"}, {"col2": False}, id="cast-0-to-bool"),
|
53
|
-
pytest.param({"col2": "yes"}, {"col2": True}, id="cast-yes-to-bool"),
|
54
|
-
pytest.param({"col2": "
|
55
|
-
pytest.param({"col2": "
|
56
|
-
pytest.param({"
|
57
|
-
pytest.param({"
|
58
|
-
pytest.param({"
|
59
|
-
pytest.param({"
|
60
|
-
pytest.param({"
|
61
|
-
pytest.param({"
|
62
|
-
pytest.param({"
|
56
|
+
pytest.param({"col1": "1"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col1": "1"}, id="cannot-cast-to-null"),
|
57
|
+
pytest.param({"col2": "1"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": True}, id="cast-1-to-bool"),
|
58
|
+
pytest.param({"col2": "0"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": False}, id="cast-0-to-bool"),
|
59
|
+
pytest.param({"col2": "yes"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": True}, id="cast-yes-to-bool"),
|
60
|
+
pytest.param({"col2": "this_is_a_true_value"}, ["this_is_a_true_value"], DEFAULT_FALSE_VALUES, {"col2": True}, id="cast-custom-true-value-to-bool"),
|
61
|
+
pytest.param({"col2": "this_is_a_false_value"}, DEFAULT_TRUE_VALUES, ["this_is_a_false_value"], {"col2": False}, id="cast-custom-false-value-to-bool"),
|
62
|
+
pytest.param({"col2": "no"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": False}, id="cast-no-to-bool"),
|
63
|
+
pytest.param({"col2": "10"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": "10"}, id="cannot-cast-to-bool"),
|
64
|
+
pytest.param({"col3": "1.1"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col3": "1.1"}, id="cannot-cast-to-int"),
|
65
|
+
pytest.param({"col4": "asdf"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col4": "asdf"}, id="cannot-cast-to-float"),
|
66
|
+
pytest.param({"col6": "{'a': 'b'}"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col6": "{'a': 'b'}"}, id="cannot-cast-to-dict"),
|
67
|
+
pytest.param({"col7": "['a', 'b']"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col7": "['a', 'b']"}, id="cannot-cast-to-list-of-ints"),
|
68
|
+
pytest.param({"col8": "['a', 'b']"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col8": "['a', 'b']"}, id="cannot-cast-to-list-of-strings"),
|
69
|
+
pytest.param({"col9": "['a', 'b']"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col9": "['a', 'b']"}, id="cannot-cast-to-list-of-objects"),
|
70
|
+
pytest.param({"col10": "x"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col10": "x"}, id="item-not-in-props-doesn't-error"),
|
71
|
+
]
|
72
|
+
)
|
73
|
+
def test_cast_to_python_type(row, true_values, false_values, expected_output):
|
74
|
+
csv_format = CsvFormat(true_values=true_values, false_values=false_values)
|
75
|
+
assert _cast_types(row, PROPERTY_TYPES, csv_format, logger) == expected_output
|
76
|
+
|
77
|
+
|
78
|
+
@pytest.mark.parametrize(
|
79
|
+
"reader_values, expected_rows", [
|
80
|
+
pytest.param([{"col1": "1", "col2": None}], None, id="raise_exception_if_any_value_is_none"),
|
81
|
+
pytest.param([{"col1": "1", "col2": "2"}], [{"col1": "1", "col2": "2"}], id="read_no_cast"),
|
63
82
|
]
|
64
83
|
)
|
65
|
-
def
|
66
|
-
|
84
|
+
def test_read_and_cast_types(reader_values, expected_rows):
|
85
|
+
reader = MagicMock()
|
86
|
+
reader.__iter__.return_value = reader_values
|
87
|
+
schema = {}
|
88
|
+
config_format = CsvFormat()
|
89
|
+
logger = Mock()
|
90
|
+
|
91
|
+
parser = CsvParser()
|
92
|
+
|
93
|
+
expected_rows = expected_rows
|
94
|
+
if expected_rows is None:
|
95
|
+
with pytest.raises(RecordParseError):
|
96
|
+
list(parser._read_and_cast_types(reader, schema, config_format, logger))
|
97
|
+
else:
|
98
|
+
assert expected_rows == list(parser._read_and_cast_types(reader, schema, config_format, logger))
|
@@ -14,6 +14,7 @@ from airbyte_cdk.sources.file_based.file_types.csv_parser import CsvParser
|
|
14
14
|
from airbyte_cdk.sources.file_based.file_types.jsonl_parser import JsonlParser
|
15
15
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
16
16
|
from airbyte_cdk.sources.file_based.schema_validation_policies import AbstractSchemaValidationPolicy
|
17
|
+
from airbyte_cdk.sources.file_based.stream.cursor import DefaultFileBasedCursor
|
17
18
|
from unit_tests.sources.file_based.in_memory_files_source import InMemoryFilesStreamReader
|
18
19
|
|
19
20
|
|
@@ -54,6 +55,10 @@ class FailingSchemaValidationPolicy(AbstractSchemaValidationPolicy):
|
|
54
55
|
return False
|
55
56
|
|
56
57
|
|
58
|
+
class LowHistoryLimitCursor(DefaultFileBasedCursor):
|
59
|
+
DEFAULT_MAX_HISTORY_SIZE = 3
|
60
|
+
|
61
|
+
|
57
62
|
def make_remote_files(files: List[str]) -> List[RemoteFile]:
|
58
63
|
return [
|
59
64
|
RemoteFile(uri=f, last_modified=datetime.strptime("2023-06-05T03:54:07.000Z", "%Y-%m-%dT%H:%M:%S.%fZ"))
|
@@ -20,11 +20,12 @@ from airbyte_cdk.models import ConfiguredAirbyteCatalog
|
|
20
20
|
from airbyte_cdk.sources.file_based.availability_strategy import AbstractFileBasedAvailabilityStrategy, DefaultFileBasedAvailabilityStrategy
|
21
21
|
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
|
22
22
|
from airbyte_cdk.sources.file_based.discovery_policy import AbstractDiscoveryPolicy, DefaultDiscoveryPolicy
|
23
|
-
from airbyte_cdk.sources.file_based.file_based_source import
|
23
|
+
from airbyte_cdk.sources.file_based.file_based_source import FileBasedSource
|
24
24
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
|
25
25
|
from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
|
26
26
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
27
27
|
from airbyte_cdk.sources.file_based.schema_validation_policies import DEFAULT_SCHEMA_VALIDATION_POLICIES, AbstractSchemaValidationPolicy
|
28
|
+
from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor, DefaultFileBasedCursor
|
28
29
|
from avro import datafile
|
29
30
|
from pydantic import AnyUrl, Field
|
30
31
|
|
@@ -41,7 +42,7 @@ class InMemoryFilesSource(FileBasedSource):
|
|
41
42
|
stream_reader: Optional[AbstractFileBasedStreamReader],
|
42
43
|
catalog: Optional[Mapping[str, Any]],
|
43
44
|
file_write_options: Mapping[str, Any],
|
44
|
-
|
45
|
+
cursor_cls: Optional[AbstractFileBasedCursor],
|
45
46
|
):
|
46
47
|
# Attributes required for test purposes
|
47
48
|
self.files = files
|
@@ -59,7 +60,7 @@ class InMemoryFilesSource(FileBasedSource):
|
|
59
60
|
discovery_policy=discovery_policy or DefaultDiscoveryPolicy(),
|
60
61
|
parsers=parsers,
|
61
62
|
validation_policies=validation_policies or DEFAULT_SCHEMA_VALIDATION_POLICIES,
|
62
|
-
|
63
|
+
cursor_cls=cursor_cls or DefaultFileBasedCursor,
|
63
64
|
)
|
64
65
|
|
65
66
|
def read_catalog(self, catalog_path: str) -> ConfiguredAirbyteCatalog:
|
@@ -100,7 +101,14 @@ class InMemoryFilesStreamReader(AbstractFileBasedStreamReader):
|
|
100
101
|
raise NotImplementedError(f"No implementation for file type: {self.file_type}")
|
101
102
|
|
102
103
|
def _make_csv_file_contents(self, file_name: str) -> IOBase:
|
104
|
+
|
105
|
+
# Some tests define the csv as an array of strings to make it easier to validate the handling
|
106
|
+
# of quotes, delimiter, and escpare chars.
|
107
|
+
if isinstance(self.files[file_name]["contents"][0], str):
|
108
|
+
return io.StringIO("\n".join([s.strip() for s in self.files[file_name]["contents"]]))
|
109
|
+
|
103
110
|
fh = io.StringIO()
|
111
|
+
|
104
112
|
if self.file_write_options:
|
105
113
|
csv.register_dialect("in_memory_dialect", **self.file_write_options)
|
106
114
|
writer = csv.writer(fh, dialect="in_memory_dialect")
|