airbyte-cdk 0.50.0__py3-none-any.whl → 0.50.2__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/entrypoint.py +7 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3 -3
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +3 -3
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +9 -9
- airbyte_cdk/sources/file_based/config/csv_format.py +42 -6
- airbyte_cdk/sources/file_based/file_based_source.py +4 -5
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +114 -59
- airbyte_cdk/sources/file_based/stream/cursor/__init__.py +2 -2
- airbyte_cdk/sources/file_based/stream/cursor/{file_based_cursor.py → abstract_file_based_cursor.py} +9 -1
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +10 -10
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +15 -2
- {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/RECORD +25 -24
- unit_tests/sources/file_based/config/test_csv_format.py +23 -0
- unit_tests/sources/file_based/file_types/test_csv_parser.py +50 -18
- unit_tests/sources/file_based/helpers.py +5 -0
- unit_tests/sources/file_based/in_memory_files_source.py +11 -3
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +1254 -47
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +6 -5
- unit_tests/sources/file_based/scenarios/scenario_builder.py +8 -7
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +13 -12
- unit_tests/sources/file_based/test_scenarios.py +30 -0
- {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
|
|
1
1
|
airbyte_cdk/__init__.py,sha256=OBQWv5rF_QTRpOiP6J8J8oTU-GGrfi18i1PRFpahKks,262
|
2
2
|
airbyte_cdk/config_observation.py,sha256=3kjxv8xTwCnub2_fTWnMPRx0E7vly1BUeyXOSK15Ql4,3610
|
3
3
|
airbyte_cdk/connector.py,sha256=LtTAmBFV1LBUz_fOEbQ_EvBhyUsz8AGOlDsvK8QOOo0,4396
|
4
|
-
airbyte_cdk/entrypoint.py,sha256=
|
4
|
+
airbyte_cdk/entrypoint.py,sha256=guPK0UnKWxpN0G3d0etxkaXX7wtgWIymLJ8M1ssKNQc,12393
|
5
5
|
airbyte_cdk/exception_handler.py,sha256=CwkiPdZ1WMOr3CBkvKFyHiyLerXGRqBrVlB4p0OImGI,1125
|
6
6
|
airbyte_cdk/logger.py,sha256=4Mi2MEQi1uh59BP9Dxw_UEbZuxaJewqK_jvEU2b10nk,3985
|
7
7
|
airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -23,7 +23,7 @@ airbyte_cdk/sources/http_logger.py,sha256=v0kkpDtA0GUOgj6_3AayrYaBrSHBqG4t3MGbrt
|
|
23
23
|
airbyte_cdk/sources/source.py,sha256=N3vHZzdUsBETFsql-YpO-LcgjolT_jcnAuHBhGD6Hqk,4278
|
24
24
|
airbyte_cdk/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
25
25
|
airbyte_cdk/sources/declarative/create_partial.py,sha256=sUJOwD8hBzW4pxw2XhYlSTMgl-WMc5WpP5Oq_jo3fHw,3371
|
26
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
26
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=5Z8OMI6UrR6n6CwaklMhm3_z7JsQvhXDVCGb1S6o5iU,83049
|
27
27
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=U2As9PDKmcWDgbsWUo-RetJ9fxQOBlwntWZ0NOgs5Ac,1453
|
28
28
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=3E8I_hsJC9vlh8BgMOAbUaEhQVvlzSeDCo5nEkCuDzg,5844
|
29
29
|
airbyte_cdk/sources/declarative/exceptions.py,sha256=kTPUA4I2NV4J6HDz-mKPGMrfuc592akJnOyYx38l_QM,176
|
@@ -64,7 +64,7 @@ airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=dyIM-bzh54
|
|
64
64
|
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=Dc0F87nElWsz_Ikj938eQ9uqZvyqgFhZ8Dqf_-hvndc,4800
|
65
65
|
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=V6WGKJ9cXX1rjuM4bK3Cs9xEryMlkY2U3FMsSBhrgC8,3098
|
66
66
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=EiYnzwCHZV7EYqMJqcy6xKSeHvTKZBsQndjbEwmiTW4,93
|
67
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
67
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=HTK0MrQpNV3CfmcCURT8pKCkPuqzJIp5auFmiIIiylo,57104
|
68
68
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
69
69
|
airbyte_cdk/sources/declarative/parsers/class_types_registry.py,sha256=bK4a74opm6WHyV7HqOVws6GE5Z7cLNc5MaTha69abIQ,6086
|
70
70
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=y7_G5mM07zxT5YG975kdC2PAja-Uc83pYp8WrV3GNdo,522
|
@@ -137,18 +137,18 @@ airbyte_cdk/sources/embedded/runner.py,sha256=TykiigEz39Y4QryHTbSm7VGvFOxo7duiCY
|
|
137
137
|
airbyte_cdk/sources/embedded/tools.py,sha256=-Z4tZ4AP1OTi_zrqFM3YV8Rt7c60wvsrv0Dc-rTZ2uw,744
|
138
138
|
airbyte_cdk/sources/file_based/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
139
139
|
airbyte_cdk/sources/file_based/exceptions.py,sha256=GKdFFteVMgYS1nzr2SeDXm7DYTHvs1PLoftWoXOl5Vk,3810
|
140
|
-
airbyte_cdk/sources/file_based/file_based_source.py,sha256=
|
140
|
+
airbyte_cdk/sources/file_based/file_based_source.py,sha256=uOxyngmCjuBLniWOHDpivXH8WlYIfzWqQOQSjBlVzvc,6786
|
141
141
|
airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=s2okNhgsI-d6_duhb-9osJ1RioQQI5NmqC0U9kNeYrc,3420
|
142
142
|
airbyte_cdk/sources/file_based/remote_file.py,sha256=s3Qz2N786yqSMXqcWmsTOvYhgs-ry0xFcn5fGyyz7bY,581
|
143
143
|
airbyte_cdk/sources/file_based/schema_helpers.py,sha256=c22G3ukPPayoOioSMUjtWSjd4zXih9X_yKvAPRmogfE,9025
|
144
144
|
airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
|
145
145
|
airbyte_cdk/sources/file_based/availability_strategy/__init__.py,sha256=WiPPuQGfmQlFUMFR5h3ECc-VzBj4vair6_4WAL87AEI,277
|
146
146
|
airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py,sha256=NeHCiG4FFohzYpQQFfmTL4-5oI0nElHWgXX1xrm8-SU,1269
|
147
|
-
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=
|
147
|
+
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=axNPioHqk-fvQRqQTxjc4wKZeT4LSRA8u7lWG_EiSDk,4611
|
148
148
|
airbyte_cdk/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
149
149
|
airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=CCZKoFMX9-tDEY_RY_ziYil0PSPTeKsz6vuoKv6XqvY,4523
|
150
150
|
airbyte_cdk/sources/file_based/config/avro_format.py,sha256=KkV7i-36aITHnacBztpvllmBDXIXwEfKbgRYEICgs30,565
|
151
|
-
airbyte_cdk/sources/file_based/config/csv_format.py,sha256=
|
151
|
+
airbyte_cdk/sources/file_based/config/csv_format.py,sha256=3Hh2McAIvMT8G6TFCGlq2vd96ox7xcO9K7jbIa-2SPo,5061
|
152
152
|
airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=-iqYoUbC3Nuy97_mKdQMZxhEVlhr3a4aoTqGOmqA0-Y,5878
|
153
153
|
airbyte_cdk/sources/file_based/config/jsonl_format.py,sha256=BI1VHv-0qbbdw8ZH7t9iV4a7JfSWRzPatkeOvLU_GI0,204
|
154
154
|
airbyte_cdk/sources/file_based/config/parquet_format.py,sha256=wqe8iF-QzIto97uoRlDBNI84ED-4H1CZyMy-TSo8Z6Y,573
|
@@ -157,7 +157,7 @@ airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha
|
|
157
157
|
airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=Mx3zT9Dem4uNfaUT0oOtrESsuB1LrGAi5N-uw2swZZA,701
|
158
158
|
airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=N3a8cjXwRUN2__46IJTwrWlsyFiSA1xtSgPcPH28sn0,476
|
159
159
|
airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=2wzcZmQ0_VYyJmRYllGrEEYwrP9hYwXVJKz-DO6JyoE,8079
|
160
|
-
airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=
|
160
|
+
airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=QpFKzvi2cdnScjln9WJdBxkrBFJl79PK1SW0pT7vGrs,10877
|
161
161
|
airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=UgQkC-J7T9u3twlILRCvXXrcHJCwkjQYKhdNvccYhbY,1424
|
162
162
|
airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=dceNH10qb-V3sTHOMcuLy_07nSde1s7l3E7KsQf8pI0,2715
|
163
163
|
airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=BdmcM-9zVdCsuSw_A4GmStQw88IiG9XJwqpEuS3Jud4,8318
|
@@ -166,10 +166,10 @@ airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_valida
|
|
166
166
|
airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py,sha256=ckBp4rv5wiUapA4ZEoe1fc6ILx-LdcRn26W4WJCrt_k,1506
|
167
167
|
airbyte_cdk/sources/file_based/stream/__init__.py,sha256=QPDqdgjsabOQD93dSFqHGaFS_3pIwm-chEabZHiPJi0,265
|
168
168
|
airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=tvVew6din9y8a3hItzU0PjTQrMxbVI7bK-3pRTvOswg,5810
|
169
|
-
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=
|
170
|
-
airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=
|
171
|
-
airbyte_cdk/sources/file_based/stream/cursor/
|
172
|
-
airbyte_cdk/sources/file_based/stream/cursor/
|
169
|
+
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=K81wNOjd-p69WMoJWShup2wbEumXK5CT311u-J985_w,11687
|
170
|
+
airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
|
171
|
+
airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=i-FPeK8lwCzX34GCcmvL5Yvdh8-uu7FeCVYDoFbD7IY,1920
|
172
|
+
airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=6waK1sNLNlQLxEbMtuJgv0GUl76wPhwanV7cX2DkSK0,6778
|
173
173
|
airbyte_cdk/sources/message/__init__.py,sha256=14ZSLah9uyI_CyK7_jIyq521vlgKAdihe6Ciw6-jLgU,372
|
174
174
|
airbyte_cdk/sources/message/repository.py,sha256=kflbIkUwCWXMKpe6566TD_HRjRqEZKQ0h2RpxzjWLJk,4994
|
175
175
|
airbyte_cdk/sources/singer/__init__.py,sha256=D3zQSiWT0B9t0kKE4JPZjrcDnP2YnFNJ3dfYqSaxo9w,246
|
@@ -305,31 +305,32 @@ unit_tests/sources/declarative/states/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyR
|
|
305
305
|
unit_tests/sources/declarative/stream_slicers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
306
306
|
unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py,sha256=_tEjpxK_7sbfppnYT0IhhBMBuGiEXai1qmfigF6Q9RA,7901
|
307
307
|
unit_tests/sources/file_based/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
308
|
-
unit_tests/sources/file_based/helpers.py,sha256
|
309
|
-
unit_tests/sources/file_based/in_memory_files_source.py,sha256=
|
308
|
+
unit_tests/sources/file_based/helpers.py,sha256=-tmNKF0Jbjn8BsBz-Qkdc_3yKvWpVLaauaSwxdMHD3Q,2542
|
309
|
+
unit_tests/sources/file_based/in_memory_files_source.py,sha256=tvt3-fhg_0iN5iCAyxuOjmhmrBLavIcm3mf3Q-P9llE,8062
|
310
310
|
unit_tests/sources/file_based/test_file_based_stream_reader.py,sha256=Im3QmgXTyWXPA2VAtu9uLYas7YXB4NVsZw8zzQYZeeg,5283
|
311
|
-
unit_tests/sources/file_based/test_scenarios.py,sha256=
|
311
|
+
unit_tests/sources/file_based/test_scenarios.py,sha256=hqsHaC2n1H6AwX8W9RufNQwLyWwGacOwm29SZ4yGNSo,17923
|
312
312
|
unit_tests/sources/file_based/test_schema_helpers.py,sha256=XJ27ecw0sjlSnKgQqV1DgnnjKB1TR2btq22OITh1Qdk,12333
|
313
313
|
unit_tests/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
314
314
|
unit_tests/sources/file_based/config/test_abstract_file_based_spec.py,sha256=wmZAC-nBiUedMZi0n4zaC9oiZD9UTuYP5zJC1xxRnME,1216
|
315
|
+
unit_tests/sources/file_based/config/test_csv_format.py,sha256=WoCUSeQdaBgZf8q2Q7GTRStvAwg8A7fl9xv9UJdjv1M,1111
|
315
316
|
unit_tests/sources/file_based/config/test_file_based_stream_config.py,sha256=ISDmD13D3Sr_AlqgH9KJ-YbCogCyYyNTTiicm9NBQNw,4942
|
316
317
|
unit_tests/sources/file_based/file_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
317
318
|
unit_tests/sources/file_based/file_types/test_avro_parser.py,sha256=eFsgq9UrIUBxlVqPCcW8cUn3CeF6BNG57kRiIrHFSmg,8646
|
318
|
-
unit_tests/sources/file_based/file_types/test_csv_parser.py,sha256=
|
319
|
+
unit_tests/sources/file_based/file_types/test_csv_parser.py,sha256=RC3YgHjPhLHQ2yfDGlFL7u1YZXYwqcxPuZI_1CCgP6Y,4602
|
319
320
|
unit_tests/sources/file_based/file_types/test_jsonl_parser.py,sha256=X2iivsiEsG_PnvUo042jgrfLqGJAs-aUtw_kChdROsY,1155
|
320
321
|
unit_tests/sources/file_based/file_types/test_parquet_parser.py,sha256=nI2Vdr88fb9JsRiRgCA8CQMiB6j8cHMRIoodbo4plqY,13517
|
321
322
|
unit_tests/sources/file_based/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
322
323
|
unit_tests/sources/file_based/scenarios/avro_scenarios.py,sha256=amqovlN0gKFeo_DnzQtHaHEjnmPdws1nFVOB_DeOmwc,30030
|
323
324
|
unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=lIDfVo8clROEyElYZLGi8OViahOCCTCDaIL9acAD9xo,5834
|
324
|
-
unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=
|
325
|
-
unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=
|
325
|
+
unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=Fg13wx-c6Cy8AleqL1DJajmrLZyh8tBebRXDr99nU34,104467
|
326
|
+
unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=gPBcbnbdlh0995Xvj6aJn9oXzfvpj-4g2LZHlWoAkBE,60710
|
326
327
|
unit_tests/sources/file_based/scenarios/jsonl_scenarios.py,sha256=hhf2YT2-N8rg4WwPU4zK2ry0i-mmFPpMppgPEFFupcM,27503
|
327
328
|
unit_tests/sources/file_based/scenarios/parquet_scenarios.py,sha256=ebxJjkkgc55RnSVcr0y7BEM0DvAGLw7h54U_cv5G1xI,22200
|
328
|
-
unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=
|
329
|
+
unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=iktYJ-WuDDLdT1rsWOvx-2z3Nt4VgkrvpzsMlMDfBGA,10177
|
329
330
|
unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py,sha256=fVaunKXwi-UEaG7gHQHEDAczWh8aj6TCLovY3aLuDa4,28325
|
330
331
|
unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py,sha256=BPQSFO0jX9bcm2RsvPectGf1x17Fo9QgmrWNkmtX4nA,25224
|
331
332
|
unit_tests/sources/file_based/stream/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
332
|
-
unit_tests/sources/file_based/stream/test_default_file_based_cursor.py,sha256=
|
333
|
+
unit_tests/sources/file_based/stream/test_default_file_based_cursor.py,sha256=DHMHULhV_8pFFrdE2vaL-hIGumgsMtyKM3_0TCfAw_Y,12459
|
333
334
|
unit_tests/sources/file_based/stream/test_default_file_based_stream.py,sha256=22Rgp1HvZss2WdOcfenbnvx18tfFJ_trPuWp299RW5E,1545
|
334
335
|
unit_tests/sources/fixtures/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
335
336
|
unit_tests/sources/fixtures/source_test_fixture.py,sha256=r-UtR241EGQMZTw1RoKaatrpCGeQn7OIuRPWfG9f7nI,5380
|
@@ -352,8 +353,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
|
|
352
353
|
unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
|
353
354
|
unit_tests/utils/test_stream_status_utils.py,sha256=NpV155JMXA6CG-2Zvofa14lItobyh3Onttc59X4m5DI,3382
|
354
355
|
unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
|
355
|
-
airbyte_cdk-0.50.
|
356
|
-
airbyte_cdk-0.50.
|
357
|
-
airbyte_cdk-0.50.
|
358
|
-
airbyte_cdk-0.50.
|
359
|
-
airbyte_cdk-0.50.
|
356
|
+
airbyte_cdk-0.50.2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
357
|
+
airbyte_cdk-0.50.2.dist-info/METADATA,sha256=Trm1tuhElrbiQAKTWMrRR5YUO3gxdU4B0qgfvrGmDYc,9439
|
358
|
+
airbyte_cdk-0.50.2.dist-info/WHEEL,sha256=AtBG6SXL3KF_v0NxLf0ehyVOh0cold-JbJYXNGorC6Q,92
|
359
|
+
airbyte_cdk-0.50.2.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
|
360
|
+
airbyte_cdk-0.50.2.dist-info/RECORD,,
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import pytest as pytest
|
6
|
+
from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
|
7
|
+
|
8
|
+
|
9
|
+
@pytest.mark.parametrize(
|
10
|
+
"skip_rows_before_header, autogenerate_column_names, expected_error",
|
11
|
+
[
|
12
|
+
pytest.param(1, True, ValueError, id="test_skip_rows_before_header_and_autogenerate_column_names"),
|
13
|
+
pytest.param(1, False, None, id="test_skip_rows_before_header_and_no_autogenerate_column_names"),
|
14
|
+
pytest.param(0, True, None, id="test_no_skip_rows_before_header_and_autogenerate_column_names"),
|
15
|
+
pytest.param(0, False, None, id="test_no_skip_rows_before_header_and_no_autogenerate_column_names"),
|
16
|
+
]
|
17
|
+
)
|
18
|
+
def test_csv_format(skip_rows_before_header, autogenerate_column_names, expected_error):
|
19
|
+
if expected_error:
|
20
|
+
with pytest.raises(expected_error):
|
21
|
+
CsvFormat(skip_rows_before_header=skip_rows_before_header, autogenerate_column_names=autogenerate_column_names)
|
22
|
+
else:
|
23
|
+
CsvFormat(skip_rows_before_header=skip_rows_before_header, autogenerate_column_names=autogenerate_column_names)
|
@@ -3,9 +3,12 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import logging
|
6
|
+
from unittest.mock import MagicMock, Mock
|
6
7
|
|
7
8
|
import pytest
|
8
|
-
from airbyte_cdk.sources.file_based.
|
9
|
+
from airbyte_cdk.sources.file_based.config.csv_format import DEFAULT_FALSE_VALUES, DEFAULT_TRUE_VALUES, CsvFormat
|
10
|
+
from airbyte_cdk.sources.file_based.exceptions import RecordParseError
|
11
|
+
from airbyte_cdk.sources.file_based.file_types.csv_parser import CsvParser, _cast_types
|
9
12
|
|
10
13
|
PROPERTY_TYPES = {
|
11
14
|
"col1": "null",
|
@@ -23,7 +26,7 @@ logger = logging.getLogger()
|
|
23
26
|
|
24
27
|
|
25
28
|
@pytest.mark.parametrize(
|
26
|
-
"row,expected_output",
|
29
|
+
"row, true_values, false_values, expected_output",
|
27
30
|
[
|
28
31
|
pytest.param(
|
29
32
|
{
|
@@ -36,7 +39,10 @@ logger = logging.getLogger()
|
|
36
39
|
"col7": '[1, 2]',
|
37
40
|
"col8": '["1", "2"]',
|
38
41
|
"col9": '[{"a": "b"}, {"a": "c"}]',
|
39
|
-
},
|
42
|
+
},
|
43
|
+
DEFAULT_TRUE_VALUES,
|
44
|
+
DEFAULT_FALSE_VALUES,
|
45
|
+
{
|
40
46
|
"col1": None,
|
41
47
|
"col2": True,
|
42
48
|
"col3": 1,
|
@@ -47,20 +53,46 @@ logger = logging.getLogger()
|
|
47
53
|
"col8": ["1", "2"],
|
48
54
|
"col9": [{"a": "b"}, {"a": "c"}],
|
49
55
|
}, id="cast-all-cols"),
|
50
|
-
pytest.param({"col1": "1"}, {"col1": "1"}, id="cannot-cast-to-null"),
|
51
|
-
pytest.param({"col2": "1"}, {"col2": True}, id="cast-1-to-bool"),
|
52
|
-
pytest.param({"col2": "0"}, {"col2": False}, id="cast-0-to-bool"),
|
53
|
-
pytest.param({"col2": "yes"}, {"col2": True}, id="cast-yes-to-bool"),
|
54
|
-
pytest.param({"col2": "
|
55
|
-
pytest.param({"col2": "
|
56
|
-
pytest.param({"
|
57
|
-
pytest.param({"
|
58
|
-
pytest.param({"
|
59
|
-
pytest.param({"
|
60
|
-
pytest.param({"
|
61
|
-
pytest.param({"
|
62
|
-
pytest.param({"
|
56
|
+
pytest.param({"col1": "1"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col1": "1"}, id="cannot-cast-to-null"),
|
57
|
+
pytest.param({"col2": "1"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": True}, id="cast-1-to-bool"),
|
58
|
+
pytest.param({"col2": "0"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": False}, id="cast-0-to-bool"),
|
59
|
+
pytest.param({"col2": "yes"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": True}, id="cast-yes-to-bool"),
|
60
|
+
pytest.param({"col2": "this_is_a_true_value"}, ["this_is_a_true_value"], DEFAULT_FALSE_VALUES, {"col2": True}, id="cast-custom-true-value-to-bool"),
|
61
|
+
pytest.param({"col2": "this_is_a_false_value"}, DEFAULT_TRUE_VALUES, ["this_is_a_false_value"], {"col2": False}, id="cast-custom-false-value-to-bool"),
|
62
|
+
pytest.param({"col2": "no"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": False}, id="cast-no-to-bool"),
|
63
|
+
pytest.param({"col2": "10"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": "10"}, id="cannot-cast-to-bool"),
|
64
|
+
pytest.param({"col3": "1.1"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col3": "1.1"}, id="cannot-cast-to-int"),
|
65
|
+
pytest.param({"col4": "asdf"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col4": "asdf"}, id="cannot-cast-to-float"),
|
66
|
+
pytest.param({"col6": "{'a': 'b'}"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col6": "{'a': 'b'}"}, id="cannot-cast-to-dict"),
|
67
|
+
pytest.param({"col7": "['a', 'b']"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col7": "['a', 'b']"}, id="cannot-cast-to-list-of-ints"),
|
68
|
+
pytest.param({"col8": "['a', 'b']"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col8": "['a', 'b']"}, id="cannot-cast-to-list-of-strings"),
|
69
|
+
pytest.param({"col9": "['a', 'b']"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col9": "['a', 'b']"}, id="cannot-cast-to-list-of-objects"),
|
70
|
+
pytest.param({"col10": "x"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col10": "x"}, id="item-not-in-props-doesn't-error"),
|
71
|
+
]
|
72
|
+
)
|
73
|
+
def test_cast_to_python_type(row, true_values, false_values, expected_output):
|
74
|
+
csv_format = CsvFormat(true_values=true_values, false_values=false_values)
|
75
|
+
assert _cast_types(row, PROPERTY_TYPES, csv_format, logger) == expected_output
|
76
|
+
|
77
|
+
|
78
|
+
@pytest.mark.parametrize(
|
79
|
+
"reader_values, expected_rows", [
|
80
|
+
pytest.param([{"col1": "1", "col2": None}], None, id="raise_exception_if_any_value_is_none"),
|
81
|
+
pytest.param([{"col1": "1", "col2": "2"}], [{"col1": "1", "col2": "2"}], id="read_no_cast"),
|
63
82
|
]
|
64
83
|
)
|
65
|
-
def
|
66
|
-
|
84
|
+
def test_read_and_cast_types(reader_values, expected_rows):
|
85
|
+
reader = MagicMock()
|
86
|
+
reader.__iter__.return_value = reader_values
|
87
|
+
schema = {}
|
88
|
+
config_format = CsvFormat()
|
89
|
+
logger = Mock()
|
90
|
+
|
91
|
+
parser = CsvParser()
|
92
|
+
|
93
|
+
expected_rows = expected_rows
|
94
|
+
if expected_rows is None:
|
95
|
+
with pytest.raises(RecordParseError):
|
96
|
+
list(parser._read_and_cast_types(reader, schema, config_format, logger))
|
97
|
+
else:
|
98
|
+
assert expected_rows == list(parser._read_and_cast_types(reader, schema, config_format, logger))
|
@@ -14,6 +14,7 @@ from airbyte_cdk.sources.file_based.file_types.csv_parser import CsvParser
|
|
14
14
|
from airbyte_cdk.sources.file_based.file_types.jsonl_parser import JsonlParser
|
15
15
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
16
16
|
from airbyte_cdk.sources.file_based.schema_validation_policies import AbstractSchemaValidationPolicy
|
17
|
+
from airbyte_cdk.sources.file_based.stream.cursor import DefaultFileBasedCursor
|
17
18
|
from unit_tests.sources.file_based.in_memory_files_source import InMemoryFilesStreamReader
|
18
19
|
|
19
20
|
|
@@ -54,6 +55,10 @@ class FailingSchemaValidationPolicy(AbstractSchemaValidationPolicy):
|
|
54
55
|
return False
|
55
56
|
|
56
57
|
|
58
|
+
class LowHistoryLimitCursor(DefaultFileBasedCursor):
|
59
|
+
DEFAULT_MAX_HISTORY_SIZE = 3
|
60
|
+
|
61
|
+
|
57
62
|
def make_remote_files(files: List[str]) -> List[RemoteFile]:
|
58
63
|
return [
|
59
64
|
RemoteFile(uri=f, last_modified=datetime.strptime("2023-06-05T03:54:07.000Z", "%Y-%m-%dT%H:%M:%S.%fZ"))
|
@@ -20,11 +20,12 @@ from airbyte_cdk.models import ConfiguredAirbyteCatalog
|
|
20
20
|
from airbyte_cdk.sources.file_based.availability_strategy import AbstractFileBasedAvailabilityStrategy, DefaultFileBasedAvailabilityStrategy
|
21
21
|
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
|
22
22
|
from airbyte_cdk.sources.file_based.discovery_policy import AbstractDiscoveryPolicy, DefaultDiscoveryPolicy
|
23
|
-
from airbyte_cdk.sources.file_based.file_based_source import
|
23
|
+
from airbyte_cdk.sources.file_based.file_based_source import FileBasedSource
|
24
24
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
|
25
25
|
from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
|
26
26
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
27
27
|
from airbyte_cdk.sources.file_based.schema_validation_policies import DEFAULT_SCHEMA_VALIDATION_POLICIES, AbstractSchemaValidationPolicy
|
28
|
+
from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor, DefaultFileBasedCursor
|
28
29
|
from avro import datafile
|
29
30
|
from pydantic import AnyUrl, Field
|
30
31
|
|
@@ -41,7 +42,7 @@ class InMemoryFilesSource(FileBasedSource):
|
|
41
42
|
stream_reader: Optional[AbstractFileBasedStreamReader],
|
42
43
|
catalog: Optional[Mapping[str, Any]],
|
43
44
|
file_write_options: Mapping[str, Any],
|
44
|
-
|
45
|
+
cursor_cls: Optional[AbstractFileBasedCursor],
|
45
46
|
):
|
46
47
|
# Attributes required for test purposes
|
47
48
|
self.files = files
|
@@ -59,7 +60,7 @@ class InMemoryFilesSource(FileBasedSource):
|
|
59
60
|
discovery_policy=discovery_policy or DefaultDiscoveryPolicy(),
|
60
61
|
parsers=parsers,
|
61
62
|
validation_policies=validation_policies or DEFAULT_SCHEMA_VALIDATION_POLICIES,
|
62
|
-
|
63
|
+
cursor_cls=cursor_cls or DefaultFileBasedCursor,
|
63
64
|
)
|
64
65
|
|
65
66
|
def read_catalog(self, catalog_path: str) -> ConfiguredAirbyteCatalog:
|
@@ -100,7 +101,14 @@ class InMemoryFilesStreamReader(AbstractFileBasedStreamReader):
|
|
100
101
|
raise NotImplementedError(f"No implementation for file type: {self.file_type}")
|
101
102
|
|
102
103
|
def _make_csv_file_contents(self, file_name: str) -> IOBase:
|
104
|
+
|
105
|
+
# Some tests define the csv as an array of strings to make it easier to validate the handling
|
106
|
+
# of quotes, delimiter, and escpare chars.
|
107
|
+
if isinstance(self.files[file_name]["contents"][0], str):
|
108
|
+
return io.StringIO("\n".join([s.strip() for s in self.files[file_name]["contents"]]))
|
109
|
+
|
103
110
|
fh = io.StringIO()
|
111
|
+
|
104
112
|
if self.file_write_options:
|
105
113
|
csv.register_dialect("in_memory_dialect", **self.file_write_options)
|
106
114
|
writer = csv.writer(fh, dialect="in_memory_dialect")
|