airbyte-cdk 0.50.0__py3-none-any.whl → 0.50.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (25) hide show
  1. airbyte_cdk/entrypoint.py +7 -0
  2. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3 -3
  3. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +3 -3
  4. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +9 -9
  5. airbyte_cdk/sources/file_based/config/csv_format.py +42 -6
  6. airbyte_cdk/sources/file_based/file_based_source.py +4 -5
  7. airbyte_cdk/sources/file_based/file_types/csv_parser.py +114 -59
  8. airbyte_cdk/sources/file_based/stream/cursor/__init__.py +2 -2
  9. airbyte_cdk/sources/file_based/stream/cursor/{file_based_cursor.py → abstract_file_based_cursor.py} +9 -1
  10. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +10 -10
  11. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +15 -2
  12. {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/METADATA +1 -1
  13. {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/RECORD +25 -24
  14. unit_tests/sources/file_based/config/test_csv_format.py +23 -0
  15. unit_tests/sources/file_based/file_types/test_csv_parser.py +50 -18
  16. unit_tests/sources/file_based/helpers.py +5 -0
  17. unit_tests/sources/file_based/in_memory_files_source.py +11 -3
  18. unit_tests/sources/file_based/scenarios/csv_scenarios.py +1254 -47
  19. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +6 -5
  20. unit_tests/sources/file_based/scenarios/scenario_builder.py +8 -7
  21. unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +13 -12
  22. unit_tests/sources/file_based/test_scenarios.py +30 -0
  23. {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/LICENSE.txt +0 -0
  24. {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/WHEEL +0 -0
  25. {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  airbyte_cdk/__init__.py,sha256=OBQWv5rF_QTRpOiP6J8J8oTU-GGrfi18i1PRFpahKks,262
2
2
  airbyte_cdk/config_observation.py,sha256=3kjxv8xTwCnub2_fTWnMPRx0E7vly1BUeyXOSK15Ql4,3610
3
3
  airbyte_cdk/connector.py,sha256=LtTAmBFV1LBUz_fOEbQ_EvBhyUsz8AGOlDsvK8QOOo0,4396
4
- airbyte_cdk/entrypoint.py,sha256=oerYF7ZZCbq5taSU6YDdXNinlZ1ZNYMd_kAVU77ZDD0,12168
4
+ airbyte_cdk/entrypoint.py,sha256=guPK0UnKWxpN0G3d0etxkaXX7wtgWIymLJ8M1ssKNQc,12393
5
5
  airbyte_cdk/exception_handler.py,sha256=CwkiPdZ1WMOr3CBkvKFyHiyLerXGRqBrVlB4p0OImGI,1125
6
6
  airbyte_cdk/logger.py,sha256=4Mi2MEQi1uh59BP9Dxw_UEbZuxaJewqK_jvEU2b10nk,3985
7
7
  airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -23,7 +23,7 @@ airbyte_cdk/sources/http_logger.py,sha256=v0kkpDtA0GUOgj6_3AayrYaBrSHBqG4t3MGbrt
23
23
  airbyte_cdk/sources/source.py,sha256=N3vHZzdUsBETFsql-YpO-LcgjolT_jcnAuHBhGD6Hqk,4278
24
24
  airbyte_cdk/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
25
25
  airbyte_cdk/sources/declarative/create_partial.py,sha256=sUJOwD8hBzW4pxw2XhYlSTMgl-WMc5WpP5Oq_jo3fHw,3371
26
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=JB7fX0RV-UZk9eiM1G7EU_6XcB63KQSepBeZbPoZwTs,82835
26
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=5Z8OMI6UrR6n6CwaklMhm3_z7JsQvhXDVCGb1S6o5iU,83049
27
27
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=U2As9PDKmcWDgbsWUo-RetJ9fxQOBlwntWZ0NOgs5Ac,1453
28
28
  airbyte_cdk/sources/declarative/declarative_stream.py,sha256=3E8I_hsJC9vlh8BgMOAbUaEhQVvlzSeDCo5nEkCuDzg,5844
29
29
  airbyte_cdk/sources/declarative/exceptions.py,sha256=kTPUA4I2NV4J6HDz-mKPGMrfuc592akJnOyYx38l_QM,176
@@ -64,7 +64,7 @@ airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=dyIM-bzh54
64
64
  airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=Dc0F87nElWsz_Ikj938eQ9uqZvyqgFhZ8Dqf_-hvndc,4800
65
65
  airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=V6WGKJ9cXX1rjuM4bK3Cs9xEryMlkY2U3FMsSBhrgC8,3098
66
66
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=EiYnzwCHZV7EYqMJqcy6xKSeHvTKZBsQndjbEwmiTW4,93
67
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=_VlCO1SaBccp0HsUwVcwvh7cR6CAvFyF20G3rn_7pL8,56890
67
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=HTK0MrQpNV3CfmcCURT8pKCkPuqzJIp5auFmiIIiylo,57104
68
68
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
69
69
  airbyte_cdk/sources/declarative/parsers/class_types_registry.py,sha256=bK4a74opm6WHyV7HqOVws6GE5Z7cLNc5MaTha69abIQ,6086
70
70
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=y7_G5mM07zxT5YG975kdC2PAja-Uc83pYp8WrV3GNdo,522
@@ -137,18 +137,18 @@ airbyte_cdk/sources/embedded/runner.py,sha256=TykiigEz39Y4QryHTbSm7VGvFOxo7duiCY
137
137
  airbyte_cdk/sources/embedded/tools.py,sha256=-Z4tZ4AP1OTi_zrqFM3YV8Rt7c60wvsrv0Dc-rTZ2uw,744
138
138
  airbyte_cdk/sources/file_based/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
139
139
  airbyte_cdk/sources/file_based/exceptions.py,sha256=GKdFFteVMgYS1nzr2SeDXm7DYTHvs1PLoftWoXOl5Vk,3810
140
- airbyte_cdk/sources/file_based/file_based_source.py,sha256=yu6RJCm4Zr7iJmC4uLn5_SMd-EFL0rBvnFQm4LAIBYY,6796
140
+ airbyte_cdk/sources/file_based/file_based_source.py,sha256=uOxyngmCjuBLniWOHDpivXH8WlYIfzWqQOQSjBlVzvc,6786
141
141
  airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=s2okNhgsI-d6_duhb-9osJ1RioQQI5NmqC0U9kNeYrc,3420
142
142
  airbyte_cdk/sources/file_based/remote_file.py,sha256=s3Qz2N786yqSMXqcWmsTOvYhgs-ry0xFcn5fGyyz7bY,581
143
143
  airbyte_cdk/sources/file_based/schema_helpers.py,sha256=c22G3ukPPayoOioSMUjtWSjd4zXih9X_yKvAPRmogfE,9025
144
144
  airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
145
145
  airbyte_cdk/sources/file_based/availability_strategy/__init__.py,sha256=WiPPuQGfmQlFUMFR5h3ECc-VzBj4vair6_4WAL87AEI,277
146
146
  airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py,sha256=NeHCiG4FFohzYpQQFfmTL4-5oI0nElHWgXX1xrm8-SU,1269
147
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=hcVf30sgMySsXosMDOK0AZ8j-LaiVCSjJHQP1b3bJIU,4705
147
+ airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=axNPioHqk-fvQRqQTxjc4wKZeT4LSRA8u7lWG_EiSDk,4611
148
148
  airbyte_cdk/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
149
  airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=CCZKoFMX9-tDEY_RY_ziYil0PSPTeKsz6vuoKv6XqvY,4523
150
150
  airbyte_cdk/sources/file_based/config/avro_format.py,sha256=KkV7i-36aITHnacBztpvllmBDXIXwEfKbgRYEICgs30,565
151
- airbyte_cdk/sources/file_based/config/csv_format.py,sha256=W9cdGy3F0S2gLFxu2orkuHUGnHYU0lwWRx_BD34uTxw,3246
151
+ airbyte_cdk/sources/file_based/config/csv_format.py,sha256=3Hh2McAIvMT8G6TFCGlq2vd96ox7xcO9K7jbIa-2SPo,5061
152
152
  airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=-iqYoUbC3Nuy97_mKdQMZxhEVlhr3a4aoTqGOmqA0-Y,5878
153
153
  airbyte_cdk/sources/file_based/config/jsonl_format.py,sha256=BI1VHv-0qbbdw8ZH7t9iV4a7JfSWRzPatkeOvLU_GI0,204
154
154
  airbyte_cdk/sources/file_based/config/parquet_format.py,sha256=wqe8iF-QzIto97uoRlDBNI84ED-4H1CZyMy-TSo8Z6Y,573
@@ -157,7 +157,7 @@ airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha
157
157
  airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=Mx3zT9Dem4uNfaUT0oOtrESsuB1LrGAi5N-uw2swZZA,701
158
158
  airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=N3a8cjXwRUN2__46IJTwrWlsyFiSA1xtSgPcPH28sn0,476
159
159
  airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=2wzcZmQ0_VYyJmRYllGrEEYwrP9hYwXVJKz-DO6JyoE,8079
160
- airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=u_4a2tRR8ZZtDCmlPpQjg5lsL3yK-oSKG5SJxGexaLE,8026
160
+ airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=QpFKzvi2cdnScjln9WJdBxkrBFJl79PK1SW0pT7vGrs,10877
161
161
  airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=UgQkC-J7T9u3twlILRCvXXrcHJCwkjQYKhdNvccYhbY,1424
162
162
  airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=dceNH10qb-V3sTHOMcuLy_07nSde1s7l3E7KsQf8pI0,2715
163
163
  airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=BdmcM-9zVdCsuSw_A4GmStQw88IiG9XJwqpEuS3Jud4,8318
@@ -166,10 +166,10 @@ airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_valida
166
166
  airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py,sha256=ckBp4rv5wiUapA4ZEoe1fc6ILx-LdcRn26W4WJCrt_k,1506
167
167
  airbyte_cdk/sources/file_based/stream/__init__.py,sha256=QPDqdgjsabOQD93dSFqHGaFS_3pIwm-chEabZHiPJi0,265
168
168
  airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=tvVew6din9y8a3hItzU0PjTQrMxbVI7bK-3pRTvOswg,5810
169
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=HbuBRbLbKZKD8DxTtnwv9Z6L2_Ms6m-js6cJzQtJdH4,11056
170
- airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=OP3ye1-cYMymEyK6FJaQk3xYmMg1pphVdiWO0lBWvRQ,166
171
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=GMNPDee7sN9L1PxG-B6v6eQt__6VcTF1DOIlouKKH88,6765
172
- airbyte_cdk/sources/file_based/stream/cursor/file_based_cursor.py,sha256=aAoWrE_7VQPb7MXljnRZ7MxbdT4HNiejzxm1mFrNajE,1639
169
+ airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=K81wNOjd-p69WMoJWShup2wbEumXK5CT311u-J985_w,11687
170
+ airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
171
+ airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=i-FPeK8lwCzX34GCcmvL5Yvdh8-uu7FeCVYDoFbD7IY,1920
172
+ airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=6waK1sNLNlQLxEbMtuJgv0GUl76wPhwanV7cX2DkSK0,6778
173
173
  airbyte_cdk/sources/message/__init__.py,sha256=14ZSLah9uyI_CyK7_jIyq521vlgKAdihe6Ciw6-jLgU,372
174
174
  airbyte_cdk/sources/message/repository.py,sha256=kflbIkUwCWXMKpe6566TD_HRjRqEZKQ0h2RpxzjWLJk,4994
175
175
  airbyte_cdk/sources/singer/__init__.py,sha256=D3zQSiWT0B9t0kKE4JPZjrcDnP2YnFNJ3dfYqSaxo9w,246
@@ -305,31 +305,32 @@ unit_tests/sources/declarative/states/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyR
305
305
  unit_tests/sources/declarative/stream_slicers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
306
306
  unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py,sha256=_tEjpxK_7sbfppnYT0IhhBMBuGiEXai1qmfigF6Q9RA,7901
307
307
  unit_tests/sources/file_based/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
308
- unit_tests/sources/file_based/helpers.py,sha256=p5VDthPcJ_ZoAkbpdAdzKMnG1LbB2vlG4eo9gWwzhWc,2374
309
- unit_tests/sources/file_based/in_memory_files_source.py,sha256=AUtQLwd3bxbKjFY0-yxIdIwluwGjsLXoPP8_FtVBKgE,7653
308
+ unit_tests/sources/file_based/helpers.py,sha256=-tmNKF0Jbjn8BsBz-Qkdc_3yKvWpVLaauaSwxdMHD3Q,2542
309
+ unit_tests/sources/file_based/in_memory_files_source.py,sha256=tvt3-fhg_0iN5iCAyxuOjmhmrBLavIcm3mf3Q-P9llE,8062
310
310
  unit_tests/sources/file_based/test_file_based_stream_reader.py,sha256=Im3QmgXTyWXPA2VAtu9uLYas7YXB4NVsZw8zzQYZeeg,5283
311
- unit_tests/sources/file_based/test_scenarios.py,sha256=ADaCrhTitu5KzIigW24kUppFXR30OSiHJHn-USZuE-Q,16583
311
+ unit_tests/sources/file_based/test_scenarios.py,sha256=hqsHaC2n1H6AwX8W9RufNQwLyWwGacOwm29SZ4yGNSo,17923
312
312
  unit_tests/sources/file_based/test_schema_helpers.py,sha256=XJ27ecw0sjlSnKgQqV1DgnnjKB1TR2btq22OITh1Qdk,12333
313
313
  unit_tests/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
314
314
  unit_tests/sources/file_based/config/test_abstract_file_based_spec.py,sha256=wmZAC-nBiUedMZi0n4zaC9oiZD9UTuYP5zJC1xxRnME,1216
315
+ unit_tests/sources/file_based/config/test_csv_format.py,sha256=WoCUSeQdaBgZf8q2Q7GTRStvAwg8A7fl9xv9UJdjv1M,1111
315
316
  unit_tests/sources/file_based/config/test_file_based_stream_config.py,sha256=ISDmD13D3Sr_AlqgH9KJ-YbCogCyYyNTTiicm9NBQNw,4942
316
317
  unit_tests/sources/file_based/file_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
317
318
  unit_tests/sources/file_based/file_types/test_avro_parser.py,sha256=eFsgq9UrIUBxlVqPCcW8cUn3CeF6BNG57kRiIrHFSmg,8646
318
- unit_tests/sources/file_based/file_types/test_csv_parser.py,sha256=0z4tCUDV9QNsnuv4_kIas1eKrREpeSI9M795St7RLNY,2445
319
+ unit_tests/sources/file_based/file_types/test_csv_parser.py,sha256=RC3YgHjPhLHQ2yfDGlFL7u1YZXYwqcxPuZI_1CCgP6Y,4602
319
320
  unit_tests/sources/file_based/file_types/test_jsonl_parser.py,sha256=X2iivsiEsG_PnvUo042jgrfLqGJAs-aUtw_kChdROsY,1155
320
321
  unit_tests/sources/file_based/file_types/test_parquet_parser.py,sha256=nI2Vdr88fb9JsRiRgCA8CQMiB6j8cHMRIoodbo4plqY,13517
321
322
  unit_tests/sources/file_based/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
322
323
  unit_tests/sources/file_based/scenarios/avro_scenarios.py,sha256=amqovlN0gKFeo_DnzQtHaHEjnmPdws1nFVOB_DeOmwc,30030
323
324
  unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=lIDfVo8clROEyElYZLGi8OViahOCCTCDaIL9acAD9xo,5834
324
- unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=NDCxdMswFdeYdWaJVXkAl4XwiMN0dpxGtWRGjr7vdDI,60670
325
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=q0gILvjQz6355PSznJzrCudOWQE95fLO44cHnpdVya8,60568
325
+ unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=Fg13wx-c6Cy8AleqL1DJajmrLZyh8tBebRXDr99nU34,104467
326
+ unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=gPBcbnbdlh0995Xvj6aJn9oXzfvpj-4g2LZHlWoAkBE,60710
326
327
  unit_tests/sources/file_based/scenarios/jsonl_scenarios.py,sha256=hhf2YT2-N8rg4WwPU4zK2ry0i-mmFPpMppgPEFFupcM,27503
327
328
  unit_tests/sources/file_based/scenarios/parquet_scenarios.py,sha256=ebxJjkkgc55RnSVcr0y7BEM0DvAGLw7h54U_cv5G1xI,22200
328
- unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=oBrxQ_T_4g6wU0ww0s9LqORXTh387ze_f5WYh0DNx8A,10093
329
+ unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=iktYJ-WuDDLdT1rsWOvx-2z3Nt4VgkrvpzsMlMDfBGA,10177
329
330
  unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py,sha256=fVaunKXwi-UEaG7gHQHEDAczWh8aj6TCLovY3aLuDa4,28325
330
331
  unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py,sha256=BPQSFO0jX9bcm2RsvPectGf1x17Fo9QgmrWNkmtX4nA,25224
331
332
  unit_tests/sources/file_based/stream/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
332
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py,sha256=I6XdFyi6nB0VOsoP2NLL4KEwVdv8RGCowk2QvHibsds,12182
333
+ unit_tests/sources/file_based/stream/test_default_file_based_cursor.py,sha256=DHMHULhV_8pFFrdE2vaL-hIGumgsMtyKM3_0TCfAw_Y,12459
333
334
  unit_tests/sources/file_based/stream/test_default_file_based_stream.py,sha256=22Rgp1HvZss2WdOcfenbnvx18tfFJ_trPuWp299RW5E,1545
334
335
  unit_tests/sources/fixtures/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
335
336
  unit_tests/sources/fixtures/source_test_fixture.py,sha256=r-UtR241EGQMZTw1RoKaatrpCGeQn7OIuRPWfG9f7nI,5380
@@ -352,8 +353,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
352
353
  unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
353
354
  unit_tests/utils/test_stream_status_utils.py,sha256=NpV155JMXA6CG-2Zvofa14lItobyh3Onttc59X4m5DI,3382
354
355
  unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
355
- airbyte_cdk-0.50.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
356
- airbyte_cdk-0.50.0.dist-info/METADATA,sha256=qScuWXxEoi09FciWKDmpWF6x1dgRfCyOrnVaAGgFMZY,9439
357
- airbyte_cdk-0.50.0.dist-info/WHEEL,sha256=AtBG6SXL3KF_v0NxLf0ehyVOh0cold-JbJYXNGorC6Q,92
358
- airbyte_cdk-0.50.0.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
359
- airbyte_cdk-0.50.0.dist-info/RECORD,,
356
+ airbyte_cdk-0.50.2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
357
+ airbyte_cdk-0.50.2.dist-info/METADATA,sha256=Trm1tuhElrbiQAKTWMrRR5YUO3gxdU4B0qgfvrGmDYc,9439
358
+ airbyte_cdk-0.50.2.dist-info/WHEEL,sha256=AtBG6SXL3KF_v0NxLf0ehyVOh0cold-JbJYXNGorC6Q,92
359
+ airbyte_cdk-0.50.2.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
360
+ airbyte_cdk-0.50.2.dist-info/RECORD,,
@@ -0,0 +1,23 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import pytest as pytest
6
+ from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
7
+
8
+
9
+ @pytest.mark.parametrize(
10
+ "skip_rows_before_header, autogenerate_column_names, expected_error",
11
+ [
12
+ pytest.param(1, True, ValueError, id="test_skip_rows_before_header_and_autogenerate_column_names"),
13
+ pytest.param(1, False, None, id="test_skip_rows_before_header_and_no_autogenerate_column_names"),
14
+ pytest.param(0, True, None, id="test_no_skip_rows_before_header_and_autogenerate_column_names"),
15
+ pytest.param(0, False, None, id="test_no_skip_rows_before_header_and_no_autogenerate_column_names"),
16
+ ]
17
+ )
18
+ def test_csv_format(skip_rows_before_header, autogenerate_column_names, expected_error):
19
+ if expected_error:
20
+ with pytest.raises(expected_error):
21
+ CsvFormat(skip_rows_before_header=skip_rows_before_header, autogenerate_column_names=autogenerate_column_names)
22
+ else:
23
+ CsvFormat(skip_rows_before_header=skip_rows_before_header, autogenerate_column_names=autogenerate_column_names)
@@ -3,9 +3,12 @@
3
3
  #
4
4
 
5
5
  import logging
6
+ from unittest.mock import MagicMock, Mock
6
7
 
7
8
  import pytest
8
- from airbyte_cdk.sources.file_based.file_types.csv_parser import cast_types
9
+ from airbyte_cdk.sources.file_based.config.csv_format import DEFAULT_FALSE_VALUES, DEFAULT_TRUE_VALUES, CsvFormat
10
+ from airbyte_cdk.sources.file_based.exceptions import RecordParseError
11
+ from airbyte_cdk.sources.file_based.file_types.csv_parser import CsvParser, _cast_types
9
12
 
10
13
  PROPERTY_TYPES = {
11
14
  "col1": "null",
@@ -23,7 +26,7 @@ logger = logging.getLogger()
23
26
 
24
27
 
25
28
  @pytest.mark.parametrize(
26
- "row,expected_output",
29
+ "row, true_values, false_values, expected_output",
27
30
  [
28
31
  pytest.param(
29
32
  {
@@ -36,7 +39,10 @@ logger = logging.getLogger()
36
39
  "col7": '[1, 2]',
37
40
  "col8": '["1", "2"]',
38
41
  "col9": '[{"a": "b"}, {"a": "c"}]',
39
- }, {
42
+ },
43
+ DEFAULT_TRUE_VALUES,
44
+ DEFAULT_FALSE_VALUES,
45
+ {
40
46
  "col1": None,
41
47
  "col2": True,
42
48
  "col3": 1,
@@ -47,20 +53,46 @@ logger = logging.getLogger()
47
53
  "col8": ["1", "2"],
48
54
  "col9": [{"a": "b"}, {"a": "c"}],
49
55
  }, id="cast-all-cols"),
50
- pytest.param({"col1": "1"}, {"col1": "1"}, id="cannot-cast-to-null"),
51
- pytest.param({"col2": "1"}, {"col2": True}, id="cast-1-to-bool"),
52
- pytest.param({"col2": "0"}, {"col2": False}, id="cast-0-to-bool"),
53
- pytest.param({"col2": "yes"}, {"col2": True}, id="cast-yes-to-bool"),
54
- pytest.param({"col2": "no"}, {"col2": False}, id="cast-no-to-bool"),
55
- pytest.param({"col2": "10"}, {"col2": "10"}, id="cannot-cast-to-bool"),
56
- pytest.param({"col3": "1.1"}, {"col3": "1.1"}, id="cannot-cast-to-int"),
57
- pytest.param({"col4": "asdf"}, {"col4": "asdf"}, id="cannot-cast-to-float"),
58
- pytest.param({"col6": "{'a': 'b'}"}, {"col6": "{'a': 'b'}"}, id="cannot-cast-to-dict"),
59
- pytest.param({"col7": "['a', 'b']"}, {"col7": "['a', 'b']"}, id="cannot-cast-to-list-of-ints"),
60
- pytest.param({"col8": "['a', 'b']"}, {"col8": "['a', 'b']"}, id="cannot-cast-to-list-of-strings"),
61
- pytest.param({"col9": "['a', 'b']"}, {"col9": "['a', 'b']"}, id="cannot-cast-to-list-of-objects"),
62
- pytest.param({"col10": "x"}, {"col10": "x"}, id="item-not-in-props-doesn't-error"),
56
+ pytest.param({"col1": "1"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col1": "1"}, id="cannot-cast-to-null"),
57
+ pytest.param({"col2": "1"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": True}, id="cast-1-to-bool"),
58
+ pytest.param({"col2": "0"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": False}, id="cast-0-to-bool"),
59
+ pytest.param({"col2": "yes"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": True}, id="cast-yes-to-bool"),
60
+ pytest.param({"col2": "this_is_a_true_value"}, ["this_is_a_true_value"], DEFAULT_FALSE_VALUES, {"col2": True}, id="cast-custom-true-value-to-bool"),
61
+ pytest.param({"col2": "this_is_a_false_value"}, DEFAULT_TRUE_VALUES, ["this_is_a_false_value"], {"col2": False}, id="cast-custom-false-value-to-bool"),
62
+ pytest.param({"col2": "no"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": False}, id="cast-no-to-bool"),
63
+ pytest.param({"col2": "10"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": "10"}, id="cannot-cast-to-bool"),
64
+ pytest.param({"col3": "1.1"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col3": "1.1"}, id="cannot-cast-to-int"),
65
+ pytest.param({"col4": "asdf"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col4": "asdf"}, id="cannot-cast-to-float"),
66
+ pytest.param({"col6": "{'a': 'b'}"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col6": "{'a': 'b'}"}, id="cannot-cast-to-dict"),
67
+ pytest.param({"col7": "['a', 'b']"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col7": "['a', 'b']"}, id="cannot-cast-to-list-of-ints"),
68
+ pytest.param({"col8": "['a', 'b']"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col8": "['a', 'b']"}, id="cannot-cast-to-list-of-strings"),
69
+ pytest.param({"col9": "['a', 'b']"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col9": "['a', 'b']"}, id="cannot-cast-to-list-of-objects"),
70
+ pytest.param({"col10": "x"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col10": "x"}, id="item-not-in-props-doesn't-error"),
71
+ ]
72
+ )
73
+ def test_cast_to_python_type(row, true_values, false_values, expected_output):
74
+ csv_format = CsvFormat(true_values=true_values, false_values=false_values)
75
+ assert _cast_types(row, PROPERTY_TYPES, csv_format, logger) == expected_output
76
+
77
+
78
+ @pytest.mark.parametrize(
79
+ "reader_values, expected_rows", [
80
+ pytest.param([{"col1": "1", "col2": None}], None, id="raise_exception_if_any_value_is_none"),
81
+ pytest.param([{"col1": "1", "col2": "2"}], [{"col1": "1", "col2": "2"}], id="read_no_cast"),
63
82
  ]
64
83
  )
65
- def test_cast_to_python_type(row, expected_output):
66
- assert cast_types(row, PROPERTY_TYPES, logger) == expected_output
84
+ def test_read_and_cast_types(reader_values, expected_rows):
85
+ reader = MagicMock()
86
+ reader.__iter__.return_value = reader_values
87
+ schema = {}
88
+ config_format = CsvFormat()
89
+ logger = Mock()
90
+
91
+ parser = CsvParser()
92
+
93
+ expected_rows = expected_rows
94
+ if expected_rows is None:
95
+ with pytest.raises(RecordParseError):
96
+ list(parser._read_and_cast_types(reader, schema, config_format, logger))
97
+ else:
98
+ assert expected_rows == list(parser._read_and_cast_types(reader, schema, config_format, logger))
@@ -14,6 +14,7 @@ from airbyte_cdk.sources.file_based.file_types.csv_parser import CsvParser
14
14
  from airbyte_cdk.sources.file_based.file_types.jsonl_parser import JsonlParser
15
15
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
16
16
  from airbyte_cdk.sources.file_based.schema_validation_policies import AbstractSchemaValidationPolicy
17
+ from airbyte_cdk.sources.file_based.stream.cursor import DefaultFileBasedCursor
17
18
  from unit_tests.sources.file_based.in_memory_files_source import InMemoryFilesStreamReader
18
19
 
19
20
 
@@ -54,6 +55,10 @@ class FailingSchemaValidationPolicy(AbstractSchemaValidationPolicy):
54
55
  return False
55
56
 
56
57
 
58
+ class LowHistoryLimitCursor(DefaultFileBasedCursor):
59
+ DEFAULT_MAX_HISTORY_SIZE = 3
60
+
61
+
57
62
  def make_remote_files(files: List[str]) -> List[RemoteFile]:
58
63
  return [
59
64
  RemoteFile(uri=f, last_modified=datetime.strptime("2023-06-05T03:54:07.000Z", "%Y-%m-%dT%H:%M:%S.%fZ"))
@@ -20,11 +20,12 @@ from airbyte_cdk.models import ConfiguredAirbyteCatalog
20
20
  from airbyte_cdk.sources.file_based.availability_strategy import AbstractFileBasedAvailabilityStrategy, DefaultFileBasedAvailabilityStrategy
21
21
  from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
22
22
  from airbyte_cdk.sources.file_based.discovery_policy import AbstractDiscoveryPolicy, DefaultDiscoveryPolicy
23
- from airbyte_cdk.sources.file_based.file_based_source import DEFAULT_MAX_HISTORY_SIZE, FileBasedSource
23
+ from airbyte_cdk.sources.file_based.file_based_source import FileBasedSource
24
24
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
25
25
  from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
26
26
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
27
27
  from airbyte_cdk.sources.file_based.schema_validation_policies import DEFAULT_SCHEMA_VALIDATION_POLICIES, AbstractSchemaValidationPolicy
28
+ from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor, DefaultFileBasedCursor
28
29
  from avro import datafile
29
30
  from pydantic import AnyUrl, Field
30
31
 
@@ -41,7 +42,7 @@ class InMemoryFilesSource(FileBasedSource):
41
42
  stream_reader: Optional[AbstractFileBasedStreamReader],
42
43
  catalog: Optional[Mapping[str, Any]],
43
44
  file_write_options: Mapping[str, Any],
44
- max_history_size: int,
45
+ cursor_cls: Optional[AbstractFileBasedCursor],
45
46
  ):
46
47
  # Attributes required for test purposes
47
48
  self.files = files
@@ -59,7 +60,7 @@ class InMemoryFilesSource(FileBasedSource):
59
60
  discovery_policy=discovery_policy or DefaultDiscoveryPolicy(),
60
61
  parsers=parsers,
61
62
  validation_policies=validation_policies or DEFAULT_SCHEMA_VALIDATION_POLICIES,
62
- max_history_size=max_history_size or DEFAULT_MAX_HISTORY_SIZE,
63
+ cursor_cls=cursor_cls or DefaultFileBasedCursor,
63
64
  )
64
65
 
65
66
  def read_catalog(self, catalog_path: str) -> ConfiguredAirbyteCatalog:
@@ -100,7 +101,14 @@ class InMemoryFilesStreamReader(AbstractFileBasedStreamReader):
100
101
  raise NotImplementedError(f"No implementation for file type: {self.file_type}")
101
102
 
102
103
  def _make_csv_file_contents(self, file_name: str) -> IOBase:
104
+
105
+ # Some tests define the csv as an array of strings to make it easier to validate the handling
106
+ # of quotes, delimiter, and escpare chars.
107
+ if isinstance(self.files[file_name]["contents"][0], str):
108
+ return io.StringIO("\n".join([s.strip() for s in self.files[file_name]["contents"]]))
109
+
103
110
  fh = io.StringIO()
111
+
104
112
  if self.file_write_options:
105
113
  csv.register_dialect("in_memory_dialect", **self.file_write_options)
106
114
  writer = csv.writer(fh, dialect="in_memory_dialect")