airbyte-cdk 0.50.0__py3-none-any.whl → 0.50.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. airbyte_cdk/entrypoint.py +7 -0
  2. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3 -3
  3. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +3 -3
  4. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +9 -9
  5. airbyte_cdk/sources/file_based/config/csv_format.py +42 -6
  6. airbyte_cdk/sources/file_based/file_based_source.py +4 -5
  7. airbyte_cdk/sources/file_based/file_types/csv_parser.py +114 -59
  8. airbyte_cdk/sources/file_based/stream/cursor/__init__.py +2 -2
  9. airbyte_cdk/sources/file_based/stream/cursor/{file_based_cursor.py → abstract_file_based_cursor.py} +9 -1
  10. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +10 -10
  11. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +15 -2
  12. {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/METADATA +1 -1
  13. {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/RECORD +25 -24
  14. unit_tests/sources/file_based/config/test_csv_format.py +23 -0
  15. unit_tests/sources/file_based/file_types/test_csv_parser.py +50 -18
  16. unit_tests/sources/file_based/helpers.py +5 -0
  17. unit_tests/sources/file_based/in_memory_files_source.py +11 -3
  18. unit_tests/sources/file_based/scenarios/csv_scenarios.py +1254 -47
  19. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +6 -5
  20. unit_tests/sources/file_based/scenarios/scenario_builder.py +8 -7
  21. unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +13 -12
  22. unit_tests/sources/file_based/test_scenarios.py +30 -0
  23. {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/LICENSE.txt +0 -0
  24. {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/WHEEL +0 -0
  25. {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  airbyte_cdk/__init__.py,sha256=OBQWv5rF_QTRpOiP6J8J8oTU-GGrfi18i1PRFpahKks,262
2
2
  airbyte_cdk/config_observation.py,sha256=3kjxv8xTwCnub2_fTWnMPRx0E7vly1BUeyXOSK15Ql4,3610
3
3
  airbyte_cdk/connector.py,sha256=LtTAmBFV1LBUz_fOEbQ_EvBhyUsz8AGOlDsvK8QOOo0,4396
4
- airbyte_cdk/entrypoint.py,sha256=oerYF7ZZCbq5taSU6YDdXNinlZ1ZNYMd_kAVU77ZDD0,12168
4
+ airbyte_cdk/entrypoint.py,sha256=guPK0UnKWxpN0G3d0etxkaXX7wtgWIymLJ8M1ssKNQc,12393
5
5
  airbyte_cdk/exception_handler.py,sha256=CwkiPdZ1WMOr3CBkvKFyHiyLerXGRqBrVlB4p0OImGI,1125
6
6
  airbyte_cdk/logger.py,sha256=4Mi2MEQi1uh59BP9Dxw_UEbZuxaJewqK_jvEU2b10nk,3985
7
7
  airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -23,7 +23,7 @@ airbyte_cdk/sources/http_logger.py,sha256=v0kkpDtA0GUOgj6_3AayrYaBrSHBqG4t3MGbrt
23
23
  airbyte_cdk/sources/source.py,sha256=N3vHZzdUsBETFsql-YpO-LcgjolT_jcnAuHBhGD6Hqk,4278
24
24
  airbyte_cdk/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
25
25
  airbyte_cdk/sources/declarative/create_partial.py,sha256=sUJOwD8hBzW4pxw2XhYlSTMgl-WMc5WpP5Oq_jo3fHw,3371
26
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=JB7fX0RV-UZk9eiM1G7EU_6XcB63KQSepBeZbPoZwTs,82835
26
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=5Z8OMI6UrR6n6CwaklMhm3_z7JsQvhXDVCGb1S6o5iU,83049
27
27
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=U2As9PDKmcWDgbsWUo-RetJ9fxQOBlwntWZ0NOgs5Ac,1453
28
28
  airbyte_cdk/sources/declarative/declarative_stream.py,sha256=3E8I_hsJC9vlh8BgMOAbUaEhQVvlzSeDCo5nEkCuDzg,5844
29
29
  airbyte_cdk/sources/declarative/exceptions.py,sha256=kTPUA4I2NV4J6HDz-mKPGMrfuc592akJnOyYx38l_QM,176
@@ -64,7 +64,7 @@ airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=dyIM-bzh54
64
64
  airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=Dc0F87nElWsz_Ikj938eQ9uqZvyqgFhZ8Dqf_-hvndc,4800
65
65
  airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=V6WGKJ9cXX1rjuM4bK3Cs9xEryMlkY2U3FMsSBhrgC8,3098
66
66
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=EiYnzwCHZV7EYqMJqcy6xKSeHvTKZBsQndjbEwmiTW4,93
67
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=_VlCO1SaBccp0HsUwVcwvh7cR6CAvFyF20G3rn_7pL8,56890
67
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=HTK0MrQpNV3CfmcCURT8pKCkPuqzJIp5auFmiIIiylo,57104
68
68
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
69
69
  airbyte_cdk/sources/declarative/parsers/class_types_registry.py,sha256=bK4a74opm6WHyV7HqOVws6GE5Z7cLNc5MaTha69abIQ,6086
70
70
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=y7_G5mM07zxT5YG975kdC2PAja-Uc83pYp8WrV3GNdo,522
@@ -137,18 +137,18 @@ airbyte_cdk/sources/embedded/runner.py,sha256=TykiigEz39Y4QryHTbSm7VGvFOxo7duiCY
137
137
  airbyte_cdk/sources/embedded/tools.py,sha256=-Z4tZ4AP1OTi_zrqFM3YV8Rt7c60wvsrv0Dc-rTZ2uw,744
138
138
  airbyte_cdk/sources/file_based/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
139
139
  airbyte_cdk/sources/file_based/exceptions.py,sha256=GKdFFteVMgYS1nzr2SeDXm7DYTHvs1PLoftWoXOl5Vk,3810
140
- airbyte_cdk/sources/file_based/file_based_source.py,sha256=yu6RJCm4Zr7iJmC4uLn5_SMd-EFL0rBvnFQm4LAIBYY,6796
140
+ airbyte_cdk/sources/file_based/file_based_source.py,sha256=uOxyngmCjuBLniWOHDpivXH8WlYIfzWqQOQSjBlVzvc,6786
141
141
  airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=s2okNhgsI-d6_duhb-9osJ1RioQQI5NmqC0U9kNeYrc,3420
142
142
  airbyte_cdk/sources/file_based/remote_file.py,sha256=s3Qz2N786yqSMXqcWmsTOvYhgs-ry0xFcn5fGyyz7bY,581
143
143
  airbyte_cdk/sources/file_based/schema_helpers.py,sha256=c22G3ukPPayoOioSMUjtWSjd4zXih9X_yKvAPRmogfE,9025
144
144
  airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
145
145
  airbyte_cdk/sources/file_based/availability_strategy/__init__.py,sha256=WiPPuQGfmQlFUMFR5h3ECc-VzBj4vair6_4WAL87AEI,277
146
146
  airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py,sha256=NeHCiG4FFohzYpQQFfmTL4-5oI0nElHWgXX1xrm8-SU,1269
147
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=hcVf30sgMySsXosMDOK0AZ8j-LaiVCSjJHQP1b3bJIU,4705
147
+ airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=axNPioHqk-fvQRqQTxjc4wKZeT4LSRA8u7lWG_EiSDk,4611
148
148
  airbyte_cdk/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
149
  airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=CCZKoFMX9-tDEY_RY_ziYil0PSPTeKsz6vuoKv6XqvY,4523
150
150
  airbyte_cdk/sources/file_based/config/avro_format.py,sha256=KkV7i-36aITHnacBztpvllmBDXIXwEfKbgRYEICgs30,565
151
- airbyte_cdk/sources/file_based/config/csv_format.py,sha256=W9cdGy3F0S2gLFxu2orkuHUGnHYU0lwWRx_BD34uTxw,3246
151
+ airbyte_cdk/sources/file_based/config/csv_format.py,sha256=3Hh2McAIvMT8G6TFCGlq2vd96ox7xcO9K7jbIa-2SPo,5061
152
152
  airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=-iqYoUbC3Nuy97_mKdQMZxhEVlhr3a4aoTqGOmqA0-Y,5878
153
153
  airbyte_cdk/sources/file_based/config/jsonl_format.py,sha256=BI1VHv-0qbbdw8ZH7t9iV4a7JfSWRzPatkeOvLU_GI0,204
154
154
  airbyte_cdk/sources/file_based/config/parquet_format.py,sha256=wqe8iF-QzIto97uoRlDBNI84ED-4H1CZyMy-TSo8Z6Y,573
@@ -157,7 +157,7 @@ airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha
157
157
  airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=Mx3zT9Dem4uNfaUT0oOtrESsuB1LrGAi5N-uw2swZZA,701
158
158
  airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=N3a8cjXwRUN2__46IJTwrWlsyFiSA1xtSgPcPH28sn0,476
159
159
  airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=2wzcZmQ0_VYyJmRYllGrEEYwrP9hYwXVJKz-DO6JyoE,8079
160
- airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=u_4a2tRR8ZZtDCmlPpQjg5lsL3yK-oSKG5SJxGexaLE,8026
160
+ airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=QpFKzvi2cdnScjln9WJdBxkrBFJl79PK1SW0pT7vGrs,10877
161
161
  airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=UgQkC-J7T9u3twlILRCvXXrcHJCwkjQYKhdNvccYhbY,1424
162
162
  airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=dceNH10qb-V3sTHOMcuLy_07nSde1s7l3E7KsQf8pI0,2715
163
163
  airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=BdmcM-9zVdCsuSw_A4GmStQw88IiG9XJwqpEuS3Jud4,8318
@@ -166,10 +166,10 @@ airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_valida
166
166
  airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py,sha256=ckBp4rv5wiUapA4ZEoe1fc6ILx-LdcRn26W4WJCrt_k,1506
167
167
  airbyte_cdk/sources/file_based/stream/__init__.py,sha256=QPDqdgjsabOQD93dSFqHGaFS_3pIwm-chEabZHiPJi0,265
168
168
  airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=tvVew6din9y8a3hItzU0PjTQrMxbVI7bK-3pRTvOswg,5810
169
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=HbuBRbLbKZKD8DxTtnwv9Z6L2_Ms6m-js6cJzQtJdH4,11056
170
- airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=OP3ye1-cYMymEyK6FJaQk3xYmMg1pphVdiWO0lBWvRQ,166
171
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=GMNPDee7sN9L1PxG-B6v6eQt__6VcTF1DOIlouKKH88,6765
172
- airbyte_cdk/sources/file_based/stream/cursor/file_based_cursor.py,sha256=aAoWrE_7VQPb7MXljnRZ7MxbdT4HNiejzxm1mFrNajE,1639
169
+ airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=K81wNOjd-p69WMoJWShup2wbEumXK5CT311u-J985_w,11687
170
+ airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
171
+ airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=i-FPeK8lwCzX34GCcmvL5Yvdh8-uu7FeCVYDoFbD7IY,1920
172
+ airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=6waK1sNLNlQLxEbMtuJgv0GUl76wPhwanV7cX2DkSK0,6778
173
173
  airbyte_cdk/sources/message/__init__.py,sha256=14ZSLah9uyI_CyK7_jIyq521vlgKAdihe6Ciw6-jLgU,372
174
174
  airbyte_cdk/sources/message/repository.py,sha256=kflbIkUwCWXMKpe6566TD_HRjRqEZKQ0h2RpxzjWLJk,4994
175
175
  airbyte_cdk/sources/singer/__init__.py,sha256=D3zQSiWT0B9t0kKE4JPZjrcDnP2YnFNJ3dfYqSaxo9w,246
@@ -305,31 +305,32 @@ unit_tests/sources/declarative/states/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyR
305
305
  unit_tests/sources/declarative/stream_slicers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
306
306
  unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py,sha256=_tEjpxK_7sbfppnYT0IhhBMBuGiEXai1qmfigF6Q9RA,7901
307
307
  unit_tests/sources/file_based/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
308
- unit_tests/sources/file_based/helpers.py,sha256=p5VDthPcJ_ZoAkbpdAdzKMnG1LbB2vlG4eo9gWwzhWc,2374
309
- unit_tests/sources/file_based/in_memory_files_source.py,sha256=AUtQLwd3bxbKjFY0-yxIdIwluwGjsLXoPP8_FtVBKgE,7653
308
+ unit_tests/sources/file_based/helpers.py,sha256=-tmNKF0Jbjn8BsBz-Qkdc_3yKvWpVLaauaSwxdMHD3Q,2542
309
+ unit_tests/sources/file_based/in_memory_files_source.py,sha256=tvt3-fhg_0iN5iCAyxuOjmhmrBLavIcm3mf3Q-P9llE,8062
310
310
  unit_tests/sources/file_based/test_file_based_stream_reader.py,sha256=Im3QmgXTyWXPA2VAtu9uLYas7YXB4NVsZw8zzQYZeeg,5283
311
- unit_tests/sources/file_based/test_scenarios.py,sha256=ADaCrhTitu5KzIigW24kUppFXR30OSiHJHn-USZuE-Q,16583
311
+ unit_tests/sources/file_based/test_scenarios.py,sha256=hqsHaC2n1H6AwX8W9RufNQwLyWwGacOwm29SZ4yGNSo,17923
312
312
  unit_tests/sources/file_based/test_schema_helpers.py,sha256=XJ27ecw0sjlSnKgQqV1DgnnjKB1TR2btq22OITh1Qdk,12333
313
313
  unit_tests/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
314
314
  unit_tests/sources/file_based/config/test_abstract_file_based_spec.py,sha256=wmZAC-nBiUedMZi0n4zaC9oiZD9UTuYP5zJC1xxRnME,1216
315
+ unit_tests/sources/file_based/config/test_csv_format.py,sha256=WoCUSeQdaBgZf8q2Q7GTRStvAwg8A7fl9xv9UJdjv1M,1111
315
316
  unit_tests/sources/file_based/config/test_file_based_stream_config.py,sha256=ISDmD13D3Sr_AlqgH9KJ-YbCogCyYyNTTiicm9NBQNw,4942
316
317
  unit_tests/sources/file_based/file_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
317
318
  unit_tests/sources/file_based/file_types/test_avro_parser.py,sha256=eFsgq9UrIUBxlVqPCcW8cUn3CeF6BNG57kRiIrHFSmg,8646
318
- unit_tests/sources/file_based/file_types/test_csv_parser.py,sha256=0z4tCUDV9QNsnuv4_kIas1eKrREpeSI9M795St7RLNY,2445
319
+ unit_tests/sources/file_based/file_types/test_csv_parser.py,sha256=RC3YgHjPhLHQ2yfDGlFL7u1YZXYwqcxPuZI_1CCgP6Y,4602
319
320
  unit_tests/sources/file_based/file_types/test_jsonl_parser.py,sha256=X2iivsiEsG_PnvUo042jgrfLqGJAs-aUtw_kChdROsY,1155
320
321
  unit_tests/sources/file_based/file_types/test_parquet_parser.py,sha256=nI2Vdr88fb9JsRiRgCA8CQMiB6j8cHMRIoodbo4plqY,13517
321
322
  unit_tests/sources/file_based/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
322
323
  unit_tests/sources/file_based/scenarios/avro_scenarios.py,sha256=amqovlN0gKFeo_DnzQtHaHEjnmPdws1nFVOB_DeOmwc,30030
323
324
  unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=lIDfVo8clROEyElYZLGi8OViahOCCTCDaIL9acAD9xo,5834
324
- unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=NDCxdMswFdeYdWaJVXkAl4XwiMN0dpxGtWRGjr7vdDI,60670
325
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=q0gILvjQz6355PSznJzrCudOWQE95fLO44cHnpdVya8,60568
325
+ unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=Fg13wx-c6Cy8AleqL1DJajmrLZyh8tBebRXDr99nU34,104467
326
+ unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=gPBcbnbdlh0995Xvj6aJn9oXzfvpj-4g2LZHlWoAkBE,60710
326
327
  unit_tests/sources/file_based/scenarios/jsonl_scenarios.py,sha256=hhf2YT2-N8rg4WwPU4zK2ry0i-mmFPpMppgPEFFupcM,27503
327
328
  unit_tests/sources/file_based/scenarios/parquet_scenarios.py,sha256=ebxJjkkgc55RnSVcr0y7BEM0DvAGLw7h54U_cv5G1xI,22200
328
- unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=oBrxQ_T_4g6wU0ww0s9LqORXTh387ze_f5WYh0DNx8A,10093
329
+ unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=iktYJ-WuDDLdT1rsWOvx-2z3Nt4VgkrvpzsMlMDfBGA,10177
329
330
  unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py,sha256=fVaunKXwi-UEaG7gHQHEDAczWh8aj6TCLovY3aLuDa4,28325
330
331
  unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py,sha256=BPQSFO0jX9bcm2RsvPectGf1x17Fo9QgmrWNkmtX4nA,25224
331
332
  unit_tests/sources/file_based/stream/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
332
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py,sha256=I6XdFyi6nB0VOsoP2NLL4KEwVdv8RGCowk2QvHibsds,12182
333
+ unit_tests/sources/file_based/stream/test_default_file_based_cursor.py,sha256=DHMHULhV_8pFFrdE2vaL-hIGumgsMtyKM3_0TCfAw_Y,12459
333
334
  unit_tests/sources/file_based/stream/test_default_file_based_stream.py,sha256=22Rgp1HvZss2WdOcfenbnvx18tfFJ_trPuWp299RW5E,1545
334
335
  unit_tests/sources/fixtures/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
335
336
  unit_tests/sources/fixtures/source_test_fixture.py,sha256=r-UtR241EGQMZTw1RoKaatrpCGeQn7OIuRPWfG9f7nI,5380
@@ -352,8 +353,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
352
353
  unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
353
354
  unit_tests/utils/test_stream_status_utils.py,sha256=NpV155JMXA6CG-2Zvofa14lItobyh3Onttc59X4m5DI,3382
354
355
  unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
355
- airbyte_cdk-0.50.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
356
- airbyte_cdk-0.50.0.dist-info/METADATA,sha256=qScuWXxEoi09FciWKDmpWF6x1dgRfCyOrnVaAGgFMZY,9439
357
- airbyte_cdk-0.50.0.dist-info/WHEEL,sha256=AtBG6SXL3KF_v0NxLf0ehyVOh0cold-JbJYXNGorC6Q,92
358
- airbyte_cdk-0.50.0.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
359
- airbyte_cdk-0.50.0.dist-info/RECORD,,
356
+ airbyte_cdk-0.50.2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
357
+ airbyte_cdk-0.50.2.dist-info/METADATA,sha256=Trm1tuhElrbiQAKTWMrRR5YUO3gxdU4B0qgfvrGmDYc,9439
358
+ airbyte_cdk-0.50.2.dist-info/WHEEL,sha256=AtBG6SXL3KF_v0NxLf0ehyVOh0cold-JbJYXNGorC6Q,92
359
+ airbyte_cdk-0.50.2.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
360
+ airbyte_cdk-0.50.2.dist-info/RECORD,,
@@ -0,0 +1,23 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import pytest as pytest
6
+ from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
7
+
8
+
9
+ @pytest.mark.parametrize(
10
+ "skip_rows_before_header, autogenerate_column_names, expected_error",
11
+ [
12
+ pytest.param(1, True, ValueError, id="test_skip_rows_before_header_and_autogenerate_column_names"),
13
+ pytest.param(1, False, None, id="test_skip_rows_before_header_and_no_autogenerate_column_names"),
14
+ pytest.param(0, True, None, id="test_no_skip_rows_before_header_and_autogenerate_column_names"),
15
+ pytest.param(0, False, None, id="test_no_skip_rows_before_header_and_no_autogenerate_column_names"),
16
+ ]
17
+ )
18
+ def test_csv_format(skip_rows_before_header, autogenerate_column_names, expected_error):
19
+ if expected_error:
20
+ with pytest.raises(expected_error):
21
+ CsvFormat(skip_rows_before_header=skip_rows_before_header, autogenerate_column_names=autogenerate_column_names)
22
+ else:
23
+ CsvFormat(skip_rows_before_header=skip_rows_before_header, autogenerate_column_names=autogenerate_column_names)
@@ -3,9 +3,12 @@
3
3
  #
4
4
 
5
5
  import logging
6
+ from unittest.mock import MagicMock, Mock
6
7
 
7
8
  import pytest
8
- from airbyte_cdk.sources.file_based.file_types.csv_parser import cast_types
9
+ from airbyte_cdk.sources.file_based.config.csv_format import DEFAULT_FALSE_VALUES, DEFAULT_TRUE_VALUES, CsvFormat
10
+ from airbyte_cdk.sources.file_based.exceptions import RecordParseError
11
+ from airbyte_cdk.sources.file_based.file_types.csv_parser import CsvParser, _cast_types
9
12
 
10
13
  PROPERTY_TYPES = {
11
14
  "col1": "null",
@@ -23,7 +26,7 @@ logger = logging.getLogger()
23
26
 
24
27
 
25
28
  @pytest.mark.parametrize(
26
- "row,expected_output",
29
+ "row, true_values, false_values, expected_output",
27
30
  [
28
31
  pytest.param(
29
32
  {
@@ -36,7 +39,10 @@ logger = logging.getLogger()
36
39
  "col7": '[1, 2]',
37
40
  "col8": '["1", "2"]',
38
41
  "col9": '[{"a": "b"}, {"a": "c"}]',
39
- }, {
42
+ },
43
+ DEFAULT_TRUE_VALUES,
44
+ DEFAULT_FALSE_VALUES,
45
+ {
40
46
  "col1": None,
41
47
  "col2": True,
42
48
  "col3": 1,
@@ -47,20 +53,46 @@ logger = logging.getLogger()
47
53
  "col8": ["1", "2"],
48
54
  "col9": [{"a": "b"}, {"a": "c"}],
49
55
  }, id="cast-all-cols"),
50
- pytest.param({"col1": "1"}, {"col1": "1"}, id="cannot-cast-to-null"),
51
- pytest.param({"col2": "1"}, {"col2": True}, id="cast-1-to-bool"),
52
- pytest.param({"col2": "0"}, {"col2": False}, id="cast-0-to-bool"),
53
- pytest.param({"col2": "yes"}, {"col2": True}, id="cast-yes-to-bool"),
54
- pytest.param({"col2": "no"}, {"col2": False}, id="cast-no-to-bool"),
55
- pytest.param({"col2": "10"}, {"col2": "10"}, id="cannot-cast-to-bool"),
56
- pytest.param({"col3": "1.1"}, {"col3": "1.1"}, id="cannot-cast-to-int"),
57
- pytest.param({"col4": "asdf"}, {"col4": "asdf"}, id="cannot-cast-to-float"),
58
- pytest.param({"col6": "{'a': 'b'}"}, {"col6": "{'a': 'b'}"}, id="cannot-cast-to-dict"),
59
- pytest.param({"col7": "['a', 'b']"}, {"col7": "['a', 'b']"}, id="cannot-cast-to-list-of-ints"),
60
- pytest.param({"col8": "['a', 'b']"}, {"col8": "['a', 'b']"}, id="cannot-cast-to-list-of-strings"),
61
- pytest.param({"col9": "['a', 'b']"}, {"col9": "['a', 'b']"}, id="cannot-cast-to-list-of-objects"),
62
- pytest.param({"col10": "x"}, {"col10": "x"}, id="item-not-in-props-doesn't-error"),
56
+ pytest.param({"col1": "1"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col1": "1"}, id="cannot-cast-to-null"),
57
+ pytest.param({"col2": "1"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": True}, id="cast-1-to-bool"),
58
+ pytest.param({"col2": "0"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": False}, id="cast-0-to-bool"),
59
+ pytest.param({"col2": "yes"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": True}, id="cast-yes-to-bool"),
60
+ pytest.param({"col2": "this_is_a_true_value"}, ["this_is_a_true_value"], DEFAULT_FALSE_VALUES, {"col2": True}, id="cast-custom-true-value-to-bool"),
61
+ pytest.param({"col2": "this_is_a_false_value"}, DEFAULT_TRUE_VALUES, ["this_is_a_false_value"], {"col2": False}, id="cast-custom-false-value-to-bool"),
62
+ pytest.param({"col2": "no"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": False}, id="cast-no-to-bool"),
63
+ pytest.param({"col2": "10"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col2": "10"}, id="cannot-cast-to-bool"),
64
+ pytest.param({"col3": "1.1"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col3": "1.1"}, id="cannot-cast-to-int"),
65
+ pytest.param({"col4": "asdf"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col4": "asdf"}, id="cannot-cast-to-float"),
66
+ pytest.param({"col6": "{'a': 'b'}"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col6": "{'a': 'b'}"}, id="cannot-cast-to-dict"),
67
+ pytest.param({"col7": "['a', 'b']"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col7": "['a', 'b']"}, id="cannot-cast-to-list-of-ints"),
68
+ pytest.param({"col8": "['a', 'b']"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col8": "['a', 'b']"}, id="cannot-cast-to-list-of-strings"),
69
+ pytest.param({"col9": "['a', 'b']"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col9": "['a', 'b']"}, id="cannot-cast-to-list-of-objects"),
70
+ pytest.param({"col10": "x"}, DEFAULT_TRUE_VALUES, DEFAULT_FALSE_VALUES, {"col10": "x"}, id="item-not-in-props-doesn't-error"),
71
+ ]
72
+ )
73
+ def test_cast_to_python_type(row, true_values, false_values, expected_output):
74
+ csv_format = CsvFormat(true_values=true_values, false_values=false_values)
75
+ assert _cast_types(row, PROPERTY_TYPES, csv_format, logger) == expected_output
76
+
77
+
78
+ @pytest.mark.parametrize(
79
+ "reader_values, expected_rows", [
80
+ pytest.param([{"col1": "1", "col2": None}], None, id="raise_exception_if_any_value_is_none"),
81
+ pytest.param([{"col1": "1", "col2": "2"}], [{"col1": "1", "col2": "2"}], id="read_no_cast"),
63
82
  ]
64
83
  )
65
- def test_cast_to_python_type(row, expected_output):
66
- assert cast_types(row, PROPERTY_TYPES, logger) == expected_output
84
+ def test_read_and_cast_types(reader_values, expected_rows):
85
+ reader = MagicMock()
86
+ reader.__iter__.return_value = reader_values
87
+ schema = {}
88
+ config_format = CsvFormat()
89
+ logger = Mock()
90
+
91
+ parser = CsvParser()
92
+
93
+ expected_rows = expected_rows
94
+ if expected_rows is None:
95
+ with pytest.raises(RecordParseError):
96
+ list(parser._read_and_cast_types(reader, schema, config_format, logger))
97
+ else:
98
+ assert expected_rows == list(parser._read_and_cast_types(reader, schema, config_format, logger))
@@ -14,6 +14,7 @@ from airbyte_cdk.sources.file_based.file_types.csv_parser import CsvParser
14
14
  from airbyte_cdk.sources.file_based.file_types.jsonl_parser import JsonlParser
15
15
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
16
16
  from airbyte_cdk.sources.file_based.schema_validation_policies import AbstractSchemaValidationPolicy
17
+ from airbyte_cdk.sources.file_based.stream.cursor import DefaultFileBasedCursor
17
18
  from unit_tests.sources.file_based.in_memory_files_source import InMemoryFilesStreamReader
18
19
 
19
20
 
@@ -54,6 +55,10 @@ class FailingSchemaValidationPolicy(AbstractSchemaValidationPolicy):
54
55
  return False
55
56
 
56
57
 
58
+ class LowHistoryLimitCursor(DefaultFileBasedCursor):
59
+ DEFAULT_MAX_HISTORY_SIZE = 3
60
+
61
+
57
62
  def make_remote_files(files: List[str]) -> List[RemoteFile]:
58
63
  return [
59
64
  RemoteFile(uri=f, last_modified=datetime.strptime("2023-06-05T03:54:07.000Z", "%Y-%m-%dT%H:%M:%S.%fZ"))
@@ -20,11 +20,12 @@ from airbyte_cdk.models import ConfiguredAirbyteCatalog
20
20
  from airbyte_cdk.sources.file_based.availability_strategy import AbstractFileBasedAvailabilityStrategy, DefaultFileBasedAvailabilityStrategy
21
21
  from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
22
22
  from airbyte_cdk.sources.file_based.discovery_policy import AbstractDiscoveryPolicy, DefaultDiscoveryPolicy
23
- from airbyte_cdk.sources.file_based.file_based_source import DEFAULT_MAX_HISTORY_SIZE, FileBasedSource
23
+ from airbyte_cdk.sources.file_based.file_based_source import FileBasedSource
24
24
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
25
25
  from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
26
26
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
27
27
  from airbyte_cdk.sources.file_based.schema_validation_policies import DEFAULT_SCHEMA_VALIDATION_POLICIES, AbstractSchemaValidationPolicy
28
+ from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor, DefaultFileBasedCursor
28
29
  from avro import datafile
29
30
  from pydantic import AnyUrl, Field
30
31
 
@@ -41,7 +42,7 @@ class InMemoryFilesSource(FileBasedSource):
41
42
  stream_reader: Optional[AbstractFileBasedStreamReader],
42
43
  catalog: Optional[Mapping[str, Any]],
43
44
  file_write_options: Mapping[str, Any],
44
- max_history_size: int,
45
+ cursor_cls: Optional[AbstractFileBasedCursor],
45
46
  ):
46
47
  # Attributes required for test purposes
47
48
  self.files = files
@@ -59,7 +60,7 @@ class InMemoryFilesSource(FileBasedSource):
59
60
  discovery_policy=discovery_policy or DefaultDiscoveryPolicy(),
60
61
  parsers=parsers,
61
62
  validation_policies=validation_policies or DEFAULT_SCHEMA_VALIDATION_POLICIES,
62
- max_history_size=max_history_size or DEFAULT_MAX_HISTORY_SIZE,
63
+ cursor_cls=cursor_cls or DefaultFileBasedCursor,
63
64
  )
64
65
 
65
66
  def read_catalog(self, catalog_path: str) -> ConfiguredAirbyteCatalog:
@@ -100,7 +101,14 @@ class InMemoryFilesStreamReader(AbstractFileBasedStreamReader):
100
101
  raise NotImplementedError(f"No implementation for file type: {self.file_type}")
101
102
 
102
103
  def _make_csv_file_contents(self, file_name: str) -> IOBase:
104
+
105
+ # Some tests define the csv as an array of strings to make it easier to validate the handling
106
+ # of quotes, delimiter, and escpare chars.
107
+ if isinstance(self.files[file_name]["contents"][0], str):
108
+ return io.StringIO("\n".join([s.strip() for s in self.files[file_name]["contents"]]))
109
+
103
110
  fh = io.StringIO()
111
+
104
112
  if self.file_write_options:
105
113
  csv.register_dialect("in_memory_dialect", **self.file_write_options)
106
114
  writer = csv.writer(fh, dialect="in_memory_dialect")