airbyte-cdk 0.72.0__py3-none-any.whl → 6.13.1.dev4106__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (517) hide show
  1. airbyte_cdk/__init__.py +355 -6
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +29 -10
  7. airbyte_cdk/connector.py +24 -24
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
  10. airbyte_cdk/connector_builder/main.py +45 -13
  11. airbyte_cdk/connector_builder/message_grouper.py +189 -50
  12. airbyte_cdk/connector_builder/models.py +3 -2
  13. airbyte_cdk/destinations/__init__.py +4 -3
  14. airbyte_cdk/destinations/destination.py +54 -20
  15. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  16. airbyte_cdk/destinations/vector_db_based/config.py +40 -17
  17. airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
  18. airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
  19. airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
  20. airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
  21. airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
  22. airbyte_cdk/entrypoint.py +153 -44
  23. airbyte_cdk/exception_handler.py +21 -3
  24. airbyte_cdk/logger.py +30 -44
  25. airbyte_cdk/models/__init__.py +13 -2
  26. airbyte_cdk/models/airbyte_protocol.py +86 -1
  27. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  28. airbyte_cdk/models/file_transfer_record_message.py +13 -0
  29. airbyte_cdk/models/well_known_types.py +1 -1
  30. airbyte_cdk/sources/__init__.py +5 -1
  31. airbyte_cdk/sources/abstract_source.py +125 -79
  32. airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
  33. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
  34. airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
  35. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
  36. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  37. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
  38. airbyte_cdk/sources/config.py +3 -2
  39. airbyte_cdk/sources/connector_state_manager.py +49 -83
  40. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  41. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
  42. airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
  43. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  44. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  45. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  46. airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
  47. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
  48. airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
  49. airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
  50. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
  51. airbyte_cdk/sources/declarative/auth/token.py +28 -10
  52. airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
  53. airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
  54. airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
  55. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  56. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  57. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +421 -0
  58. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
  59. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
  60. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1213 -88
  61. airbyte_cdk/sources/declarative/declarative_source.py +5 -2
  62. airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
  63. airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
  64. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
  65. airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
  66. airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
  67. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  68. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  69. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  70. airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
  71. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
  72. airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
  73. airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
  74. airbyte_cdk/sources/declarative/extractors/record_filter.py +65 -8
  75. airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
  76. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
  77. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  78. airbyte_cdk/sources/declarative/incremental/__init__.py +25 -3
  79. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
  80. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  81. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
  82. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +159 -74
  83. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  84. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  85. airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
  86. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
  87. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
  88. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
  89. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
  90. airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
  91. airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
  92. airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
  93. airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
  94. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  95. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  96. airbyte_cdk/sources/declarative/models/__init__.py +1 -1
  97. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1329 -595
  98. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
  99. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
  100. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
  101. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1699 -226
  102. airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
  103. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  104. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  105. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
  106. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  107. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
  108. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
  109. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
  110. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
  111. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
  112. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
  113. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
  114. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
  115. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
  116. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
  117. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
  118. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
  119. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  120. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
  121. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
  122. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
  123. airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
  124. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
  125. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
  126. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
  127. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
  128. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
  129. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
  130. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
  131. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
  132. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
  133. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
  134. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
  135. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
  136. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  137. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
  138. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
  139. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
  140. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
  141. airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
  142. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  143. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  144. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  145. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  146. airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
  147. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
  148. airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
  149. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +228 -72
  150. airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
  151. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
  152. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
  153. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
  154. airbyte_cdk/sources/declarative/spec/spec.py +12 -5
  155. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
  156. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
  157. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
  158. airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
  159. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  160. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  161. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  162. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  163. airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
  164. airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
  165. airbyte_cdk/sources/declarative/types.py +19 -110
  166. airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
  167. airbyte_cdk/sources/embedded/base_integration.py +16 -5
  168. airbyte_cdk/sources/embedded/catalog.py +16 -4
  169. airbyte_cdk/sources/embedded/runner.py +19 -3
  170. airbyte_cdk/sources/embedded/tools.py +5 -2
  171. airbyte_cdk/sources/file_based/README.md +152 -0
  172. airbyte_cdk/sources/file_based/__init__.py +24 -0
  173. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
  174. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
  175. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
  176. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +58 -10
  177. airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
  178. airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
  179. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  180. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
  181. airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
  182. airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
  183. airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
  184. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
  185. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
  186. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
  187. airbyte_cdk/sources/file_based/exceptions.py +52 -15
  188. airbyte_cdk/sources/file_based/file_based_source.py +163 -33
  189. airbyte_cdk/sources/file_based/file_based_stream_reader.py +83 -5
  190. airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
  191. airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
  192. airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
  193. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  194. airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
  195. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
  196. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
  197. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
  198. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +145 -41
  199. airbyte_cdk/sources/file_based/remote_file.py +1 -1
  200. airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
  201. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
  202. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
  203. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
  204. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
  205. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
  206. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
  207. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
  208. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
  209. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
  210. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
  211. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
  212. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +175 -45
  213. airbyte_cdk/sources/http_logger.py +8 -3
  214. airbyte_cdk/sources/message/__init__.py +7 -1
  215. airbyte_cdk/sources/message/repository.py +18 -4
  216. airbyte_cdk/sources/source.py +42 -38
  217. airbyte_cdk/sources/streams/__init__.py +2 -2
  218. airbyte_cdk/sources/streams/availability_strategy.py +54 -3
  219. airbyte_cdk/sources/streams/call_rate.py +64 -21
  220. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  221. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  222. airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
  223. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  224. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  225. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  226. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  227. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
  228. airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
  229. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
  230. airbyte_cdk/sources/streams/concurrent/cursor.py +298 -42
  231. airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
  232. airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
  233. airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
  234. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
  235. airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
  236. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
  237. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  238. airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
  239. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
  240. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
  241. airbyte_cdk/sources/streams/core.py +412 -87
  242. airbyte_cdk/sources/streams/http/__init__.py +2 -1
  243. airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
  244. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  245. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  246. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  247. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  248. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  249. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  250. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  251. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  252. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  253. airbyte_cdk/sources/streams/http/exceptions.py +27 -7
  254. airbyte_cdk/sources/streams/http/http.py +369 -246
  255. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  256. airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
  257. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
  258. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
  259. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
  260. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
  261. airbyte_cdk/sources/types.py +154 -0
  262. airbyte_cdk/sources/utils/record_helper.py +36 -21
  263. airbyte_cdk/sources/utils/schema_helpers.py +13 -6
  264. airbyte_cdk/sources/utils/slice_logger.py +4 -1
  265. airbyte_cdk/sources/utils/transform.py +54 -20
  266. airbyte_cdk/sql/_util/hashing.py +34 -0
  267. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  268. airbyte_cdk/sql/constants.py +32 -0
  269. airbyte_cdk/sql/exceptions.py +235 -0
  270. airbyte_cdk/sql/secrets.py +123 -0
  271. airbyte_cdk/sql/shared/__init__.py +15 -0
  272. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  273. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  274. airbyte_cdk/sql/types.py +160 -0
  275. airbyte_cdk/test/catalog_builder.py +70 -18
  276. airbyte_cdk/test/entrypoint_wrapper.py +117 -42
  277. airbyte_cdk/test/mock_http/__init__.py +1 -1
  278. airbyte_cdk/test/mock_http/matcher.py +6 -0
  279. airbyte_cdk/test/mock_http/mocker.py +57 -10
  280. airbyte_cdk/test/mock_http/request.py +19 -3
  281. airbyte_cdk/test/mock_http/response.py +3 -1
  282. airbyte_cdk/test/mock_http/response_builder.py +32 -16
  283. airbyte_cdk/test/state_builder.py +18 -10
  284. airbyte_cdk/test/utils/__init__.py +1 -0
  285. airbyte_cdk/test/utils/data.py +24 -0
  286. airbyte_cdk/test/utils/http_mocking.py +16 -0
  287. airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
  288. airbyte_cdk/test/utils/reading.py +26 -0
  289. airbyte_cdk/utils/__init__.py +2 -1
  290. airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
  291. airbyte_cdk/utils/analytics_message.py +10 -2
  292. airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
  293. airbyte_cdk/utils/event_timing.py +10 -10
  294. airbyte_cdk/utils/mapping_helpers.py +3 -1
  295. airbyte_cdk/utils/message_utils.py +20 -11
  296. airbyte_cdk/utils/print_buffer.py +75 -0
  297. airbyte_cdk/utils/schema_inferrer.py +198 -28
  298. airbyte_cdk/utils/slice_hasher.py +30 -0
  299. airbyte_cdk/utils/spec_schema_transformations.py +6 -3
  300. airbyte_cdk/utils/stream_status_utils.py +8 -1
  301. airbyte_cdk/utils/traced_exception.py +61 -21
  302. airbyte_cdk-6.13.1.dev4106.dist-info/METADATA +109 -0
  303. airbyte_cdk-6.13.1.dev4106.dist-info/RECORD +349 -0
  304. {airbyte_cdk-0.72.0.dist-info → airbyte_cdk-6.13.1.dev4106.dist-info}/WHEEL +1 -2
  305. airbyte_cdk-6.13.1.dev4106.dist-info/entry_points.txt +3 -0
  306. airbyte_cdk/sources/declarative/create_partial.py +0 -92
  307. airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
  308. airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
  309. airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
  310. airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
  311. airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
  312. airbyte_cdk/sources/deprecated/base_source.py +0 -94
  313. airbyte_cdk/sources/deprecated/client.py +0 -99
  314. airbyte_cdk/sources/singer/__init__.py +0 -8
  315. airbyte_cdk/sources/singer/singer_helpers.py +0 -304
  316. airbyte_cdk/sources/singer/source.py +0 -186
  317. airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
  318. airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
  319. airbyte_cdk/sources/streams/http/auth/core.py +0 -29
  320. airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
  321. airbyte_cdk/sources/streams/http/auth/token.py +0 -47
  322. airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
  323. airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
  324. airbyte_cdk/sources/utils/schema_models.py +0 -84
  325. airbyte_cdk-0.72.0.dist-info/METADATA +0 -243
  326. airbyte_cdk-0.72.0.dist-info/RECORD +0 -466
  327. airbyte_cdk-0.72.0.dist-info/top_level.txt +0 -3
  328. source_declarative_manifest/main.py +0 -29
  329. unit_tests/connector_builder/__init__.py +0 -3
  330. unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
  331. unit_tests/connector_builder/test_message_grouper.py +0 -713
  332. unit_tests/connector_builder/utils.py +0 -27
  333. unit_tests/destinations/test_destination.py +0 -243
  334. unit_tests/singer/test_singer_helpers.py +0 -56
  335. unit_tests/singer/test_singer_source.py +0 -112
  336. unit_tests/sources/__init__.py +0 -0
  337. unit_tests/sources/concurrent_source/__init__.py +0 -3
  338. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
  339. unit_tests/sources/declarative/__init__.py +0 -3
  340. unit_tests/sources/declarative/auth/__init__.py +0 -3
  341. unit_tests/sources/declarative/auth/test_oauth.py +0 -331
  342. unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
  343. unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
  344. unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
  345. unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
  346. unit_tests/sources/declarative/checks/__init__.py +0 -3
  347. unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
  348. unit_tests/sources/declarative/decoders/__init__.py +0 -0
  349. unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
  350. unit_tests/sources/declarative/external_component.py +0 -13
  351. unit_tests/sources/declarative/extractors/__init__.py +0 -3
  352. unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
  353. unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
  354. unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
  355. unit_tests/sources/declarative/incremental/__init__.py +0 -0
  356. unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
  357. unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
  358. unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
  359. unit_tests/sources/declarative/interpolation/__init__.py +0 -3
  360. unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
  361. unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
  362. unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
  363. unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
  364. unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
  365. unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
  366. unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
  367. unit_tests/sources/declarative/parsers/__init__.py +0 -3
  368. unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
  369. unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
  370. unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1841
  371. unit_tests/sources/declarative/parsers/testing_components.py +0 -36
  372. unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
  373. unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
  374. unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
  375. unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
  376. unit_tests/sources/declarative/requesters/__init__.py +0 -3
  377. unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
  378. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
  379. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
  380. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
  381. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
  382. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
  383. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
  384. unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
  385. unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
  386. unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
  387. unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
  388. unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
  389. unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
  390. unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
  391. unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
  392. unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
  393. unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
  394. unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
  395. unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
  396. unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
  397. unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
  398. unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
  399. unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
  400. unit_tests/sources/declarative/retrievers/__init__.py +0 -3
  401. unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
  402. unit_tests/sources/declarative/schema/__init__.py +0 -6
  403. unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
  404. unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
  405. unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
  406. unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
  407. unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
  408. unit_tests/sources/declarative/states/__init__.py +0 -3
  409. unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
  410. unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
  411. unit_tests/sources/declarative/test_create_partial.py +0 -83
  412. unit_tests/sources/declarative/test_declarative_stream.py +0 -103
  413. unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
  414. unit_tests/sources/declarative/test_types.py +0 -39
  415. unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
  416. unit_tests/sources/file_based/__init__.py +0 -0
  417. unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
  418. unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
  419. unit_tests/sources/file_based/config/__init__.py +0 -0
  420. unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
  421. unit_tests/sources/file_based/config/test_csv_format.py +0 -34
  422. unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
  423. unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
  424. unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
  425. unit_tests/sources/file_based/file_types/__init__.py +0 -0
  426. unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
  427. unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
  428. unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
  429. unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
  430. unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
  431. unit_tests/sources/file_based/helpers.py +0 -70
  432. unit_tests/sources/file_based/in_memory_files_source.py +0 -211
  433. unit_tests/sources/file_based/scenarios/__init__.py +0 -0
  434. unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
  435. unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
  436. unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
  437. unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
  438. unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
  439. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
  440. unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
  441. unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
  442. unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
  443. unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
  444. unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
  445. unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
  446. unit_tests/sources/file_based/stream/__init__.py +0 -0
  447. unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
  448. unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
  449. unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
  450. unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
  451. unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
  452. unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
  453. unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
  454. unit_tests/sources/file_based/test_scenarios.py +0 -253
  455. unit_tests/sources/file_based/test_schema_helpers.py +0 -346
  456. unit_tests/sources/fixtures/__init__.py +0 -3
  457. unit_tests/sources/fixtures/source_test_fixture.py +0 -153
  458. unit_tests/sources/message/__init__.py +0 -0
  459. unit_tests/sources/message/test_repository.py +0 -153
  460. unit_tests/sources/streams/__init__.py +0 -0
  461. unit_tests/sources/streams/concurrent/__init__.py +0 -3
  462. unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
  463. unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
  464. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
  465. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
  466. unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
  467. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
  468. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
  469. unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
  470. unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
  471. unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
  472. unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
  473. unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
  474. unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
  475. unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
  476. unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
  477. unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
  478. unit_tests/sources/streams/http/__init__.py +0 -0
  479. unit_tests/sources/streams/http/auth/__init__.py +0 -0
  480. unit_tests/sources/streams/http/auth/test_auth.py +0 -173
  481. unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
  482. unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
  483. unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
  484. unit_tests/sources/streams/http/test_http.py +0 -635
  485. unit_tests/sources/streams/test_availability_strategy.py +0 -70
  486. unit_tests/sources/streams/test_call_rate.py +0 -300
  487. unit_tests/sources/streams/test_stream_read.py +0 -405
  488. unit_tests/sources/streams/test_streams_core.py +0 -184
  489. unit_tests/sources/test_abstract_source.py +0 -1442
  490. unit_tests/sources/test_concurrent_source.py +0 -112
  491. unit_tests/sources/test_config.py +0 -92
  492. unit_tests/sources/test_connector_state_manager.py +0 -482
  493. unit_tests/sources/test_http_logger.py +0 -252
  494. unit_tests/sources/test_integration_source.py +0 -86
  495. unit_tests/sources/test_source.py +0 -684
  496. unit_tests/sources/test_source_read.py +0 -460
  497. unit_tests/test/__init__.py +0 -0
  498. unit_tests/test/mock_http/__init__.py +0 -0
  499. unit_tests/test/mock_http/test_matcher.py +0 -53
  500. unit_tests/test/mock_http/test_mocker.py +0 -214
  501. unit_tests/test/mock_http/test_request.py +0 -117
  502. unit_tests/test/mock_http/test_response_builder.py +0 -177
  503. unit_tests/test/test_entrypoint_wrapper.py +0 -240
  504. unit_tests/utils/__init__.py +0 -0
  505. unit_tests/utils/test_datetime_format_inferrer.py +0 -60
  506. unit_tests/utils/test_mapping_helpers.py +0 -54
  507. unit_tests/utils/test_message_utils.py +0 -91
  508. unit_tests/utils/test_rate_limiting.py +0 -26
  509. unit_tests/utils/test_schema_inferrer.py +0 -202
  510. unit_tests/utils/test_secret_utils.py +0 -135
  511. unit_tests/utils/test_stream_status_utils.py +0 -61
  512. unit_tests/utils/test_traced_exception.py +0 -107
  513. /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
  514. {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
  515. {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
  516. {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
  517. {airbyte_cdk-0.72.0.dist-info → airbyte_cdk-6.13.1.dev4106.dist-info}/LICENSE.txt +0 -0
@@ -1,2844 +0,0 @@
1
- #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
- #
4
-
5
- from airbyte_cdk.sources.file_based.stream.concurrent.cursor import FileBasedConcurrentCursor
6
- from airbyte_cdk.test.state_builder import StateBuilder
7
- from unit_tests.sources.file_based.helpers import LowHistoryLimitConcurrentCursor
8
- from unit_tests.sources.file_based.scenarios.file_based_source_builder import FileBasedSourceBuilder
9
- from unit_tests.sources.file_based.scenarios.scenario_builder import IncrementalScenarioConfig, TestScenarioBuilder
10
-
11
- single_csv_input_state_is_earlier_scenario_concurrent = (
12
- TestScenarioBuilder()
13
- .set_name("single_csv_input_state_is_earlier_concurrent")
14
- .set_config(
15
- {
16
- "streams": [
17
- {
18
- "name": "stream1",
19
- "format": {"filetype": "csv"},
20
- "globs": ["*.csv"],
21
- "validation_policy": "Emit Record",
22
- }
23
- ]
24
- }
25
- )
26
- .set_source_builder(
27
- FileBasedSourceBuilder()
28
- .set_files(
29
- {
30
- "a.csv": {
31
- "contents": [
32
- ("col1", "col2"),
33
- ("val11", "val12"),
34
- ("val21", "val22"),
35
- ],
36
- "last_modified": "2023-06-05T03:54:07.000Z",
37
- }
38
- }
39
- )
40
- .set_file_type("csv")
41
- .set_cursor_cls(FileBasedConcurrentCursor)
42
- )
43
- .set_incremental_scenario_config(
44
- IncrementalScenarioConfig(
45
- input_state=StateBuilder()
46
- .with_stream_state(
47
- "stream1",
48
- {
49
- "history": {"some_old_file.csv": "2023-06-01T03:54:07.000000Z"},
50
- "_ab_source_file_last_modified": "2023-06-01T03:54:07.000000Z_some_old_file.csv",
51
- },
52
- )
53
- .build(),
54
- )
55
- )
56
- .set_expected_records(
57
- [
58
- {
59
- "data": {
60
- "col1": "val11",
61
- "col2": "val12",
62
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
63
- "_ab_source_file_url": "a.csv",
64
- },
65
- "stream": "stream1",
66
- },
67
- {
68
- "data": {
69
- "col1": "val21",
70
- "col2": "val22",
71
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
72
- "_ab_source_file_url": "a.csv",
73
- },
74
- "stream": "stream1",
75
- },
76
- {
77
- "history": {"some_old_file.csv": "2023-06-01T03:54:07.000000Z", "a.csv": "2023-06-05T03:54:07.000000Z"},
78
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
79
- },
80
- ]
81
- )
82
- .set_expected_catalog(
83
- {
84
- "streams": [
85
- {
86
- "default_cursor_field": ["_ab_source_file_last_modified"],
87
- "source_defined_cursor": True,
88
- "supported_sync_modes": ["full_refresh", "incremental"],
89
- "json_schema": {
90
- "type": "object",
91
- "properties": {
92
- "col1": {
93
- "type": ["null", "string"],
94
- },
95
- "col2": {
96
- "type": ["null", "string"],
97
- },
98
- "_ab_source_file_last_modified": {"type": "string"},
99
- "_ab_source_file_url": {"type": "string"},
100
- },
101
- },
102
- "name": "stream1",
103
- }
104
- ]
105
- }
106
- )
107
- ).build()
108
-
109
- single_csv_file_is_skipped_if_same_modified_at_as_in_history_concurrent = (
110
- TestScenarioBuilder()
111
- .set_name("single_csv_file_is_skipped_if_same_modified_at_as_in_history_concurrent")
112
- .set_config(
113
- {
114
- "streams": [
115
- {
116
- "name": "stream1",
117
- "format": {"filetype": "csv"},
118
- "globs": ["*.csv"],
119
- "validation_policy": "Emit Record",
120
- }
121
- ]
122
- }
123
- )
124
- .set_source_builder(
125
- FileBasedSourceBuilder()
126
- .set_files(
127
- {
128
- "a.csv": {
129
- "contents": [
130
- ("col1", "col2"),
131
- ("val11", "val12"),
132
- ("val21", "val22"),
133
- ],
134
- "last_modified": "2023-06-05T03:54:07.000Z",
135
- }
136
- }
137
- )
138
- .set_file_type("csv")
139
- .set_cursor_cls(FileBasedConcurrentCursor)
140
- )
141
- .set_incremental_scenario_config(
142
- IncrementalScenarioConfig(
143
- input_state=StateBuilder()
144
- .with_stream_state(
145
- "stream1",
146
- {
147
- "history": {"a.csv": "2023-06-05T03:54:07.000000Z"},
148
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
149
- },
150
- )
151
- .build(),
152
- )
153
- )
154
- .set_expected_records(
155
- [
156
- {
157
- "history": {"a.csv": "2023-06-05T03:54:07.000000Z"},
158
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
159
- }
160
- ]
161
- )
162
- .set_expected_catalog(
163
- {
164
- "streams": [
165
- {
166
- "default_cursor_field": ["_ab_source_file_last_modified"],
167
- "source_defined_cursor": True,
168
- "supported_sync_modes": ["full_refresh", "incremental"],
169
- "json_schema": {
170
- "type": "object",
171
- "properties": {
172
- "col1": {
173
- "type": ["null", "string"],
174
- },
175
- "col2": {
176
- "type": ["null", "string"],
177
- },
178
- "_ab_source_file_last_modified": {"type": "string"},
179
- "_ab_source_file_url": {"type": "string"},
180
- },
181
- },
182
- "name": "stream1",
183
- }
184
- ]
185
- }
186
- )
187
- ).build()
188
-
189
- single_csv_file_is_synced_if_modified_at_is_more_recent_than_in_history_concurrent = (
190
- TestScenarioBuilder()
191
- .set_name("single_csv_file_is_synced_if_modified_at_is_more_recent_than_in_history_concurrent")
192
- .set_config(
193
- {
194
- "streams": [
195
- {
196
- "name": "stream1",
197
- "format": {"filetype": "csv"},
198
- "globs": ["*.csv"],
199
- "validation_policy": "Emit Record",
200
- }
201
- ]
202
- }
203
- )
204
- .set_source_builder(
205
- FileBasedSourceBuilder()
206
- .set_files(
207
- {
208
- "a.csv": {
209
- "contents": [
210
- ("col1", "col2"),
211
- ("val11", "val12"),
212
- ("val21", "val22"),
213
- ],
214
- "last_modified": "2023-06-05T03:54:07.000Z",
215
- }
216
- }
217
- )
218
- .set_file_type("csv")
219
- .set_cursor_cls(FileBasedConcurrentCursor)
220
- )
221
- .set_incremental_scenario_config(
222
- IncrementalScenarioConfig(
223
- input_state=StateBuilder()
224
- .with_stream_state(
225
- "stream1",
226
- {
227
- "history": {"a.csv": "2023-06-01T03:54:07.000000Z"},
228
- "_ab_source_file_last_modified": "2023-06-01T03:54:07.000000Z_a.csv",
229
- },
230
- )
231
- .build(),
232
- )
233
- )
234
- .set_expected_records(
235
- [
236
- {
237
- "data": {
238
- "col1": "val11",
239
- "col2": "val12",
240
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
241
- "_ab_source_file_url": "a.csv",
242
- },
243
- "stream": "stream1",
244
- },
245
- {
246
- "data": {
247
- "col1": "val21",
248
- "col2": "val22",
249
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
250
- "_ab_source_file_url": "a.csv",
251
- },
252
- "stream": "stream1",
253
- },
254
- {
255
- "history": {"a.csv": "2023-06-05T03:54:07.000000Z"},
256
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
257
- },
258
- ]
259
- )
260
- .set_expected_catalog(
261
- {
262
- "streams": [
263
- {
264
- "default_cursor_field": ["_ab_source_file_last_modified"],
265
- "source_defined_cursor": True,
266
- "supported_sync_modes": ["full_refresh", "incremental"],
267
- "json_schema": {
268
- "type": "object",
269
- "properties": {
270
- "col1": {
271
- "type": ["null", "string"],
272
- },
273
- "col2": {
274
- "type": ["null", "string"],
275
- },
276
- "_ab_source_file_last_modified": {"type": "string"},
277
- "_ab_source_file_url": {"type": "string"},
278
- },
279
- },
280
- "name": "stream1",
281
- }
282
- ]
283
- }
284
- )
285
- ).build()
286
-
287
- single_csv_no_input_state_scenario_concurrent = (
288
- TestScenarioBuilder()
289
- .set_name("single_csv_input_state_is_earlier_again_concurrent")
290
- .set_config(
291
- {
292
- "streams": [
293
- {
294
- "name": "stream1",
295
- "format": {"filetype": "csv"},
296
- "globs": ["*.csv"],
297
- "validation_policy": "Emit Record",
298
- }
299
- ]
300
- }
301
- )
302
- .set_source_builder(
303
- FileBasedSourceBuilder()
304
- .set_files(
305
- {
306
- "a.csv": {
307
- "contents": [
308
- ("col1", "col2"),
309
- ("val11", "val12"),
310
- ("val21", "val22"),
311
- ],
312
- "last_modified": "2023-06-05T03:54:07.000000Z",
313
- }
314
- }
315
- )
316
- .set_file_type("csv")
317
- .set_cursor_cls(FileBasedConcurrentCursor)
318
- )
319
- .set_expected_catalog(
320
- {
321
- "streams": [
322
- {
323
- "default_cursor_field": ["_ab_source_file_last_modified"],
324
- "source_defined_cursor": True,
325
- "supported_sync_modes": ["full_refresh", "incremental"],
326
- "json_schema": {
327
- "type": "object",
328
- "properties": {
329
- "col1": {
330
- "type": ["null", "string"],
331
- },
332
- "col2": {
333
- "type": ["null", "string"],
334
- },
335
- "_ab_source_file_last_modified": {"type": "string"},
336
- "_ab_source_file_url": {"type": "string"},
337
- },
338
- },
339
- "name": "stream1",
340
- }
341
- ]
342
- }
343
- )
344
- .set_expected_records(
345
- [
346
- {
347
- "data": {
348
- "col1": "val11",
349
- "col2": "val12",
350
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
351
- "_ab_source_file_url": "a.csv",
352
- },
353
- "stream": "stream1",
354
- },
355
- {
356
- "data": {
357
- "col1": "val21",
358
- "col2": "val22",
359
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
360
- "_ab_source_file_url": "a.csv",
361
- },
362
- "stream": "stream1",
363
- },
364
- {
365
- "history": {"a.csv": "2023-06-05T03:54:07.000000Z"},
366
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
367
- },
368
- ]
369
- )
370
- .set_incremental_scenario_config(
371
- IncrementalScenarioConfig(
372
- input_state=[],
373
- )
374
- )
375
- ).build()
376
-
377
- multi_csv_same_timestamp_scenario_concurrent = (
378
- TestScenarioBuilder()
379
- .set_name("multi_csv_same_timestamp_concurrent")
380
- .set_config(
381
- {
382
- "streams": [
383
- {
384
- "name": "stream1",
385
- "format": {"filetype": "csv"},
386
- "globs": ["*.csv"],
387
- "validation_policy": "Emit Record",
388
- }
389
- ]
390
- }
391
- )
392
- .set_source_builder(
393
- FileBasedSourceBuilder()
394
- .set_files(
395
- {
396
- "a.csv": {
397
- "contents": [
398
- ("col1", "col2"),
399
- ("val11a", "val12a"),
400
- ("val21a", "val22a"),
401
- ],
402
- "last_modified": "2023-06-05T03:54:07.000000Z",
403
- },
404
- "b.csv": {
405
- "contents": [
406
- ("col1", "col2", "col3"),
407
- ("val11b", "val12b", "val13b"),
408
- ("val21b", "val22b", "val23b"),
409
- ],
410
- "last_modified": "2023-06-05T03:54:07.000000Z",
411
- },
412
- }
413
- )
414
- .set_file_type("csv")
415
- .set_cursor_cls(FileBasedConcurrentCursor)
416
- )
417
- .set_expected_catalog(
418
- {
419
- "streams": [
420
- {
421
- "default_cursor_field": ["_ab_source_file_last_modified"],
422
- "json_schema": {
423
- "type": "object",
424
- "properties": {
425
- "col1": {
426
- "type": ["null", "string"],
427
- },
428
- "col2": {
429
- "type": ["null", "string"],
430
- },
431
- "col3": {
432
- "type": ["null", "string"],
433
- },
434
- "_ab_source_file_last_modified": {"type": "string"},
435
- "_ab_source_file_url": {"type": "string"},
436
- },
437
- },
438
- "name": "stream1",
439
- "source_defined_cursor": True,
440
- "supported_sync_modes": ["full_refresh", "incremental"],
441
- }
442
- ]
443
- }
444
- )
445
- .set_expected_records(
446
- [
447
- {
448
- "data": {
449
- "col1": "val11a",
450
- "col2": "val12a",
451
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
452
- "_ab_source_file_url": "a.csv",
453
- },
454
- "stream": "stream1",
455
- },
456
- {
457
- "data": {
458
- "col1": "val21a",
459
- "col2": "val22a",
460
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
461
- "_ab_source_file_url": "a.csv",
462
- },
463
- "stream": "stream1",
464
- },
465
- {
466
- "data": {
467
- "col1": "val11b",
468
- "col2": "val12b",
469
- "col3": "val13b",
470
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
471
- "_ab_source_file_url": "b.csv",
472
- },
473
- "stream": "stream1",
474
- },
475
- {
476
- "data": {
477
- "col1": "val21b",
478
- "col2": "val22b",
479
- "col3": "val23b",
480
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
481
- "_ab_source_file_url": "b.csv",
482
- },
483
- "stream": "stream1",
484
- },
485
- {
486
- "history": {"a.csv": "2023-06-05T03:54:07.000000Z", "b.csv": "2023-06-05T03:54:07.000000Z"},
487
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
488
- },
489
- ]
490
- )
491
- .set_incremental_scenario_config(
492
- IncrementalScenarioConfig(
493
- input_state=[],
494
- )
495
- )
496
- ).build()
497
-
498
- single_csv_input_state_is_later_scenario_concurrent = (
499
- TestScenarioBuilder()
500
- .set_name("single_csv_input_state_is_later_concurrent")
501
- .set_config(
502
- {
503
- "streams": [
504
- {
505
- "name": "stream1",
506
- "format": {"filetype": "csv"},
507
- "globs": ["*.csv"],
508
- "validation_policy": "Emit Record",
509
- }
510
- ]
511
- }
512
- )
513
- .set_source_builder(
514
- FileBasedSourceBuilder()
515
- .set_files(
516
- {
517
- "a.csv": {
518
- "contents": [
519
- ("col1", "col2"),
520
- ("val11", "val12"),
521
- ("val21", "val22"),
522
- ],
523
- "last_modified": "2023-06-05T03:54:07.000000Z",
524
- }
525
- }
526
- )
527
- .set_file_type("csv")
528
- .set_cursor_cls(FileBasedConcurrentCursor)
529
- )
530
- .set_expected_catalog(
531
- {
532
- "streams": [
533
- {
534
- "default_cursor_field": ["_ab_source_file_last_modified"],
535
- "source_defined_cursor": True,
536
- "supported_sync_modes": ["full_refresh", "incremental"],
537
- "json_schema": {
538
- "type": "object",
539
- "properties": {
540
- "col1": {
541
- "type": ["null", "string"],
542
- },
543
- "col2": {
544
- "type": ["null", "string"],
545
- },
546
- "_ab_source_file_last_modified": {"type": "string"},
547
- "_ab_source_file_url": {"type": "string"},
548
- },
549
- },
550
- "name": "stream1",
551
- }
552
- ]
553
- }
554
- )
555
- .set_expected_records(
556
- [
557
- {
558
- "data": {
559
- "col1": "val11",
560
- "col2": "val12",
561
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
562
- "_ab_source_file_url": "a.csv",
563
- },
564
- "stream": "stream1",
565
- },
566
- {
567
- "data": {
568
- "col1": "val21",
569
- "col2": "val22",
570
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
571
- "_ab_source_file_url": "a.csv",
572
- },
573
- "stream": "stream1",
574
- },
575
- {
576
- "history": {
577
- "recent_file.csv": "2023-07-15T23:59:59.000000Z",
578
- "a.csv": "2023-06-05T03:54:07.000000Z",
579
- },
580
- "_ab_source_file_last_modified": "2023-07-15T23:59:59.000000Z_recent_file.csv",
581
- },
582
- ]
583
- )
584
- .set_incremental_scenario_config(
585
- IncrementalScenarioConfig(
586
- input_state=StateBuilder()
587
- .with_stream_state(
588
- "stream1",
589
- {
590
- "history": {"recent_file.csv": "2023-07-15T23:59:59.000000Z"},
591
- "_ab_source_file_last_modified": "2023-07-15T23:59:59.000000Z_recent_file.csv",
592
- },
593
- )
594
- .build(),
595
- )
596
- )
597
- ).build()
598
-
599
- multi_csv_different_timestamps_scenario_concurrent = (
600
- TestScenarioBuilder()
601
- .set_name("multi_csv_stream_different_timestamps_concurrent")
602
- .set_config(
603
- {
604
- "streams": [
605
- {
606
- "name": "stream1",
607
- "format": {"filetype": "csv"},
608
- "globs": ["*.csv"],
609
- "validation_policy": "Emit Record",
610
- }
611
- ]
612
- }
613
- )
614
- .set_source_builder(
615
- FileBasedSourceBuilder()
616
- .set_files(
617
- {
618
- "a.csv": {
619
- "contents": [
620
- ("col1", "col2"),
621
- ("val11a", "val12a"),
622
- ("val21a", "val22a"),
623
- ],
624
- "last_modified": "2023-06-04T03:54:07.000000Z",
625
- },
626
- "b.csv": {
627
- "contents": [
628
- ("col1", "col2", "col3"),
629
- ("val11b", "val12b", "val13b"),
630
- ("val21b", "val22b", "val23b"),
631
- ],
632
- "last_modified": "2023-06-05T03:54:07.000000Z",
633
- },
634
- }
635
- )
636
- .set_file_type("csv")
637
- .set_cursor_cls(FileBasedConcurrentCursor)
638
- )
639
- .set_expected_catalog(
640
- {
641
- "streams": [
642
- {
643
- "default_cursor_field": ["_ab_source_file_last_modified"],
644
- "json_schema": {
645
- "type": "object",
646
- "properties": {
647
- "col1": {
648
- "type": ["null", "string"],
649
- },
650
- "col2": {
651
- "type": ["null", "string"],
652
- },
653
- "col3": {
654
- "type": ["null", "string"],
655
- },
656
- "_ab_source_file_last_modified": {"type": "string"},
657
- "_ab_source_file_url": {"type": "string"},
658
- },
659
- },
660
- "name": "stream1",
661
- "source_defined_cursor": True,
662
- "supported_sync_modes": ["full_refresh", "incremental"],
663
- }
664
- ]
665
- }
666
- )
667
- .set_expected_records(
668
- [
669
- {
670
- "data": {
671
- "col1": "val11a",
672
- "col2": "val12a",
673
- "_ab_source_file_last_modified": "2023-06-04T03:54:07.000000Z",
674
- "_ab_source_file_url": "a.csv",
675
- },
676
- "stream": "stream1",
677
- },
678
- {
679
- "data": {
680
- "col1": "val21a",
681
- "col2": "val22a",
682
- "_ab_source_file_last_modified": "2023-06-04T03:54:07.000000Z",
683
- "_ab_source_file_url": "a.csv",
684
- },
685
- "stream": "stream1",
686
- },
687
- {
688
- "history": {
689
- "a.csv": "2023-06-04T03:54:07.000000Z",
690
- },
691
- "_ab_source_file_last_modified": "2023-06-04T03:54:07.000000Z_a.csv",
692
- },
693
- {
694
- "data": {
695
- "col1": "val11b",
696
- "col2": "val12b",
697
- "col3": "val13b",
698
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
699
- "_ab_source_file_url": "b.csv",
700
- },
701
- "stream": "stream1",
702
- },
703
- {
704
- "data": {
705
- "col1": "val21b",
706
- "col2": "val22b",
707
- "col3": "val23b",
708
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
709
- "_ab_source_file_url": "b.csv",
710
- },
711
- "stream": "stream1",
712
- },
713
- {
714
- "history": {"a.csv": "2023-06-04T03:54:07.000000Z", "b.csv": "2023-06-05T03:54:07.000000Z"},
715
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
716
- },
717
- ]
718
- )
719
- .set_incremental_scenario_config(
720
- IncrementalScenarioConfig(
721
- input_state=[],
722
- )
723
- )
724
- ).build()
725
-
726
- multi_csv_per_timestamp_scenario_concurrent = (
727
- TestScenarioBuilder()
728
- .set_name("multi_csv_per_timestamp_concurrent")
729
- .set_config(
730
- {
731
- "streams": [
732
- {
733
- "name": "stream1",
734
- "format": {"filetype": "csv"},
735
- "globs": ["*.csv"],
736
- "validation_policy": "Emit Record",
737
- }
738
- ]
739
- }
740
- )
741
- .set_source_builder(
742
- FileBasedSourceBuilder()
743
- .set_files(
744
- {
745
- "a.csv": {
746
- "contents": [
747
- ("col1", "col2"),
748
- ("val11a", "val12a"),
749
- ("val21a", "val22a"),
750
- ],
751
- "last_modified": "2023-06-05T03:54:07.000000Z",
752
- },
753
- "b.csv": {
754
- "contents": [
755
- ("col1", "col2", "col3"),
756
- ("val11b", "val12b", "val13b"),
757
- ("val21b", "val22b", "val23b"),
758
- ],
759
- "last_modified": "2023-06-05T03:54:07.000000Z",
760
- },
761
- "c.csv": {
762
- "contents": [
763
- ("col1", "col2", "col3"),
764
- ("val11c", "val12c", "val13c"),
765
- ("val21c", "val22c", "val23c"),
766
- ],
767
- "last_modified": "2023-06-06T03:54:07.000000Z",
768
- },
769
- }
770
- )
771
- .set_file_type("csv")
772
- .set_cursor_cls(FileBasedConcurrentCursor)
773
- )
774
- .set_expected_catalog(
775
- {
776
- "streams": [
777
- {
778
- "default_cursor_field": ["_ab_source_file_last_modified"],
779
- "json_schema": {
780
- "type": "object",
781
- "properties": {
782
- "col1": {
783
- "type": ["null", "string"],
784
- },
785
- "col2": {
786
- "type": ["null", "string"],
787
- },
788
- "col3": {
789
- "type": ["null", "string"],
790
- },
791
- "_ab_source_file_last_modified": {"type": "string"},
792
- "_ab_source_file_url": {"type": "string"},
793
- },
794
- },
795
- "name": "stream1",
796
- "source_defined_cursor": True,
797
- "supported_sync_modes": ["full_refresh", "incremental"],
798
- }
799
- ]
800
- }
801
- )
802
- .set_expected_records(
803
- [
804
- {
805
- "data": {
806
- "col1": "val11a",
807
- "col2": "val12a",
808
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
809
- "_ab_source_file_url": "a.csv",
810
- },
811
- "stream": "stream1",
812
- },
813
- {
814
- "data": {
815
- "col1": "val21a",
816
- "col2": "val22a",
817
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
818
- "_ab_source_file_url": "a.csv",
819
- },
820
- "stream": "stream1",
821
- },
822
- {
823
- "data": {
824
- "col1": "val11b",
825
- "col2": "val12b",
826
- "col3": "val13b",
827
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
828
- "_ab_source_file_url": "b.csv",
829
- },
830
- "stream": "stream1",
831
- },
832
- {
833
- "data": {
834
- "col1": "val21b",
835
- "col2": "val22b",
836
- "col3": "val23b",
837
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
838
- "_ab_source_file_url": "b.csv",
839
- },
840
- "stream": "stream1",
841
- },
842
- {
843
- "history": {"a.csv": "2023-06-05T03:54:07.000000Z", "b.csv": "2023-06-05T03:54:07.000000Z"},
844
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
845
- },
846
- {
847
- "data": {
848
- "col1": "val11c",
849
- "col2": "val12c",
850
- "col3": "val13c",
851
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
852
- "_ab_source_file_url": "c.csv",
853
- },
854
- "stream": "stream1",
855
- },
856
- {
857
- "data": {
858
- "col1": "val21c",
859
- "col2": "val22c",
860
- "col3": "val23c",
861
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
862
- "_ab_source_file_url": "c.csv",
863
- },
864
- "stream": "stream1",
865
- },
866
- {
867
- "history": {
868
- "a.csv": "2023-06-05T03:54:07.000000Z",
869
- "b.csv": "2023-06-05T03:54:07.000000Z",
870
- "c.csv": "2023-06-06T03:54:07.000000Z",
871
- },
872
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
873
- },
874
- ]
875
- )
876
- .set_incremental_scenario_config(
877
- IncrementalScenarioConfig(
878
- input_state=[],
879
- )
880
- )
881
- ).build()
882
-
883
- multi_csv_skip_file_if_already_in_history_concurrent = (
884
- TestScenarioBuilder()
885
- .set_name("skip_files_already_in_history_concurrent")
886
- .set_config(
887
- {
888
- "streams": [
889
- {
890
- "name": "stream1",
891
- "format": {"filetype": "csv"},
892
- "globs": ["*.csv"],
893
- "validation_policy": "Emit Record",
894
- }
895
- ]
896
- }
897
- )
898
- .set_source_builder(
899
- FileBasedSourceBuilder()
900
- .set_files(
901
- {
902
- "a.csv": {
903
- "contents": [
904
- ("col1", "col2"),
905
- ("val11a", "val12a"),
906
- ("val21a", "val22a"),
907
- ],
908
- "last_modified": "2023-06-05T03:54:07.000000Z",
909
- },
910
- "b.csv": {
911
- "contents": [
912
- ("col1", "col2", "col3"),
913
- ("val11b", "val12b", "val13b"),
914
- ("val21b", "val22b", "val23b"),
915
- ],
916
- "last_modified": "2023-06-05T03:54:07.000000Z",
917
- },
918
- "c.csv": {
919
- "contents": [
920
- ("col1", "col2", "col3"),
921
- ("val11c", "val12c", "val13c"),
922
- ("val21c", "val22c", "val23c"),
923
- ],
924
- "last_modified": "2023-06-06T03:54:07.000000Z",
925
- },
926
- }
927
- )
928
- .set_file_type("csv")
929
- .set_cursor_cls(FileBasedConcurrentCursor)
930
- )
931
- .set_expected_catalog(
932
- {
933
- "streams": [
934
- {
935
- "default_cursor_field": ["_ab_source_file_last_modified"],
936
- "json_schema": {
937
- "type": "object",
938
- "properties": {
939
- "col1": {
940
- "type": ["null", "string"],
941
- },
942
- "col2": {
943
- "type": ["null", "string"],
944
- },
945
- "col3": {
946
- "type": ["null", "string"],
947
- },
948
- "_ab_source_file_last_modified": {"type": "string"},
949
- "_ab_source_file_url": {"type": "string"},
950
- },
951
- },
952
- "name": "stream1",
953
- "source_defined_cursor": True,
954
- "supported_sync_modes": ["full_refresh", "incremental"],
955
- }
956
- ]
957
- }
958
- )
959
- .set_expected_records(
960
- [
961
- # {"data": {"col1": "val11a", "col2": "val12a"}, "stream": "stream1"}, # this file is skipped
962
- # {"data": {"col1": "val21a", "col2": "val22a"}, "stream": "stream1"}, # this file is skipped
963
- {
964
- "data": {
965
- "col1": "val11b",
966
- "col2": "val12b",
967
- "col3": "val13b",
968
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
969
- "_ab_source_file_url": "b.csv",
970
- },
971
- "stream": "stream1",
972
- },
973
- {
974
- "data": {
975
- "col1": "val21b",
976
- "col2": "val22b",
977
- "col3": "val23b",
978
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
979
- "_ab_source_file_url": "b.csv",
980
- },
981
- "stream": "stream1",
982
- },
983
- {
984
- "history": {"a.csv": "2023-06-05T03:54:07.000000Z", "b.csv": "2023-06-05T03:54:07.000000Z"},
985
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
986
- },
987
- {
988
- "data": {
989
- "col1": "val11c",
990
- "col2": "val12c",
991
- "col3": "val13c",
992
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
993
- "_ab_source_file_url": "c.csv",
994
- },
995
- "stream": "stream1",
996
- },
997
- {
998
- "data": {
999
- "col1": "val21c",
1000
- "col2": "val22c",
1001
- "col3": "val23c",
1002
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
1003
- "_ab_source_file_url": "c.csv",
1004
- },
1005
- "stream": "stream1",
1006
- },
1007
- {
1008
- "history": {
1009
- "a.csv": "2023-06-05T03:54:07.000000Z",
1010
- "b.csv": "2023-06-05T03:54:07.000000Z",
1011
- "c.csv": "2023-06-06T03:54:07.000000Z",
1012
- },
1013
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
1014
- },
1015
- ]
1016
- )
1017
- .set_incremental_scenario_config(
1018
- IncrementalScenarioConfig(
1019
- input_state=StateBuilder()
1020
- .with_stream_state(
1021
- "stream1",
1022
- {"history": {"a.csv": "2023-06-05T03:54:07.000000Z"}, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv"},
1023
- )
1024
- .build(),
1025
- )
1026
- )
1027
- ).build()
1028
-
1029
- multi_csv_include_missing_files_within_history_range_concurrent_cursor_is_newer = (
1030
- TestScenarioBuilder()
1031
- .set_name("multi_csv_include_missing_files_within_history_range_concurrent_cursor_is_newer")
1032
- .set_config(
1033
- {
1034
- "streams": [
1035
- {
1036
- "name": "stream1",
1037
- "format": {"filetype": "csv"},
1038
- "globs": ["*.csv"],
1039
- "validation_policy": "Emit Record",
1040
- }
1041
- ]
1042
- }
1043
- )
1044
- .set_source_builder(
1045
- FileBasedSourceBuilder()
1046
- .set_files(
1047
- {
1048
- "a.csv": {
1049
- "contents": [
1050
- ("col1", "col2"),
1051
- ("val11a", "val12a"),
1052
- ("val21a", "val22a"),
1053
- ],
1054
- "last_modified": "2023-06-05T03:54:07.000000Z",
1055
- },
1056
- "b.csv": {
1057
- "contents": [
1058
- ("col1", "col2", "col3"),
1059
- ("val11b", "val12b", "val13b"),
1060
- ("val21b", "val22b", "val23b"),
1061
- ],
1062
- "last_modified": "2023-06-05T03:54:07.000000Z",
1063
- },
1064
- "c.csv": {
1065
- "contents": [
1066
- ("col1", "col2", "col3"),
1067
- ("val11c", "val12c", "val13c"),
1068
- ("val21c", "val22c", "val23c"),
1069
- ],
1070
- "last_modified": "2023-06-06T03:54:07.000000Z",
1071
- },
1072
- }
1073
- )
1074
- .set_file_type("csv")
1075
- .set_cursor_cls(FileBasedConcurrentCursor)
1076
- )
1077
- .set_expected_catalog(
1078
- {
1079
- "streams": [
1080
- {
1081
- "default_cursor_field": ["_ab_source_file_last_modified"],
1082
- "json_schema": {
1083
- "type": "object",
1084
- "properties": {
1085
- "col1": {
1086
- "type": ["null", "string"],
1087
- },
1088
- "col2": {
1089
- "type": ["null", "string"],
1090
- },
1091
- "col3": {
1092
- "type": ["null", "string"],
1093
- },
1094
- "_ab_source_file_last_modified": {"type": "string"},
1095
- "_ab_source_file_url": {"type": "string"},
1096
- },
1097
- },
1098
- "name": "stream1",
1099
- "source_defined_cursor": True,
1100
- "supported_sync_modes": ["full_refresh", "incremental"],
1101
- }
1102
- ]
1103
- }
1104
- )
1105
- .set_expected_records(
1106
- [
1107
- # {"data": {"col1": "val11a", "col2": "val12a"}, "stream": "stream1"}, # this file is skipped
1108
- # {"data": {"col1": "val21a", "col2": "val22a"}, "stream": "stream1"}, # this file is skipped
1109
- {
1110
- "data": {
1111
- "col1": "val11b",
1112
- "col2": "val12b",
1113
- "col3": "val13b",
1114
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1115
- "_ab_source_file_url": "b.csv",
1116
- },
1117
- "stream": "stream1",
1118
- },
1119
- {
1120
- "data": {
1121
- "col1": "val21b",
1122
- "col2": "val22b",
1123
- "col3": "val23b",
1124
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1125
- "_ab_source_file_url": "b.csv",
1126
- },
1127
- "stream": "stream1",
1128
- },
1129
- # {"data": {"col1": "val11c", "col2": "val12c", "col3": "val13c"}, "stream": "stream1"}, # this file is skipped
1130
- # {"data": {"col1": "val21c", "col2": "val22c", "col3": "val23c"}, "stream": "stream1"}, # this file is skipped
1131
- {
1132
- "history": {
1133
- "a.csv": "2023-06-05T03:54:07.000000Z",
1134
- "b.csv": "2023-06-05T03:54:07.000000Z",
1135
- "c.csv": "2023-06-06T03:54:07.000000Z",
1136
- },
1137
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
1138
- },
1139
- ]
1140
- )
1141
- .set_incremental_scenario_config(
1142
- IncrementalScenarioConfig(
1143
- input_state=StateBuilder()
1144
- .with_stream_state(
1145
- "stream1",
1146
- {
1147
- "history": {"a.csv": "2023-06-05T03:54:07.000000Z", "c.csv": "2023-06-06T03:54:07.000000Z"},
1148
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
1149
- },
1150
- )
1151
- .build(),
1152
- )
1153
- )
1154
- ).build()
1155
-
1156
- multi_csv_include_missing_files_within_history_range_concurrent_cursor_is_older = (
1157
- TestScenarioBuilder()
1158
- .set_name("multi_csv_include_missing_files_within_history_range_concurrent_cursor_is_older")
1159
- .set_config(
1160
- {
1161
- "streams": [
1162
- {
1163
- "name": "stream1",
1164
- "format": {"filetype": "csv"},
1165
- "globs": ["*.csv"],
1166
- "validation_policy": "Emit Record",
1167
- }
1168
- ]
1169
- }
1170
- )
1171
- .set_source_builder(
1172
- FileBasedSourceBuilder()
1173
- .set_files(
1174
- {
1175
- "a.csv": {
1176
- "contents": [
1177
- ("col1", "col2"),
1178
- ("val11a", "val12a"),
1179
- ("val21a", "val22a"),
1180
- ],
1181
- "last_modified": "2023-06-05T03:54:07.000000Z",
1182
- },
1183
- "b.csv": {
1184
- "contents": [
1185
- ("col1", "col2", "col3"),
1186
- ("val11b", "val12b", "val13b"),
1187
- ("val21b", "val22b", "val23b"),
1188
- ],
1189
- "last_modified": "2023-06-05T03:54:07.000000Z",
1190
- },
1191
- "c.csv": {
1192
- "contents": [
1193
- ("col1", "col2", "col3"),
1194
- ("val11c", "val12c", "val13c"),
1195
- ("val21c", "val22c", "val23c"),
1196
- ],
1197
- "last_modified": "2023-06-06T03:54:07.000000Z",
1198
- },
1199
- }
1200
- )
1201
- .set_file_type("csv")
1202
- .set_cursor_cls(FileBasedConcurrentCursor)
1203
- )
1204
- .set_expected_catalog(
1205
- {
1206
- "streams": [
1207
- {
1208
- "default_cursor_field": ["_ab_source_file_last_modified"],
1209
- "json_schema": {
1210
- "type": "object",
1211
- "properties": {
1212
- "col1": {
1213
- "type": ["null", "string"],
1214
- },
1215
- "col2": {
1216
- "type": ["null", "string"],
1217
- },
1218
- "col3": {
1219
- "type": ["null", "string"],
1220
- },
1221
- "_ab_source_file_last_modified": {"type": "string"},
1222
- "_ab_source_file_url": {"type": "string"},
1223
- },
1224
- },
1225
- "name": "stream1",
1226
- "source_defined_cursor": True,
1227
- "supported_sync_modes": ["full_refresh", "incremental"],
1228
- }
1229
- ]
1230
- }
1231
- )
1232
- .set_expected_records(
1233
- [
1234
- # {"data": {"col1": "val11a", "col2": "val12a"}, "stream": "stream1"}, # this file is skipped
1235
- # {"data": {"col1": "val21a", "col2": "val22a"}, "stream": "stream1"}, # this file is skipped
1236
- {
1237
- "data": {
1238
- "col1": "val11b",
1239
- "col2": "val12b",
1240
- "col3": "val13b",
1241
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1242
- "_ab_source_file_url": "b.csv",
1243
- },
1244
- "stream": "stream1",
1245
- },
1246
- {
1247
- "data": {
1248
- "col1": "val21b",
1249
- "col2": "val22b",
1250
- "col3": "val23b",
1251
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1252
- "_ab_source_file_url": "b.csv",
1253
- },
1254
- "stream": "stream1",
1255
- },
1256
- # {"data": {"col1": "val11c", "col2": "val12c", "col3": "val13c"}, "stream": "stream1"}, # this file is skipped
1257
- # {"data": {"col1": "val21c", "col2": "val22c", "col3": "val23c"}, "stream": "stream1"}, # this file is skipped
1258
- {
1259
- "history": {
1260
- "a.csv": "2023-06-05T03:54:07.000000Z",
1261
- "b.csv": "2023-06-05T03:54:07.000000Z",
1262
- "c.csv": "2023-06-06T03:54:07.000000Z",
1263
- },
1264
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
1265
- },
1266
- ]
1267
- )
1268
- .set_incremental_scenario_config(
1269
- IncrementalScenarioConfig(
1270
- input_state=StateBuilder()
1271
- .with_stream_state(
1272
- "stream1",
1273
- {
1274
- "history": {"a.csv": "2023-06-05T03:54:07.000000Z", "c.csv": "2023-06-06T03:54:07.000000Z"},
1275
- "_ab_source_file_last_modified": "2023-06-03T03:54:07.000000Z_x.csv",
1276
- },
1277
- )
1278
- .build()
1279
- )
1280
- )
1281
- ).build()
1282
-
1283
- multi_csv_remove_old_files_if_history_is_full_scenario_concurrent_cursor_is_newer = (
1284
- TestScenarioBuilder()
1285
- .set_name("multi_csv_remove_old_files_if_history_is_full_scenario_concurrent_cursor_is_newer")
1286
- .set_config(
1287
- {
1288
- "streams": [
1289
- {
1290
- "name": "stream1",
1291
- "format": {"filetype": "csv"},
1292
- "globs": ["*.csv"],
1293
- "validation_policy": "Emit Record",
1294
- }
1295
- ]
1296
- }
1297
- )
1298
- .set_source_builder(
1299
- FileBasedSourceBuilder()
1300
- .set_files(
1301
- {
1302
- "a.csv": {
1303
- "contents": [
1304
- ("col1", "col2"),
1305
- ("val11a", "val12a"),
1306
- ("val21a", "val22a"),
1307
- ],
1308
- "last_modified": "2023-06-06T03:54:07.000000Z",
1309
- },
1310
- "b.csv": {
1311
- "contents": [
1312
- ("col1", "col2", "col3"),
1313
- ("val11b", "val12b", "val13b"),
1314
- ("val21b", "val22b", "val23b"),
1315
- ],
1316
- "last_modified": "2023-06-07T03:54:07.000000Z",
1317
- },
1318
- "c.csv": {
1319
- "contents": [
1320
- ("col1", "col2", "col3"),
1321
- ("val11c", "val12c", "val13c"),
1322
- ("val21c", "val22c", "val23c"),
1323
- ],
1324
- "last_modified": "2023-06-10T03:54:07.000000Z",
1325
- },
1326
- }
1327
- )
1328
- .set_file_type("csv")
1329
- .set_cursor_cls(LowHistoryLimitConcurrentCursor)
1330
- )
1331
- .set_expected_catalog(
1332
- {
1333
- "streams": [
1334
- {
1335
- "default_cursor_field": ["_ab_source_file_last_modified"],
1336
- "json_schema": {
1337
- "type": "object",
1338
- "properties": {
1339
- "col1": {
1340
- "type": ["null", "string"],
1341
- },
1342
- "col2": {
1343
- "type": ["null", "string"],
1344
- },
1345
- "col3": {
1346
- "type": ["null", "string"],
1347
- },
1348
- "_ab_source_file_last_modified": {"type": "string"},
1349
- "_ab_source_file_url": {"type": "string"},
1350
- },
1351
- },
1352
- "name": "stream1",
1353
- "source_defined_cursor": True,
1354
- "supported_sync_modes": ["full_refresh", "incremental"],
1355
- }
1356
- ]
1357
- }
1358
- )
1359
- .set_expected_records(
1360
- [
1361
- {
1362
- "data": {
1363
- "col1": "val11a",
1364
- "col2": "val12a",
1365
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
1366
- "_ab_source_file_url": "a.csv",
1367
- },
1368
- "stream": "stream1",
1369
- },
1370
- {
1371
- "data": {
1372
- "col1": "val21a",
1373
- "col2": "val22a",
1374
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
1375
- "_ab_source_file_url": "a.csv",
1376
- },
1377
- "stream": "stream1",
1378
- },
1379
- {
1380
- "history": {
1381
- "very_old_file.csv": "2023-06-02T03:54:07.000000Z",
1382
- "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
1383
- "a.csv": "2023-06-06T03:54:07.000000Z",
1384
- },
1385
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_old_file_same_timestamp_as_a.csv",
1386
- },
1387
- {
1388
- "data": {
1389
- "col1": "val11b",
1390
- "col2": "val12b",
1391
- "col3": "val13b",
1392
- "_ab_source_file_last_modified": "2023-06-07T03:54:07.000000Z",
1393
- "_ab_source_file_url": "b.csv",
1394
- },
1395
- "stream": "stream1",
1396
- },
1397
- {
1398
- "data": {
1399
- "col1": "val21b",
1400
- "col2": "val22b",
1401
- "col3": "val23b",
1402
- "_ab_source_file_last_modified": "2023-06-07T03:54:07.000000Z",
1403
- "_ab_source_file_url": "b.csv",
1404
- },
1405
- "stream": "stream1",
1406
- },
1407
- {
1408
- "history": {
1409
- "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
1410
- "a.csv": "2023-06-06T03:54:07.000000Z",
1411
- "b.csv": "2023-06-07T03:54:07.000000Z",
1412
- },
1413
- "_ab_source_file_last_modified": "2023-06-07T03:54:07.000000Z_b.csv",
1414
- },
1415
- {
1416
- "data": {
1417
- "col1": "val11c",
1418
- "col2": "val12c",
1419
- "col3": "val13c",
1420
- "_ab_source_file_last_modified": "2023-06-10T03:54:07.000000Z",
1421
- "_ab_source_file_url": "c.csv",
1422
- },
1423
- "stream": "stream1",
1424
- },
1425
- {
1426
- "data": {
1427
- "col1": "val21c",
1428
- "col2": "val22c",
1429
- "col3": "val23c",
1430
- "_ab_source_file_last_modified": "2023-06-10T03:54:07.000000Z",
1431
- "_ab_source_file_url": "c.csv",
1432
- },
1433
- "stream": "stream1",
1434
- },
1435
- {
1436
- "history": {
1437
- "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
1438
- "b.csv": "2023-06-07T03:54:07.000000Z",
1439
- "c.csv": "2023-06-10T03:54:07.000000Z",
1440
- },
1441
- "_ab_source_file_last_modified": "2023-06-10T03:54:07.000000Z_c.csv",
1442
- },
1443
- ]
1444
- )
1445
- .set_incremental_scenario_config(
1446
- IncrementalScenarioConfig(
1447
- input_state=StateBuilder()
1448
- .with_stream_state(
1449
- "stream1",
1450
- {
1451
- "history": {
1452
- "very_very_old_file.csv": "2023-06-01T03:54:07.000000Z",
1453
- "very_old_file.csv": "2023-06-02T03:54:07.000000Z",
1454
- "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
1455
- },
1456
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_old_file_same_timestamp_as_a.csv",
1457
- },
1458
- )
1459
- .build(),
1460
- )
1461
- )
1462
- ).build()
1463
-
1464
- multi_csv_remove_old_files_if_history_is_full_scenario_concurrent_cursor_is_older = (
1465
- TestScenarioBuilder()
1466
- .set_name("multi_csv_remove_old_files_if_history_is_full_scenario_concurrent_cursor_is_older")
1467
- .set_config(
1468
- {
1469
- "streams": [
1470
- {
1471
- "name": "stream1",
1472
- "format": {"filetype": "csv"},
1473
- "globs": ["*.csv"],
1474
- "validation_policy": "Emit Record",
1475
- }
1476
- ]
1477
- }
1478
- )
1479
- .set_source_builder(
1480
- FileBasedSourceBuilder()
1481
- .set_files(
1482
- {
1483
- "a.csv": {
1484
- "contents": [
1485
- ("col1", "col2"),
1486
- ("val11a", "val12a"),
1487
- ("val21a", "val22a"),
1488
- ],
1489
- "last_modified": "2023-06-06T03:54:07.000000Z",
1490
- },
1491
- "b.csv": {
1492
- "contents": [
1493
- ("col1", "col2", "col3"),
1494
- ("val11b", "val12b", "val13b"),
1495
- ("val21b", "val22b", "val23b"),
1496
- ],
1497
- "last_modified": "2023-06-07T03:54:07.000000Z",
1498
- },
1499
- "c.csv": {
1500
- "contents": [
1501
- ("col1", "col2", "col3"),
1502
- ("val11c", "val12c", "val13c"),
1503
- ("val21c", "val22c", "val23c"),
1504
- ],
1505
- "last_modified": "2023-06-10T03:54:07.000000Z",
1506
- },
1507
- }
1508
- )
1509
- .set_file_type("csv")
1510
- .set_cursor_cls(LowHistoryLimitConcurrentCursor)
1511
- )
1512
- .set_expected_catalog(
1513
- {
1514
- "streams": [
1515
- {
1516
- "default_cursor_field": ["_ab_source_file_last_modified"],
1517
- "json_schema": {
1518
- "type": "object",
1519
- "properties": {
1520
- "col1": {
1521
- "type": ["null", "string"],
1522
- },
1523
- "col2": {
1524
- "type": ["null", "string"],
1525
- },
1526
- "col3": {
1527
- "type": ["null", "string"],
1528
- },
1529
- "_ab_source_file_last_modified": {"type": "string"},
1530
- "_ab_source_file_url": {"type": "string"},
1531
- },
1532
- },
1533
- "name": "stream1",
1534
- "source_defined_cursor": True,
1535
- "supported_sync_modes": ["full_refresh", "incremental"],
1536
- }
1537
- ]
1538
- }
1539
- )
1540
- .set_expected_records(
1541
- [
1542
- {
1543
- "data": {
1544
- "col1": "val11a",
1545
- "col2": "val12a",
1546
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
1547
- "_ab_source_file_url": "a.csv",
1548
- },
1549
- "stream": "stream1",
1550
- },
1551
- {
1552
- "data": {
1553
- "col1": "val21a",
1554
- "col2": "val22a",
1555
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
1556
- "_ab_source_file_url": "a.csv",
1557
- },
1558
- "stream": "stream1",
1559
- },
1560
- {
1561
- "history": {
1562
- "very_old_file.csv": "2023-06-02T03:54:07.000000Z",
1563
- "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
1564
- "a.csv": "2023-06-06T03:54:07.000000Z",
1565
- },
1566
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_old_file_same_timestamp_as_a.csv",
1567
- },
1568
- {
1569
- "data": {
1570
- "col1": "val11b",
1571
- "col2": "val12b",
1572
- "col3": "val13b",
1573
- "_ab_source_file_last_modified": "2023-06-07T03:54:07.000000Z",
1574
- "_ab_source_file_url": "b.csv",
1575
- },
1576
- "stream": "stream1",
1577
- },
1578
- {
1579
- "data": {
1580
- "col1": "val21b",
1581
- "col2": "val22b",
1582
- "col3": "val23b",
1583
- "_ab_source_file_last_modified": "2023-06-07T03:54:07.000000Z",
1584
- "_ab_source_file_url": "b.csv",
1585
- },
1586
- "stream": "stream1",
1587
- },
1588
- {
1589
- "history": {
1590
- "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
1591
- "a.csv": "2023-06-06T03:54:07.000000Z",
1592
- "b.csv": "2023-06-07T03:54:07.000000Z",
1593
- },
1594
- "_ab_source_file_last_modified": "2023-06-07T03:54:07.000000Z_b.csv",
1595
- },
1596
- {
1597
- "data": {
1598
- "col1": "val11c",
1599
- "col2": "val12c",
1600
- "col3": "val13c",
1601
- "_ab_source_file_last_modified": "2023-06-10T03:54:07.000000Z",
1602
- "_ab_source_file_url": "c.csv",
1603
- },
1604
- "stream": "stream1",
1605
- },
1606
- {
1607
- "data": {
1608
- "col1": "val21c",
1609
- "col2": "val22c",
1610
- "col3": "val23c",
1611
- "_ab_source_file_last_modified": "2023-06-10T03:54:07.000000Z",
1612
- "_ab_source_file_url": "c.csv",
1613
- },
1614
- "stream": "stream1",
1615
- },
1616
- {
1617
- "history": {
1618
- "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
1619
- "b.csv": "2023-06-07T03:54:07.000000Z",
1620
- "c.csv": "2023-06-10T03:54:07.000000Z",
1621
- },
1622
- "_ab_source_file_last_modified": "2023-06-10T03:54:07.000000Z_c.csv",
1623
- },
1624
- ]
1625
- )
1626
- .set_incremental_scenario_config(
1627
- IncrementalScenarioConfig(
1628
- input_state=StateBuilder()
1629
- .with_stream_state(
1630
- "stream1",
1631
- {
1632
- "history": {
1633
- "very_very_old_file.csv": "2023-06-01T03:54:07.000000Z",
1634
- "very_old_file.csv": "2023-06-02T03:54:07.000000Z",
1635
- "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
1636
- },
1637
- "_ab_source_file_last_modified": "2023-05-01T03:54:07.000000Z_very_very_very_old_file.csv",
1638
- },
1639
- )
1640
- .build(),
1641
- )
1642
- )
1643
- ).build()
1644
-
1645
- multi_csv_same_timestamp_more_files_than_history_size_scenario_concurrent_cursor_is_newer = (
1646
- TestScenarioBuilder()
1647
- .set_name("multi_csv_same_timestamp_more_files_than_history_size_scenario_concurrent_cursor_is_newer")
1648
- .set_config(
1649
- {
1650
- "streams": [
1651
- {
1652
- "name": "stream1",
1653
- "format": {"filetype": "csv"},
1654
- "globs": ["*.csv"],
1655
- "validation_policy": "Emit Record",
1656
- "days_to_sync_if_history_is_full": 3,
1657
- }
1658
- ]
1659
- }
1660
- )
1661
- .set_source_builder(
1662
- FileBasedSourceBuilder()
1663
- .set_files(
1664
- {
1665
- "b.csv": {
1666
- "contents": [
1667
- ("col1", "col2", "col3"),
1668
- ("val11b", "val12b", "val13b"),
1669
- ("val21b", "val22b", "val23b"),
1670
- ],
1671
- "last_modified": "2023-06-05T03:54:07.000000Z",
1672
- },
1673
- "a.csv": {
1674
- "contents": [
1675
- ("col1", "col2"),
1676
- ("val11a", "val12a"),
1677
- ("val21a", "val22a"),
1678
- ],
1679
- "last_modified": "2023-06-05T03:54:07.000000Z",
1680
- },
1681
- "c.csv": {
1682
- "contents": [
1683
- ("col1", "col2", "col3"),
1684
- ("val11c", "val12c", "val13c"),
1685
- ("val21c", "val22c", "val23c"),
1686
- ],
1687
- "last_modified": "2023-06-05T03:54:07.000000Z",
1688
- },
1689
- "d.csv": {
1690
- "contents": [
1691
- ("col1", "col2", "col3"),
1692
- ("val11d", "val12d", "val13d"),
1693
- ("val21d", "val22d", "val23d"),
1694
- ],
1695
- "last_modified": "2023-06-05T03:54:07.000000Z",
1696
- },
1697
- }
1698
- )
1699
- .set_file_type("csv")
1700
- .set_cursor_cls(LowHistoryLimitConcurrentCursor)
1701
- )
1702
- .set_expected_catalog(
1703
- {
1704
- "streams": [
1705
- {
1706
- "default_cursor_field": ["_ab_source_file_last_modified"],
1707
- "json_schema": {
1708
- "type": "object",
1709
- "properties": {
1710
- "col1": {
1711
- "type": ["null", "string"],
1712
- },
1713
- "col2": {
1714
- "type": ["null", "string"],
1715
- },
1716
- "col3": {
1717
- "type": ["null", "string"],
1718
- },
1719
- "_ab_source_file_last_modified": {"type": "string"},
1720
- "_ab_source_file_url": {"type": "string"},
1721
- },
1722
- },
1723
- "name": "stream1",
1724
- "source_defined_cursor": True,
1725
- "supported_sync_modes": ["full_refresh", "incremental"],
1726
- }
1727
- ]
1728
- }
1729
- )
1730
- .set_expected_records(
1731
- [
1732
- {
1733
- "data": {
1734
- "col1": "val11a",
1735
- "col2": "val12a",
1736
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1737
- "_ab_source_file_url": "a.csv",
1738
- },
1739
- "stream": "stream1",
1740
- },
1741
- {
1742
- "data": {
1743
- "col1": "val21a",
1744
- "col2": "val22a",
1745
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1746
- "_ab_source_file_url": "a.csv",
1747
- },
1748
- "stream": "stream1",
1749
- },
1750
- {
1751
- "data": {
1752
- "col1": "val11b",
1753
- "col2": "val12b",
1754
- "col3": "val13b",
1755
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1756
- "_ab_source_file_url": "b.csv",
1757
- },
1758
- "stream": "stream1",
1759
- },
1760
- {
1761
- "data": {
1762
- "col1": "val21b",
1763
- "col2": "val22b",
1764
- "col3": "val23b",
1765
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1766
- "_ab_source_file_url": "b.csv",
1767
- },
1768
- "stream": "stream1",
1769
- },
1770
- {
1771
- "data": {
1772
- "col1": "val11c",
1773
- "col2": "val12c",
1774
- "col3": "val13c",
1775
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1776
- "_ab_source_file_url": "c.csv",
1777
- },
1778
- "stream": "stream1",
1779
- },
1780
- {
1781
- "data": {
1782
- "col1": "val21c",
1783
- "col2": "val22c",
1784
- "col3": "val23c",
1785
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1786
- "_ab_source_file_url": "c.csv",
1787
- },
1788
- "stream": "stream1",
1789
- },
1790
- {
1791
- "data": {
1792
- "col1": "val11d",
1793
- "col2": "val12d",
1794
- "col3": "val13d",
1795
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1796
- "_ab_source_file_url": "d.csv",
1797
- },
1798
- "stream": "stream1",
1799
- },
1800
- {
1801
- "data": {
1802
- "col1": "val21d",
1803
- "col2": "val22d",
1804
- "col3": "val23d",
1805
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1806
- "_ab_source_file_url": "d.csv",
1807
- },
1808
- "stream": "stream1",
1809
- },
1810
- {
1811
- "history": {
1812
- "b.csv": "2023-06-05T03:54:07.000000Z",
1813
- "c.csv": "2023-06-05T03:54:07.000000Z",
1814
- "d.csv": "2023-06-05T03:54:07.000000Z",
1815
- },
1816
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_d.csv",
1817
- },
1818
- ]
1819
- )
1820
- .set_incremental_scenario_config(
1821
- IncrementalScenarioConfig(
1822
- input_state=[],
1823
- )
1824
- )
1825
- ).build()
1826
-
1827
- multi_csv_same_timestamp_more_files_than_history_size_scenario_concurrent_cursor_is_older = (
1828
- TestScenarioBuilder()
1829
- .set_name("multi_csv_same_timestamp_more_files_than_history_size_scenario_concurrent_cursor_is_older")
1830
- .set_config(
1831
- {
1832
- "streams": [
1833
- {
1834
- "name": "stream1",
1835
- "format": {"filetype": "csv"},
1836
- "globs": ["*.csv"],
1837
- "validation_policy": "Emit Record",
1838
- "days_to_sync_if_history_is_full": 3,
1839
- }
1840
- ]
1841
- }
1842
- )
1843
- .set_source_builder(
1844
- FileBasedSourceBuilder()
1845
- .set_files(
1846
- {
1847
- "b.csv": {
1848
- "contents": [
1849
- ("col1", "col2", "col3"),
1850
- ("val11b", "val12b", "val13b"),
1851
- ("val21b", "val22b", "val23b"),
1852
- ],
1853
- "last_modified": "2023-06-05T03:54:07.000000Z",
1854
- },
1855
- "a.csv": {
1856
- "contents": [
1857
- ("col1", "col2"),
1858
- ("val11a", "val12a"),
1859
- ("val21a", "val22a"),
1860
- ],
1861
- "last_modified": "2023-06-05T03:54:07.000000Z",
1862
- },
1863
- "c.csv": {
1864
- "contents": [
1865
- ("col1", "col2", "col3"),
1866
- ("val11c", "val12c", "val13c"),
1867
- ("val21c", "val22c", "val23c"),
1868
- ],
1869
- "last_modified": "2023-06-05T03:54:07.000000Z",
1870
- },
1871
- "d.csv": {
1872
- "contents": [
1873
- ("col1", "col2", "col3"),
1874
- ("val11d", "val12d", "val13d"),
1875
- ("val21d", "val22d", "val23d"),
1876
- ],
1877
- "last_modified": "2023-06-05T03:54:07.000000Z",
1878
- },
1879
- }
1880
- )
1881
- .set_file_type("csv")
1882
- .set_cursor_cls(LowHistoryLimitConcurrentCursor)
1883
- )
1884
- .set_expected_catalog(
1885
- {
1886
- "streams": [
1887
- {
1888
- "default_cursor_field": ["_ab_source_file_last_modified"],
1889
- "json_schema": {
1890
- "type": "object",
1891
- "properties": {
1892
- "col1": {
1893
- "type": ["null", "string"],
1894
- },
1895
- "col2": {
1896
- "type": ["null", "string"],
1897
- },
1898
- "col3": {
1899
- "type": ["null", "string"],
1900
- },
1901
- "_ab_source_file_last_modified": {"type": "string"},
1902
- "_ab_source_file_url": {"type": "string"},
1903
- },
1904
- },
1905
- "name": "stream1",
1906
- "source_defined_cursor": True,
1907
- "supported_sync_modes": ["full_refresh", "incremental"],
1908
- }
1909
- ]
1910
- }
1911
- )
1912
- .set_expected_records(
1913
- [
1914
- {
1915
- "data": {
1916
- "col1": "val11a",
1917
- "col2": "val12a",
1918
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1919
- "_ab_source_file_url": "a.csv",
1920
- },
1921
- "stream": "stream1",
1922
- },
1923
- {
1924
- "data": {
1925
- "col1": "val21a",
1926
- "col2": "val22a",
1927
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1928
- "_ab_source_file_url": "a.csv",
1929
- },
1930
- "stream": "stream1",
1931
- },
1932
- {
1933
- "data": {
1934
- "col1": "val11b",
1935
- "col2": "val12b",
1936
- "col3": "val13b",
1937
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1938
- "_ab_source_file_url": "b.csv",
1939
- },
1940
- "stream": "stream1",
1941
- },
1942
- {
1943
- "data": {
1944
- "col1": "val21b",
1945
- "col2": "val22b",
1946
- "col3": "val23b",
1947
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1948
- "_ab_source_file_url": "b.csv",
1949
- },
1950
- "stream": "stream1",
1951
- },
1952
- {
1953
- "data": {
1954
- "col1": "val11c",
1955
- "col2": "val12c",
1956
- "col3": "val13c",
1957
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1958
- "_ab_source_file_url": "c.csv",
1959
- },
1960
- "stream": "stream1",
1961
- },
1962
- {
1963
- "data": {
1964
- "col1": "val21c",
1965
- "col2": "val22c",
1966
- "col3": "val23c",
1967
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1968
- "_ab_source_file_url": "c.csv",
1969
- },
1970
- "stream": "stream1",
1971
- },
1972
- {
1973
- "data": {
1974
- "col1": "val11d",
1975
- "col2": "val12d",
1976
- "col3": "val13d",
1977
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1978
- "_ab_source_file_url": "d.csv",
1979
- },
1980
- "stream": "stream1",
1981
- },
1982
- {
1983
- "data": {
1984
- "col1": "val21d",
1985
- "col2": "val22d",
1986
- "col3": "val23d",
1987
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1988
- "_ab_source_file_url": "d.csv",
1989
- },
1990
- "stream": "stream1",
1991
- },
1992
- {
1993
- "history": {
1994
- "b.csv": "2023-06-05T03:54:07.000000Z",
1995
- "c.csv": "2023-06-05T03:54:07.000000Z",
1996
- "d.csv": "2023-06-05T03:54:07.000000Z",
1997
- },
1998
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_d.csv",
1999
- },
2000
- ]
2001
- )
2002
- .set_incremental_scenario_config(
2003
- IncrementalScenarioConfig(
2004
- input_state=[],
2005
- )
2006
- )
2007
- ).build()
2008
-
2009
- multi_csv_sync_recent_files_if_history_is_incomplete_scenario_concurrent_cursor_is_older = (
2010
- TestScenarioBuilder()
2011
- .set_name("multi_csv_sync_recent_files_if_history_is_incomplete_scenario_concurrent_cursor_is_older")
2012
- .set_config(
2013
- {
2014
- "streams": [
2015
- {
2016
- "name": "stream1",
2017
- "format": {"filetype": "csv"},
2018
- "globs": ["*.csv"],
2019
- "validation_policy": "Emit Record",
2020
- "days_to_sync_if_history_is_full": 3,
2021
- }
2022
- ]
2023
- }
2024
- )
2025
- .set_source_builder(
2026
- FileBasedSourceBuilder()
2027
- .set_files(
2028
- {
2029
- "a.csv": {
2030
- "contents": [
2031
- ("col1", "col2"),
2032
- ("val11a", "val12a"),
2033
- ("val21a", "val22a"),
2034
- ],
2035
- "last_modified": "2023-06-05T03:54:07.000000Z",
2036
- },
2037
- "b.csv": {
2038
- "contents": [
2039
- ("col1", "col2", "col3"),
2040
- ("val11b", "val12b", "val13b"),
2041
- ("val21b", "val22b", "val23b"),
2042
- ],
2043
- "last_modified": "2023-06-05T03:54:07.000000Z",
2044
- },
2045
- "c.csv": {
2046
- "contents": [
2047
- ("col1", "col2", "col3"),
2048
- ("val11c", "val12c", "val13c"),
2049
- ("val21c", "val22c", "val23c"),
2050
- ],
2051
- "last_modified": "2023-06-05T03:54:07.000000Z",
2052
- },
2053
- "d.csv": {
2054
- "contents": [
2055
- ("col1", "col2", "col3"),
2056
- ("val11d", "val12d", "val13d"),
2057
- ("val21d", "val22d", "val23d"),
2058
- ],
2059
- "last_modified": "2023-06-05T03:54:07.000000Z",
2060
- },
2061
- }
2062
- )
2063
- .set_cursor_cls(LowHistoryLimitConcurrentCursor)
2064
- .set_file_type("csv")
2065
- )
2066
- .set_expected_catalog(
2067
- {
2068
- "streams": [
2069
- {
2070
- "default_cursor_field": ["_ab_source_file_last_modified"],
2071
- "json_schema": {
2072
- "type": "object",
2073
- "properties": {
2074
- "col1": {
2075
- "type": ["null", "string"],
2076
- },
2077
- "col2": {
2078
- "type": ["null", "string"],
2079
- },
2080
- "col3": {
2081
- "type": ["null", "string"],
2082
- },
2083
- "_ab_source_file_last_modified": {"type": "string"},
2084
- "_ab_source_file_url": {"type": "string"},
2085
- },
2086
- },
2087
- "name": "stream1",
2088
- "source_defined_cursor": True,
2089
- "supported_sync_modes": ["full_refresh", "incremental"],
2090
- }
2091
- ]
2092
- }
2093
- )
2094
- .set_expected_records(
2095
- [
2096
- {
2097
- "history": {
2098
- "b.csv": "2023-06-05T03:54:07.000000Z",
2099
- "c.csv": "2023-06-05T03:54:07.000000Z",
2100
- "d.csv": "2023-06-05T03:54:07.000000Z",
2101
- },
2102
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_d.csv",
2103
- }
2104
- ]
2105
- )
2106
- .set_incremental_scenario_config(
2107
- IncrementalScenarioConfig(
2108
- input_state=StateBuilder()
2109
- .with_stream_state(
2110
- "stream1",
2111
- {
2112
- "history": {
2113
- "b.csv": "2023-06-05T03:54:07.000000Z",
2114
- "c.csv": "2023-06-05T03:54:07.000000Z",
2115
- "d.csv": "2023-06-05T03:54:07.000000Z",
2116
- },
2117
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
2118
- },
2119
- )
2120
- .build(),
2121
- )
2122
- )
2123
- ).build()
2124
-
2125
- multi_csv_sync_recent_files_if_history_is_incomplete_scenario_concurrent_cursor_is_newer = (
2126
- TestScenarioBuilder()
2127
- .set_name("multi_csv_sync_recent_files_if_history_is_incomplete_scenario_concurrent_cursor_is_newer")
2128
- .set_config(
2129
- {
2130
- "streams": [
2131
- {
2132
- "name": "stream1",
2133
- "format": {"filetype": "csv"},
2134
- "globs": ["*.csv"],
2135
- "validation_policy": "Emit Record",
2136
- "days_to_sync_if_history_is_full": 3,
2137
- }
2138
- ]
2139
- }
2140
- )
2141
- .set_source_builder(
2142
- FileBasedSourceBuilder()
2143
- .set_files(
2144
- {
2145
- "a.csv": {
2146
- "contents": [
2147
- ("col1", "col2"),
2148
- ("val11a", "val12a"),
2149
- ("val21a", "val22a"),
2150
- ],
2151
- "last_modified": "2023-06-05T03:54:07.000000Z",
2152
- },
2153
- "b.csv": {
2154
- "contents": [
2155
- ("col1", "col2", "col3"),
2156
- ("val11b", "val12b", "val13b"),
2157
- ("val21b", "val22b", "val23b"),
2158
- ],
2159
- "last_modified": "2023-06-05T03:54:07.000000Z",
2160
- },
2161
- "c.csv": {
2162
- "contents": [
2163
- ("col1", "col2", "col3"),
2164
- ("val11c", "val12c", "val13c"),
2165
- ("val21c", "val22c", "val23c"),
2166
- ],
2167
- "last_modified": "2023-06-05T03:54:07.000000Z",
2168
- },
2169
- "d.csv": {
2170
- "contents": [
2171
- ("col1", "col2", "col3"),
2172
- ("val11d", "val12d", "val13d"),
2173
- ("val21d", "val22d", "val23d"),
2174
- ],
2175
- "last_modified": "2023-06-05T03:54:07.000000Z",
2176
- },
2177
- }
2178
- )
2179
- .set_cursor_cls(LowHistoryLimitConcurrentCursor)
2180
- .set_file_type("csv")
2181
- )
2182
- .set_expected_catalog(
2183
- {
2184
- "streams": [
2185
- {
2186
- "default_cursor_field": ["_ab_source_file_last_modified"],
2187
- "json_schema": {
2188
- "type": "object",
2189
- "properties": {
2190
- "col1": {
2191
- "type": ["null", "string"],
2192
- },
2193
- "col2": {
2194
- "type": ["null", "string"],
2195
- },
2196
- "col3": {
2197
- "type": ["null", "string"],
2198
- },
2199
- "_ab_source_file_last_modified": {"type": "string"},
2200
- "_ab_source_file_url": {"type": "string"},
2201
- },
2202
- },
2203
- "name": "stream1",
2204
- "source_defined_cursor": True,
2205
- "supported_sync_modes": ["full_refresh", "incremental"],
2206
- }
2207
- ]
2208
- }
2209
- )
2210
- .set_expected_records(
2211
- [
2212
- {
2213
- "history": {
2214
- "b.csv": "2023-06-05T03:54:07.000000Z",
2215
- "c.csv": "2023-06-05T03:54:07.000000Z",
2216
- "d.csv": "2023-06-05T03:54:07.000000Z",
2217
- },
2218
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_d.csv",
2219
- }
2220
- ]
2221
- )
2222
- .set_incremental_scenario_config(
2223
- IncrementalScenarioConfig(
2224
- input_state=StateBuilder()
2225
- .with_stream_state(
2226
- "stream1",
2227
- {
2228
- "history": {
2229
- "b.csv": "2023-06-05T03:54:07.000000Z",
2230
- "c.csv": "2023-06-05T03:54:07.000000Z",
2231
- "d.csv": "2023-06-05T03:54:07.000000Z",
2232
- },
2233
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_d.csv",
2234
- },
2235
- )
2236
- .build(),
2237
- )
2238
- )
2239
- ).build()
2240
-
2241
-
2242
- multi_csv_sync_files_within_time_window_if_history_is_incomplete__different_timestamps_scenario_concurrent_cursor_is_older = (
2243
- TestScenarioBuilder()
2244
- .set_name("multi_csv_sync_files_within_time_window_if_history_is_incomplete__different_timestamps_scenario_concurrent_cursor_is_older")
2245
- .set_config(
2246
- {
2247
- "streams": [
2248
- {
2249
- "name": "stream1",
2250
- "format": {"filetype": "csv"},
2251
- "globs": ["*.csv"],
2252
- "validation_policy": "Emit Record",
2253
- "days_to_sync_if_history_is_full": 3,
2254
- }
2255
- ]
2256
- }
2257
- )
2258
- .set_source_builder(
2259
- FileBasedSourceBuilder()
2260
- .set_files(
2261
- {
2262
- "a.csv": {
2263
- "contents": [
2264
- ("col1", "col2"),
2265
- ("val11a", "val12a"),
2266
- ("val21a", "val22a"),
2267
- ],
2268
- "last_modified": "2023-06-05T03:54:07.000000Z",
2269
- },
2270
- "b.csv": {
2271
- "contents": [
2272
- ("col1", "col2", "col3"),
2273
- ("val11b", "val12b", "val13b"),
2274
- ("val21b", "val22b", "val23b"),
2275
- ],
2276
- "last_modified": "2023-06-06T03:54:07.000000Z",
2277
- },
2278
- "c.csv": {
2279
- "contents": [
2280
- ("col1", "col2", "col3"),
2281
- ("val11c", "val12c", "val13c"),
2282
- ("val21c", "val22c", "val23c"),
2283
- ],
2284
- "last_modified": "2023-06-07T03:54:07.000000Z",
2285
- },
2286
- "d.csv": {
2287
- "contents": [
2288
- ("col1", "col2", "col3"),
2289
- ("val11d", "val12d", "val13d"),
2290
- ("val21d", "val22d", "val23d"),
2291
- ],
2292
- "last_modified": "2023-06-08T03:54:07.000000Z",
2293
- },
2294
- }
2295
- )
2296
- .set_file_type("csv")
2297
- .set_cursor_cls(LowHistoryLimitConcurrentCursor)
2298
- )
2299
- .set_expected_catalog(
2300
- {
2301
- "streams": [
2302
- {
2303
- "default_cursor_field": ["_ab_source_file_last_modified"],
2304
- "json_schema": {
2305
- "type": "object",
2306
- "properties": {
2307
- "col1": {
2308
- "type": ["null", "string"],
2309
- },
2310
- "col2": {
2311
- "type": ["null", "string"],
2312
- },
2313
- "col3": {
2314
- "type": ["null", "string"],
2315
- },
2316
- "_ab_source_file_last_modified": {"type": "string"},
2317
- "_ab_source_file_url": {"type": "string"},
2318
- },
2319
- },
2320
- "name": "stream1",
2321
- "source_defined_cursor": True,
2322
- "supported_sync_modes": ["full_refresh", "incremental"],
2323
- }
2324
- ]
2325
- }
2326
- )
2327
- .set_expected_records(
2328
- [
2329
- # {"data": {"col1": "val11a", "col2": "val12a"}, "stream": "stream1"}, # This file is skipped because it is older than the time_window
2330
- # {"data": {"col1": "val21a", "col2": "val22a"}, "stream": "stream1"},
2331
- {
2332
- "data": {
2333
- "col1": "val11b",
2334
- "col2": "val12b",
2335
- "col3": "val13b",
2336
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
2337
- "_ab_source_file_url": "b.csv",
2338
- },
2339
- "stream": "stream1",
2340
- },
2341
- {
2342
- "data": {
2343
- "col1": "val21b",
2344
- "col2": "val22b",
2345
- "col3": "val23b",
2346
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
2347
- "_ab_source_file_url": "b.csv",
2348
- },
2349
- "stream": "stream1",
2350
- },
2351
- {
2352
- "history": {
2353
- "c.csv": "2023-06-07T03:54:07.000000Z",
2354
- "d.csv": "2023-06-08T03:54:07.000000Z",
2355
- "e.csv": "2023-06-08T03:54:07.000000Z",
2356
- },
2357
- "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_e.csv",
2358
- },
2359
- ]
2360
- )
2361
- .set_incremental_scenario_config(
2362
- IncrementalScenarioConfig(
2363
- input_state=StateBuilder()
2364
- .with_stream_state(
2365
- "stream1",
2366
- {
2367
- "history": {
2368
- "c.csv": "2023-06-07T03:54:07.000000Z",
2369
- "d.csv": "2023-06-08T03:54:07.000000Z",
2370
- "e.csv": "2023-06-08T03:54:07.000000Z",
2371
- },
2372
- "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_e.csv",
2373
- },
2374
- )
2375
- .build(),
2376
- )
2377
- )
2378
- ).build()
2379
-
2380
- multi_csv_sync_files_within_time_window_if_history_is_incomplete__different_timestamps_scenario_concurrent_cursor_is_newer = (
2381
- TestScenarioBuilder()
2382
- .set_name("multi_csv_sync_files_within_time_window_if_history_is_incomplete__different_timestamps_scenario_concurrent_cursor_is_newer")
2383
- .set_config(
2384
- {
2385
- "streams": [
2386
- {
2387
- "name": "stream1",
2388
- "format": {"filetype": "csv"},
2389
- "globs": ["*.csv"],
2390
- "validation_policy": "Emit Record",
2391
- "days_to_sync_if_history_is_full": 3,
2392
- }
2393
- ]
2394
- }
2395
- )
2396
- .set_source_builder(
2397
- FileBasedSourceBuilder()
2398
- .set_files(
2399
- {
2400
- "a.csv": {
2401
- "contents": [
2402
- ("col1", "col2"),
2403
- ("val11a", "val12a"),
2404
- ("val21a", "val22a"),
2405
- ],
2406
- "last_modified": "2023-06-05T03:54:07.000000Z",
2407
- },
2408
- "b.csv": {
2409
- "contents": [
2410
- ("col1", "col2", "col3"),
2411
- ("val11b", "val12b", "val13b"),
2412
- ("val21b", "val22b", "val23b"),
2413
- ],
2414
- "last_modified": "2023-06-06T03:54:07.000000Z",
2415
- },
2416
- "c.csv": {
2417
- "contents": [
2418
- ("col1", "col2", "col3"),
2419
- ("val11c", "val12c", "val13c"),
2420
- ("val21c", "val22c", "val23c"),
2421
- ],
2422
- "last_modified": "2023-06-07T03:54:07.000000Z",
2423
- },
2424
- "d.csv": {
2425
- "contents": [
2426
- ("col1", "col2", "col3"),
2427
- ("val11d", "val12d", "val13d"),
2428
- ("val21d", "val22d", "val23d"),
2429
- ],
2430
- "last_modified": "2023-06-08T03:54:07.000000Z",
2431
- },
2432
- }
2433
- )
2434
- .set_file_type("csv")
2435
- .set_cursor_cls(LowHistoryLimitConcurrentCursor)
2436
- )
2437
- .set_expected_catalog(
2438
- {
2439
- "streams": [
2440
- {
2441
- "default_cursor_field": ["_ab_source_file_last_modified"],
2442
- "json_schema": {
2443
- "type": "object",
2444
- "properties": {
2445
- "col1": {
2446
- "type": ["null", "string"],
2447
- },
2448
- "col2": {
2449
- "type": ["null", "string"],
2450
- },
2451
- "col3": {
2452
- "type": ["null", "string"],
2453
- },
2454
- "_ab_source_file_last_modified": {"type": "string"},
2455
- "_ab_source_file_url": {"type": "string"},
2456
- },
2457
- },
2458
- "name": "stream1",
2459
- "source_defined_cursor": True,
2460
- "supported_sync_modes": ["full_refresh", "incremental"],
2461
- }
2462
- ]
2463
- }
2464
- )
2465
- .set_expected_records(
2466
- [
2467
- # {"data": {"col1": "val11a", "col2": "val12a"}, "stream": "stream1"}, # This file is skipped because it is older than the time_window
2468
- # {"data": {"col1": "val21a", "col2": "val22a"}, "stream": "stream1"},
2469
- {
2470
- "data": {
2471
- "col1": "val11b",
2472
- "col2": "val12b",
2473
- "col3": "val13b",
2474
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
2475
- "_ab_source_file_url": "b.csv",
2476
- },
2477
- "stream": "stream1",
2478
- },
2479
- {
2480
- "data": {
2481
- "col1": "val21b",
2482
- "col2": "val22b",
2483
- "col3": "val23b",
2484
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
2485
- "_ab_source_file_url": "b.csv",
2486
- },
2487
- "stream": "stream1",
2488
- },
2489
- {
2490
- "history": {
2491
- "c.csv": "2023-06-07T03:54:07.000000Z",
2492
- "d.csv": "2023-06-08T03:54:07.000000Z",
2493
- "e.csv": "2023-06-08T03:54:07.000000Z",
2494
- },
2495
- "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_e.csv",
2496
- },
2497
- ]
2498
- )
2499
- .set_incremental_scenario_config(
2500
- IncrementalScenarioConfig(
2501
- input_state=StateBuilder()
2502
- .with_stream_state(
2503
- "stream1",
2504
- {
2505
- "history": {
2506
- "c.csv": "2023-06-07T03:54:07.000000Z",
2507
- "d.csv": "2023-06-08T03:54:07.000000Z",
2508
- "e.csv": "2023-06-08T03:54:07.000000Z",
2509
- },
2510
- "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_e.csv",
2511
- },
2512
- )
2513
- .build(),
2514
- )
2515
- )
2516
- ).build()
2517
-
2518
- multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_different_timestamps_scenario_concurrent_cursor_is_newer = (
2519
- TestScenarioBuilder()
2520
- .set_name(
2521
- "multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_different_timestamps_scenario_concurrent_cursor_is_newer"
2522
- )
2523
- .set_config(
2524
- {
2525
- "streams": [
2526
- {
2527
- "name": "stream1",
2528
- "format": {"filetype": "csv"},
2529
- "globs": ["*.csv"],
2530
- "validation_policy": "Emit Record",
2531
- "days_to_sync_if_history_is_full": 3,
2532
- }
2533
- ]
2534
- }
2535
- )
2536
- .set_source_builder(
2537
- FileBasedSourceBuilder()
2538
- .set_files(
2539
- {
2540
- "a.csv": {
2541
- "contents": [
2542
- ("col1", "col2"),
2543
- ("val11a", "val12a"),
2544
- ("val21a", "val22a"),
2545
- ],
2546
- "last_modified": "2023-06-05T03:54:07.000000Z",
2547
- },
2548
- "b.csv": {
2549
- "contents": [
2550
- ("col1", "col2", "col3"),
2551
- ("val11b", "val12b", "val13b"),
2552
- ("val21b", "val22b", "val23b"),
2553
- ],
2554
- "last_modified": "2023-06-06T03:54:07.000000Z",
2555
- },
2556
- "c.csv": {
2557
- "contents": [
2558
- ("col1", "col2", "col3"),
2559
- ("val11c", "val12c", "val13c"),
2560
- ("val21c", "val22c", "val23c"),
2561
- ],
2562
- "last_modified": "2023-06-07T03:54:07.000000Z",
2563
- },
2564
- "d.csv": {
2565
- "contents": [
2566
- ("col1", "col2", "col3"),
2567
- ("val11d", "val12d", "val13d"),
2568
- ("val21d", "val22d", "val23d"),
2569
- ],
2570
- "last_modified": "2023-06-08T03:54:07.000000Z",
2571
- },
2572
- }
2573
- )
2574
- .set_file_type("csv")
2575
- .set_cursor_cls(LowHistoryLimitConcurrentCursor)
2576
- )
2577
- .set_expected_catalog(
2578
- {
2579
- "streams": [
2580
- {
2581
- "default_cursor_field": ["_ab_source_file_last_modified"],
2582
- "json_schema": {
2583
- "type": "object",
2584
- "properties": {
2585
- "col1": {
2586
- "type": ["null", "string"],
2587
- },
2588
- "col2": {
2589
- "type": ["null", "string"],
2590
- },
2591
- "col3": {
2592
- "type": ["null", "string"],
2593
- },
2594
- "_ab_source_file_last_modified": {"type": "string"},
2595
- "_ab_source_file_url": {"type": "string"},
2596
- },
2597
- },
2598
- "name": "stream1",
2599
- "source_defined_cursor": True,
2600
- "supported_sync_modes": ["full_refresh", "incremental"],
2601
- }
2602
- ]
2603
- }
2604
- )
2605
- .set_expected_records(
2606
- [
2607
- {
2608
- "data": {
2609
- "col1": "val11a",
2610
- "col2": "val12a",
2611
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2612
- "_ab_source_file_url": "a.csv",
2613
- },
2614
- "stream": "stream1",
2615
- },
2616
- {
2617
- "data": {
2618
- "col1": "val21a",
2619
- "col2": "val22a",
2620
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2621
- "_ab_source_file_url": "a.csv",
2622
- },
2623
- "stream": "stream1",
2624
- },
2625
- {
2626
- "history": {
2627
- "a.csv": "2023-06-05T03:54:07.000000Z",
2628
- "c.csv": "2023-06-07T03:54:07.000000Z",
2629
- "d.csv": "2023-06-08T03:54:07.000000Z",
2630
- },
2631
- "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_d.csv",
2632
- },
2633
- {
2634
- "data": {
2635
- "col1": "val11b",
2636
- "col2": "val12b",
2637
- "col3": "val13b",
2638
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
2639
- "_ab_source_file_url": "b.csv",
2640
- },
2641
- "stream": "stream1",
2642
- },
2643
- {
2644
- "data": {
2645
- "col1": "val21b",
2646
- "col2": "val22b",
2647
- "col3": "val23b",
2648
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
2649
- "_ab_source_file_url": "b.csv",
2650
- },
2651
- "stream": "stream1",
2652
- },
2653
- {
2654
- "history": {
2655
- "b.csv": "2023-06-06T03:54:07.000000Z",
2656
- "c.csv": "2023-06-07T03:54:07.000000Z",
2657
- "d.csv": "2023-06-08T03:54:07.000000Z",
2658
- },
2659
- "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_d.csv",
2660
- },
2661
- ]
2662
- )
2663
- .set_incremental_scenario_config(
2664
- IncrementalScenarioConfig(
2665
- input_state=StateBuilder()
2666
- .with_stream_state(
2667
- "stream1",
2668
- {
2669
- "history": {
2670
- "old_file.csv": "2023-06-05T00:00:00.000000Z",
2671
- "c.csv": "2023-06-07T03:54:07.000000Z",
2672
- "d.csv": "2023-06-08T03:54:07.000000Z",
2673
- },
2674
- "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_d.csv",
2675
- },
2676
- )
2677
- .build(),
2678
- )
2679
- )
2680
- ).build()
2681
-
2682
- multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_different_timestamps_scenario_concurrent_cursor_is_older = (
2683
- TestScenarioBuilder()
2684
- .set_name(
2685
- "multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_different_timestamps_scenario_concurrent_cursor_is_older"
2686
- )
2687
- .set_config(
2688
- {
2689
- "streams": [
2690
- {
2691
- "name": "stream1",
2692
- "format": {"filetype": "csv"},
2693
- "globs": ["*.csv"],
2694
- "validation_policy": "Emit Record",
2695
- "days_to_sync_if_history_is_full": 3,
2696
- }
2697
- ]
2698
- }
2699
- )
2700
- .set_source_builder(
2701
- FileBasedSourceBuilder()
2702
- .set_files(
2703
- {
2704
- "a.csv": {
2705
- "contents": [
2706
- ("col1", "col2"),
2707
- ("val11a", "val12a"),
2708
- ("val21a", "val22a"),
2709
- ],
2710
- "last_modified": "2023-06-05T03:54:07.000000Z",
2711
- },
2712
- "b.csv": {
2713
- "contents": [
2714
- ("col1", "col2", "col3"),
2715
- ("val11b", "val12b", "val13b"),
2716
- ("val21b", "val22b", "val23b"),
2717
- ],
2718
- "last_modified": "2023-06-06T03:54:07.000000Z",
2719
- },
2720
- "c.csv": {
2721
- "contents": [
2722
- ("col1", "col2", "col3"),
2723
- ("val11c", "val12c", "val13c"),
2724
- ("val21c", "val22c", "val23c"),
2725
- ],
2726
- "last_modified": "2023-06-07T03:54:07.000000Z",
2727
- },
2728
- "d.csv": {
2729
- "contents": [
2730
- ("col1", "col2", "col3"),
2731
- ("val11d", "val12d", "val13d"),
2732
- ("val21d", "val22d", "val23d"),
2733
- ],
2734
- "last_modified": "2023-06-08T03:54:07.000000Z",
2735
- },
2736
- }
2737
- )
2738
- .set_file_type("csv")
2739
- .set_cursor_cls(LowHistoryLimitConcurrentCursor)
2740
- )
2741
- .set_expected_catalog(
2742
- {
2743
- "streams": [
2744
- {
2745
- "default_cursor_field": ["_ab_source_file_last_modified"],
2746
- "json_schema": {
2747
- "type": "object",
2748
- "properties": {
2749
- "col1": {
2750
- "type": ["null", "string"],
2751
- },
2752
- "col2": {
2753
- "type": ["null", "string"],
2754
- },
2755
- "col3": {
2756
- "type": ["null", "string"],
2757
- },
2758
- "_ab_source_file_last_modified": {"type": "string"},
2759
- "_ab_source_file_url": {"type": "string"},
2760
- },
2761
- },
2762
- "name": "stream1",
2763
- "source_defined_cursor": True,
2764
- "supported_sync_modes": ["full_refresh", "incremental"],
2765
- }
2766
- ]
2767
- }
2768
- )
2769
- .set_expected_records(
2770
- [
2771
- {
2772
- "data": {
2773
- "col1": "val11a",
2774
- "col2": "val12a",
2775
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2776
- "_ab_source_file_url": "a.csv",
2777
- },
2778
- "stream": "stream1",
2779
- },
2780
- {
2781
- "data": {
2782
- "col1": "val21a",
2783
- "col2": "val22a",
2784
- "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2785
- "_ab_source_file_url": "a.csv",
2786
- },
2787
- "stream": "stream1",
2788
- },
2789
- {
2790
- "history": {
2791
- "a.csv": "2023-06-05T03:54:07.000000Z",
2792
- "c.csv": "2023-06-07T03:54:07.000000Z",
2793
- "d.csv": "2023-06-08T03:54:07.000000Z",
2794
- },
2795
- "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_d.csv",
2796
- },
2797
- {
2798
- "data": {
2799
- "col1": "val11b",
2800
- "col2": "val12b",
2801
- "col3": "val13b",
2802
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
2803
- "_ab_source_file_url": "b.csv",
2804
- },
2805
- "stream": "stream1",
2806
- },
2807
- {
2808
- "data": {
2809
- "col1": "val21b",
2810
- "col2": "val22b",
2811
- "col3": "val23b",
2812
- "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
2813
- "_ab_source_file_url": "b.csv",
2814
- },
2815
- "stream": "stream1",
2816
- },
2817
- {
2818
- "history": {
2819
- "b.csv": "2023-06-06T03:54:07.000000Z",
2820
- "c.csv": "2023-06-07T03:54:07.000000Z",
2821
- "d.csv": "2023-06-08T03:54:07.000000Z",
2822
- },
2823
- "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_d.csv",
2824
- },
2825
- ]
2826
- )
2827
- .set_incremental_scenario_config(
2828
- IncrementalScenarioConfig(
2829
- input_state=StateBuilder()
2830
- .with_stream_state(
2831
- "stream1",
2832
- {
2833
- "history": {
2834
- "old_file.csv": "2023-06-05T00:00:00.000000Z",
2835
- "c.csv": "2023-06-07T03:54:07.000000Z",
2836
- "d.csv": "2023-06-08T03:54:07.000000Z",
2837
- },
2838
- "_ab_source_file_last_modified": "2023-06-04T00:00:00.000000Z_very_old_file.csv",
2839
- },
2840
- )
2841
- .build(),
2842
- )
2843
- )
2844
- ).build()