airbyte-cdk 0.72.1__py3-none-any.whl → 6.13.1.dev4107__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (517) hide show
  1. airbyte_cdk/__init__.py +355 -6
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +29 -10
  7. airbyte_cdk/connector.py +24 -24
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
  10. airbyte_cdk/connector_builder/main.py +45 -13
  11. airbyte_cdk/connector_builder/message_grouper.py +189 -50
  12. airbyte_cdk/connector_builder/models.py +3 -2
  13. airbyte_cdk/destinations/__init__.py +4 -3
  14. airbyte_cdk/destinations/destination.py +54 -20
  15. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  16. airbyte_cdk/destinations/vector_db_based/config.py +40 -17
  17. airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
  18. airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
  19. airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
  20. airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
  21. airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
  22. airbyte_cdk/entrypoint.py +153 -44
  23. airbyte_cdk/exception_handler.py +21 -3
  24. airbyte_cdk/logger.py +30 -44
  25. airbyte_cdk/models/__init__.py +13 -2
  26. airbyte_cdk/models/airbyte_protocol.py +86 -1
  27. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  28. airbyte_cdk/models/file_transfer_record_message.py +13 -0
  29. airbyte_cdk/models/well_known_types.py +1 -1
  30. airbyte_cdk/sources/__init__.py +5 -1
  31. airbyte_cdk/sources/abstract_source.py +125 -79
  32. airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
  33. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
  34. airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
  35. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
  36. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  37. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
  38. airbyte_cdk/sources/config.py +3 -2
  39. airbyte_cdk/sources/connector_state_manager.py +49 -83
  40. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  41. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
  42. airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
  43. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  44. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  45. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  46. airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
  47. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
  48. airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
  49. airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
  50. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
  51. airbyte_cdk/sources/declarative/auth/token.py +28 -10
  52. airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
  53. airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
  54. airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
  55. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  56. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  57. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +421 -0
  58. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
  59. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
  60. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1185 -85
  61. airbyte_cdk/sources/declarative/declarative_source.py +5 -2
  62. airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
  63. airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
  64. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
  65. airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
  66. airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
  67. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  68. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  69. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  70. airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
  71. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
  72. airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
  73. airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
  74. airbyte_cdk/sources/declarative/extractors/record_filter.py +65 -8
  75. airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
  76. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
  77. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  78. airbyte_cdk/sources/declarative/incremental/__init__.py +25 -3
  79. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
  80. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  81. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
  82. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +159 -74
  83. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  84. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  85. airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
  86. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
  87. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
  88. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
  89. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
  90. airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
  91. airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
  92. airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
  93. airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
  94. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  95. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  96. airbyte_cdk/sources/declarative/models/__init__.py +1 -1
  97. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1319 -603
  98. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
  99. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
  100. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
  101. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1695 -225
  102. airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
  103. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  104. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  105. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
  106. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  107. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
  108. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
  109. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
  110. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
  111. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
  112. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
  113. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
  114. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
  115. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
  116. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
  117. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
  118. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
  119. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  120. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
  121. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
  122. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
  123. airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
  124. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
  125. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
  126. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
  127. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
  128. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
  129. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
  130. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
  131. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
  132. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
  133. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
  134. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
  135. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
  136. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  137. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
  138. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
  139. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
  140. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
  141. airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
  142. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  143. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  144. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  145. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  146. airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
  147. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
  148. airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
  149. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +228 -72
  150. airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
  151. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
  152. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
  153. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
  154. airbyte_cdk/sources/declarative/spec/spec.py +12 -5
  155. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
  156. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
  157. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
  158. airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
  159. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  160. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  161. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  162. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  163. airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
  164. airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
  165. airbyte_cdk/sources/declarative/types.py +19 -110
  166. airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
  167. airbyte_cdk/sources/embedded/base_integration.py +16 -5
  168. airbyte_cdk/sources/embedded/catalog.py +16 -4
  169. airbyte_cdk/sources/embedded/runner.py +19 -3
  170. airbyte_cdk/sources/embedded/tools.py +5 -2
  171. airbyte_cdk/sources/file_based/README.md +152 -0
  172. airbyte_cdk/sources/file_based/__init__.py +24 -0
  173. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
  174. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
  175. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
  176. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +58 -10
  177. airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
  178. airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
  179. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  180. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
  181. airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
  182. airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
  183. airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
  184. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
  185. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
  186. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
  187. airbyte_cdk/sources/file_based/exceptions.py +52 -15
  188. airbyte_cdk/sources/file_based/file_based_source.py +163 -33
  189. airbyte_cdk/sources/file_based/file_based_stream_reader.py +83 -5
  190. airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
  191. airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
  192. airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
  193. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  194. airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
  195. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
  196. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
  197. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
  198. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +147 -41
  199. airbyte_cdk/sources/file_based/remote_file.py +1 -1
  200. airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
  201. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
  202. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
  203. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
  204. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
  205. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
  206. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
  207. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
  208. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
  209. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
  210. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
  211. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
  212. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +175 -45
  213. airbyte_cdk/sources/http_logger.py +8 -3
  214. airbyte_cdk/sources/message/__init__.py +7 -1
  215. airbyte_cdk/sources/message/repository.py +18 -4
  216. airbyte_cdk/sources/source.py +42 -38
  217. airbyte_cdk/sources/streams/__init__.py +2 -2
  218. airbyte_cdk/sources/streams/availability_strategy.py +54 -3
  219. airbyte_cdk/sources/streams/call_rate.py +64 -21
  220. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  221. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  222. airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
  223. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  224. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  225. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  226. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  227. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
  228. airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
  229. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
  230. airbyte_cdk/sources/streams/concurrent/cursor.py +298 -42
  231. airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
  232. airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
  233. airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
  234. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
  235. airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
  236. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
  237. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  238. airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
  239. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
  240. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
  241. airbyte_cdk/sources/streams/core.py +412 -87
  242. airbyte_cdk/sources/streams/http/__init__.py +2 -1
  243. airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
  244. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  245. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  246. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  247. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  248. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  249. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  250. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  251. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  252. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  253. airbyte_cdk/sources/streams/http/exceptions.py +27 -7
  254. airbyte_cdk/sources/streams/http/http.py +369 -246
  255. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  256. airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
  257. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
  258. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
  259. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
  260. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
  261. airbyte_cdk/sources/types.py +154 -0
  262. airbyte_cdk/sources/utils/record_helper.py +36 -21
  263. airbyte_cdk/sources/utils/schema_helpers.py +13 -6
  264. airbyte_cdk/sources/utils/slice_logger.py +4 -1
  265. airbyte_cdk/sources/utils/transform.py +54 -20
  266. airbyte_cdk/sql/_util/hashing.py +34 -0
  267. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  268. airbyte_cdk/sql/constants.py +32 -0
  269. airbyte_cdk/sql/exceptions.py +235 -0
  270. airbyte_cdk/sql/secrets.py +123 -0
  271. airbyte_cdk/sql/shared/__init__.py +15 -0
  272. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  273. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  274. airbyte_cdk/sql/types.py +160 -0
  275. airbyte_cdk/test/catalog_builder.py +70 -18
  276. airbyte_cdk/test/entrypoint_wrapper.py +117 -42
  277. airbyte_cdk/test/mock_http/__init__.py +1 -1
  278. airbyte_cdk/test/mock_http/matcher.py +6 -0
  279. airbyte_cdk/test/mock_http/mocker.py +57 -10
  280. airbyte_cdk/test/mock_http/request.py +19 -3
  281. airbyte_cdk/test/mock_http/response.py +3 -1
  282. airbyte_cdk/test/mock_http/response_builder.py +32 -16
  283. airbyte_cdk/test/state_builder.py +18 -10
  284. airbyte_cdk/test/utils/__init__.py +1 -0
  285. airbyte_cdk/test/utils/data.py +24 -0
  286. airbyte_cdk/test/utils/http_mocking.py +16 -0
  287. airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
  288. airbyte_cdk/test/utils/reading.py +26 -0
  289. airbyte_cdk/utils/__init__.py +2 -1
  290. airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
  291. airbyte_cdk/utils/analytics_message.py +10 -2
  292. airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
  293. airbyte_cdk/utils/event_timing.py +10 -10
  294. airbyte_cdk/utils/mapping_helpers.py +3 -1
  295. airbyte_cdk/utils/message_utils.py +20 -11
  296. airbyte_cdk/utils/print_buffer.py +75 -0
  297. airbyte_cdk/utils/schema_inferrer.py +198 -28
  298. airbyte_cdk/utils/slice_hasher.py +30 -0
  299. airbyte_cdk/utils/spec_schema_transformations.py +6 -3
  300. airbyte_cdk/utils/stream_status_utils.py +8 -1
  301. airbyte_cdk/utils/traced_exception.py +61 -21
  302. airbyte_cdk-6.13.1.dev4107.dist-info/METADATA +109 -0
  303. airbyte_cdk-6.13.1.dev4107.dist-info/RECORD +349 -0
  304. {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.13.1.dev4107.dist-info}/WHEEL +1 -2
  305. airbyte_cdk-6.13.1.dev4107.dist-info/entry_points.txt +3 -0
  306. airbyte_cdk/sources/declarative/create_partial.py +0 -92
  307. airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
  308. airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
  309. airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
  310. airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
  311. airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
  312. airbyte_cdk/sources/deprecated/base_source.py +0 -94
  313. airbyte_cdk/sources/deprecated/client.py +0 -99
  314. airbyte_cdk/sources/singer/__init__.py +0 -8
  315. airbyte_cdk/sources/singer/singer_helpers.py +0 -304
  316. airbyte_cdk/sources/singer/source.py +0 -186
  317. airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
  318. airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
  319. airbyte_cdk/sources/streams/http/auth/core.py +0 -29
  320. airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
  321. airbyte_cdk/sources/streams/http/auth/token.py +0 -47
  322. airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
  323. airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
  324. airbyte_cdk/sources/utils/schema_models.py +0 -84
  325. airbyte_cdk-0.72.1.dist-info/METADATA +0 -243
  326. airbyte_cdk-0.72.1.dist-info/RECORD +0 -466
  327. airbyte_cdk-0.72.1.dist-info/top_level.txt +0 -3
  328. source_declarative_manifest/main.py +0 -29
  329. unit_tests/connector_builder/__init__.py +0 -3
  330. unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
  331. unit_tests/connector_builder/test_message_grouper.py +0 -713
  332. unit_tests/connector_builder/utils.py +0 -27
  333. unit_tests/destinations/test_destination.py +0 -243
  334. unit_tests/singer/test_singer_helpers.py +0 -56
  335. unit_tests/singer/test_singer_source.py +0 -112
  336. unit_tests/sources/__init__.py +0 -0
  337. unit_tests/sources/concurrent_source/__init__.py +0 -3
  338. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
  339. unit_tests/sources/declarative/__init__.py +0 -3
  340. unit_tests/sources/declarative/auth/__init__.py +0 -3
  341. unit_tests/sources/declarative/auth/test_oauth.py +0 -331
  342. unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
  343. unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
  344. unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
  345. unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
  346. unit_tests/sources/declarative/checks/__init__.py +0 -3
  347. unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
  348. unit_tests/sources/declarative/decoders/__init__.py +0 -0
  349. unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
  350. unit_tests/sources/declarative/external_component.py +0 -13
  351. unit_tests/sources/declarative/extractors/__init__.py +0 -3
  352. unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
  353. unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
  354. unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
  355. unit_tests/sources/declarative/incremental/__init__.py +0 -0
  356. unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
  357. unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
  358. unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
  359. unit_tests/sources/declarative/interpolation/__init__.py +0 -3
  360. unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
  361. unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
  362. unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
  363. unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
  364. unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
  365. unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
  366. unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
  367. unit_tests/sources/declarative/parsers/__init__.py +0 -3
  368. unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
  369. unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
  370. unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1847
  371. unit_tests/sources/declarative/parsers/testing_components.py +0 -36
  372. unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
  373. unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
  374. unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
  375. unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
  376. unit_tests/sources/declarative/requesters/__init__.py +0 -3
  377. unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
  378. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
  379. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
  380. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
  381. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
  382. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
  383. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
  384. unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
  385. unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
  386. unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
  387. unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
  388. unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
  389. unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
  390. unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
  391. unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
  392. unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
  393. unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
  394. unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
  395. unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
  396. unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
  397. unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
  398. unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
  399. unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
  400. unit_tests/sources/declarative/retrievers/__init__.py +0 -3
  401. unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
  402. unit_tests/sources/declarative/schema/__init__.py +0 -6
  403. unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
  404. unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
  405. unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
  406. unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
  407. unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
  408. unit_tests/sources/declarative/states/__init__.py +0 -3
  409. unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
  410. unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
  411. unit_tests/sources/declarative/test_create_partial.py +0 -83
  412. unit_tests/sources/declarative/test_declarative_stream.py +0 -103
  413. unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
  414. unit_tests/sources/declarative/test_types.py +0 -39
  415. unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
  416. unit_tests/sources/file_based/__init__.py +0 -0
  417. unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
  418. unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
  419. unit_tests/sources/file_based/config/__init__.py +0 -0
  420. unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
  421. unit_tests/sources/file_based/config/test_csv_format.py +0 -34
  422. unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
  423. unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
  424. unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
  425. unit_tests/sources/file_based/file_types/__init__.py +0 -0
  426. unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
  427. unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
  428. unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
  429. unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
  430. unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
  431. unit_tests/sources/file_based/helpers.py +0 -70
  432. unit_tests/sources/file_based/in_memory_files_source.py +0 -211
  433. unit_tests/sources/file_based/scenarios/__init__.py +0 -0
  434. unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
  435. unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
  436. unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
  437. unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
  438. unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
  439. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
  440. unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
  441. unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
  442. unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
  443. unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
  444. unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
  445. unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
  446. unit_tests/sources/file_based/stream/__init__.py +0 -0
  447. unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
  448. unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
  449. unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
  450. unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
  451. unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
  452. unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
  453. unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
  454. unit_tests/sources/file_based/test_scenarios.py +0 -253
  455. unit_tests/sources/file_based/test_schema_helpers.py +0 -346
  456. unit_tests/sources/fixtures/__init__.py +0 -3
  457. unit_tests/sources/fixtures/source_test_fixture.py +0 -153
  458. unit_tests/sources/message/__init__.py +0 -0
  459. unit_tests/sources/message/test_repository.py +0 -153
  460. unit_tests/sources/streams/__init__.py +0 -0
  461. unit_tests/sources/streams/concurrent/__init__.py +0 -3
  462. unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
  463. unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
  464. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
  465. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
  466. unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
  467. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
  468. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
  469. unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
  470. unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
  471. unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
  472. unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
  473. unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
  474. unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
  475. unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
  476. unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
  477. unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
  478. unit_tests/sources/streams/http/__init__.py +0 -0
  479. unit_tests/sources/streams/http/auth/__init__.py +0 -0
  480. unit_tests/sources/streams/http/auth/test_auth.py +0 -173
  481. unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
  482. unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
  483. unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
  484. unit_tests/sources/streams/http/test_http.py +0 -635
  485. unit_tests/sources/streams/test_availability_strategy.py +0 -70
  486. unit_tests/sources/streams/test_call_rate.py +0 -300
  487. unit_tests/sources/streams/test_stream_read.py +0 -405
  488. unit_tests/sources/streams/test_streams_core.py +0 -184
  489. unit_tests/sources/test_abstract_source.py +0 -1442
  490. unit_tests/sources/test_concurrent_source.py +0 -112
  491. unit_tests/sources/test_config.py +0 -92
  492. unit_tests/sources/test_connector_state_manager.py +0 -482
  493. unit_tests/sources/test_http_logger.py +0 -252
  494. unit_tests/sources/test_integration_source.py +0 -86
  495. unit_tests/sources/test_source.py +0 -684
  496. unit_tests/sources/test_source_read.py +0 -460
  497. unit_tests/test/__init__.py +0 -0
  498. unit_tests/test/mock_http/__init__.py +0 -0
  499. unit_tests/test/mock_http/test_matcher.py +0 -53
  500. unit_tests/test/mock_http/test_mocker.py +0 -214
  501. unit_tests/test/mock_http/test_request.py +0 -117
  502. unit_tests/test/mock_http/test_response_builder.py +0 -177
  503. unit_tests/test/test_entrypoint_wrapper.py +0 -240
  504. unit_tests/utils/__init__.py +0 -0
  505. unit_tests/utils/test_datetime_format_inferrer.py +0 -60
  506. unit_tests/utils/test_mapping_helpers.py +0 -54
  507. unit_tests/utils/test_message_utils.py +0 -91
  508. unit_tests/utils/test_rate_limiting.py +0 -26
  509. unit_tests/utils/test_schema_inferrer.py +0 -202
  510. unit_tests/utils/test_secret_utils.py +0 -135
  511. unit_tests/utils/test_stream_status_utils.py +0 -61
  512. unit_tests/utils/test_traced_exception.py +0 -107
  513. /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
  514. {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
  515. {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
  516. {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
  517. {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.13.1.dev4107.dist-info}/LICENSE.txt +0 -0
@@ -0,0 +1,51 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Optional
5
+
6
+ from airbyte_cdk.sources.streams.checkpoint import Cursor
7
+ from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
8
+
9
+
10
+ @dataclass
11
+ class ResumableFullRefreshCursor(Cursor):
12
+ """
13
+ Cursor that allows for the checkpointing of sync progress according to a synthetic cursor based on the pagination state
14
+ of the stream. Resumable full refresh syncs are only intended to retain state in between sync attempts of the same job
15
+ with the platform responsible for removing said state.
16
+ """
17
+
18
+ def __init__(self) -> None:
19
+ self._cursor: StreamState = {}
20
+
21
+ def get_stream_state(self) -> StreamState:
22
+ return self._cursor
23
+
24
+ def set_initial_state(self, stream_state: StreamState) -> None:
25
+ self._cursor = stream_state
26
+
27
+ def observe(self, stream_slice: StreamSlice, record: Record) -> None:
28
+ """
29
+ Resumable full refresh manages state using a page number so it does not need to update state by observing incoming records.
30
+ """
31
+ pass
32
+
33
+ def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None:
34
+ self._cursor = stream_slice.cursor_slice
35
+
36
+ def should_be_synced(self, record: Record) -> bool:
37
+ """
38
+ Unlike date-based cursors which filter out records outside slice boundaries, resumable full refresh records exist within pages
39
+ that don't have filterable bounds. We should always return them.
40
+ """
41
+ return True
42
+
43
+ def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
44
+ """
45
+ RFR record don't have ordering to be compared between one another.
46
+ """
47
+ return False
48
+
49
+ def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]:
50
+ # A top-level RFR cursor only manages the state of a single partition
51
+ return self._cursor
@@ -0,0 +1,110 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Mapping, MutableMapping, Optional
5
+
6
+ from airbyte_cdk.models import FailureType
7
+ from airbyte_cdk.sources.streams.checkpoint import Cursor
8
+ from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
9
+ PerPartitionKeySerializer,
10
+ )
11
+ from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
12
+ from airbyte_cdk.utils import AirbyteTracedException
13
+
14
+ FULL_REFRESH_COMPLETE_STATE: Mapping[str, Any] = {"__ab_full_refresh_sync_complete": True}
15
+
16
+
17
+ @dataclass
18
+ class SubstreamResumableFullRefreshCursor(Cursor):
19
+ def __init__(self) -> None:
20
+ self._per_partition_state: MutableMapping[str, StreamState] = {}
21
+ self._partition_serializer = PerPartitionKeySerializer()
22
+
23
+ def get_stream_state(self) -> StreamState:
24
+ return {"states": list(self._per_partition_state.values())}
25
+
26
+ def set_initial_state(self, stream_state: StreamState) -> None:
27
+ """
28
+ Set the initial state for the cursors.
29
+
30
+ This method initializes the state for each partition cursor using the provided stream state.
31
+ If a partition state is provided in the stream state, it will update the corresponding partition cursor with this state.
32
+
33
+ To simplify processing and state management, we do not maintain the checkpointed state of the parent partitions.
34
+ Instead, we are tracking whether a parent has already successfully synced on a prior attempt and skipping over it
35
+ allowing the sync to continue making progress. And this works for RFR because the platform will dispose of this
36
+ state on the next sync job.
37
+
38
+ Args:
39
+ stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
40
+ {
41
+ "states": [
42
+ {
43
+ "partition": {
44
+ "partition_key": "value_0"
45
+ },
46
+ "cursor": {
47
+ "__ab_full_refresh_sync_complete": True
48
+ }
49
+ },
50
+ {
51
+ "partition": {
52
+ "partition_key": "value_1"
53
+ },
54
+ "cursor": {},
55
+ },
56
+ ]
57
+ }
58
+ """
59
+ if not stream_state:
60
+ return
61
+
62
+ if "states" not in stream_state:
63
+ raise AirbyteTracedException(
64
+ internal_message=f"Could not sync parse the following state: {stream_state}",
65
+ message="The state for is format invalid. Validate that the migration steps included a reset and that it was performed "
66
+ "properly. Otherwise, please contact Airbyte support.",
67
+ failure_type=FailureType.config_error,
68
+ )
69
+
70
+ for state in stream_state["states"]:
71
+ self._per_partition_state[self._to_partition_key(state["partition"])] = state
72
+
73
+ def observe(self, stream_slice: StreamSlice, record: Record) -> None:
74
+ """
75
+ Substream resumable full refresh manages state by closing the slice after syncing a parent so observe is not used.
76
+ """
77
+ pass
78
+
79
+ def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None:
80
+ self._per_partition_state[self._to_partition_key(stream_slice.partition)] = {
81
+ "partition": stream_slice.partition,
82
+ "cursor": FULL_REFRESH_COMPLETE_STATE,
83
+ }
84
+
85
+ def should_be_synced(self, record: Record) -> bool:
86
+ """
87
+ Unlike date-based cursors which filter out records outside slice boundaries, resumable full refresh records exist within pages
88
+ that don't have filterable bounds. We should always return them.
89
+ """
90
+ return True
91
+
92
+ def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
93
+ """
94
+ RFR record don't have ordering to be compared between one another.
95
+ """
96
+ return False
97
+
98
+ def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]:
99
+ if not stream_slice:
100
+ raise ValueError("A partition needs to be provided in order to extract a state")
101
+
102
+ return self._per_partition_state.get(
103
+ self._to_partition_key(stream_slice.partition), {}
104
+ ).get("cursor")
105
+
106
+ def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
107
+ return self._partition_serializer.to_partition_key(partition)
108
+
109
+ def _to_dict(self, partition_key: str) -> Mapping[str, Any]:
110
+ return self._partition_serializer.to_partition(partition_key)
@@ -0,0 +1,7 @@
1
+ ## Breaking Changes & Limitations
2
+
3
+ - [bigger scope than Concurrent CDK] checkpointing state was acting on the number of records per slice. This has been changed to consider the number of records per syncs
4
+ - `Source.read_state` and `Source._emit_legacy_state_format` are now classmethods to allow for developers to have access to the state before instantiating the source
5
+ - send_per_stream_state is always True for Concurrent CDK
6
+ - Using stream_state during read_records: The concern is that today, stream_instance.get_updated_state is called on every record and read_records on every slice. The implication is that the argument stream_state passed to read_records will have the value after the last stream_instance.get_updated_state of the previous slice. For Concurrent CDK, this is not possible as slices are processed in an unordered way.
7
+ - Cursor fields can only be data-time formatted as epoch. Eventually, we want to move to ISO 8601 as it provides more flexibility but for the first iteration on Stripe, it was easier to use the same format that was already used
@@ -5,14 +5,19 @@
5
5
  from abc import ABC, abstractmethod
6
6
  from typing import Any, Iterable, Mapping, Optional
7
7
 
8
+ from typing_extensions import deprecated
9
+
8
10
  from airbyte_cdk.models import AirbyteStream
11
+ from airbyte_cdk.sources.source import ExperimentalClassWarning
9
12
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import StreamAvailability
10
13
  from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
11
14
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
12
- from deprecated.classic import deprecated
13
15
 
14
16
 
15
- @deprecated("This class is experimental. Use at your own risk.")
17
+ @deprecated(
18
+ "This class is experimental. Use at your own risk.",
19
+ category=ExperimentalClassWarning,
20
+ )
16
21
  class AbstractStream(ABC):
17
22
  """
18
23
  AbstractStream is an experimental interface for streams developed as part of the Concurrent CDK.
@@ -8,37 +8,52 @@ import logging
8
8
  from functools import lru_cache
9
9
  from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
10
10
 
11
- from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, AirbyteStream, ConfiguredAirbyteStream, Level, SyncMode, Type
11
+ from typing_extensions import deprecated
12
+
13
+ from airbyte_cdk.models import (
14
+ AirbyteLogMessage,
15
+ AirbyteMessage,
16
+ AirbyteStream,
17
+ ConfiguredAirbyteStream,
18
+ Level,
19
+ SyncMode,
20
+ Type,
21
+ )
12
22
  from airbyte_cdk.sources import AbstractSource, Source
13
23
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
14
24
  from airbyte_cdk.sources.message import MessageRepository
25
+ from airbyte_cdk.sources.source import ExperimentalClassWarning
15
26
  from airbyte_cdk.sources.streams import Stream
16
27
  from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy
17
28
  from airbyte_cdk.sources.streams.concurrent.abstract_stream_facade import AbstractStreamFacade
18
29
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
19
30
  AbstractAvailabilityStrategy,
20
- StreamAvailability,
21
- StreamAvailable,
22
- StreamUnavailable,
31
+ AlwaysAvailableAvailabilityStrategy,
23
32
  )
24
33
  from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, FinalStateCursor
25
34
  from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
26
35
  from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
27
- from airbyte_cdk.sources.streams.concurrent.helpers import get_cursor_field_from_stream, get_primary_key_from_stream
36
+ from airbyte_cdk.sources.streams.concurrent.helpers import (
37
+ get_cursor_field_from_stream,
38
+ get_primary_key_from_stream,
39
+ )
28
40
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
29
41
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
30
- from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
31
42
  from airbyte_cdk.sources.streams.core import StreamData
43
+ from airbyte_cdk.sources.types import Record
32
44
  from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
33
45
  from airbyte_cdk.sources.utils.slice_logger import SliceLogger
34
- from deprecated.classic import deprecated
46
+ from airbyte_cdk.utils.slice_hasher import SliceHasher
35
47
 
36
48
  """
37
49
  This module contains adapters to help enabling concurrency on Stream objects without needing to migrate to AbstractStream
38
50
  """
39
51
 
40
52
 
41
- @deprecated("This class is experimental. Use at your own risk.")
53
+ @deprecated(
54
+ "This class is experimental. Use at your own risk.",
55
+ category=ExperimentalClassWarning,
56
+ )
42
57
  class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
43
58
  """
44
59
  The StreamFacade is a Stream that wraps an AbstractStream and exposes it as a Stream.
@@ -77,15 +92,16 @@ class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
77
92
  partition_generator=StreamPartitionGenerator(
78
93
  stream,
79
94
  message_repository,
80
- SyncMode.full_refresh if isinstance(cursor, FinalStateCursor) else SyncMode.incremental,
95
+ SyncMode.full_refresh
96
+ if isinstance(cursor, FinalStateCursor)
97
+ else SyncMode.incremental,
81
98
  [cursor_field] if cursor_field is not None else None,
82
99
  state,
83
- cursor,
84
100
  ),
85
101
  name=stream.name,
86
102
  namespace=stream.namespace,
87
103
  json_schema=stream.get_json_schema(),
88
- availability_strategy=StreamAvailabilityStrategy(stream, source),
104
+ availability_strategy=AlwaysAvailableAvailabilityStrategy(),
89
105
  primary_key=pk,
90
106
  cursor_field=cursor_field,
91
107
  logger=logger,
@@ -99,14 +115,23 @@ class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
99
115
 
100
116
  @property
101
117
  def state(self) -> MutableMapping[str, Any]:
102
- raise NotImplementedError("This should not be called as part of the Concurrent CDK code. Please report the problem to Airbyte")
118
+ raise NotImplementedError(
119
+ "This should not be called as part of the Concurrent CDK code. Please report the problem to Airbyte"
120
+ )
103
121
 
104
122
  @state.setter
105
123
  def state(self, value: Mapping[str, Any]) -> None:
106
124
  if "state" in dir(self._legacy_stream):
107
125
  self._legacy_stream.state = value # type: ignore # validating `state` is attribute of stream using `if` above
108
126
 
109
- def __init__(self, stream: DefaultStream, legacy_stream: Stream, cursor: Cursor, slice_logger: SliceLogger, logger: logging.Logger):
127
+ def __init__(
128
+ self,
129
+ stream: DefaultStream,
130
+ legacy_stream: Stream,
131
+ cursor: Cursor,
132
+ slice_logger: SliceLogger,
133
+ logger: logging.Logger,
134
+ ):
110
135
  """
111
136
  :param stream: The underlying AbstractStream
112
137
  """
@@ -143,7 +168,10 @@ class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
143
168
  # This shouldn't happen if the ConcurrentCursor was used
144
169
  state = "unknown; no state attribute was available on the cursor"
145
170
  yield AirbyteMessage(
146
- type=Type.LOG, log=AirbyteLogMessage(level=Level.ERROR, message=f"Cursor State at time of exception: {state}")
171
+ type=Type.LOG,
172
+ log=AirbyteLogMessage(
173
+ level=Level.ERROR, message=f"Cursor State at time of exception: {state}"
174
+ ),
147
175
  )
148
176
  raise exc
149
177
 
@@ -170,6 +198,10 @@ class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
170
198
  else:
171
199
  return self._abstract_stream.cursor_field
172
200
 
201
+ @property
202
+ def cursor(self) -> Optional[Cursor]: # type: ignore[override] # StreamFaced expects to use only airbyte_cdk.sources.streams.concurrent.cursor.Cursor
203
+ return self._cursor
204
+
173
205
  @lru_cache(maxsize=None)
174
206
  def get_json_schema(self) -> Mapping[str, Any]:
175
207
  return self._abstract_stream.get_json_schema()
@@ -178,7 +210,9 @@ class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
178
210
  def supports_incremental(self) -> bool:
179
211
  return self._legacy_stream.supports_incremental
180
212
 
181
- def check_availability(self, logger: logging.Logger, source: Optional["Source"] = None) -> Tuple[bool, Optional[str]]:
213
+ def check_availability(
214
+ self, logger: logging.Logger, source: Optional["Source"] = None
215
+ ) -> Tuple[bool, Optional[str]]:
182
216
  """
183
217
  Verifies the stream is available. Delegates to the underlying AbstractStream and ignores the parameters
184
218
  :param logger: (ignored)
@@ -198,6 +232,15 @@ class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
198
232
  return self._abstract_stream
199
233
 
200
234
 
235
+ class SliceEncoder(json.JSONEncoder):
236
+ def default(self, obj: Any) -> Any:
237
+ if hasattr(obj, "__json_serializable__"):
238
+ return obj.__json_serializable__()
239
+
240
+ # Let the base class default method raise the TypeError
241
+ return super().default(obj)
242
+
243
+
201
244
  class StreamPartition(Partition):
202
245
  """
203
246
  This class acts as an adapter between the new Partition interface and the Stream's stream_slice interface
@@ -216,7 +259,6 @@ class StreamPartition(Partition):
216
259
  sync_mode: SyncMode,
217
260
  cursor_field: Optional[List[str]],
218
261
  state: Optional[MutableMapping[str, Any]],
219
- cursor: Cursor,
220
262
  ):
221
263
  """
222
264
  :param stream: The stream to delegate to
@@ -229,8 +271,7 @@ class StreamPartition(Partition):
229
271
  self._sync_mode = sync_mode
230
272
  self._cursor_field = cursor_field
231
273
  self._state = state
232
- self._cursor = cursor
233
- self._is_closed = False
274
+ self._hash = SliceHasher.hash(self._stream.name, self._slice)
234
275
 
235
276
  def read(self) -> Iterable[Record]:
236
277
  """
@@ -253,10 +294,14 @@ class StreamPartition(Partition):
253
294
  ):
254
295
  if isinstance(record_data, Mapping):
255
296
  data_to_return = dict(record_data)
256
- self._stream.transformer.transform(data_to_return, self._stream.get_json_schema())
257
- record = Record(data_to_return, self._stream.name)
258
- self._cursor.observe(record)
259
- yield Record(data_to_return, self._stream.name)
297
+ self._stream.transformer.transform(
298
+ data_to_return, self._stream.get_json_schema()
299
+ )
300
+ yield Record(
301
+ data=data_to_return,
302
+ stream_name=self.stream_name(),
303
+ associated_slice=self._slice, # type: ignore [arg-type]
304
+ )
260
305
  else:
261
306
  self._message_repository.emit_message(record_data)
262
307
  except Exception as e:
@@ -270,23 +315,11 @@ class StreamPartition(Partition):
270
315
  return self._slice
271
316
 
272
317
  def __hash__(self) -> int:
273
- if self._slice:
274
- # Convert the slice to a string so that it can be hashed
275
- s = json.dumps(self._slice, sort_keys=True)
276
- return hash((self._stream.name, s))
277
- else:
278
- return hash(self._stream.name)
318
+ return self._hash
279
319
 
280
320
  def stream_name(self) -> str:
281
321
  return self._stream.name
282
322
 
283
- def close(self) -> None:
284
- self._cursor.close_partition(self)
285
- self._is_closed = True
286
-
287
- def is_closed(self) -> bool:
288
- return self._is_closed
289
-
290
323
  def __repr__(self) -> str:
291
324
  return f"StreamPartition({self._stream.name}, {self._slice})"
292
325
 
@@ -306,7 +339,6 @@ class StreamPartitionGenerator(PartitionGenerator):
306
339
  sync_mode: SyncMode,
307
340
  cursor_field: Optional[List[str]],
308
341
  state: Optional[MutableMapping[str, Any]],
309
- cursor: Cursor,
310
342
  ):
311
343
  """
312
344
  :param stream: The stream to delegate to
@@ -317,21 +349,32 @@ class StreamPartitionGenerator(PartitionGenerator):
317
349
  self._sync_mode = sync_mode
318
350
  self._cursor_field = cursor_field
319
351
  self._state = state
320
- self._cursor = cursor
321
352
 
322
353
  def generate(self) -> Iterable[Partition]:
323
- for s in self._stream.stream_slices(sync_mode=self._sync_mode, cursor_field=self._cursor_field, stream_state=self._state):
354
+ for s in self._stream.stream_slices(
355
+ sync_mode=self._sync_mode, cursor_field=self._cursor_field, stream_state=self._state
356
+ ):
324
357
  yield StreamPartition(
325
- self._stream, copy.deepcopy(s), self.message_repository, self._sync_mode, self._cursor_field, self._state, self._cursor
358
+ self._stream,
359
+ copy.deepcopy(s),
360
+ self.message_repository,
361
+ self._sync_mode,
362
+ self._cursor_field,
363
+ self._state,
326
364
  )
327
365
 
328
366
 
329
- @deprecated("This class is experimental. Use at your own risk.")
367
+ @deprecated(
368
+ "Availability strategy has been soft deprecated. Do not use. Class is subject to removal",
369
+ category=ExperimentalClassWarning,
370
+ )
330
371
  class AvailabilityStrategyFacade(AvailabilityStrategy):
331
372
  def __init__(self, abstract_availability_strategy: AbstractAvailabilityStrategy):
332
373
  self._abstract_availability_strategy = abstract_availability_strategy
333
374
 
334
- def check_availability(self, stream: Stream, logger: logging.Logger, source: Optional[Source]) -> Tuple[bool, Optional[str]]:
375
+ def check_availability(
376
+ self, stream: Stream, logger: logging.Logger, source: Optional["Source"] = None
377
+ ) -> Tuple[bool, Optional[str]]:
335
378
  """
336
379
  Checks stream availability.
337
380
 
@@ -344,37 +387,3 @@ class AvailabilityStrategyFacade(AvailabilityStrategy):
344
387
  """
345
388
  stream_availability = self._abstract_availability_strategy.check_availability(logger)
346
389
  return stream_availability.is_available(), stream_availability.message()
347
-
348
-
349
- class StreamAvailabilityStrategy(AbstractAvailabilityStrategy):
350
- """
351
- This class acts as an adapter between the existing AvailabilityStrategy and the new AbstractAvailabilityStrategy.
352
- StreamAvailabilityStrategy is instantiated with a Stream and a Source to allow the existing AvailabilityStrategy to be used with the new AbstractAvailabilityStrategy interface.
353
-
354
- A more convenient implementation would not depend on the docs URL instead of the Source itself, and would support running on an AbstractStream instead of only on a Stream.
355
-
356
- This class can be used to help enable concurrency on existing connectors without having to rewrite everything as AbstractStream and AbstractAvailabilityStrategy.
357
- In the long-run, it would be preferable to update the connectors, but we don't have the tooling or need to justify the effort at this time.
358
- """
359
-
360
- def __init__(self, stream: Stream, source: Source):
361
- """
362
- :param stream: The stream to delegate to
363
- :param source: The source to delegate to
364
- """
365
- self._stream = stream
366
- self._source = source
367
-
368
- def check_availability(self, logger: logging.Logger) -> StreamAvailability:
369
- try:
370
- available, message = self._stream.check_availability(logger, self._source)
371
- if available:
372
- return StreamAvailable()
373
- else:
374
- return StreamUnavailable(str(message))
375
- except Exception as e:
376
- display_message = self._stream.get_error_display_message(e)
377
- if display_message:
378
- raise ExceptionWithDisplayMessage(display_message)
379
- else:
380
- raise e
@@ -6,7 +6,9 @@ import logging
6
6
  from abc import ABC, abstractmethod
7
7
  from typing import Optional
8
8
 
9
- from deprecated.classic import deprecated
9
+ from typing_extensions import deprecated
10
+
11
+ from airbyte_cdk.sources.source import ExperimentalClassWarning
10
12
 
11
13
 
12
14
  class StreamAvailability(ABC):
@@ -46,7 +48,10 @@ class StreamUnavailable(StreamAvailability):
46
48
  STREAM_AVAILABLE = StreamAvailable()
47
49
 
48
50
 
49
- @deprecated("This class is experimental. Use at your own risk.")
51
+ @deprecated(
52
+ "This class is experimental. Use at your own risk.",
53
+ category=ExperimentalClassWarning,
54
+ )
50
55
  class AbstractAvailabilityStrategy(ABC):
51
56
  """
52
57
  AbstractAvailabilityStrategy is an experimental interface developed as part of the Concurrent CDK.
@@ -64,3 +69,26 @@ class AbstractAvailabilityStrategy(ABC):
64
69
  :param logger: logger object to use
65
70
  :return: A StreamAvailability object describing the stream's availability
66
71
  """
72
+
73
+
74
+ @deprecated(
75
+ "This class is experimental. Use at your own risk.",
76
+ category=ExperimentalClassWarning,
77
+ )
78
+ class AlwaysAvailableAvailabilityStrategy(AbstractAvailabilityStrategy):
79
+ """
80
+ An availability strategy that always indicates a stream is available.
81
+
82
+ This strategy is used to avoid breaking changes and serves as a soft
83
+ deprecation of the availability strategy, allowing a smoother transition
84
+ without disrupting existing functionality.
85
+ """
86
+
87
+ def check_availability(self, logger: logging.Logger) -> StreamAvailability:
88
+ """
89
+ Checks stream availability.
90
+
91
+ :param logger: logger object to use
92
+ :return: A StreamAvailability object describing the stream's availability
93
+ """
94
+ return StreamAvailable()