airbyte-cdk 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (368) hide show
  1. airbyte_cdk/__init__.py +358 -0
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +104 -0
  7. airbyte_cdk/connector.py +123 -0
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/__init__.py +3 -0
  10. airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
  11. airbyte_cdk/connector_builder/main.py +107 -0
  12. airbyte_cdk/connector_builder/models.py +73 -0
  13. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  14. airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
  15. airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
  16. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  17. airbyte_cdk/connector_builder/test_reader/types.py +83 -0
  18. airbyte_cdk/destinations/__init__.py +8 -0
  19. airbyte_cdk/destinations/destination.py +154 -0
  20. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  21. airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
  22. airbyte_cdk/destinations/vector_db_based/config.py +298 -0
  23. airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
  24. airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
  25. airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
  26. airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
  27. airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
  28. airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
  29. airbyte_cdk/entrypoint.py +414 -0
  30. airbyte_cdk/exception_handler.py +56 -0
  31. airbyte_cdk/logger.py +109 -0
  32. airbyte_cdk/models/__init__.py +72 -0
  33. airbyte_cdk/models/airbyte_protocol.py +88 -0
  34. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  35. airbyte_cdk/models/well_known_types.py +5 -0
  36. airbyte_cdk/py.typed +0 -0
  37. airbyte_cdk/sources/__init__.py +26 -0
  38. airbyte_cdk/sources/abstract_source.py +326 -0
  39. airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
  40. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
  41. airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
  42. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
  43. airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
  44. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  45. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
  46. airbyte_cdk/sources/config.py +27 -0
  47. airbyte_cdk/sources/connector_state_manager.py +161 -0
  48. airbyte_cdk/sources/declarative/__init__.py +3 -0
  49. airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
  50. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  51. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
  52. airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
  53. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  54. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  55. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  56. airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
  57. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
  58. airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
  59. airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
  60. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
  61. airbyte_cdk/sources/declarative/auth/token.py +267 -0
  62. airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
  63. airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
  64. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
  65. airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
  66. airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
  67. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  68. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  69. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
  70. airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
  71. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
  72. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
  73. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
  74. airbyte_cdk/sources/declarative/declarative_source.py +36 -0
  75. airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
  76. airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
  77. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
  78. airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
  79. airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
  80. airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
  81. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  82. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  83. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  84. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
  85. airbyte_cdk/sources/declarative/exceptions.py +9 -0
  86. airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
  87. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
  88. airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
  89. airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
  90. airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
  91. airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
  92. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
  93. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  94. airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
  95. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
  96. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
  97. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  98. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
  99. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
  100. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  101. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  102. airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
  103. airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
  104. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
  105. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
  106. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
  107. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
  108. airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
  109. airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
  110. airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
  111. airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
  112. airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
  113. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  114. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  115. airbyte_cdk/sources/declarative/models/__init__.py +2 -0
  116. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
  117. airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
  118. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
  119. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
  120. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
  121. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
  122. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
  123. airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
  124. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  125. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  126. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
  127. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  128. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
  129. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
  130. airbyte_cdk/sources/declarative/requesters/README.md +56 -0
  131. airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
  132. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
  133. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
  134. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
  135. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
  136. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
  137. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
  138. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
  139. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
  140. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
  141. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
  142. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  143. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
  144. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
  145. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
  146. airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
  147. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
  148. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
  149. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
  150. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
  151. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
  152. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
  153. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
  154. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
  155. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
  156. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
  157. airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
  158. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
  159. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
  160. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  161. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
  162. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
  163. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
  164. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
  165. airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
  166. airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
  167. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  168. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  169. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  170. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  171. airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
  172. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
  173. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
  174. airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
  175. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
  176. airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
  177. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
  178. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
  179. airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
  180. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
  181. airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
  182. airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
  183. airbyte_cdk/sources/declarative/spec/spec.py +48 -0
  184. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
  185. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
  186. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
  187. airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
  188. airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
  189. airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
  190. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  191. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  192. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  193. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  194. airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
  195. airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
  196. airbyte_cdk/sources/declarative/types.py +25 -0
  197. airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
  198. airbyte_cdk/sources/file_based/README.md +152 -0
  199. airbyte_cdk/sources/file_based/__init__.py +24 -0
  200. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
  201. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
  202. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
  203. airbyte_cdk/sources/file_based/config/__init__.py +0 -0
  204. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
  205. airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
  206. airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
  207. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  208. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
  209. airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
  210. airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
  211. airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
  212. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  213. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
  214. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
  215. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
  216. airbyte_cdk/sources/file_based/exceptions.py +159 -0
  217. airbyte_cdk/sources/file_based/file_based_source.py +466 -0
  218. airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
  219. airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
  220. airbyte_cdk/sources/file_based/file_record_data.py +22 -0
  221. airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
  222. airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
  223. airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
  224. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  225. airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
  226. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
  227. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
  228. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
  229. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
  230. airbyte_cdk/sources/file_based/remote_file.py +18 -0
  231. airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
  232. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
  233. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
  234. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
  235. airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
  236. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
  237. airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
  238. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
  239. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
  240. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
  241. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
  242. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
  243. airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
  244. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
  245. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
  246. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
  247. airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
  248. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
  249. airbyte_cdk/sources/file_based/types.py +10 -0
  250. airbyte_cdk/sources/http_config.py +10 -0
  251. airbyte_cdk/sources/http_logger.py +55 -0
  252. airbyte_cdk/sources/message/__init__.py +19 -0
  253. airbyte_cdk/sources/message/repository.py +137 -0
  254. airbyte_cdk/sources/source.py +95 -0
  255. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  256. airbyte_cdk/sources/streams/__init__.py +8 -0
  257. airbyte_cdk/sources/streams/availability_strategy.py +84 -0
  258. airbyte_cdk/sources/streams/call_rate.py +704 -0
  259. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  260. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  261. airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
  262. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  263. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  264. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  265. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  266. airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
  267. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
  268. airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
  269. airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
  270. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
  271. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  272. airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
  273. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  274. airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
  275. airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
  276. airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
  277. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
  278. airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
  279. airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
  280. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
  281. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
  282. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  283. airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
  284. airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
  285. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
  286. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
  287. airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
  288. airbyte_cdk/sources/streams/core.py +703 -0
  289. airbyte_cdk/sources/streams/http/__init__.py +10 -0
  290. airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
  291. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  292. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  293. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  294. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  295. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  296. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  297. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  298. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  299. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  300. airbyte_cdk/sources/streams/http/exceptions.py +61 -0
  301. airbyte_cdk/sources/streams/http/http.py +673 -0
  302. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  303. airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
  304. airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
  305. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
  306. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
  307. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
  308. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
  309. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  310. airbyte_cdk/sources/streams/utils/__init__.py +3 -0
  311. airbyte_cdk/sources/types.py +169 -0
  312. airbyte_cdk/sources/utils/__init__.py +7 -0
  313. airbyte_cdk/sources/utils/casing.py +12 -0
  314. airbyte_cdk/sources/utils/files_directory.py +15 -0
  315. airbyte_cdk/sources/utils/record_helper.py +53 -0
  316. airbyte_cdk/sources/utils/schema_helpers.py +230 -0
  317. airbyte_cdk/sources/utils/slice_logger.py +57 -0
  318. airbyte_cdk/sources/utils/transform.py +277 -0
  319. airbyte_cdk/sources/utils/types.py +7 -0
  320. airbyte_cdk/sql/__init__.py +0 -0
  321. airbyte_cdk/sql/_util/__init__.py +0 -0
  322. airbyte_cdk/sql/_util/hashing.py +34 -0
  323. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  324. airbyte_cdk/sql/constants.py +32 -0
  325. airbyte_cdk/sql/exceptions.py +235 -0
  326. airbyte_cdk/sql/secrets.py +123 -0
  327. airbyte_cdk/sql/shared/__init__.py +15 -0
  328. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  329. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  330. airbyte_cdk/sql/types.py +160 -0
  331. airbyte_cdk/test/__init__.py +7 -0
  332. airbyte_cdk/test/catalog_builder.py +81 -0
  333. airbyte_cdk/test/entrypoint_wrapper.py +250 -0
  334. airbyte_cdk/test/mock_http/__init__.py +6 -0
  335. airbyte_cdk/test/mock_http/matcher.py +41 -0
  336. airbyte_cdk/test/mock_http/mocker.py +185 -0
  337. airbyte_cdk/test/mock_http/request.py +103 -0
  338. airbyte_cdk/test/mock_http/response.py +28 -0
  339. airbyte_cdk/test/mock_http/response_builder.py +237 -0
  340. airbyte_cdk/test/state_builder.py +33 -0
  341. airbyte_cdk/test/utils/__init__.py +1 -0
  342. airbyte_cdk/test/utils/data.py +24 -0
  343. airbyte_cdk/test/utils/http_mocking.py +16 -0
  344. airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
  345. airbyte_cdk/test/utils/reading.py +26 -0
  346. airbyte_cdk/utils/__init__.py +10 -0
  347. airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
  348. airbyte_cdk/utils/analytics_message.py +25 -0
  349. airbyte_cdk/utils/constants.py +5 -0
  350. airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
  351. airbyte_cdk/utils/datetime_helpers.py +499 -0
  352. airbyte_cdk/utils/event_timing.py +85 -0
  353. airbyte_cdk/utils/is_cloud_environment.py +18 -0
  354. airbyte_cdk/utils/mapping_helpers.py +162 -0
  355. airbyte_cdk/utils/message_utils.py +26 -0
  356. airbyte_cdk/utils/oneof_option_config.py +33 -0
  357. airbyte_cdk/utils/print_buffer.py +75 -0
  358. airbyte_cdk/utils/schema_inferrer.py +270 -0
  359. airbyte_cdk/utils/slice_hasher.py +37 -0
  360. airbyte_cdk/utils/spec_schema_transformations.py +26 -0
  361. airbyte_cdk/utils/stream_status_utils.py +43 -0
  362. airbyte_cdk/utils/traced_exception.py +145 -0
  363. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
  364. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
  365. airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
  366. airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
  367. airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
  368. airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,3407 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from __future__ import annotations
6
+
7
+ import datetime
8
+ import importlib
9
+ import inspect
10
+ import re
11
+ from functools import partial
12
+ from typing import (
13
+ Any,
14
+ Callable,
15
+ Dict,
16
+ List,
17
+ Mapping,
18
+ MutableMapping,
19
+ Optional,
20
+ Type,
21
+ Union,
22
+ get_args,
23
+ get_origin,
24
+ get_type_hints,
25
+ )
26
+
27
+ from isodate import parse_duration
28
+ from pydantic.v1 import BaseModel
29
+
30
+ from airbyte_cdk.models import FailureType, Level
31
+ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
32
+ from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator
33
+ from airbyte_cdk.sources.declarative.async_job.job_tracker import JobTracker
34
+ from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
35
+ from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
36
+ from airbyte_cdk.sources.declarative.auth import DeclarativeOauth2Authenticator, JwtAuthenticator
37
+ from airbyte_cdk.sources.declarative.auth.declarative_authenticator import (
38
+ DeclarativeAuthenticator,
39
+ NoAuth,
40
+ )
41
+ from airbyte_cdk.sources.declarative.auth.jwt import JwtAlgorithm
42
+ from airbyte_cdk.sources.declarative.auth.oauth import (
43
+ DeclarativeSingleUseRefreshTokenOauth2Authenticator,
44
+ )
45
+ from airbyte_cdk.sources.declarative.auth.selective_authenticator import SelectiveAuthenticator
46
+ from airbyte_cdk.sources.declarative.auth.token import (
47
+ ApiKeyAuthenticator,
48
+ BasicHttpAuthenticator,
49
+ BearerAuthenticator,
50
+ LegacySessionTokenAuthenticator,
51
+ )
52
+ from airbyte_cdk.sources.declarative.auth.token_provider import (
53
+ InterpolatedStringTokenProvider,
54
+ SessionTokenProvider,
55
+ TokenProvider,
56
+ )
57
+ from airbyte_cdk.sources.declarative.checks import CheckDynamicStream, CheckStream
58
+ from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
59
+ from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
60
+ from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
61
+ from airbyte_cdk.sources.declarative.decoders import (
62
+ Decoder,
63
+ IterableDecoder,
64
+ JsonDecoder,
65
+ PaginationDecoderDecorator,
66
+ XmlDecoder,
67
+ ZipfileDecoder,
68
+ )
69
+ from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
70
+ CompositeRawDecoder,
71
+ CsvParser,
72
+ GzipParser,
73
+ JsonLineParser,
74
+ JsonParser,
75
+ Parser,
76
+ )
77
+ from airbyte_cdk.sources.declarative.extractors import (
78
+ DpathExtractor,
79
+ RecordFilter,
80
+ RecordSelector,
81
+ ResponseToFileExtractor,
82
+ )
83
+ from airbyte_cdk.sources.declarative.extractors.record_filter import (
84
+ ClientSideIncrementalRecordFilterDecorator,
85
+ )
86
+ from airbyte_cdk.sources.declarative.incremental import (
87
+ ChildPartitionResumableFullRefreshCursor,
88
+ ConcurrentCursorFactory,
89
+ ConcurrentPerPartitionCursor,
90
+ CursorFactory,
91
+ DatetimeBasedCursor,
92
+ DeclarativeCursor,
93
+ GlobalSubstreamCursor,
94
+ PerPartitionCursor,
95
+ PerPartitionWithGlobalCursor,
96
+ ResumableFullRefreshCursor,
97
+ )
98
+ from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
99
+ from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping
100
+ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import (
101
+ LegacyToPerPartitionStateMigration,
102
+ )
103
+ from airbyte_cdk.sources.declarative.models import (
104
+ CustomStateMigration,
105
+ )
106
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
107
+ AddedFieldDefinition as AddedFieldDefinitionModel,
108
+ )
109
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
110
+ AddFields as AddFieldsModel,
111
+ )
112
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
113
+ ApiKeyAuthenticator as ApiKeyAuthenticatorModel,
114
+ )
115
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
116
+ AsyncJobStatusMap as AsyncJobStatusMapModel,
117
+ )
118
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
119
+ AsyncRetriever as AsyncRetrieverModel,
120
+ )
121
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
122
+ BasicHttpAuthenticator as BasicHttpAuthenticatorModel,
123
+ )
124
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
125
+ BearerAuthenticator as BearerAuthenticatorModel,
126
+ )
127
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
128
+ CheckDynamicStream as CheckDynamicStreamModel,
129
+ )
130
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
131
+ CheckStream as CheckStreamModel,
132
+ )
133
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
134
+ ComplexFieldType as ComplexFieldTypeModel,
135
+ )
136
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
137
+ ComponentMappingDefinition as ComponentMappingDefinitionModel,
138
+ )
139
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
140
+ CompositeErrorHandler as CompositeErrorHandlerModel,
141
+ )
142
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
143
+ ConcurrencyLevel as ConcurrencyLevelModel,
144
+ )
145
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
146
+ ConfigComponentsResolver as ConfigComponentsResolverModel,
147
+ )
148
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
149
+ ConstantBackoffStrategy as ConstantBackoffStrategyModel,
150
+ )
151
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
152
+ CsvDecoder as CsvDecoderModel,
153
+ )
154
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
155
+ CursorPagination as CursorPaginationModel,
156
+ )
157
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
158
+ CustomAuthenticator as CustomAuthenticatorModel,
159
+ )
160
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
161
+ CustomBackoffStrategy as CustomBackoffStrategyModel,
162
+ )
163
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
164
+ CustomDecoder as CustomDecoderModel,
165
+ )
166
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
167
+ CustomErrorHandler as CustomErrorHandlerModel,
168
+ )
169
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
170
+ CustomIncrementalSync as CustomIncrementalSyncModel,
171
+ )
172
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
173
+ CustomPaginationStrategy as CustomPaginationStrategyModel,
174
+ )
175
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
176
+ CustomPartitionRouter as CustomPartitionRouterModel,
177
+ )
178
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
179
+ CustomRecordExtractor as CustomRecordExtractorModel,
180
+ )
181
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
182
+ CustomRecordFilter as CustomRecordFilterModel,
183
+ )
184
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
185
+ CustomRequester as CustomRequesterModel,
186
+ )
187
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
188
+ CustomRetriever as CustomRetrieverModel,
189
+ )
190
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
191
+ CustomSchemaLoader as CustomSchemaLoader,
192
+ )
193
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
194
+ CustomSchemaNormalization as CustomSchemaNormalizationModel,
195
+ )
196
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
197
+ CustomTransformation as CustomTransformationModel,
198
+ )
199
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
200
+ DatetimeBasedCursor as DatetimeBasedCursorModel,
201
+ )
202
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
203
+ DeclarativeStream as DeclarativeStreamModel,
204
+ )
205
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
206
+ DefaultErrorHandler as DefaultErrorHandlerModel,
207
+ )
208
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
209
+ DefaultPaginator as DefaultPaginatorModel,
210
+ )
211
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
212
+ DpathExtractor as DpathExtractorModel,
213
+ )
214
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
215
+ DpathFlattenFields as DpathFlattenFieldsModel,
216
+ )
217
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
218
+ DynamicSchemaLoader as DynamicSchemaLoaderModel,
219
+ )
220
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
221
+ ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
222
+ )
223
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
224
+ FileUploader as FileUploaderModel,
225
+ )
226
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
227
+ FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel,
228
+ )
229
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
230
+ FlattenFields as FlattenFieldsModel,
231
+ )
232
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
233
+ GzipDecoder as GzipDecoderModel,
234
+ )
235
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
236
+ HTTPAPIBudget as HTTPAPIBudgetModel,
237
+ )
238
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
239
+ HttpComponentsResolver as HttpComponentsResolverModel,
240
+ )
241
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
242
+ HttpRequester as HttpRequesterModel,
243
+ )
244
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
245
+ HttpRequestRegexMatcher as HttpRequestRegexMatcherModel,
246
+ )
247
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
248
+ HttpResponseFilter as HttpResponseFilterModel,
249
+ )
250
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
251
+ IncrementingCountCursor as IncrementingCountCursorModel,
252
+ )
253
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
254
+ InlineSchemaLoader as InlineSchemaLoaderModel,
255
+ )
256
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
257
+ IterableDecoder as IterableDecoderModel,
258
+ )
259
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
260
+ JsonDecoder as JsonDecoderModel,
261
+ )
262
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
263
+ JsonFileSchemaLoader as JsonFileSchemaLoaderModel,
264
+ )
265
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
266
+ JsonlDecoder as JsonlDecoderModel,
267
+ )
268
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
269
+ JwtAuthenticator as JwtAuthenticatorModel,
270
+ )
271
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
272
+ JwtHeaders as JwtHeadersModel,
273
+ )
274
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
275
+ JwtPayload as JwtPayloadModel,
276
+ )
277
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
278
+ KeysReplace as KeysReplaceModel,
279
+ )
280
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
281
+ KeysToLower as KeysToLowerModel,
282
+ )
283
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
284
+ KeysToSnakeCase as KeysToSnakeCaseModel,
285
+ )
286
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
287
+ LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel,
288
+ )
289
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
290
+ LegacyToPerPartitionStateMigration as LegacyToPerPartitionStateMigrationModel,
291
+ )
292
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
293
+ ListPartitionRouter as ListPartitionRouterModel,
294
+ )
295
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
296
+ MinMaxDatetime as MinMaxDatetimeModel,
297
+ )
298
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
299
+ MovingWindowCallRatePolicy as MovingWindowCallRatePolicyModel,
300
+ )
301
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
302
+ NoAuth as NoAuthModel,
303
+ )
304
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
305
+ NoPagination as NoPaginationModel,
306
+ )
307
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
308
+ OAuthAuthenticator as OAuthAuthenticatorModel,
309
+ )
310
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
311
+ OffsetIncrement as OffsetIncrementModel,
312
+ )
313
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
314
+ PageIncrement as PageIncrementModel,
315
+ )
316
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
317
+ ParentStreamConfig as ParentStreamConfigModel,
318
+ )
319
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
320
+ Rate as RateModel,
321
+ )
322
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
323
+ RecordFilter as RecordFilterModel,
324
+ )
325
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
326
+ RecordSelector as RecordSelectorModel,
327
+ )
328
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
329
+ RemoveFields as RemoveFieldsModel,
330
+ )
331
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
332
+ RequestOption as RequestOptionModel,
333
+ )
334
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
335
+ RequestPath as RequestPathModel,
336
+ )
337
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
338
+ ResponseToFileExtractor as ResponseToFileExtractorModel,
339
+ )
340
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
341
+ SchemaNormalization as SchemaNormalizationModel,
342
+ )
343
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
344
+ SchemaTypeIdentifier as SchemaTypeIdentifierModel,
345
+ )
346
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
347
+ SelectiveAuthenticator as SelectiveAuthenticatorModel,
348
+ )
349
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
350
+ SessionTokenAuthenticator as SessionTokenAuthenticatorModel,
351
+ )
352
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
353
+ SimpleRetriever as SimpleRetrieverModel,
354
+ )
355
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
356
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
357
+ StateDelegatingStream as StateDelegatingStreamModel,
358
+ )
359
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
360
+ StreamConfig as StreamConfigModel,
361
+ )
362
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
363
+ SubstreamPartitionRouter as SubstreamPartitionRouterModel,
364
+ )
365
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
366
+ TypesMap as TypesMapModel,
367
+ )
368
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
369
+ UnlimitedCallRatePolicy as UnlimitedCallRatePolicyModel,
370
+ )
371
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
372
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
373
+ WaitTimeFromHeader as WaitTimeFromHeaderModel,
374
+ )
375
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
376
+ WaitUntilTimeFromHeader as WaitUntilTimeFromHeaderModel,
377
+ )
378
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
379
+ XmlDecoder as XmlDecoderModel,
380
+ )
381
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
382
+ ZipfileDecoder as ZipfileDecoderModel,
383
+ )
384
+ from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
385
+ COMPONENTS_MODULE_NAME,
386
+ SDM_COMPONENTS_MODULE_NAME,
387
+ )
388
+ from airbyte_cdk.sources.declarative.partition_routers import (
389
+ CartesianProductStreamSlicer,
390
+ ListPartitionRouter,
391
+ PartitionRouter,
392
+ SinglePartitionRouter,
393
+ SubstreamPartitionRouter,
394
+ )
395
+ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
396
+ AsyncJobPartitionRouter,
397
+ )
398
+ from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
399
+ ParentStreamConfig,
400
+ )
401
+ from airbyte_cdk.sources.declarative.requesters import HttpRequester, RequestOption
402
+ from airbyte_cdk.sources.declarative.requesters.error_handlers import (
403
+ CompositeErrorHandler,
404
+ DefaultErrorHandler,
405
+ HttpResponseFilter,
406
+ )
407
+ from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies import (
408
+ ConstantBackoffStrategy,
409
+ ExponentialBackoffStrategy,
410
+ WaitTimeFromHeaderBackoffStrategy,
411
+ WaitUntilTimeFromHeaderBackoffStrategy,
412
+ )
413
+ from airbyte_cdk.sources.declarative.requesters.http_job_repository import AsyncHttpJobRepository
414
+ from airbyte_cdk.sources.declarative.requesters.paginators import (
415
+ DefaultPaginator,
416
+ NoPagination,
417
+ PaginatorTestReadDecorator,
418
+ )
419
+ from airbyte_cdk.sources.declarative.requesters.paginators.strategies import (
420
+ CursorPaginationStrategy,
421
+ CursorStopCondition,
422
+ OffsetIncrement,
423
+ PageIncrement,
424
+ StopConditionPaginationStrategyDecorator,
425
+ )
426
+ from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType
427
+ from airbyte_cdk.sources.declarative.requesters.request_options import (
428
+ DatetimeBasedRequestOptionsProvider,
429
+ DefaultRequestOptionsProvider,
430
+ InterpolatedRequestOptionsProvider,
431
+ RequestOptionsProvider,
432
+ )
433
+ from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
434
+ from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
435
+ from airbyte_cdk.sources.declarative.resolvers import (
436
+ ComponentMappingDefinition,
437
+ ConfigComponentsResolver,
438
+ HttpComponentsResolver,
439
+ StreamConfig,
440
+ )
441
+ from airbyte_cdk.sources.declarative.retrievers import (
442
+ AsyncRetriever,
443
+ LazySimpleRetriever,
444
+ SimpleRetriever,
445
+ SimpleRetrieverTestReadDecorator,
446
+ )
447
+ from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
448
+ from airbyte_cdk.sources.declarative.schema import (
449
+ ComplexFieldType,
450
+ DefaultSchemaLoader,
451
+ DynamicSchemaLoader,
452
+ InlineSchemaLoader,
453
+ JsonFileSchemaLoader,
454
+ SchemaTypeIdentifier,
455
+ TypesMap,
456
+ )
457
+ from airbyte_cdk.sources.declarative.spec import Spec
458
+ from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
459
+ from airbyte_cdk.sources.declarative.transformations import (
460
+ AddFields,
461
+ RecordTransformation,
462
+ RemoveFields,
463
+ )
464
+ from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition
465
+ from airbyte_cdk.sources.declarative.transformations.dpath_flatten_fields import (
466
+ DpathFlattenFields,
467
+ )
468
+ from airbyte_cdk.sources.declarative.transformations.flatten_fields import (
469
+ FlattenFields,
470
+ )
471
+ from airbyte_cdk.sources.declarative.transformations.keys_replace_transformation import (
472
+ KeysReplaceTransformation,
473
+ )
474
+ from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import (
475
+ KeysToLowerTransformation,
476
+ )
477
+ from airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation import (
478
+ KeysToSnakeCaseTransformation,
479
+ )
480
+ from airbyte_cdk.sources.message import (
481
+ InMemoryMessageRepository,
482
+ LogAppenderMessageRepositoryDecorator,
483
+ MessageRepository,
484
+ NoopMessageRepository,
485
+ )
486
+ from airbyte_cdk.sources.streams.call_rate import (
487
+ APIBudget,
488
+ FixedWindowCallRatePolicy,
489
+ HttpAPIBudget,
490
+ HttpRequestRegexMatcher,
491
+ MovingWindowCallRatePolicy,
492
+ Rate,
493
+ UnlimitedCallRatePolicy,
494
+ )
495
+ from airbyte_cdk.sources.streams.concurrent.clamping import (
496
+ ClampingEndProvider,
497
+ ClampingStrategy,
498
+ DayClampingStrategy,
499
+ MonthClampingStrategy,
500
+ NoClamping,
501
+ WeekClampingStrategy,
502
+ Weekday,
503
+ )
504
+ from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
505
+ from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
506
+ CustomFormatConcurrentStreamStateConverter,
507
+ DateTimeStreamStateConverter,
508
+ )
509
+ from airbyte_cdk.sources.streams.concurrent.state_converters.incrementing_count_stream_state_converter import (
510
+ IncrementingCountStreamStateConverter,
511
+ )
512
+ from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
513
+ from airbyte_cdk.sources.types import Config
514
+ from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
515
+
516
+ ComponentDefinition = Mapping[str, Any]
517
+
518
+ SCHEMA_TRANSFORMER_TYPE_MAPPING = {
519
+ SchemaNormalizationModel.None_: TransformConfig.NoTransform,
520
+ SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization,
521
+ }
522
+
523
+
524
+ class ModelToComponentFactory:
525
+ EPOCH_DATETIME_FORMAT = "%s"
526
+
527
+ def __init__(
528
+ self,
529
+ limit_pages_fetched_per_slice: Optional[int] = None,
530
+ limit_slices_fetched: Optional[int] = None,
531
+ emit_connector_builder_messages: bool = False,
532
+ disable_retries: bool = False,
533
+ disable_cache: bool = False,
534
+ disable_resumable_full_refresh: bool = False,
535
+ message_repository: Optional[MessageRepository] = None,
536
+ connector_state_manager: Optional[ConnectorStateManager] = None,
537
+ max_concurrent_async_job_count: Optional[int] = None,
538
+ ):
539
+ self._init_mappings()
540
+ self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice
541
+ self._limit_slices_fetched = limit_slices_fetched
542
+ self._emit_connector_builder_messages = emit_connector_builder_messages
543
+ self._disable_retries = disable_retries
544
+ self._disable_cache = disable_cache
545
+ self._disable_resumable_full_refresh = disable_resumable_full_refresh
546
+ self._message_repository = message_repository or InMemoryMessageRepository(
547
+ self._evaluate_log_level(emit_connector_builder_messages)
548
+ )
549
+ self._connector_state_manager = connector_state_manager or ConnectorStateManager()
550
+ self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None
551
+ self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1)
552
+
553
+ def _init_mappings(self) -> None:
554
+ self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
555
+ AddedFieldDefinitionModel: self.create_added_field_definition,
556
+ AddFieldsModel: self.create_add_fields,
557
+ ApiKeyAuthenticatorModel: self.create_api_key_authenticator,
558
+ BasicHttpAuthenticatorModel: self.create_basic_http_authenticator,
559
+ BearerAuthenticatorModel: self.create_bearer_authenticator,
560
+ CheckStreamModel: self.create_check_stream,
561
+ CheckDynamicStreamModel: self.create_check_dynamic_stream,
562
+ CompositeErrorHandlerModel: self.create_composite_error_handler,
563
+ ConcurrencyLevelModel: self.create_concurrency_level,
564
+ ConstantBackoffStrategyModel: self.create_constant_backoff_strategy,
565
+ CsvDecoderModel: self.create_csv_decoder,
566
+ CursorPaginationModel: self.create_cursor_pagination,
567
+ CustomAuthenticatorModel: self.create_custom_component,
568
+ CustomBackoffStrategyModel: self.create_custom_component,
569
+ CustomDecoderModel: self.create_custom_component,
570
+ CustomErrorHandlerModel: self.create_custom_component,
571
+ CustomIncrementalSyncModel: self.create_custom_component,
572
+ CustomRecordExtractorModel: self.create_custom_component,
573
+ CustomRecordFilterModel: self.create_custom_component,
574
+ CustomRequesterModel: self.create_custom_component,
575
+ CustomRetrieverModel: self.create_custom_component,
576
+ CustomSchemaLoader: self.create_custom_component,
577
+ CustomSchemaNormalizationModel: self.create_custom_component,
578
+ CustomStateMigration: self.create_custom_component,
579
+ CustomPaginationStrategyModel: self.create_custom_component,
580
+ CustomPartitionRouterModel: self.create_custom_component,
581
+ CustomTransformationModel: self.create_custom_component,
582
+ DatetimeBasedCursorModel: self.create_datetime_based_cursor,
583
+ DeclarativeStreamModel: self.create_declarative_stream,
584
+ DefaultErrorHandlerModel: self.create_default_error_handler,
585
+ DefaultPaginatorModel: self.create_default_paginator,
586
+ DpathExtractorModel: self.create_dpath_extractor,
587
+ ResponseToFileExtractorModel: self.create_response_to_file_extractor,
588
+ ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy,
589
+ SessionTokenAuthenticatorModel: self.create_session_token_authenticator,
590
+ HttpRequesterModel: self.create_http_requester,
591
+ HttpResponseFilterModel: self.create_http_response_filter,
592
+ InlineSchemaLoaderModel: self.create_inline_schema_loader,
593
+ JsonDecoderModel: self.create_json_decoder,
594
+ JsonlDecoderModel: self.create_jsonl_decoder,
595
+ GzipDecoderModel: self.create_gzip_decoder,
596
+ KeysToLowerModel: self.create_keys_to_lower_transformation,
597
+ KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
598
+ KeysReplaceModel: self.create_keys_replace_transformation,
599
+ FlattenFieldsModel: self.create_flatten_fields,
600
+ DpathFlattenFieldsModel: self.create_dpath_flatten_fields,
601
+ IterableDecoderModel: self.create_iterable_decoder,
602
+ IncrementingCountCursorModel: self.create_incrementing_count_cursor,
603
+ XmlDecoderModel: self.create_xml_decoder,
604
+ JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
605
+ DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
606
+ SchemaTypeIdentifierModel: self.create_schema_type_identifier,
607
+ TypesMapModel: self.create_types_map,
608
+ ComplexFieldTypeModel: self.create_complex_field_type,
609
+ JwtAuthenticatorModel: self.create_jwt_authenticator,
610
+ LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
611
+ ListPartitionRouterModel: self.create_list_partition_router,
612
+ MinMaxDatetimeModel: self.create_min_max_datetime,
613
+ NoAuthModel: self.create_no_auth,
614
+ NoPaginationModel: self.create_no_pagination,
615
+ OAuthAuthenticatorModel: self.create_oauth_authenticator,
616
+ OffsetIncrementModel: self.create_offset_increment,
617
+ PageIncrementModel: self.create_page_increment,
618
+ ParentStreamConfigModel: self.create_parent_stream_config,
619
+ RecordFilterModel: self.create_record_filter,
620
+ RecordSelectorModel: self.create_record_selector,
621
+ RemoveFieldsModel: self.create_remove_fields,
622
+ RequestPathModel: self.create_request_path,
623
+ RequestOptionModel: self.create_request_option,
624
+ LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator,
625
+ SelectiveAuthenticatorModel: self.create_selective_authenticator,
626
+ SimpleRetrieverModel: self.create_simple_retriever,
627
+ StateDelegatingStreamModel: self.create_state_delegating_stream,
628
+ SpecModel: self.create_spec,
629
+ SubstreamPartitionRouterModel: self.create_substream_partition_router,
630
+ WaitTimeFromHeaderModel: self.create_wait_time_from_header,
631
+ WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header,
632
+ AsyncRetrieverModel: self.create_async_retriever,
633
+ HttpComponentsResolverModel: self.create_http_components_resolver,
634
+ ConfigComponentsResolverModel: self.create_config_components_resolver,
635
+ StreamConfigModel: self.create_stream_config,
636
+ ComponentMappingDefinitionModel: self.create_components_mapping_definition,
637
+ ZipfileDecoderModel: self.create_zipfile_decoder,
638
+ HTTPAPIBudgetModel: self.create_http_api_budget,
639
+ FileUploaderModel: self.create_file_uploader,
640
+ FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
641
+ MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
642
+ UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
643
+ RateModel: self.create_rate,
644
+ HttpRequestRegexMatcherModel: self.create_http_request_matcher,
645
+ }
646
+
647
+ # Needed for the case where we need to perform a second parse on the fields of a custom component
648
+ self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR}
649
+
650
+ def create_component(
651
+ self,
652
+ model_type: Type[BaseModel],
653
+ component_definition: ComponentDefinition,
654
+ config: Config,
655
+ **kwargs: Any,
656
+ ) -> Any:
657
+ """
658
+ Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and
659
+ subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating
660
+ creating declarative components from that model.
661
+
662
+ :param model_type: The type of declarative component that is being initialized
663
+ :param component_definition: The mapping that represents a declarative component
664
+ :param config: The connector config that is provided by the customer
665
+ :return: The declarative component to be used at runtime
666
+ """
667
+
668
+ component_type = component_definition.get("type")
669
+ if component_definition.get("type") != model_type.__name__:
670
+ raise ValueError(
671
+ f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
672
+ )
673
+
674
+ declarative_component_model = model_type.parse_obj(component_definition)
675
+
676
+ if not isinstance(declarative_component_model, model_type):
677
+ raise ValueError(
678
+ f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}"
679
+ )
680
+
681
+ return self._create_component_from_model(
682
+ model=declarative_component_model, config=config, **kwargs
683
+ )
684
+
685
+ def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any:
686
+ if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR:
687
+ raise ValueError(
688
+ f"{model.__class__} with attributes {model} is not a valid component type"
689
+ )
690
+ component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__)
691
+ if not component_constructor:
692
+ raise ValueError(f"Could not find constructor for {model.__class__}")
693
+ return component_constructor(model=model, config=config, **kwargs)
694
+
695
+ @staticmethod
696
+ def create_added_field_definition(
697
+ model: AddedFieldDefinitionModel, config: Config, **kwargs: Any
698
+ ) -> AddedFieldDefinition:
699
+ interpolated_value = InterpolatedString.create(
700
+ model.value, parameters=model.parameters or {}
701
+ )
702
+ return AddedFieldDefinition(
703
+ path=model.path,
704
+ value=interpolated_value,
705
+ value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
706
+ parameters=model.parameters or {},
707
+ )
708
+
709
+ def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields:
710
+ added_field_definitions = [
711
+ self._create_component_from_model(
712
+ model=added_field_definition_model,
713
+ value_type=ModelToComponentFactory._json_schema_type_name_to_type(
714
+ added_field_definition_model.value_type
715
+ ),
716
+ config=config,
717
+ )
718
+ for added_field_definition_model in model.fields
719
+ ]
720
+ return AddFields(
721
+ fields=added_field_definitions,
722
+ condition=model.condition or "",
723
+ parameters=model.parameters or {},
724
+ )
725
+
726
+ def create_keys_to_lower_transformation(
727
+ self, model: KeysToLowerModel, config: Config, **kwargs: Any
728
+ ) -> KeysToLowerTransformation:
729
+ return KeysToLowerTransformation()
730
+
731
+ def create_keys_to_snake_transformation(
732
+ self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any
733
+ ) -> KeysToSnakeCaseTransformation:
734
+ return KeysToSnakeCaseTransformation()
735
+
736
+ def create_keys_replace_transformation(
737
+ self, model: KeysReplaceModel, config: Config, **kwargs: Any
738
+ ) -> KeysReplaceTransformation:
739
+ return KeysReplaceTransformation(
740
+ old=model.old, new=model.new, parameters=model.parameters or {}
741
+ )
742
+
743
+ def create_flatten_fields(
744
+ self, model: FlattenFieldsModel, config: Config, **kwargs: Any
745
+ ) -> FlattenFields:
746
+ return FlattenFields(
747
+ flatten_lists=model.flatten_lists if model.flatten_lists is not None else True
748
+ )
749
+
750
+ def create_dpath_flatten_fields(
751
+ self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any
752
+ ) -> DpathFlattenFields:
753
+ model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path]
754
+ return DpathFlattenFields(
755
+ config=config,
756
+ field_path=model_field_path,
757
+ delete_origin_value=model.delete_origin_value
758
+ if model.delete_origin_value is not None
759
+ else False,
760
+ replace_record=model.replace_record if model.replace_record is not None else False,
761
+ parameters=model.parameters or {},
762
+ )
763
+
764
+ @staticmethod
765
+ def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]:
766
+ if not value_type:
767
+ return None
768
+ names_to_types = {
769
+ ValueType.string: str,
770
+ ValueType.number: float,
771
+ ValueType.integer: int,
772
+ ValueType.boolean: bool,
773
+ }
774
+ return names_to_types[value_type]
775
+
776
+ def create_api_key_authenticator(
777
+ self,
778
+ model: ApiKeyAuthenticatorModel,
779
+ config: Config,
780
+ token_provider: Optional[TokenProvider] = None,
781
+ **kwargs: Any,
782
+ ) -> ApiKeyAuthenticator:
783
+ if model.inject_into is None and model.header is None:
784
+ raise ValueError(
785
+ "Expected either inject_into or header to be set for ApiKeyAuthenticator"
786
+ )
787
+
788
+ if model.inject_into is not None and model.header is not None:
789
+ raise ValueError(
790
+ "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option"
791
+ )
792
+
793
+ if token_provider is not None and model.api_token != "":
794
+ raise ValueError(
795
+ "If token_provider is set, api_token is ignored and has to be set to empty string."
796
+ )
797
+
798
+ request_option = (
799
+ self._create_component_from_model(
800
+ model.inject_into, config, parameters=model.parameters or {}
801
+ )
802
+ if model.inject_into
803
+ else RequestOption(
804
+ inject_into=RequestOptionType.header,
805
+ field_name=model.header or "",
806
+ parameters=model.parameters or {},
807
+ )
808
+ )
809
+
810
+ return ApiKeyAuthenticator(
811
+ token_provider=(
812
+ token_provider
813
+ if token_provider is not None
814
+ else InterpolatedStringTokenProvider(
815
+ api_token=model.api_token or "",
816
+ config=config,
817
+ parameters=model.parameters or {},
818
+ )
819
+ ),
820
+ request_option=request_option,
821
+ config=config,
822
+ parameters=model.parameters or {},
823
+ )
824
+
825
+ def create_legacy_to_per_partition_state_migration(
826
+ self,
827
+ model: LegacyToPerPartitionStateMigrationModel,
828
+ config: Mapping[str, Any],
829
+ declarative_stream: DeclarativeStreamModel,
830
+ ) -> LegacyToPerPartitionStateMigration:
831
+ retriever = declarative_stream.retriever
832
+ if not isinstance(retriever, SimpleRetrieverModel):
833
+ raise ValueError(
834
+ f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever. Got {type(retriever)}"
835
+ )
836
+ partition_router = retriever.partition_router
837
+ if not isinstance(
838
+ partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel)
839
+ ):
840
+ raise ValueError(
841
+ f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}"
842
+ )
843
+ if not hasattr(partition_router, "parent_stream_configs"):
844
+ raise ValueError(
845
+ "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration."
846
+ )
847
+
848
+ if not hasattr(declarative_stream, "incremental_sync"):
849
+ raise ValueError(
850
+ "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration."
851
+ )
852
+
853
+ return LegacyToPerPartitionStateMigration(
854
+ partition_router, # type: ignore # was already checked above
855
+ declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams.
856
+ config,
857
+ declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any]
858
+ )
859
+
860
+ def create_session_token_authenticator(
861
+ self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any
862
+ ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]:
863
+ decoder = (
864
+ self._create_component_from_model(model=model.decoder, config=config)
865
+ if model.decoder
866
+ else JsonDecoder(parameters={})
867
+ )
868
+ login_requester = self._create_component_from_model(
869
+ model=model.login_requester,
870
+ config=config,
871
+ name=f"{name}_login_requester",
872
+ decoder=decoder,
873
+ )
874
+ token_provider = SessionTokenProvider(
875
+ login_requester=login_requester,
876
+ session_token_path=model.session_token_path,
877
+ expiration_duration=parse_duration(model.expiration_duration)
878
+ if model.expiration_duration
879
+ else None,
880
+ parameters=model.parameters or {},
881
+ message_repository=self._message_repository,
882
+ decoder=decoder,
883
+ )
884
+ if model.request_authentication.type == "Bearer":
885
+ return ModelToComponentFactory.create_bearer_authenticator(
886
+ BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value
887
+ config,
888
+ token_provider=token_provider,
889
+ )
890
+ else:
891
+ return self.create_api_key_authenticator(
892
+ ApiKeyAuthenticatorModel(
893
+ type="ApiKeyAuthenticator",
894
+ api_token="",
895
+ inject_into=model.request_authentication.inject_into,
896
+ ), # type: ignore # $parameters and headers default to None
897
+ config=config,
898
+ token_provider=token_provider,
899
+ )
900
+
901
+ @staticmethod
902
+ def create_basic_http_authenticator(
903
+ model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any
904
+ ) -> BasicHttpAuthenticator:
905
+ return BasicHttpAuthenticator(
906
+ password=model.password or "",
907
+ username=model.username,
908
+ config=config,
909
+ parameters=model.parameters or {},
910
+ )
911
+
912
+ @staticmethod
913
+ def create_bearer_authenticator(
914
+ model: BearerAuthenticatorModel,
915
+ config: Config,
916
+ token_provider: Optional[TokenProvider] = None,
917
+ **kwargs: Any,
918
+ ) -> BearerAuthenticator:
919
+ if token_provider is not None and model.api_token != "":
920
+ raise ValueError(
921
+ "If token_provider is set, api_token is ignored and has to be set to empty string."
922
+ )
923
+ return BearerAuthenticator(
924
+ token_provider=(
925
+ token_provider
926
+ if token_provider is not None
927
+ else InterpolatedStringTokenProvider(
928
+ api_token=model.api_token or "",
929
+ config=config,
930
+ parameters=model.parameters or {},
931
+ )
932
+ ),
933
+ config=config,
934
+ parameters=model.parameters or {},
935
+ )
936
+
937
+ @staticmethod
938
+ def create_check_stream(model: CheckStreamModel, config: Config, **kwargs: Any) -> CheckStream:
939
+ return CheckStream(stream_names=model.stream_names, parameters={})
940
+
941
+ @staticmethod
942
+ def create_check_dynamic_stream(
943
+ model: CheckDynamicStreamModel, config: Config, **kwargs: Any
944
+ ) -> CheckDynamicStream:
945
+ assert model.use_check_availability is not None # for mypy
946
+
947
+ use_check_availability = model.use_check_availability
948
+
949
+ return CheckDynamicStream(
950
+ stream_count=model.stream_count,
951
+ use_check_availability=use_check_availability,
952
+ parameters={},
953
+ )
954
+
955
+ def create_composite_error_handler(
956
+ self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
957
+ ) -> CompositeErrorHandler:
958
+ error_handlers = [
959
+ self._create_component_from_model(model=error_handler_model, config=config)
960
+ for error_handler_model in model.error_handlers
961
+ ]
962
+ return CompositeErrorHandler(
963
+ error_handlers=error_handlers, parameters=model.parameters or {}
964
+ )
965
+
966
+ @staticmethod
967
+ def create_concurrency_level(
968
+ model: ConcurrencyLevelModel, config: Config, **kwargs: Any
969
+ ) -> ConcurrencyLevel:
970
+ return ConcurrencyLevel(
971
+ default_concurrency=model.default_concurrency,
972
+ max_concurrency=model.max_concurrency,
973
+ config=config,
974
+ parameters={},
975
+ )
976
+
977
+ @staticmethod
978
+ def apply_stream_state_migrations(
979
+ stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any]
980
+ ) -> MutableMapping[str, Any]:
981
+ if stream_state_migrations:
982
+ for state_migration in stream_state_migrations:
983
+ if state_migration.should_migrate(stream_state):
984
+ # The state variable is expected to be mutable but the migrate method returns an immutable mapping.
985
+ stream_state = dict(state_migration.migrate(stream_state))
986
+ return stream_state
987
+
988
+ def create_concurrent_cursor_from_datetime_based_cursor(
989
+ self,
990
+ model_type: Type[BaseModel],
991
+ component_definition: ComponentDefinition,
992
+ stream_name: str,
993
+ stream_namespace: Optional[str],
994
+ config: Config,
995
+ message_repository: Optional[MessageRepository] = None,
996
+ runtime_lookback_window: Optional[datetime.timedelta] = None,
997
+ stream_state_migrations: Optional[List[Any]] = None,
998
+ **kwargs: Any,
999
+ ) -> ConcurrentCursor:
1000
+ # Per-partition incremental streams can dynamically create child cursors which will pass their current
1001
+ # state via the stream_state keyword argument. Incremental syncs without parent streams use the
1002
+ # incoming state and connector_state_manager that is initialized when the component factory is created
1003
+ stream_state = (
1004
+ self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
1005
+ if "stream_state" not in kwargs
1006
+ else kwargs["stream_state"]
1007
+ )
1008
+ stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
1009
+
1010
+ component_type = component_definition.get("type")
1011
+ if component_definition.get("type") != model_type.__name__:
1012
+ raise ValueError(
1013
+ f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1014
+ )
1015
+
1016
+ datetime_based_cursor_model = model_type.parse_obj(component_definition)
1017
+
1018
+ if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1019
+ raise ValueError(
1020
+ f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1021
+ )
1022
+
1023
+ interpolated_cursor_field = InterpolatedString.create(
1024
+ datetime_based_cursor_model.cursor_field,
1025
+ parameters=datetime_based_cursor_model.parameters or {},
1026
+ )
1027
+ cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1028
+
1029
+ interpolated_partition_field_start = InterpolatedString.create(
1030
+ datetime_based_cursor_model.partition_field_start or "start_time",
1031
+ parameters=datetime_based_cursor_model.parameters or {},
1032
+ )
1033
+ interpolated_partition_field_end = InterpolatedString.create(
1034
+ datetime_based_cursor_model.partition_field_end or "end_time",
1035
+ parameters=datetime_based_cursor_model.parameters or {},
1036
+ )
1037
+
1038
+ slice_boundary_fields = (
1039
+ interpolated_partition_field_start.eval(config=config),
1040
+ interpolated_partition_field_end.eval(config=config),
1041
+ )
1042
+
1043
+ datetime_format = datetime_based_cursor_model.datetime_format
1044
+
1045
+ cursor_granularity = (
1046
+ parse_duration(datetime_based_cursor_model.cursor_granularity)
1047
+ if datetime_based_cursor_model.cursor_granularity
1048
+ else None
1049
+ )
1050
+
1051
+ lookback_window = None
1052
+ interpolated_lookback_window = (
1053
+ InterpolatedString.create(
1054
+ datetime_based_cursor_model.lookback_window,
1055
+ parameters=datetime_based_cursor_model.parameters or {},
1056
+ )
1057
+ if datetime_based_cursor_model.lookback_window
1058
+ else None
1059
+ )
1060
+ if interpolated_lookback_window:
1061
+ evaluated_lookback_window = interpolated_lookback_window.eval(config=config)
1062
+ if evaluated_lookback_window:
1063
+ lookback_window = parse_duration(evaluated_lookback_window)
1064
+
1065
+ connector_state_converter: DateTimeStreamStateConverter
1066
+ connector_state_converter = CustomFormatConcurrentStreamStateConverter(
1067
+ datetime_format=datetime_format,
1068
+ input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
1069
+ is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
1070
+ cursor_granularity=cursor_granularity,
1071
+ )
1072
+
1073
+ # Adjusts the stream state by applying the runtime lookback window.
1074
+ # This is used to ensure correct state handling in case of failed partitions.
1075
+ stream_state_value = stream_state.get(cursor_field.cursor_field_key)
1076
+ if runtime_lookback_window and stream_state_value:
1077
+ new_stream_state = (
1078
+ connector_state_converter.parse_timestamp(stream_state_value)
1079
+ - runtime_lookback_window
1080
+ )
1081
+ stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format(
1082
+ new_stream_state
1083
+ )
1084
+
1085
+ start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
1086
+ if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
1087
+ start_date_runtime_value = self.create_min_max_datetime(
1088
+ model=datetime_based_cursor_model.start_datetime, config=config
1089
+ )
1090
+ else:
1091
+ start_date_runtime_value = datetime_based_cursor_model.start_datetime
1092
+
1093
+ end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]]
1094
+ if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel):
1095
+ end_date_runtime_value = self.create_min_max_datetime(
1096
+ model=datetime_based_cursor_model.end_datetime, config=config
1097
+ )
1098
+ else:
1099
+ end_date_runtime_value = datetime_based_cursor_model.end_datetime
1100
+
1101
+ interpolated_start_date = MinMaxDatetime.create(
1102
+ interpolated_string_or_min_max_datetime=start_date_runtime_value,
1103
+ parameters=datetime_based_cursor_model.parameters,
1104
+ )
1105
+ interpolated_end_date = (
1106
+ None
1107
+ if not end_date_runtime_value
1108
+ else MinMaxDatetime.create(
1109
+ end_date_runtime_value, datetime_based_cursor_model.parameters
1110
+ )
1111
+ )
1112
+
1113
+ # If datetime format is not specified then start/end datetime should inherit it from the stream slicer
1114
+ if not interpolated_start_date.datetime_format:
1115
+ interpolated_start_date.datetime_format = datetime_format
1116
+ if interpolated_end_date and not interpolated_end_date.datetime_format:
1117
+ interpolated_end_date.datetime_format = datetime_format
1118
+
1119
+ start_date = interpolated_start_date.get_datetime(config=config)
1120
+ end_date_provider = (
1121
+ partial(interpolated_end_date.get_datetime, config)
1122
+ if interpolated_end_date
1123
+ else connector_state_converter.get_end_provider()
1124
+ )
1125
+
1126
+ if (
1127
+ datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity
1128
+ ) or (
1129
+ not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity
1130
+ ):
1131
+ raise ValueError(
1132
+ f"If step is defined, cursor_granularity should be as well and vice-versa. "
1133
+ f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`"
1134
+ )
1135
+
1136
+ # When step is not defined, default to a step size from the starting date to the present moment
1137
+ step_length = datetime.timedelta.max
1138
+ interpolated_step = (
1139
+ InterpolatedString.create(
1140
+ datetime_based_cursor_model.step,
1141
+ parameters=datetime_based_cursor_model.parameters or {},
1142
+ )
1143
+ if datetime_based_cursor_model.step
1144
+ else None
1145
+ )
1146
+ if interpolated_step:
1147
+ evaluated_step = interpolated_step.eval(config)
1148
+ if evaluated_step:
1149
+ step_length = parse_duration(evaluated_step)
1150
+
1151
+ clamping_strategy: ClampingStrategy = NoClamping()
1152
+ if datetime_based_cursor_model.clamping:
1153
+ # While it is undesirable to interpolate within the model factory (as opposed to at runtime),
1154
+ # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime
1155
+ # object which we want to keep agnostic of being low-code
1156
+ target = InterpolatedString(
1157
+ string=datetime_based_cursor_model.clamping.target,
1158
+ parameters=datetime_based_cursor_model.parameters or {},
1159
+ )
1160
+ evaluated_target = target.eval(config=config)
1161
+ match evaluated_target:
1162
+ case "DAY":
1163
+ clamping_strategy = DayClampingStrategy()
1164
+ end_date_provider = ClampingEndProvider(
1165
+ DayClampingStrategy(is_ceiling=False),
1166
+ end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1167
+ granularity=cursor_granularity or datetime.timedelta(seconds=1),
1168
+ )
1169
+ case "WEEK":
1170
+ if (
1171
+ not datetime_based_cursor_model.clamping.target_details
1172
+ or "weekday" not in datetime_based_cursor_model.clamping.target_details
1173
+ ):
1174
+ raise ValueError(
1175
+ "Given WEEK clamping, weekday needs to be provided as target_details"
1176
+ )
1177
+ weekday = self._assemble_weekday(
1178
+ datetime_based_cursor_model.clamping.target_details["weekday"]
1179
+ )
1180
+ clamping_strategy = WeekClampingStrategy(weekday)
1181
+ end_date_provider = ClampingEndProvider(
1182
+ WeekClampingStrategy(weekday, is_ceiling=False),
1183
+ end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1184
+ granularity=cursor_granularity or datetime.timedelta(days=1),
1185
+ )
1186
+ case "MONTH":
1187
+ clamping_strategy = MonthClampingStrategy()
1188
+ end_date_provider = ClampingEndProvider(
1189
+ MonthClampingStrategy(is_ceiling=False),
1190
+ end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1191
+ granularity=cursor_granularity or datetime.timedelta(days=1),
1192
+ )
1193
+ case _:
1194
+ raise ValueError(
1195
+ f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH"
1196
+ )
1197
+
1198
+ return ConcurrentCursor(
1199
+ stream_name=stream_name,
1200
+ stream_namespace=stream_namespace,
1201
+ stream_state=stream_state,
1202
+ message_repository=message_repository or self._message_repository,
1203
+ connector_state_manager=self._connector_state_manager,
1204
+ connector_state_converter=connector_state_converter,
1205
+ cursor_field=cursor_field,
1206
+ slice_boundary_fields=slice_boundary_fields,
1207
+ start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1208
+ end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1209
+ lookback_window=lookback_window,
1210
+ slice_range=step_length,
1211
+ cursor_granularity=cursor_granularity,
1212
+ clamping_strategy=clamping_strategy,
1213
+ )
1214
+
1215
+ def create_concurrent_cursor_from_incrementing_count_cursor(
1216
+ self,
1217
+ model_type: Type[BaseModel],
1218
+ component_definition: ComponentDefinition,
1219
+ stream_name: str,
1220
+ stream_namespace: Optional[str],
1221
+ config: Config,
1222
+ message_repository: Optional[MessageRepository] = None,
1223
+ **kwargs: Any,
1224
+ ) -> ConcurrentCursor:
1225
+ # Per-partition incremental streams can dynamically create child cursors which will pass their current
1226
+ # state via the stream_state keyword argument. Incremental syncs without parent streams use the
1227
+ # incoming state and connector_state_manager that is initialized when the component factory is created
1228
+ stream_state = (
1229
+ self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
1230
+ if "stream_state" not in kwargs
1231
+ else kwargs["stream_state"]
1232
+ )
1233
+
1234
+ component_type = component_definition.get("type")
1235
+ if component_definition.get("type") != model_type.__name__:
1236
+ raise ValueError(
1237
+ f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1238
+ )
1239
+
1240
+ incrementing_count_cursor_model = model_type.parse_obj(component_definition)
1241
+
1242
+ if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel):
1243
+ raise ValueError(
1244
+ f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}"
1245
+ )
1246
+
1247
+ interpolated_start_value = (
1248
+ InterpolatedString.create(
1249
+ incrementing_count_cursor_model.start_value, # type: ignore
1250
+ parameters=incrementing_count_cursor_model.parameters or {},
1251
+ )
1252
+ if incrementing_count_cursor_model.start_value
1253
+ else 0
1254
+ )
1255
+
1256
+ interpolated_cursor_field = InterpolatedString.create(
1257
+ incrementing_count_cursor_model.cursor_field,
1258
+ parameters=incrementing_count_cursor_model.parameters or {},
1259
+ )
1260
+ cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1261
+
1262
+ connector_state_converter = IncrementingCountStreamStateConverter(
1263
+ is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
1264
+ )
1265
+
1266
+ return ConcurrentCursor(
1267
+ stream_name=stream_name,
1268
+ stream_namespace=stream_namespace,
1269
+ stream_state=stream_state,
1270
+ message_repository=message_repository or self._message_repository,
1271
+ connector_state_manager=self._connector_state_manager,
1272
+ connector_state_converter=connector_state_converter,
1273
+ cursor_field=cursor_field,
1274
+ slice_boundary_fields=None,
1275
+ start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1276
+ end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1277
+ )
1278
+
1279
+ def _assemble_weekday(self, weekday: str) -> Weekday:
1280
+ match weekday:
1281
+ case "MONDAY":
1282
+ return Weekday.MONDAY
1283
+ case "TUESDAY":
1284
+ return Weekday.TUESDAY
1285
+ case "WEDNESDAY":
1286
+ return Weekday.WEDNESDAY
1287
+ case "THURSDAY":
1288
+ return Weekday.THURSDAY
1289
+ case "FRIDAY":
1290
+ return Weekday.FRIDAY
1291
+ case "SATURDAY":
1292
+ return Weekday.SATURDAY
1293
+ case "SUNDAY":
1294
+ return Weekday.SUNDAY
1295
+ case _:
1296
+ raise ValueError(f"Unknown weekday {weekday}")
1297
+
1298
+ def create_concurrent_cursor_from_perpartition_cursor(
1299
+ self,
1300
+ state_manager: ConnectorStateManager,
1301
+ model_type: Type[BaseModel],
1302
+ component_definition: ComponentDefinition,
1303
+ stream_name: str,
1304
+ stream_namespace: Optional[str],
1305
+ config: Config,
1306
+ stream_state: MutableMapping[str, Any],
1307
+ partition_router: PartitionRouter,
1308
+ stream_state_migrations: Optional[List[Any]] = None,
1309
+ **kwargs: Any,
1310
+ ) -> ConcurrentPerPartitionCursor:
1311
+ component_type = component_definition.get("type")
1312
+ if component_definition.get("type") != model_type.__name__:
1313
+ raise ValueError(
1314
+ f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1315
+ )
1316
+
1317
+ datetime_based_cursor_model = model_type.parse_obj(component_definition)
1318
+
1319
+ if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1320
+ raise ValueError(
1321
+ f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1322
+ )
1323
+
1324
+ interpolated_cursor_field = InterpolatedString.create(
1325
+ datetime_based_cursor_model.cursor_field,
1326
+ parameters=datetime_based_cursor_model.parameters or {},
1327
+ )
1328
+ cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1329
+
1330
+ datetime_format = datetime_based_cursor_model.datetime_format
1331
+
1332
+ cursor_granularity = (
1333
+ parse_duration(datetime_based_cursor_model.cursor_granularity)
1334
+ if datetime_based_cursor_model.cursor_granularity
1335
+ else None
1336
+ )
1337
+
1338
+ connector_state_converter: DateTimeStreamStateConverter
1339
+ connector_state_converter = CustomFormatConcurrentStreamStateConverter(
1340
+ datetime_format=datetime_format,
1341
+ input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
1342
+ is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
1343
+ cursor_granularity=cursor_granularity,
1344
+ )
1345
+
1346
+ # Create the cursor factory
1347
+ cursor_factory = ConcurrentCursorFactory(
1348
+ partial(
1349
+ self.create_concurrent_cursor_from_datetime_based_cursor,
1350
+ state_manager=state_manager,
1351
+ model_type=model_type,
1352
+ component_definition=component_definition,
1353
+ stream_name=stream_name,
1354
+ stream_namespace=stream_namespace,
1355
+ config=config,
1356
+ message_repository=NoopMessageRepository(),
1357
+ stream_state_migrations=stream_state_migrations,
1358
+ )
1359
+ )
1360
+ stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
1361
+
1362
+ # Return the concurrent cursor and state converter
1363
+ return ConcurrentPerPartitionCursor(
1364
+ cursor_factory=cursor_factory,
1365
+ partition_router=partition_router,
1366
+ stream_name=stream_name,
1367
+ stream_namespace=stream_namespace,
1368
+ stream_state=stream_state,
1369
+ message_repository=self._message_repository, # type: ignore
1370
+ connector_state_manager=state_manager,
1371
+ connector_state_converter=connector_state_converter,
1372
+ cursor_field=cursor_field,
1373
+ )
1374
+
1375
+ @staticmethod
1376
+ def create_constant_backoff_strategy(
1377
+ model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
1378
+ ) -> ConstantBackoffStrategy:
1379
+ return ConstantBackoffStrategy(
1380
+ backoff_time_in_seconds=model.backoff_time_in_seconds,
1381
+ config=config,
1382
+ parameters=model.parameters or {},
1383
+ )
1384
+
1385
+ def create_cursor_pagination(
1386
+ self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any
1387
+ ) -> CursorPaginationStrategy:
1388
+ if isinstance(decoder, PaginationDecoderDecorator):
1389
+ inner_decoder = decoder.decoder
1390
+ else:
1391
+ inner_decoder = decoder
1392
+ decoder = PaginationDecoderDecorator(decoder=decoder)
1393
+
1394
+ if self._is_supported_decoder_for_pagination(inner_decoder):
1395
+ decoder_to_use = decoder
1396
+ else:
1397
+ raise ValueError(
1398
+ self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
1399
+ )
1400
+
1401
+ return CursorPaginationStrategy(
1402
+ cursor_value=model.cursor_value,
1403
+ decoder=decoder_to_use,
1404
+ page_size=model.page_size,
1405
+ stop_condition=model.stop_condition,
1406
+ config=config,
1407
+ parameters=model.parameters or {},
1408
+ )
1409
+
1410
+ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any:
1411
+ """
1412
+ Generically creates a custom component based on the model type and a class_name reference to the custom Python class being
1413
+ instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor
1414
+ :param model: The Pydantic model of the custom component being created
1415
+ :param config: The custom defined connector config
1416
+ :return: The declarative component built from the Pydantic model to be used at runtime
1417
+ """
1418
+ custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name)
1419
+ component_fields = get_type_hints(custom_component_class)
1420
+ model_args = model.dict()
1421
+ model_args["config"] = config
1422
+
1423
+ # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions
1424
+ # we defer to these arguments over the component's definition
1425
+ for key, arg in kwargs.items():
1426
+ model_args[key] = arg
1427
+
1428
+ # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not
1429
+ # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to
1430
+ # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components
1431
+ for model_field, model_value in model_args.items():
1432
+ # If a custom component field doesn't have a type set, we try to use the type hints to infer the type
1433
+ if (
1434
+ isinstance(model_value, dict)
1435
+ and "type" not in model_value
1436
+ and model_field in component_fields
1437
+ ):
1438
+ derived_type = self._derive_component_type_from_type_hints(
1439
+ component_fields.get(model_field)
1440
+ )
1441
+ if derived_type:
1442
+ model_value["type"] = derived_type
1443
+
1444
+ if self._is_component(model_value):
1445
+ model_args[model_field] = self._create_nested_component(
1446
+ model, model_field, model_value, config
1447
+ )
1448
+ elif isinstance(model_value, list):
1449
+ vals = []
1450
+ for v in model_value:
1451
+ if isinstance(v, dict) and "type" not in v and model_field in component_fields:
1452
+ derived_type = self._derive_component_type_from_type_hints(
1453
+ component_fields.get(model_field)
1454
+ )
1455
+ if derived_type:
1456
+ v["type"] = derived_type
1457
+ if self._is_component(v):
1458
+ vals.append(self._create_nested_component(model, model_field, v, config))
1459
+ else:
1460
+ vals.append(v)
1461
+ model_args[model_field] = vals
1462
+
1463
+ kwargs = {
1464
+ class_field: model_args[class_field]
1465
+ for class_field in component_fields.keys()
1466
+ if class_field in model_args
1467
+ }
1468
+ return custom_component_class(**kwargs)
1469
+
1470
+ @staticmethod
1471
+ def _get_class_from_fully_qualified_class_name(
1472
+ full_qualified_class_name: str,
1473
+ ) -> Any:
1474
+ """Get a class from its fully qualified name.
1475
+
1476
+ If a custom components module is needed, we assume it is already registered - probably
1477
+ as `source_declarative_manifest.components` or `components`.
1478
+
1479
+ Args:
1480
+ full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName").
1481
+
1482
+ Returns:
1483
+ Any: The class object.
1484
+
1485
+ Raises:
1486
+ ValueError: If the class cannot be loaded.
1487
+ """
1488
+ split = full_qualified_class_name.split(".")
1489
+ module_name_full = ".".join(split[:-1])
1490
+ class_name = split[-1]
1491
+
1492
+ try:
1493
+ module_ref = importlib.import_module(module_name_full)
1494
+ except ModuleNotFoundError as e:
1495
+ if split[0] == "source_declarative_manifest":
1496
+ # During testing, the modules containing the custom components are not moved to source_declarative_manifest. In order to run the test, add the source folder to your PYTHONPATH or add it runtime using sys.path.append
1497
+ try:
1498
+ import os
1499
+
1500
+ module_name_with_source_declarative_manifest = ".".join(split[1:-1])
1501
+ module_ref = importlib.import_module(
1502
+ module_name_with_source_declarative_manifest
1503
+ )
1504
+ except ModuleNotFoundError:
1505
+ raise ValueError(f"Could not load module `{module_name_full}`.") from e
1506
+ else:
1507
+ raise ValueError(f"Could not load module `{module_name_full}`.") from e
1508
+
1509
+ try:
1510
+ return getattr(module_ref, class_name)
1511
+ except AttributeError as e:
1512
+ raise ValueError(
1513
+ f"Could not load class `{class_name}` from module `{module_name_full}`.",
1514
+ ) from e
1515
+
1516
+ @staticmethod
1517
+ def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]:
1518
+ interface = field_type
1519
+ while True:
1520
+ origin = get_origin(interface)
1521
+ if origin:
1522
+ # Unnest types until we reach the raw type
1523
+ # List[T] -> T
1524
+ # Optional[List[T]] -> T
1525
+ args = get_args(interface)
1526
+ interface = args[0]
1527
+ else:
1528
+ break
1529
+ if isinstance(interface, type) and not ModelToComponentFactory.is_builtin_type(interface):
1530
+ return interface.__name__
1531
+ return None
1532
+
1533
+ @staticmethod
1534
+ def is_builtin_type(cls: Optional[Type[Any]]) -> bool:
1535
+ if not cls:
1536
+ return False
1537
+ return cls.__module__ == "builtins"
1538
+
1539
+ @staticmethod
1540
+ def _extract_missing_parameters(error: TypeError) -> List[str]:
1541
+ parameter_search = re.search(r"keyword-only.*:\s(.*)", str(error))
1542
+ if parameter_search:
1543
+ return re.findall(r"\'(.+?)\'", parameter_search.group(1))
1544
+ else:
1545
+ return []
1546
+
1547
+ def _create_nested_component(
1548
+ self, model: Any, model_field: str, model_value: Any, config: Config
1549
+ ) -> Any:
1550
+ type_name = model_value.get("type", None)
1551
+ if not type_name:
1552
+ # If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent
1553
+ return model_value
1554
+
1555
+ model_type = self.TYPE_NAME_TO_MODEL.get(type_name, None)
1556
+ if model_type:
1557
+ parsed_model = model_type.parse_obj(model_value)
1558
+ try:
1559
+ # To improve usability of the language, certain fields are shared between components. This can come in the form of
1560
+ # a parent component passing some of its fields to a child component or the parent extracting fields from other child
1561
+ # components and passing it to others. One example is the DefaultPaginator referencing the HttpRequester url_base
1562
+ # while constructing a SimpleRetriever. However, custom components don't support this behavior because they are created
1563
+ # generically in create_custom_component(). This block allows developers to specify extra arguments in $parameters that
1564
+ # are needed by a component and could not be shared.
1565
+ model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__)
1566
+ constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs
1567
+ model_parameters = model_value.get("$parameters", {})
1568
+ matching_parameters = {
1569
+ kwarg: model_parameters[kwarg]
1570
+ for kwarg in constructor_kwargs
1571
+ if kwarg in model_parameters
1572
+ }
1573
+ return self._create_component_from_model(
1574
+ model=parsed_model, config=config, **matching_parameters
1575
+ )
1576
+ except TypeError as error:
1577
+ missing_parameters = self._extract_missing_parameters(error)
1578
+ if missing_parameters:
1579
+ raise ValueError(
1580
+ f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide "
1581
+ + ", ".join(
1582
+ (
1583
+ f"{type_name}.$parameters.{parameter}"
1584
+ for parameter in missing_parameters
1585
+ )
1586
+ )
1587
+ )
1588
+ raise TypeError(
1589
+ f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}"
1590
+ )
1591
+ else:
1592
+ raise ValueError(
1593
+ f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'"
1594
+ )
1595
+
1596
+ @staticmethod
1597
+ def _is_component(model_value: Any) -> bool:
1598
+ return isinstance(model_value, dict) and model_value.get("type") is not None
1599
+
1600
+ def create_datetime_based_cursor(
1601
+ self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any
1602
+ ) -> DatetimeBasedCursor:
1603
+ start_datetime: Union[str, MinMaxDatetime] = (
1604
+ model.start_datetime
1605
+ if isinstance(model.start_datetime, str)
1606
+ else self.create_min_max_datetime(model.start_datetime, config)
1607
+ )
1608
+ end_datetime: Union[str, MinMaxDatetime, None] = None
1609
+ if model.is_data_feed and model.end_datetime:
1610
+ raise ValueError("Data feed does not support end_datetime")
1611
+ if model.is_data_feed and model.is_client_side_incremental:
1612
+ raise ValueError(
1613
+ "`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them."
1614
+ )
1615
+ if model.end_datetime:
1616
+ end_datetime = (
1617
+ model.end_datetime
1618
+ if isinstance(model.end_datetime, str)
1619
+ else self.create_min_max_datetime(model.end_datetime, config)
1620
+ )
1621
+
1622
+ end_time_option = (
1623
+ self._create_component_from_model(
1624
+ model.end_time_option, config, parameters=model.parameters or {}
1625
+ )
1626
+ if model.end_time_option
1627
+ else None
1628
+ )
1629
+ start_time_option = (
1630
+ self._create_component_from_model(
1631
+ model.start_time_option, config, parameters=model.parameters or {}
1632
+ )
1633
+ if model.start_time_option
1634
+ else None
1635
+ )
1636
+
1637
+ return DatetimeBasedCursor(
1638
+ cursor_field=model.cursor_field,
1639
+ cursor_datetime_formats=model.cursor_datetime_formats
1640
+ if model.cursor_datetime_formats
1641
+ else [],
1642
+ cursor_granularity=model.cursor_granularity,
1643
+ datetime_format=model.datetime_format,
1644
+ end_datetime=end_datetime,
1645
+ start_datetime=start_datetime,
1646
+ step=model.step,
1647
+ end_time_option=end_time_option,
1648
+ lookback_window=model.lookback_window,
1649
+ start_time_option=start_time_option,
1650
+ partition_field_end=model.partition_field_end,
1651
+ partition_field_start=model.partition_field_start,
1652
+ message_repository=self._message_repository,
1653
+ is_compare_strictly=model.is_compare_strictly,
1654
+ config=config,
1655
+ parameters=model.parameters or {},
1656
+ )
1657
+
1658
+ def create_declarative_stream(
1659
+ self, model: DeclarativeStreamModel, config: Config, **kwargs: Any
1660
+ ) -> DeclarativeStream:
1661
+ # When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field
1662
+ # components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the
1663
+ # Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in
1664
+ # the factory only support passing arguments to the component constructors, whereas this performs a merge of all slicers into one.
1665
+ combined_slicers = self._merge_stream_slicers(model=model, config=config)
1666
+
1667
+ primary_key = model.primary_key.__root__ if model.primary_key else None
1668
+ stop_condition_on_cursor = (
1669
+ model.incremental_sync
1670
+ and hasattr(model.incremental_sync, "is_data_feed")
1671
+ and model.incremental_sync.is_data_feed
1672
+ )
1673
+ client_side_incremental_sync = None
1674
+ if (
1675
+ model.incremental_sync
1676
+ and hasattr(model.incremental_sync, "is_client_side_incremental")
1677
+ and model.incremental_sync.is_client_side_incremental
1678
+ ):
1679
+ supported_slicers = (
1680
+ DatetimeBasedCursor,
1681
+ GlobalSubstreamCursor,
1682
+ PerPartitionWithGlobalCursor,
1683
+ )
1684
+ if combined_slicers and not isinstance(combined_slicers, supported_slicers):
1685
+ raise ValueError(
1686
+ "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
1687
+ )
1688
+ cursor = (
1689
+ combined_slicers
1690
+ if isinstance(
1691
+ combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1692
+ )
1693
+ else self._create_component_from_model(model=model.incremental_sync, config=config)
1694
+ )
1695
+
1696
+ client_side_incremental_sync = {"cursor": cursor}
1697
+
1698
+ if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
1699
+ cursor_model = model.incremental_sync
1700
+
1701
+ end_time_option = (
1702
+ self._create_component_from_model(
1703
+ cursor_model.end_time_option, config, parameters=cursor_model.parameters or {}
1704
+ )
1705
+ if cursor_model.end_time_option
1706
+ else None
1707
+ )
1708
+ start_time_option = (
1709
+ self._create_component_from_model(
1710
+ cursor_model.start_time_option, config, parameters=cursor_model.parameters or {}
1711
+ )
1712
+ if cursor_model.start_time_option
1713
+ else None
1714
+ )
1715
+
1716
+ request_options_provider = DatetimeBasedRequestOptionsProvider(
1717
+ start_time_option=start_time_option,
1718
+ end_time_option=end_time_option,
1719
+ partition_field_start=cursor_model.partition_field_end,
1720
+ partition_field_end=cursor_model.partition_field_end,
1721
+ config=config,
1722
+ parameters=model.parameters or {},
1723
+ )
1724
+ elif model.incremental_sync and isinstance(
1725
+ model.incremental_sync, IncrementingCountCursorModel
1726
+ ):
1727
+ cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore
1728
+
1729
+ start_time_option = (
1730
+ self._create_component_from_model(
1731
+ cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
1732
+ config,
1733
+ parameters=cursor_model.parameters or {},
1734
+ )
1735
+ if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
1736
+ else None
1737
+ )
1738
+
1739
+ # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but
1740
+ # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time
1741
+ partition_field_start = "start"
1742
+
1743
+ request_options_provider = DatetimeBasedRequestOptionsProvider(
1744
+ start_time_option=start_time_option,
1745
+ partition_field_start=partition_field_start,
1746
+ config=config,
1747
+ parameters=model.parameters or {},
1748
+ )
1749
+ else:
1750
+ request_options_provider = None
1751
+
1752
+ transformations = []
1753
+ if model.transformations:
1754
+ for transformation_model in model.transformations:
1755
+ transformations.append(
1756
+ self._create_component_from_model(model=transformation_model, config=config)
1757
+ )
1758
+ file_uploader = None
1759
+ if model.file_uploader:
1760
+ file_uploader = self._create_component_from_model(
1761
+ model=model.file_uploader, config=config
1762
+ )
1763
+
1764
+ retriever = self._create_component_from_model(
1765
+ model=model.retriever,
1766
+ config=config,
1767
+ name=model.name,
1768
+ primary_key=primary_key,
1769
+ stream_slicer=combined_slicers,
1770
+ request_options_provider=request_options_provider,
1771
+ stop_condition_on_cursor=stop_condition_on_cursor,
1772
+ client_side_incremental_sync=client_side_incremental_sync,
1773
+ transformations=transformations,
1774
+ file_uploader=file_uploader,
1775
+ incremental_sync=model.incremental_sync,
1776
+ )
1777
+ cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
1778
+
1779
+ if model.state_migrations:
1780
+ state_transformations = [
1781
+ self._create_component_from_model(state_migration, config, declarative_stream=model)
1782
+ for state_migration in model.state_migrations
1783
+ ]
1784
+ else:
1785
+ state_transformations = []
1786
+
1787
+ if model.schema_loader:
1788
+ schema_loader = self._create_component_from_model(
1789
+ model=model.schema_loader, config=config
1790
+ )
1791
+ else:
1792
+ options = model.parameters or {}
1793
+ if "name" not in options:
1794
+ options["name"] = model.name
1795
+ schema_loader = DefaultSchemaLoader(config=config, parameters=options)
1796
+
1797
+ return DeclarativeStream(
1798
+ name=model.name or "",
1799
+ primary_key=primary_key,
1800
+ retriever=retriever,
1801
+ schema_loader=schema_loader,
1802
+ stream_cursor_field=cursor_field or "",
1803
+ state_migrations=state_transformations,
1804
+ config=config,
1805
+ parameters=model.parameters or {},
1806
+ )
1807
+
1808
+ def _build_stream_slicer_from_partition_router(
1809
+ self,
1810
+ model: Union[
1811
+ AsyncRetrieverModel,
1812
+ CustomRetrieverModel,
1813
+ SimpleRetrieverModel,
1814
+ ],
1815
+ config: Config,
1816
+ stream_name: Optional[str] = None,
1817
+ ) -> Optional[PartitionRouter]:
1818
+ if (
1819
+ hasattr(model, "partition_router")
1820
+ and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel)
1821
+ and model.partition_router
1822
+ ):
1823
+ stream_slicer_model = model.partition_router
1824
+ if isinstance(stream_slicer_model, list):
1825
+ return CartesianProductStreamSlicer(
1826
+ [
1827
+ self._create_component_from_model(
1828
+ model=slicer, config=config, stream_name=stream_name or ""
1829
+ )
1830
+ for slicer in stream_slicer_model
1831
+ ],
1832
+ parameters={},
1833
+ )
1834
+ else:
1835
+ return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router
1836
+ model=stream_slicer_model, config=config, stream_name=stream_name or ""
1837
+ )
1838
+ return None
1839
+
1840
+ def _build_incremental_cursor(
1841
+ self,
1842
+ model: DeclarativeStreamModel,
1843
+ stream_slicer: Optional[PartitionRouter],
1844
+ config: Config,
1845
+ ) -> Optional[StreamSlicer]:
1846
+ if model.incremental_sync and stream_slicer:
1847
+ if model.retriever.type == "AsyncRetriever":
1848
+ return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1849
+ state_manager=self._connector_state_manager,
1850
+ model_type=DatetimeBasedCursorModel,
1851
+ component_definition=model.incremental_sync.__dict__,
1852
+ stream_name=model.name or "",
1853
+ stream_namespace=None,
1854
+ config=config or {},
1855
+ stream_state={},
1856
+ partition_router=stream_slicer,
1857
+ )
1858
+
1859
+ incremental_sync_model = model.incremental_sync
1860
+ cursor_component = self._create_component_from_model(
1861
+ model=incremental_sync_model, config=config
1862
+ )
1863
+ is_global_cursor = (
1864
+ hasattr(incremental_sync_model, "global_substream_cursor")
1865
+ and incremental_sync_model.global_substream_cursor
1866
+ )
1867
+
1868
+ if is_global_cursor:
1869
+ return GlobalSubstreamCursor(
1870
+ stream_cursor=cursor_component, partition_router=stream_slicer
1871
+ )
1872
+ return PerPartitionWithGlobalCursor(
1873
+ cursor_factory=CursorFactory(
1874
+ lambda: self._create_component_from_model(
1875
+ model=incremental_sync_model, config=config
1876
+ ),
1877
+ ),
1878
+ partition_router=stream_slicer,
1879
+ stream_cursor=cursor_component,
1880
+ )
1881
+ elif model.incremental_sync:
1882
+ if model.retriever.type == "AsyncRetriever":
1883
+ return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1884
+ model_type=DatetimeBasedCursorModel,
1885
+ component_definition=model.incremental_sync.__dict__,
1886
+ stream_name=model.name or "",
1887
+ stream_namespace=None,
1888
+ config=config or {},
1889
+ stream_state_migrations=model.state_migrations,
1890
+ )
1891
+ return self._create_component_from_model(model=model.incremental_sync, config=config) # type: ignore[no-any-return] # Will be created Cursor as stream_slicer_model is model.incremental_sync
1892
+ return None
1893
+
1894
+ def _build_resumable_cursor(
1895
+ self,
1896
+ model: Union[
1897
+ AsyncRetrieverModel,
1898
+ CustomRetrieverModel,
1899
+ SimpleRetrieverModel,
1900
+ ],
1901
+ stream_slicer: Optional[PartitionRouter],
1902
+ ) -> Optional[StreamSlicer]:
1903
+ if hasattr(model, "paginator") and model.paginator and not stream_slicer:
1904
+ # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
1905
+ return ResumableFullRefreshCursor(parameters={})
1906
+ elif stream_slicer:
1907
+ # For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor`
1908
+ return PerPartitionCursor(
1909
+ cursor_factory=CursorFactory(
1910
+ create_function=partial(ChildPartitionResumableFullRefreshCursor, {})
1911
+ ),
1912
+ partition_router=stream_slicer,
1913
+ )
1914
+ return None
1915
+
1916
+ def _merge_stream_slicers(
1917
+ self, model: DeclarativeStreamModel, config: Config
1918
+ ) -> Optional[StreamSlicer]:
1919
+ retriever_model = model.retriever
1920
+
1921
+ stream_slicer = self._build_stream_slicer_from_partition_router(
1922
+ retriever_model, config, stream_name=model.name
1923
+ )
1924
+
1925
+ if retriever_model.type == "AsyncRetriever":
1926
+ is_not_datetime_cursor = (
1927
+ model.incremental_sync.type != "DatetimeBasedCursor"
1928
+ if model.incremental_sync
1929
+ else None
1930
+ )
1931
+ is_partition_router = (
1932
+ bool(retriever_model.partition_router) if model.incremental_sync else None
1933
+ )
1934
+
1935
+ if is_not_datetime_cursor:
1936
+ # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the
1937
+ # support or unordered slices (for example, when we trigger reports for January and February, the report
1938
+ # in February can be completed first). Once we have support for custom concurrent cursor or have a new
1939
+ # implementation available in the CDK, we can enable more cursors here.
1940
+ raise ValueError(
1941
+ "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet."
1942
+ )
1943
+
1944
+ if is_partition_router and not stream_slicer:
1945
+ # Note that this development is also done in parallel to the per partition development which once merged
1946
+ # we could support here by calling create_concurrent_cursor_from_perpartition_cursor
1947
+ raise ValueError("Per partition state is not supported yet for AsyncRetriever.")
1948
+
1949
+ if model.incremental_sync:
1950
+ return self._build_incremental_cursor(model, stream_slicer, config)
1951
+
1952
+ return (
1953
+ stream_slicer
1954
+ if self._disable_resumable_full_refresh
1955
+ else self._build_resumable_cursor(retriever_model, stream_slicer)
1956
+ )
1957
+
1958
+ def create_default_error_handler(
1959
+ self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
1960
+ ) -> DefaultErrorHandler:
1961
+ backoff_strategies = []
1962
+ if model.backoff_strategies:
1963
+ for backoff_strategy_model in model.backoff_strategies:
1964
+ backoff_strategies.append(
1965
+ self._create_component_from_model(model=backoff_strategy_model, config=config)
1966
+ )
1967
+
1968
+ response_filters = []
1969
+ if model.response_filters:
1970
+ for response_filter_model in model.response_filters:
1971
+ response_filters.append(
1972
+ self._create_component_from_model(model=response_filter_model, config=config)
1973
+ )
1974
+ response_filters.append(
1975
+ HttpResponseFilter(config=config, parameters=model.parameters or {})
1976
+ )
1977
+
1978
+ return DefaultErrorHandler(
1979
+ backoff_strategies=backoff_strategies,
1980
+ max_retries=model.max_retries,
1981
+ response_filters=response_filters,
1982
+ config=config,
1983
+ parameters=model.parameters or {},
1984
+ )
1985
+
1986
+ def create_default_paginator(
1987
+ self,
1988
+ model: DefaultPaginatorModel,
1989
+ config: Config,
1990
+ *,
1991
+ url_base: str,
1992
+ decoder: Optional[Decoder] = None,
1993
+ cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None,
1994
+ ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]:
1995
+ if decoder:
1996
+ if self._is_supported_decoder_for_pagination(decoder):
1997
+ decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
1998
+ else:
1999
+ raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder)))
2000
+ else:
2001
+ decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={}))
2002
+ page_size_option = (
2003
+ self._create_component_from_model(model=model.page_size_option, config=config)
2004
+ if model.page_size_option
2005
+ else None
2006
+ )
2007
+ page_token_option = (
2008
+ self._create_component_from_model(model=model.page_token_option, config=config)
2009
+ if model.page_token_option
2010
+ else None
2011
+ )
2012
+ pagination_strategy = self._create_component_from_model(
2013
+ model=model.pagination_strategy, config=config, decoder=decoder_to_use
2014
+ )
2015
+ if cursor_used_for_stop_condition:
2016
+ pagination_strategy = StopConditionPaginationStrategyDecorator(
2017
+ pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition)
2018
+ )
2019
+ paginator = DefaultPaginator(
2020
+ decoder=decoder_to_use,
2021
+ page_size_option=page_size_option,
2022
+ page_token_option=page_token_option,
2023
+ pagination_strategy=pagination_strategy,
2024
+ url_base=url_base,
2025
+ config=config,
2026
+ parameters=model.parameters or {},
2027
+ )
2028
+ if self._limit_pages_fetched_per_slice:
2029
+ return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice)
2030
+ return paginator
2031
+
2032
+ def create_dpath_extractor(
2033
+ self,
2034
+ model: DpathExtractorModel,
2035
+ config: Config,
2036
+ decoder: Optional[Decoder] = None,
2037
+ **kwargs: Any,
2038
+ ) -> DpathExtractor:
2039
+ if decoder:
2040
+ decoder_to_use = decoder
2041
+ else:
2042
+ decoder_to_use = JsonDecoder(parameters={})
2043
+ model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path]
2044
+ return DpathExtractor(
2045
+ decoder=decoder_to_use,
2046
+ field_path=model_field_path,
2047
+ config=config,
2048
+ parameters=model.parameters or {},
2049
+ )
2050
+
2051
+ def create_response_to_file_extractor(
2052
+ self,
2053
+ model: ResponseToFileExtractorModel,
2054
+ **kwargs: Any,
2055
+ ) -> ResponseToFileExtractor:
2056
+ return ResponseToFileExtractor(parameters=model.parameters or {})
2057
+
2058
+ @staticmethod
2059
+ def create_exponential_backoff_strategy(
2060
+ model: ExponentialBackoffStrategyModel, config: Config
2061
+ ) -> ExponentialBackoffStrategy:
2062
+ return ExponentialBackoffStrategy(
2063
+ factor=model.factor or 5, parameters=model.parameters or {}, config=config
2064
+ )
2065
+
2066
+ def create_http_requester(
2067
+ self,
2068
+ model: HttpRequesterModel,
2069
+ config: Config,
2070
+ decoder: Decoder = JsonDecoder(parameters={}),
2071
+ *,
2072
+ name: str,
2073
+ ) -> HttpRequester:
2074
+ authenticator = (
2075
+ self._create_component_from_model(
2076
+ model=model.authenticator,
2077
+ config=config,
2078
+ url_base=model.url_base,
2079
+ name=name,
2080
+ decoder=decoder,
2081
+ )
2082
+ if model.authenticator
2083
+ else None
2084
+ )
2085
+ error_handler = (
2086
+ self._create_component_from_model(model=model.error_handler, config=config)
2087
+ if model.error_handler
2088
+ else DefaultErrorHandler(
2089
+ backoff_strategies=[],
2090
+ response_filters=[],
2091
+ config=config,
2092
+ parameters=model.parameters or {},
2093
+ )
2094
+ )
2095
+
2096
+ api_budget = self._api_budget
2097
+
2098
+ request_options_provider = InterpolatedRequestOptionsProvider(
2099
+ request_body_data=model.request_body_data,
2100
+ request_body_json=model.request_body_json,
2101
+ request_headers=model.request_headers,
2102
+ request_parameters=model.request_parameters,
2103
+ config=config,
2104
+ parameters=model.parameters or {},
2105
+ )
2106
+
2107
+ assert model.use_cache is not None # for mypy
2108
+ assert model.http_method is not None # for mypy
2109
+
2110
+ use_cache = model.use_cache and not self._disable_cache
2111
+
2112
+ return HttpRequester(
2113
+ name=name,
2114
+ url_base=model.url_base,
2115
+ path=model.path,
2116
+ authenticator=authenticator,
2117
+ error_handler=error_handler,
2118
+ api_budget=api_budget,
2119
+ http_method=HttpMethod[model.http_method.value],
2120
+ request_options_provider=request_options_provider,
2121
+ config=config,
2122
+ disable_retries=self._disable_retries,
2123
+ parameters=model.parameters or {},
2124
+ message_repository=self._message_repository,
2125
+ use_cache=use_cache,
2126
+ decoder=decoder,
2127
+ stream_response=decoder.is_stream_response() if decoder else False,
2128
+ )
2129
+
2130
+ @staticmethod
2131
+ def create_http_response_filter(
2132
+ model: HttpResponseFilterModel, config: Config, **kwargs: Any
2133
+ ) -> HttpResponseFilter:
2134
+ if model.action:
2135
+ action = ResponseAction(model.action.value)
2136
+ else:
2137
+ action = None
2138
+
2139
+ failure_type = FailureType(model.failure_type.value) if model.failure_type else None
2140
+
2141
+ http_codes = (
2142
+ set(model.http_codes) if model.http_codes else set()
2143
+ ) # JSON schema notation has no set data type. The schema enforces an array of unique elements
2144
+
2145
+ return HttpResponseFilter(
2146
+ action=action,
2147
+ failure_type=failure_type,
2148
+ error_message=model.error_message or "",
2149
+ error_message_contains=model.error_message_contains or "",
2150
+ http_codes=http_codes,
2151
+ predicate=model.predicate or "",
2152
+ config=config,
2153
+ parameters=model.parameters or {},
2154
+ )
2155
+
2156
+ @staticmethod
2157
+ def create_inline_schema_loader(
2158
+ model: InlineSchemaLoaderModel, config: Config, **kwargs: Any
2159
+ ) -> InlineSchemaLoader:
2160
+ return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
2161
+
2162
+ def create_complex_field_type(
2163
+ self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any
2164
+ ) -> ComplexFieldType:
2165
+ items = (
2166
+ self._create_component_from_model(model=model.items, config=config)
2167
+ if isinstance(model.items, ComplexFieldTypeModel)
2168
+ else model.items
2169
+ )
2170
+
2171
+ return ComplexFieldType(field_type=model.field_type, items=items)
2172
+
2173
+ def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap:
2174
+ target_type = (
2175
+ self._create_component_from_model(model=model.target_type, config=config)
2176
+ if isinstance(model.target_type, ComplexFieldTypeModel)
2177
+ else model.target_type
2178
+ )
2179
+
2180
+ return TypesMap(
2181
+ target_type=target_type,
2182
+ current_type=model.current_type,
2183
+ condition=model.condition if model.condition is not None else "True",
2184
+ )
2185
+
2186
+ def create_schema_type_identifier(
2187
+ self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
2188
+ ) -> SchemaTypeIdentifier:
2189
+ types_mapping = []
2190
+ if model.types_mapping:
2191
+ types_mapping.extend(
2192
+ [
2193
+ self._create_component_from_model(types_map, config=config)
2194
+ for types_map in model.types_mapping
2195
+ ]
2196
+ )
2197
+ model_schema_pointer: List[Union[InterpolatedString, str]] = (
2198
+ [x for x in model.schema_pointer] if model.schema_pointer else []
2199
+ )
2200
+ model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer]
2201
+ model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = (
2202
+ [x for x in model.type_pointer] if model.type_pointer else None
2203
+ )
2204
+
2205
+ return SchemaTypeIdentifier(
2206
+ schema_pointer=model_schema_pointer,
2207
+ key_pointer=model_key_pointer,
2208
+ type_pointer=model_type_pointer,
2209
+ types_mapping=types_mapping,
2210
+ parameters=model.parameters or {},
2211
+ )
2212
+
2213
+ def create_dynamic_schema_loader(
2214
+ self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
2215
+ ) -> DynamicSchemaLoader:
2216
+ stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
2217
+ combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
2218
+
2219
+ schema_transformations = []
2220
+ if model.schema_transformations:
2221
+ for transformation_model in model.schema_transformations:
2222
+ schema_transformations.append(
2223
+ self._create_component_from_model(model=transformation_model, config=config)
2224
+ )
2225
+
2226
+ retriever = self._create_component_from_model(
2227
+ model=model.retriever,
2228
+ config=config,
2229
+ name="",
2230
+ primary_key=None,
2231
+ stream_slicer=combined_slicers,
2232
+ transformations=[],
2233
+ )
2234
+ schema_type_identifier = self._create_component_from_model(
2235
+ model.schema_type_identifier, config=config, parameters=model.parameters or {}
2236
+ )
2237
+ return DynamicSchemaLoader(
2238
+ retriever=retriever,
2239
+ config=config,
2240
+ schema_transformations=schema_transformations,
2241
+ schema_type_identifier=schema_type_identifier,
2242
+ parameters=model.parameters or {},
2243
+ )
2244
+
2245
+ @staticmethod
2246
+ def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2247
+ return JsonDecoder(parameters={})
2248
+
2249
+ def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2250
+ return CompositeRawDecoder(
2251
+ parser=ModelToComponentFactory._get_parser(model, config),
2252
+ stream_response=False if self._emit_connector_builder_messages else True,
2253
+ )
2254
+
2255
+ def create_jsonl_decoder(
2256
+ self, model: JsonlDecoderModel, config: Config, **kwargs: Any
2257
+ ) -> Decoder:
2258
+ return CompositeRawDecoder(
2259
+ parser=ModelToComponentFactory._get_parser(model, config),
2260
+ stream_response=False if self._emit_connector_builder_messages else True,
2261
+ )
2262
+
2263
+ def create_gzip_decoder(
2264
+ self, model: GzipDecoderModel, config: Config, **kwargs: Any
2265
+ ) -> Decoder:
2266
+ _compressed_response_types = {
2267
+ "gzip",
2268
+ "x-gzip",
2269
+ "gzip, deflate",
2270
+ "x-gzip, deflate",
2271
+ "application/zip",
2272
+ "application/gzip",
2273
+ "application/x-gzip",
2274
+ "application/x-zip-compressed",
2275
+ }
2276
+
2277
+ gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser
2278
+
2279
+ if self._emit_connector_builder_messages:
2280
+ # This is very surprising but if the response is not streamed,
2281
+ # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw,
2282
+ # which uses urllib3 directly and does not uncompress the data.
2283
+ return CompositeRawDecoder(gzip_parser.inner_parser, False)
2284
+
2285
+ return CompositeRawDecoder.by_headers(
2286
+ [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)],
2287
+ stream_response=True,
2288
+ fallback_parser=gzip_parser.inner_parser,
2289
+ )
2290
+
2291
+ @staticmethod
2292
+ def create_incrementing_count_cursor(
2293
+ model: IncrementingCountCursorModel, config: Config, **kwargs: Any
2294
+ ) -> DatetimeBasedCursor:
2295
+ # This should not actually get used anywhere at runtime, but needed to add this to pass checks since
2296
+ # we still parse models into components. The issue is that there's no runtime implementation of a
2297
+ # IncrementingCountCursor.
2298
+ # A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor.
2299
+ return DatetimeBasedCursor(
2300
+ cursor_field=model.cursor_field,
2301
+ datetime_format="%Y-%m-%d",
2302
+ start_datetime="2024-12-12",
2303
+ config=config,
2304
+ parameters={},
2305
+ )
2306
+
2307
+ @staticmethod
2308
+ def create_iterable_decoder(
2309
+ model: IterableDecoderModel, config: Config, **kwargs: Any
2310
+ ) -> IterableDecoder:
2311
+ return IterableDecoder(parameters={})
2312
+
2313
+ @staticmethod
2314
+ def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder:
2315
+ return XmlDecoder(parameters={})
2316
+
2317
+ def create_zipfile_decoder(
2318
+ self, model: ZipfileDecoderModel, config: Config, **kwargs: Any
2319
+ ) -> ZipfileDecoder:
2320
+ return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config))
2321
+
2322
+ @staticmethod
2323
+ def _get_parser(model: BaseModel, config: Config) -> Parser:
2324
+ if isinstance(model, JsonDecoderModel):
2325
+ # Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases
2326
+ return JsonParser()
2327
+ elif isinstance(model, JsonlDecoderModel):
2328
+ return JsonLineParser()
2329
+ elif isinstance(model, CsvDecoderModel):
2330
+ return CsvParser(encoding=model.encoding, delimiter=model.delimiter)
2331
+ elif isinstance(model, GzipDecoderModel):
2332
+ return GzipParser(
2333
+ inner_parser=ModelToComponentFactory._get_parser(model.decoder, config)
2334
+ )
2335
+ elif isinstance(
2336
+ model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel)
2337
+ ):
2338
+ raise ValueError(f"Decoder type {model} does not have parser associated to it")
2339
+
2340
+ raise ValueError(f"Unknown decoder type {model}")
2341
+
2342
+ @staticmethod
2343
+ def create_json_file_schema_loader(
2344
+ model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any
2345
+ ) -> JsonFileSchemaLoader:
2346
+ return JsonFileSchemaLoader(
2347
+ file_path=model.file_path or "", config=config, parameters=model.parameters or {}
2348
+ )
2349
+
2350
+ @staticmethod
2351
+ def create_jwt_authenticator(
2352
+ model: JwtAuthenticatorModel, config: Config, **kwargs: Any
2353
+ ) -> JwtAuthenticator:
2354
+ jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None)
2355
+ jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None)
2356
+ return JwtAuthenticator(
2357
+ config=config,
2358
+ parameters=model.parameters or {},
2359
+ algorithm=JwtAlgorithm(model.algorithm.value),
2360
+ secret_key=model.secret_key,
2361
+ base64_encode_secret_key=model.base64_encode_secret_key,
2362
+ token_duration=model.token_duration,
2363
+ header_prefix=model.header_prefix,
2364
+ kid=jwt_headers.kid,
2365
+ typ=jwt_headers.typ,
2366
+ cty=jwt_headers.cty,
2367
+ iss=jwt_payload.iss,
2368
+ sub=jwt_payload.sub,
2369
+ aud=jwt_payload.aud,
2370
+ additional_jwt_headers=model.additional_jwt_headers,
2371
+ additional_jwt_payload=model.additional_jwt_payload,
2372
+ )
2373
+
2374
+ def create_list_partition_router(
2375
+ self, model: ListPartitionRouterModel, config: Config, **kwargs: Any
2376
+ ) -> ListPartitionRouter:
2377
+ request_option = (
2378
+ self._create_component_from_model(model.request_option, config)
2379
+ if model.request_option
2380
+ else None
2381
+ )
2382
+ return ListPartitionRouter(
2383
+ cursor_field=model.cursor_field,
2384
+ request_option=request_option,
2385
+ values=model.values,
2386
+ config=config,
2387
+ parameters=model.parameters or {},
2388
+ )
2389
+
2390
+ @staticmethod
2391
+ def create_min_max_datetime(
2392
+ model: MinMaxDatetimeModel, config: Config, **kwargs: Any
2393
+ ) -> MinMaxDatetime:
2394
+ return MinMaxDatetime(
2395
+ datetime=model.datetime,
2396
+ datetime_format=model.datetime_format or "",
2397
+ max_datetime=model.max_datetime or "",
2398
+ min_datetime=model.min_datetime or "",
2399
+ parameters=model.parameters or {},
2400
+ )
2401
+
2402
+ @staticmethod
2403
+ def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth:
2404
+ return NoAuth(parameters=model.parameters or {})
2405
+
2406
+ @staticmethod
2407
+ def create_no_pagination(
2408
+ model: NoPaginationModel, config: Config, **kwargs: Any
2409
+ ) -> NoPagination:
2410
+ return NoPagination(parameters={})
2411
+
2412
+ def create_oauth_authenticator(
2413
+ self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any
2414
+ ) -> DeclarativeOauth2Authenticator:
2415
+ profile_assertion = (
2416
+ self._create_component_from_model(model.profile_assertion, config=config)
2417
+ if model.profile_assertion
2418
+ else None
2419
+ )
2420
+
2421
+ if model.refresh_token_updater:
2422
+ # ignore type error because fixing it would have a lot of dependencies, revisit later
2423
+ return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore
2424
+ config,
2425
+ InterpolatedString.create(
2426
+ model.token_refresh_endpoint, # type: ignore
2427
+ parameters=model.parameters or {},
2428
+ ).eval(config),
2429
+ access_token_name=InterpolatedString.create(
2430
+ model.access_token_name or "access_token", parameters=model.parameters or {}
2431
+ ).eval(config),
2432
+ refresh_token_name=model.refresh_token_updater.refresh_token_name,
2433
+ expires_in_name=InterpolatedString.create(
2434
+ model.expires_in_name or "expires_in", parameters=model.parameters or {}
2435
+ ).eval(config),
2436
+ client_id_name=InterpolatedString.create(
2437
+ model.client_id_name or "client_id", parameters=model.parameters or {}
2438
+ ).eval(config),
2439
+ client_id=InterpolatedString.create(
2440
+ model.client_id, parameters=model.parameters or {}
2441
+ ).eval(config)
2442
+ if model.client_id
2443
+ else model.client_id,
2444
+ client_secret_name=InterpolatedString.create(
2445
+ model.client_secret_name or "client_secret", parameters=model.parameters or {}
2446
+ ).eval(config),
2447
+ client_secret=InterpolatedString.create(
2448
+ model.client_secret, parameters=model.parameters or {}
2449
+ ).eval(config)
2450
+ if model.client_secret
2451
+ else model.client_secret,
2452
+ access_token_config_path=model.refresh_token_updater.access_token_config_path,
2453
+ refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
2454
+ token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
2455
+ grant_type_name=InterpolatedString.create(
2456
+ model.grant_type_name or "grant_type", parameters=model.parameters or {}
2457
+ ).eval(config),
2458
+ grant_type=InterpolatedString.create(
2459
+ model.grant_type or "refresh_token", parameters=model.parameters or {}
2460
+ ).eval(config),
2461
+ refresh_request_body=InterpolatedMapping(
2462
+ model.refresh_request_body or {}, parameters=model.parameters or {}
2463
+ ).eval(config),
2464
+ refresh_request_headers=InterpolatedMapping(
2465
+ model.refresh_request_headers or {}, parameters=model.parameters or {}
2466
+ ).eval(config),
2467
+ scopes=model.scopes,
2468
+ token_expiry_date_format=model.token_expiry_date_format,
2469
+ message_repository=self._message_repository,
2470
+ refresh_token_error_status_codes=model.refresh_token_updater.refresh_token_error_status_codes,
2471
+ refresh_token_error_key=model.refresh_token_updater.refresh_token_error_key,
2472
+ refresh_token_error_values=model.refresh_token_updater.refresh_token_error_values,
2473
+ )
2474
+ # ignore type error because fixing it would have a lot of dependencies, revisit later
2475
+ return DeclarativeOauth2Authenticator( # type: ignore
2476
+ access_token_name=model.access_token_name or "access_token",
2477
+ access_token_value=model.access_token_value,
2478
+ client_id_name=model.client_id_name or "client_id",
2479
+ client_id=model.client_id,
2480
+ client_secret_name=model.client_secret_name or "client_secret",
2481
+ client_secret=model.client_secret,
2482
+ expires_in_name=model.expires_in_name or "expires_in",
2483
+ grant_type_name=model.grant_type_name or "grant_type",
2484
+ grant_type=model.grant_type or "refresh_token",
2485
+ refresh_request_body=model.refresh_request_body,
2486
+ refresh_request_headers=model.refresh_request_headers,
2487
+ refresh_token_name=model.refresh_token_name or "refresh_token",
2488
+ refresh_token=model.refresh_token,
2489
+ scopes=model.scopes,
2490
+ token_expiry_date=model.token_expiry_date,
2491
+ token_expiry_date_format=model.token_expiry_date_format,
2492
+ token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format),
2493
+ token_refresh_endpoint=model.token_refresh_endpoint,
2494
+ config=config,
2495
+ parameters=model.parameters or {},
2496
+ message_repository=self._message_repository,
2497
+ profile_assertion=profile_assertion,
2498
+ use_profile_assertion=model.use_profile_assertion,
2499
+ )
2500
+
2501
+ def create_offset_increment(
2502
+ self, model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any
2503
+ ) -> OffsetIncrement:
2504
+ if isinstance(decoder, PaginationDecoderDecorator):
2505
+ inner_decoder = decoder.decoder
2506
+ else:
2507
+ inner_decoder = decoder
2508
+ decoder = PaginationDecoderDecorator(decoder=decoder)
2509
+
2510
+ if self._is_supported_decoder_for_pagination(inner_decoder):
2511
+ decoder_to_use = decoder
2512
+ else:
2513
+ raise ValueError(
2514
+ self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
2515
+ )
2516
+
2517
+ return OffsetIncrement(
2518
+ page_size=model.page_size,
2519
+ config=config,
2520
+ decoder=decoder_to_use,
2521
+ inject_on_first_request=model.inject_on_first_request or False,
2522
+ parameters=model.parameters or {},
2523
+ )
2524
+
2525
+ @staticmethod
2526
+ def create_page_increment(
2527
+ model: PageIncrementModel, config: Config, **kwargs: Any
2528
+ ) -> PageIncrement:
2529
+ return PageIncrement(
2530
+ page_size=model.page_size,
2531
+ config=config,
2532
+ start_from_page=model.start_from_page or 0,
2533
+ inject_on_first_request=model.inject_on_first_request or False,
2534
+ parameters=model.parameters or {},
2535
+ )
2536
+
2537
+ def create_parent_stream_config(
2538
+ self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
2539
+ ) -> ParentStreamConfig:
2540
+ declarative_stream = self._create_component_from_model(
2541
+ model.stream, config=config, **kwargs
2542
+ )
2543
+ request_option = (
2544
+ self._create_component_from_model(model.request_option, config=config)
2545
+ if model.request_option
2546
+ else None
2547
+ )
2548
+
2549
+ if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer):
2550
+ raise ValueError(
2551
+ "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed."
2552
+ )
2553
+
2554
+ model_lazy_read_pointer: List[Union[InterpolatedString, str]] = (
2555
+ [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else []
2556
+ )
2557
+
2558
+ return ParentStreamConfig(
2559
+ parent_key=model.parent_key,
2560
+ request_option=request_option,
2561
+ stream=declarative_stream,
2562
+ partition_field=model.partition_field,
2563
+ config=config,
2564
+ incremental_dependency=model.incremental_dependency or False,
2565
+ parameters=model.parameters or {},
2566
+ extra_fields=model.extra_fields,
2567
+ lazy_read_pointer=model_lazy_read_pointer,
2568
+ )
2569
+
2570
+ @staticmethod
2571
+ def create_record_filter(
2572
+ model: RecordFilterModel, config: Config, **kwargs: Any
2573
+ ) -> RecordFilter:
2574
+ return RecordFilter(
2575
+ condition=model.condition or "", config=config, parameters=model.parameters or {}
2576
+ )
2577
+
2578
+ @staticmethod
2579
+ def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath:
2580
+ return RequestPath(parameters={})
2581
+
2582
+ @staticmethod
2583
+ def create_request_option(
2584
+ model: RequestOptionModel, config: Config, **kwargs: Any
2585
+ ) -> RequestOption:
2586
+ inject_into = RequestOptionType(model.inject_into.value)
2587
+ field_path: Optional[List[Union[InterpolatedString, str]]] = (
2588
+ [
2589
+ InterpolatedString.create(segment, parameters=kwargs.get("parameters", {}))
2590
+ for segment in model.field_path
2591
+ ]
2592
+ if model.field_path
2593
+ else None
2594
+ )
2595
+ field_name = (
2596
+ InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {}))
2597
+ if model.field_name
2598
+ else None
2599
+ )
2600
+ return RequestOption(
2601
+ field_name=field_name,
2602
+ field_path=field_path,
2603
+ inject_into=inject_into,
2604
+ parameters=kwargs.get("parameters", {}),
2605
+ )
2606
+
2607
+ def create_record_selector(
2608
+ self,
2609
+ model: RecordSelectorModel,
2610
+ config: Config,
2611
+ *,
2612
+ name: str,
2613
+ transformations: List[RecordTransformation] | None = None,
2614
+ decoder: Decoder | None = None,
2615
+ client_side_incremental_sync: Dict[str, Any] | None = None,
2616
+ file_uploader: Optional[FileUploader] = None,
2617
+ **kwargs: Any,
2618
+ ) -> RecordSelector:
2619
+ extractor = self._create_component_from_model(
2620
+ model=model.extractor, decoder=decoder, config=config
2621
+ )
2622
+ record_filter = (
2623
+ self._create_component_from_model(model.record_filter, config=config)
2624
+ if model.record_filter
2625
+ else None
2626
+ )
2627
+
2628
+ assert model.transform_before_filtering is not None # for mypy
2629
+
2630
+ transform_before_filtering = model.transform_before_filtering
2631
+ if client_side_incremental_sync:
2632
+ record_filter = ClientSideIncrementalRecordFilterDecorator(
2633
+ config=config,
2634
+ parameters=model.parameters,
2635
+ condition=model.record_filter.condition
2636
+ if (model.record_filter and hasattr(model.record_filter, "condition"))
2637
+ else None,
2638
+ **client_side_incremental_sync,
2639
+ )
2640
+ transform_before_filtering = True
2641
+
2642
+ schema_normalization = (
2643
+ TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
2644
+ if isinstance(model.schema_normalization, SchemaNormalizationModel)
2645
+ else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here
2646
+ )
2647
+
2648
+ return RecordSelector(
2649
+ extractor=extractor,
2650
+ name=name,
2651
+ config=config,
2652
+ record_filter=record_filter,
2653
+ transformations=transformations or [],
2654
+ file_uploader=file_uploader,
2655
+ schema_normalization=schema_normalization,
2656
+ parameters=model.parameters or {},
2657
+ transform_before_filtering=transform_before_filtering,
2658
+ )
2659
+
2660
+ @staticmethod
2661
+ def create_remove_fields(
2662
+ model: RemoveFieldsModel, config: Config, **kwargs: Any
2663
+ ) -> RemoveFields:
2664
+ return RemoveFields(
2665
+ field_pointers=model.field_pointers, condition=model.condition or "", parameters={}
2666
+ )
2667
+
2668
+ def create_selective_authenticator(
2669
+ self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any
2670
+ ) -> DeclarativeAuthenticator:
2671
+ authenticators = {
2672
+ name: self._create_component_from_model(model=auth, config=config)
2673
+ for name, auth in model.authenticators.items()
2674
+ }
2675
+ # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error
2676
+ return SelectiveAuthenticator( # type: ignore[abstract]
2677
+ config=config,
2678
+ authenticators=authenticators,
2679
+ authenticator_selection_path=model.authenticator_selection_path,
2680
+ **kwargs,
2681
+ )
2682
+
2683
+ @staticmethod
2684
+ def create_legacy_session_token_authenticator(
2685
+ model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any
2686
+ ) -> LegacySessionTokenAuthenticator:
2687
+ return LegacySessionTokenAuthenticator(
2688
+ api_url=url_base,
2689
+ header=model.header,
2690
+ login_url=model.login_url,
2691
+ password=model.password or "",
2692
+ session_token=model.session_token or "",
2693
+ session_token_response_key=model.session_token_response_key or "",
2694
+ username=model.username or "",
2695
+ validate_session_url=model.validate_session_url,
2696
+ config=config,
2697
+ parameters=model.parameters or {},
2698
+ )
2699
+
2700
+ def create_simple_retriever(
2701
+ self,
2702
+ model: SimpleRetrieverModel,
2703
+ config: Config,
2704
+ *,
2705
+ name: str,
2706
+ primary_key: Optional[Union[str, List[str], List[List[str]]]],
2707
+ stream_slicer: Optional[StreamSlicer],
2708
+ request_options_provider: Optional[RequestOptionsProvider] = None,
2709
+ stop_condition_on_cursor: bool = False,
2710
+ client_side_incremental_sync: Optional[Dict[str, Any]] = None,
2711
+ transformations: List[RecordTransformation],
2712
+ file_uploader: Optional[FileUploader] = None,
2713
+ incremental_sync: Optional[
2714
+ Union[
2715
+ IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
2716
+ ]
2717
+ ] = None,
2718
+ **kwargs: Any,
2719
+ ) -> SimpleRetriever:
2720
+ decoder = (
2721
+ self._create_component_from_model(model=model.decoder, config=config)
2722
+ if model.decoder
2723
+ else JsonDecoder(parameters={})
2724
+ )
2725
+ requester = self._create_component_from_model(
2726
+ model=model.requester, decoder=decoder, config=config, name=name
2727
+ )
2728
+ record_selector = self._create_component_from_model(
2729
+ model=model.record_selector,
2730
+ name=name,
2731
+ config=config,
2732
+ decoder=decoder,
2733
+ transformations=transformations,
2734
+ client_side_incremental_sync=client_side_incremental_sync,
2735
+ file_uploader=file_uploader,
2736
+ )
2737
+ url_base = (
2738
+ model.requester.url_base
2739
+ if hasattr(model.requester, "url_base")
2740
+ else requester.get_url_base()
2741
+ )
2742
+
2743
+ # Define cursor only if per partition or common incremental support is needed
2744
+ cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None
2745
+
2746
+ if (
2747
+ not isinstance(stream_slicer, DatetimeBasedCursor)
2748
+ or type(stream_slicer) is not DatetimeBasedCursor
2749
+ ):
2750
+ # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
2751
+ # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
2752
+ # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
2753
+ # request_options_provider
2754
+ request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={})
2755
+ elif not request_options_provider:
2756
+ request_options_provider = DefaultRequestOptionsProvider(parameters={})
2757
+
2758
+ stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
2759
+
2760
+ cursor_used_for_stop_condition = cursor if stop_condition_on_cursor else None
2761
+ paginator = (
2762
+ self._create_component_from_model(
2763
+ model=model.paginator,
2764
+ config=config,
2765
+ url_base=url_base,
2766
+ decoder=decoder,
2767
+ cursor_used_for_stop_condition=cursor_used_for_stop_condition,
2768
+ )
2769
+ if model.paginator
2770
+ else NoPagination(parameters={})
2771
+ )
2772
+
2773
+ ignore_stream_slicer_parameters_on_paginated_requests = (
2774
+ model.ignore_stream_slicer_parameters_on_paginated_requests or False
2775
+ )
2776
+
2777
+ if (
2778
+ model.partition_router
2779
+ and isinstance(model.partition_router, SubstreamPartitionRouterModel)
2780
+ and not bool(self._connector_state_manager.get_stream_state(name, None))
2781
+ and any(
2782
+ parent_stream_config.lazy_read_pointer
2783
+ for parent_stream_config in model.partition_router.parent_stream_configs
2784
+ )
2785
+ ):
2786
+ if incremental_sync:
2787
+ if incremental_sync.type != "DatetimeBasedCursor":
2788
+ raise ValueError(
2789
+ f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}."
2790
+ )
2791
+
2792
+ elif incremental_sync.step or incremental_sync.cursor_granularity:
2793
+ raise ValueError(
2794
+ f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}."
2795
+ )
2796
+
2797
+ if model.decoder and model.decoder.type != "JsonDecoder":
2798
+ raise ValueError(
2799
+ f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}."
2800
+ )
2801
+
2802
+ return LazySimpleRetriever(
2803
+ name=name,
2804
+ paginator=paginator,
2805
+ primary_key=primary_key,
2806
+ requester=requester,
2807
+ record_selector=record_selector,
2808
+ stream_slicer=stream_slicer,
2809
+ request_option_provider=request_options_provider,
2810
+ cursor=cursor,
2811
+ config=config,
2812
+ ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
2813
+ parameters=model.parameters or {},
2814
+ )
2815
+
2816
+ if self._limit_slices_fetched or self._emit_connector_builder_messages:
2817
+ return SimpleRetrieverTestReadDecorator(
2818
+ name=name,
2819
+ paginator=paginator,
2820
+ primary_key=primary_key,
2821
+ requester=requester,
2822
+ record_selector=record_selector,
2823
+ stream_slicer=stream_slicer,
2824
+ request_option_provider=request_options_provider,
2825
+ cursor=cursor,
2826
+ config=config,
2827
+ maximum_number_of_slices=self._limit_slices_fetched or 5,
2828
+ ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
2829
+ parameters=model.parameters or {},
2830
+ )
2831
+ return SimpleRetriever(
2832
+ name=name,
2833
+ paginator=paginator,
2834
+ primary_key=primary_key,
2835
+ requester=requester,
2836
+ record_selector=record_selector,
2837
+ stream_slicer=stream_slicer,
2838
+ request_option_provider=request_options_provider,
2839
+ cursor=cursor,
2840
+ config=config,
2841
+ ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
2842
+ parameters=model.parameters or {},
2843
+ )
2844
+
2845
+ def create_state_delegating_stream(
2846
+ self,
2847
+ model: StateDelegatingStreamModel,
2848
+ config: Config,
2849
+ has_parent_state: Optional[bool] = None,
2850
+ **kwargs: Any,
2851
+ ) -> DeclarativeStream:
2852
+ if (
2853
+ model.full_refresh_stream.name != model.name
2854
+ or model.name != model.incremental_stream.name
2855
+ ):
2856
+ raise ValueError(
2857
+ f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}."
2858
+ )
2859
+
2860
+ stream_model = (
2861
+ model.incremental_stream
2862
+ if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state
2863
+ else model.full_refresh_stream
2864
+ )
2865
+
2866
+ return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # Will be created DeclarativeStream as stream_model is stream description
2867
+
2868
+ def _create_async_job_status_mapping(
2869
+ self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any
2870
+ ) -> Mapping[str, AsyncJobStatus]:
2871
+ api_status_to_cdk_status = {}
2872
+ for cdk_status, api_statuses in model.dict().items():
2873
+ if cdk_status == "type":
2874
+ # This is an element of the dict because of the typing of the CDK but it is not a CDK status
2875
+ continue
2876
+
2877
+ for status in api_statuses:
2878
+ if status in api_status_to_cdk_status:
2879
+ raise ValueError(
2880
+ f"API status {status} is already set for CDK status {cdk_status}. Please ensure API statuses are only provided once"
2881
+ )
2882
+ api_status_to_cdk_status[status] = self._get_async_job_status(cdk_status)
2883
+ return api_status_to_cdk_status
2884
+
2885
+ def _get_async_job_status(self, status: str) -> AsyncJobStatus:
2886
+ match status:
2887
+ case "running":
2888
+ return AsyncJobStatus.RUNNING
2889
+ case "completed":
2890
+ return AsyncJobStatus.COMPLETED
2891
+ case "failed":
2892
+ return AsyncJobStatus.FAILED
2893
+ case "timeout":
2894
+ return AsyncJobStatus.TIMED_OUT
2895
+ case _:
2896
+ raise ValueError(f"Unsupported CDK status {status}")
2897
+
2898
+ def create_async_retriever(
2899
+ self,
2900
+ model: AsyncRetrieverModel,
2901
+ config: Config,
2902
+ *,
2903
+ name: str,
2904
+ primary_key: Optional[
2905
+ Union[str, List[str], List[List[str]]]
2906
+ ], # this seems to be needed to match create_simple_retriever
2907
+ stream_slicer: Optional[StreamSlicer],
2908
+ client_side_incremental_sync: Optional[Dict[str, Any]] = None,
2909
+ transformations: List[RecordTransformation],
2910
+ **kwargs: Any,
2911
+ ) -> AsyncRetriever:
2912
+ def _get_download_retriever() -> SimpleRetrieverTestReadDecorator | SimpleRetriever:
2913
+ record_selector = RecordSelector(
2914
+ extractor=download_extractor,
2915
+ name=name,
2916
+ record_filter=None,
2917
+ transformations=transformations,
2918
+ schema_normalization=TypeTransformer(TransformConfig.NoTransform),
2919
+ config=config,
2920
+ parameters={},
2921
+ )
2922
+ paginator = (
2923
+ self._create_component_from_model(
2924
+ model=model.download_paginator,
2925
+ decoder=decoder,
2926
+ config=config,
2927
+ url_base="",
2928
+ )
2929
+ if model.download_paginator
2930
+ else NoPagination(parameters={})
2931
+ )
2932
+ maximum_number_of_slices = self._limit_slices_fetched or 5
2933
+
2934
+ if self._limit_slices_fetched or self._emit_connector_builder_messages:
2935
+ return SimpleRetrieverTestReadDecorator(
2936
+ requester=download_requester,
2937
+ record_selector=record_selector,
2938
+ primary_key=None,
2939
+ name=job_download_components_name,
2940
+ paginator=paginator,
2941
+ config=config,
2942
+ parameters={},
2943
+ maximum_number_of_slices=maximum_number_of_slices,
2944
+ )
2945
+
2946
+ return SimpleRetriever(
2947
+ requester=download_requester,
2948
+ record_selector=record_selector,
2949
+ primary_key=None,
2950
+ name=job_download_components_name,
2951
+ paginator=paginator,
2952
+ config=config,
2953
+ parameters={},
2954
+ )
2955
+
2956
+ def _get_job_timeout() -> datetime.timedelta:
2957
+ user_defined_timeout: Optional[int] = (
2958
+ int(
2959
+ InterpolatedString.create(
2960
+ str(model.polling_job_timeout),
2961
+ parameters={},
2962
+ ).eval(config)
2963
+ )
2964
+ if model.polling_job_timeout
2965
+ else None
2966
+ )
2967
+
2968
+ # check for user defined timeout during the test read or 15 minutes
2969
+ test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15)
2970
+ # default value for non-connector builder is 60 minutes.
2971
+ default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60)
2972
+
2973
+ return (
2974
+ test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout
2975
+ )
2976
+
2977
+ decoder = (
2978
+ self._create_component_from_model(model=model.decoder, config=config)
2979
+ if model.decoder
2980
+ else JsonDecoder(parameters={})
2981
+ )
2982
+ record_selector = self._create_component_from_model(
2983
+ model=model.record_selector,
2984
+ config=config,
2985
+ decoder=decoder,
2986
+ name=name,
2987
+ transformations=transformations,
2988
+ client_side_incremental_sync=client_side_incremental_sync,
2989
+ )
2990
+ stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
2991
+ creation_requester = self._create_component_from_model(
2992
+ model=model.creation_requester,
2993
+ decoder=decoder,
2994
+ config=config,
2995
+ name=f"job creation - {name}",
2996
+ )
2997
+ polling_requester = self._create_component_from_model(
2998
+ model=model.polling_requester,
2999
+ decoder=decoder,
3000
+ config=config,
3001
+ name=f"job polling - {name}",
3002
+ )
3003
+ job_download_components_name = f"job download - {name}"
3004
+ download_decoder = (
3005
+ self._create_component_from_model(model=model.download_decoder, config=config)
3006
+ if model.download_decoder
3007
+ else JsonDecoder(parameters={})
3008
+ )
3009
+ download_extractor = (
3010
+ self._create_component_from_model(
3011
+ model=model.download_extractor,
3012
+ config=config,
3013
+ decoder=download_decoder,
3014
+ parameters=model.parameters,
3015
+ )
3016
+ if model.download_extractor
3017
+ else DpathExtractor(
3018
+ [],
3019
+ config=config,
3020
+ decoder=download_decoder,
3021
+ parameters=model.parameters or {},
3022
+ )
3023
+ )
3024
+ download_requester = self._create_component_from_model(
3025
+ model=model.download_requester,
3026
+ decoder=download_decoder,
3027
+ config=config,
3028
+ name=job_download_components_name,
3029
+ )
3030
+ download_retriever = _get_download_retriever()
3031
+ abort_requester = (
3032
+ self._create_component_from_model(
3033
+ model=model.abort_requester,
3034
+ decoder=decoder,
3035
+ config=config,
3036
+ name=f"job abort - {name}",
3037
+ )
3038
+ if model.abort_requester
3039
+ else None
3040
+ )
3041
+ delete_requester = (
3042
+ self._create_component_from_model(
3043
+ model=model.delete_requester,
3044
+ decoder=decoder,
3045
+ config=config,
3046
+ name=f"job delete - {name}",
3047
+ )
3048
+ if model.delete_requester
3049
+ else None
3050
+ )
3051
+ download_target_requester = (
3052
+ self._create_component_from_model(
3053
+ model=model.download_target_requester,
3054
+ decoder=decoder,
3055
+ config=config,
3056
+ name=f"job extract_url - {name}",
3057
+ )
3058
+ if model.download_target_requester
3059
+ else None
3060
+ )
3061
+ status_extractor = self._create_component_from_model(
3062
+ model=model.status_extractor, decoder=decoder, config=config, name=name
3063
+ )
3064
+ download_target_extractor = self._create_component_from_model(
3065
+ model=model.download_target_extractor,
3066
+ decoder=decoder,
3067
+ config=config,
3068
+ name=name,
3069
+ )
3070
+
3071
+ job_repository: AsyncJobRepository = AsyncHttpJobRepository(
3072
+ creation_requester=creation_requester,
3073
+ polling_requester=polling_requester,
3074
+ download_retriever=download_retriever,
3075
+ download_target_requester=download_target_requester,
3076
+ abort_requester=abort_requester,
3077
+ delete_requester=delete_requester,
3078
+ status_extractor=status_extractor,
3079
+ status_mapping=self._create_async_job_status_mapping(model.status_mapping, config),
3080
+ download_target_extractor=download_target_extractor,
3081
+ job_timeout=_get_job_timeout(),
3082
+ )
3083
+
3084
+ async_job_partition_router = AsyncJobPartitionRouter(
3085
+ job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
3086
+ job_repository,
3087
+ stream_slices,
3088
+ self._job_tracker,
3089
+ self._message_repository,
3090
+ has_bulk_parent=False,
3091
+ # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
3092
+ ),
3093
+ stream_slicer=stream_slicer,
3094
+ config=config,
3095
+ parameters=model.parameters or {},
3096
+ )
3097
+
3098
+ return AsyncRetriever(
3099
+ record_selector=record_selector,
3100
+ stream_slicer=async_job_partition_router,
3101
+ config=config,
3102
+ parameters=model.parameters or {},
3103
+ )
3104
+
3105
+ @staticmethod
3106
+ def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec:
3107
+ return Spec(
3108
+ connection_specification=model.connection_specification,
3109
+ documentation_url=model.documentation_url,
3110
+ advanced_auth=model.advanced_auth,
3111
+ parameters={},
3112
+ )
3113
+
3114
+ def create_substream_partition_router(
3115
+ self, model: SubstreamPartitionRouterModel, config: Config, **kwargs: Any
3116
+ ) -> SubstreamPartitionRouter:
3117
+ parent_stream_configs = []
3118
+ if model.parent_stream_configs:
3119
+ parent_stream_configs.extend(
3120
+ [
3121
+ self._create_message_repository_substream_wrapper(
3122
+ model=parent_stream_config, config=config, **kwargs
3123
+ )
3124
+ for parent_stream_config in model.parent_stream_configs
3125
+ ]
3126
+ )
3127
+
3128
+ return SubstreamPartitionRouter(
3129
+ parent_stream_configs=parent_stream_configs,
3130
+ parameters=model.parameters or {},
3131
+ config=config,
3132
+ )
3133
+
3134
+ def _create_message_repository_substream_wrapper(
3135
+ self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
3136
+ ) -> Any:
3137
+ substream_factory = ModelToComponentFactory(
3138
+ limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice,
3139
+ limit_slices_fetched=self._limit_slices_fetched,
3140
+ emit_connector_builder_messages=self._emit_connector_builder_messages,
3141
+ disable_retries=self._disable_retries,
3142
+ disable_cache=self._disable_cache,
3143
+ message_repository=LogAppenderMessageRepositoryDecorator(
3144
+ {"airbyte_cdk": {"stream": {"is_substream": True}}, "http": {"is_auxiliary": True}},
3145
+ self._message_repository,
3146
+ self._evaluate_log_level(self._emit_connector_builder_messages),
3147
+ ),
3148
+ )
3149
+
3150
+ # This flag will be used exclusively for StateDelegatingStream when a parent stream is created
3151
+ has_parent_state = bool(
3152
+ self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
3153
+ if model.incremental_dependency
3154
+ else False
3155
+ )
3156
+ return substream_factory._create_component_from_model(
3157
+ model=model, config=config, has_parent_state=has_parent_state, **kwargs
3158
+ )
3159
+
3160
+ @staticmethod
3161
+ def create_wait_time_from_header(
3162
+ model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any
3163
+ ) -> WaitTimeFromHeaderBackoffStrategy:
3164
+ return WaitTimeFromHeaderBackoffStrategy(
3165
+ header=model.header,
3166
+ parameters=model.parameters or {},
3167
+ config=config,
3168
+ regex=model.regex,
3169
+ max_waiting_time_in_seconds=model.max_waiting_time_in_seconds
3170
+ if model.max_waiting_time_in_seconds is not None
3171
+ else None,
3172
+ )
3173
+
3174
+ @staticmethod
3175
+ def create_wait_until_time_from_header(
3176
+ model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any
3177
+ ) -> WaitUntilTimeFromHeaderBackoffStrategy:
3178
+ return WaitUntilTimeFromHeaderBackoffStrategy(
3179
+ header=model.header,
3180
+ parameters=model.parameters or {},
3181
+ config=config,
3182
+ min_wait=model.min_wait,
3183
+ regex=model.regex,
3184
+ )
3185
+
3186
+ def get_message_repository(self) -> MessageRepository:
3187
+ return self._message_repository
3188
+
3189
+ def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level:
3190
+ return Level.DEBUG if emit_connector_builder_messages else Level.INFO
3191
+
3192
+ @staticmethod
3193
+ def create_components_mapping_definition(
3194
+ model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any
3195
+ ) -> ComponentMappingDefinition:
3196
+ interpolated_value = InterpolatedString.create(
3197
+ model.value, parameters=model.parameters or {}
3198
+ )
3199
+ field_path = [
3200
+ InterpolatedString.create(path, parameters=model.parameters or {})
3201
+ for path in model.field_path
3202
+ ]
3203
+ return ComponentMappingDefinition(
3204
+ field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString
3205
+ value=interpolated_value,
3206
+ value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
3207
+ parameters=model.parameters or {},
3208
+ )
3209
+
3210
+ def create_http_components_resolver(
3211
+ self, model: HttpComponentsResolverModel, config: Config
3212
+ ) -> Any:
3213
+ stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
3214
+ combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
3215
+
3216
+ retriever = self._create_component_from_model(
3217
+ model=model.retriever,
3218
+ config=config,
3219
+ name="",
3220
+ primary_key=None,
3221
+ stream_slicer=stream_slicer if stream_slicer else combined_slicers,
3222
+ transformations=[],
3223
+ )
3224
+
3225
+ components_mapping = [
3226
+ self._create_component_from_model(
3227
+ model=components_mapping_definition_model,
3228
+ value_type=ModelToComponentFactory._json_schema_type_name_to_type(
3229
+ components_mapping_definition_model.value_type
3230
+ ),
3231
+ config=config,
3232
+ )
3233
+ for components_mapping_definition_model in model.components_mapping
3234
+ ]
3235
+
3236
+ return HttpComponentsResolver(
3237
+ retriever=retriever,
3238
+ config=config,
3239
+ components_mapping=components_mapping,
3240
+ parameters=model.parameters or {},
3241
+ )
3242
+
3243
+ @staticmethod
3244
+ def create_stream_config(
3245
+ model: StreamConfigModel, config: Config, **kwargs: Any
3246
+ ) -> StreamConfig:
3247
+ model_configs_pointer: List[Union[InterpolatedString, str]] = (
3248
+ [x for x in model.configs_pointer] if model.configs_pointer else []
3249
+ )
3250
+
3251
+ return StreamConfig(
3252
+ configs_pointer=model_configs_pointer,
3253
+ parameters=model.parameters or {},
3254
+ )
3255
+
3256
+ def create_config_components_resolver(
3257
+ self, model: ConfigComponentsResolverModel, config: Config
3258
+ ) -> Any:
3259
+ stream_config = self._create_component_from_model(
3260
+ model.stream_config, config=config, parameters=model.parameters or {}
3261
+ )
3262
+
3263
+ components_mapping = [
3264
+ self._create_component_from_model(
3265
+ model=components_mapping_definition_model,
3266
+ value_type=ModelToComponentFactory._json_schema_type_name_to_type(
3267
+ components_mapping_definition_model.value_type
3268
+ ),
3269
+ config=config,
3270
+ )
3271
+ for components_mapping_definition_model in model.components_mapping
3272
+ ]
3273
+
3274
+ return ConfigComponentsResolver(
3275
+ stream_config=stream_config,
3276
+ config=config,
3277
+ components_mapping=components_mapping,
3278
+ parameters=model.parameters or {},
3279
+ )
3280
+
3281
+ _UNSUPPORTED_DECODER_ERROR = (
3282
+ "Specified decoder of {decoder_type} is not supported for pagination."
3283
+ "Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead."
3284
+ "If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`."
3285
+ )
3286
+
3287
+ def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool:
3288
+ if isinstance(decoder, (JsonDecoder, XmlDecoder)):
3289
+ return True
3290
+ elif isinstance(decoder, CompositeRawDecoder):
3291
+ return self._is_supported_parser_for_pagination(decoder.parser)
3292
+ else:
3293
+ return False
3294
+
3295
+ def _is_supported_parser_for_pagination(self, parser: Parser) -> bool:
3296
+ if isinstance(parser, JsonParser):
3297
+ return True
3298
+ elif isinstance(parser, GzipParser):
3299
+ return isinstance(parser.inner_parser, JsonParser)
3300
+ else:
3301
+ return False
3302
+
3303
+ def create_http_api_budget(
3304
+ self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any
3305
+ ) -> HttpAPIBudget:
3306
+ policies = [
3307
+ self._create_component_from_model(model=policy, config=config)
3308
+ for policy in model.policies
3309
+ ]
3310
+
3311
+ return HttpAPIBudget(
3312
+ policies=policies,
3313
+ ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset",
3314
+ ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining",
3315
+ status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429],
3316
+ )
3317
+
3318
+ def create_fixed_window_call_rate_policy(
3319
+ self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any
3320
+ ) -> FixedWindowCallRatePolicy:
3321
+ matchers = [
3322
+ self._create_component_from_model(model=matcher, config=config)
3323
+ for matcher in model.matchers
3324
+ ]
3325
+
3326
+ # Set the initial reset timestamp to 10 days from now.
3327
+ # This value will be updated by the first request.
3328
+ return FixedWindowCallRatePolicy(
3329
+ next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10),
3330
+ period=parse_duration(model.period),
3331
+ call_limit=model.call_limit,
3332
+ matchers=matchers,
3333
+ )
3334
+
3335
+ def create_file_uploader(
3336
+ self, model: FileUploaderModel, config: Config, **kwargs: Any
3337
+ ) -> FileUploader:
3338
+ name = "File Uploader"
3339
+ requester = self._create_component_from_model(
3340
+ model=model.requester,
3341
+ config=config,
3342
+ name=name,
3343
+ **kwargs,
3344
+ )
3345
+ download_target_extractor = self._create_component_from_model(
3346
+ model=model.download_target_extractor,
3347
+ config=config,
3348
+ name=name,
3349
+ **kwargs,
3350
+ )
3351
+ return FileUploader(
3352
+ requester=requester,
3353
+ download_target_extractor=download_target_extractor,
3354
+ config=config,
3355
+ parameters=model.parameters or {},
3356
+ filename_extractor=model.filename_extractor if model.filename_extractor else None,
3357
+ )
3358
+
3359
+ def create_moving_window_call_rate_policy(
3360
+ self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
3361
+ ) -> MovingWindowCallRatePolicy:
3362
+ rates = [
3363
+ self._create_component_from_model(model=rate, config=config) for rate in model.rates
3364
+ ]
3365
+ matchers = [
3366
+ self._create_component_from_model(model=matcher, config=config)
3367
+ for matcher in model.matchers
3368
+ ]
3369
+ return MovingWindowCallRatePolicy(
3370
+ rates=rates,
3371
+ matchers=matchers,
3372
+ )
3373
+
3374
+ def create_unlimited_call_rate_policy(
3375
+ self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any
3376
+ ) -> UnlimitedCallRatePolicy:
3377
+ matchers = [
3378
+ self._create_component_from_model(model=matcher, config=config)
3379
+ for matcher in model.matchers
3380
+ ]
3381
+
3382
+ return UnlimitedCallRatePolicy(
3383
+ matchers=matchers,
3384
+ )
3385
+
3386
+ def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
3387
+ interpolated_limit = InterpolatedString.create(str(model.limit), parameters={})
3388
+ return Rate(
3389
+ limit=int(interpolated_limit.eval(config=config)),
3390
+ interval=parse_duration(model.interval),
3391
+ )
3392
+
3393
+ def create_http_request_matcher(
3394
+ self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any
3395
+ ) -> HttpRequestRegexMatcher:
3396
+ return HttpRequestRegexMatcher(
3397
+ method=model.method,
3398
+ url_base=model.url_base,
3399
+ url_path_pattern=model.url_path_pattern,
3400
+ params=model.params,
3401
+ headers=model.headers,
3402
+ )
3403
+
3404
+ def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None:
3405
+ self._api_budget = self.create_component(
3406
+ model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
3407
+ )