airbyte-cdk 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (368) hide show
  1. airbyte_cdk/__init__.py +358 -0
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +104 -0
  7. airbyte_cdk/connector.py +123 -0
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/__init__.py +3 -0
  10. airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
  11. airbyte_cdk/connector_builder/main.py +107 -0
  12. airbyte_cdk/connector_builder/models.py +73 -0
  13. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  14. airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
  15. airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
  16. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  17. airbyte_cdk/connector_builder/test_reader/types.py +83 -0
  18. airbyte_cdk/destinations/__init__.py +8 -0
  19. airbyte_cdk/destinations/destination.py +154 -0
  20. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  21. airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
  22. airbyte_cdk/destinations/vector_db_based/config.py +298 -0
  23. airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
  24. airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
  25. airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
  26. airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
  27. airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
  28. airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
  29. airbyte_cdk/entrypoint.py +414 -0
  30. airbyte_cdk/exception_handler.py +56 -0
  31. airbyte_cdk/logger.py +109 -0
  32. airbyte_cdk/models/__init__.py +72 -0
  33. airbyte_cdk/models/airbyte_protocol.py +88 -0
  34. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  35. airbyte_cdk/models/well_known_types.py +5 -0
  36. airbyte_cdk/py.typed +0 -0
  37. airbyte_cdk/sources/__init__.py +26 -0
  38. airbyte_cdk/sources/abstract_source.py +326 -0
  39. airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
  40. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
  41. airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
  42. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
  43. airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
  44. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  45. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
  46. airbyte_cdk/sources/config.py +27 -0
  47. airbyte_cdk/sources/connector_state_manager.py +161 -0
  48. airbyte_cdk/sources/declarative/__init__.py +3 -0
  49. airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
  50. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  51. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
  52. airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
  53. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  54. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  55. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  56. airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
  57. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
  58. airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
  59. airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
  60. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
  61. airbyte_cdk/sources/declarative/auth/token.py +267 -0
  62. airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
  63. airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
  64. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
  65. airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
  66. airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
  67. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  68. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  69. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
  70. airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
  71. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
  72. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
  73. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
  74. airbyte_cdk/sources/declarative/declarative_source.py +36 -0
  75. airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
  76. airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
  77. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
  78. airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
  79. airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
  80. airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
  81. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  82. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  83. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  84. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
  85. airbyte_cdk/sources/declarative/exceptions.py +9 -0
  86. airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
  87. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
  88. airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
  89. airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
  90. airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
  91. airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
  92. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
  93. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  94. airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
  95. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
  96. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
  97. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  98. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
  99. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
  100. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  101. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  102. airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
  103. airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
  104. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
  105. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
  106. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
  107. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
  108. airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
  109. airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
  110. airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
  111. airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
  112. airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
  113. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  114. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  115. airbyte_cdk/sources/declarative/models/__init__.py +2 -0
  116. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
  117. airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
  118. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
  119. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
  120. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
  121. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
  122. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
  123. airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
  124. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  125. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  126. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
  127. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  128. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
  129. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
  130. airbyte_cdk/sources/declarative/requesters/README.md +56 -0
  131. airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
  132. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
  133. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
  134. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
  135. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
  136. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
  137. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
  138. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
  139. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
  140. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
  141. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
  142. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  143. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
  144. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
  145. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
  146. airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
  147. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
  148. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
  149. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
  150. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
  151. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
  152. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
  153. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
  154. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
  155. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
  156. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
  157. airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
  158. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
  159. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
  160. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  161. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
  162. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
  163. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
  164. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
  165. airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
  166. airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
  167. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  168. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  169. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  170. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  171. airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
  172. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
  173. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
  174. airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
  175. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
  176. airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
  177. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
  178. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
  179. airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
  180. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
  181. airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
  182. airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
  183. airbyte_cdk/sources/declarative/spec/spec.py +48 -0
  184. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
  185. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
  186. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
  187. airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
  188. airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
  189. airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
  190. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  191. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  192. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  193. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  194. airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
  195. airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
  196. airbyte_cdk/sources/declarative/types.py +25 -0
  197. airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
  198. airbyte_cdk/sources/file_based/README.md +152 -0
  199. airbyte_cdk/sources/file_based/__init__.py +24 -0
  200. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
  201. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
  202. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
  203. airbyte_cdk/sources/file_based/config/__init__.py +0 -0
  204. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
  205. airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
  206. airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
  207. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  208. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
  209. airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
  210. airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
  211. airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
  212. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  213. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
  214. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
  215. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
  216. airbyte_cdk/sources/file_based/exceptions.py +159 -0
  217. airbyte_cdk/sources/file_based/file_based_source.py +466 -0
  218. airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
  219. airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
  220. airbyte_cdk/sources/file_based/file_record_data.py +22 -0
  221. airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
  222. airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
  223. airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
  224. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  225. airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
  226. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
  227. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
  228. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
  229. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
  230. airbyte_cdk/sources/file_based/remote_file.py +18 -0
  231. airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
  232. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
  233. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
  234. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
  235. airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
  236. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
  237. airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
  238. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
  239. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
  240. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
  241. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
  242. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
  243. airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
  244. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
  245. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
  246. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
  247. airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
  248. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
  249. airbyte_cdk/sources/file_based/types.py +10 -0
  250. airbyte_cdk/sources/http_config.py +10 -0
  251. airbyte_cdk/sources/http_logger.py +55 -0
  252. airbyte_cdk/sources/message/__init__.py +19 -0
  253. airbyte_cdk/sources/message/repository.py +137 -0
  254. airbyte_cdk/sources/source.py +95 -0
  255. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  256. airbyte_cdk/sources/streams/__init__.py +8 -0
  257. airbyte_cdk/sources/streams/availability_strategy.py +84 -0
  258. airbyte_cdk/sources/streams/call_rate.py +704 -0
  259. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  260. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  261. airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
  262. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  263. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  264. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  265. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  266. airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
  267. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
  268. airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
  269. airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
  270. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
  271. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  272. airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
  273. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  274. airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
  275. airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
  276. airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
  277. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
  278. airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
  279. airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
  280. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
  281. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
  282. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  283. airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
  284. airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
  285. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
  286. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
  287. airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
  288. airbyte_cdk/sources/streams/core.py +703 -0
  289. airbyte_cdk/sources/streams/http/__init__.py +10 -0
  290. airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
  291. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  292. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  293. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  294. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  295. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  296. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  297. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  298. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  299. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  300. airbyte_cdk/sources/streams/http/exceptions.py +61 -0
  301. airbyte_cdk/sources/streams/http/http.py +673 -0
  302. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  303. airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
  304. airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
  305. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
  306. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
  307. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
  308. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
  309. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  310. airbyte_cdk/sources/streams/utils/__init__.py +3 -0
  311. airbyte_cdk/sources/types.py +169 -0
  312. airbyte_cdk/sources/utils/__init__.py +7 -0
  313. airbyte_cdk/sources/utils/casing.py +12 -0
  314. airbyte_cdk/sources/utils/files_directory.py +15 -0
  315. airbyte_cdk/sources/utils/record_helper.py +53 -0
  316. airbyte_cdk/sources/utils/schema_helpers.py +230 -0
  317. airbyte_cdk/sources/utils/slice_logger.py +57 -0
  318. airbyte_cdk/sources/utils/transform.py +277 -0
  319. airbyte_cdk/sources/utils/types.py +7 -0
  320. airbyte_cdk/sql/__init__.py +0 -0
  321. airbyte_cdk/sql/_util/__init__.py +0 -0
  322. airbyte_cdk/sql/_util/hashing.py +34 -0
  323. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  324. airbyte_cdk/sql/constants.py +32 -0
  325. airbyte_cdk/sql/exceptions.py +235 -0
  326. airbyte_cdk/sql/secrets.py +123 -0
  327. airbyte_cdk/sql/shared/__init__.py +15 -0
  328. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  329. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  330. airbyte_cdk/sql/types.py +160 -0
  331. airbyte_cdk/test/__init__.py +7 -0
  332. airbyte_cdk/test/catalog_builder.py +81 -0
  333. airbyte_cdk/test/entrypoint_wrapper.py +250 -0
  334. airbyte_cdk/test/mock_http/__init__.py +6 -0
  335. airbyte_cdk/test/mock_http/matcher.py +41 -0
  336. airbyte_cdk/test/mock_http/mocker.py +185 -0
  337. airbyte_cdk/test/mock_http/request.py +103 -0
  338. airbyte_cdk/test/mock_http/response.py +28 -0
  339. airbyte_cdk/test/mock_http/response_builder.py +237 -0
  340. airbyte_cdk/test/state_builder.py +33 -0
  341. airbyte_cdk/test/utils/__init__.py +1 -0
  342. airbyte_cdk/test/utils/data.py +24 -0
  343. airbyte_cdk/test/utils/http_mocking.py +16 -0
  344. airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
  345. airbyte_cdk/test/utils/reading.py +26 -0
  346. airbyte_cdk/utils/__init__.py +10 -0
  347. airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
  348. airbyte_cdk/utils/analytics_message.py +25 -0
  349. airbyte_cdk/utils/constants.py +5 -0
  350. airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
  351. airbyte_cdk/utils/datetime_helpers.py +499 -0
  352. airbyte_cdk/utils/event_timing.py +85 -0
  353. airbyte_cdk/utils/is_cloud_environment.py +18 -0
  354. airbyte_cdk/utils/mapping_helpers.py +162 -0
  355. airbyte_cdk/utils/message_utils.py +26 -0
  356. airbyte_cdk/utils/oneof_option_config.py +33 -0
  357. airbyte_cdk/utils/print_buffer.py +75 -0
  358. airbyte_cdk/utils/schema_inferrer.py +270 -0
  359. airbyte_cdk/utils/slice_hasher.py +37 -0
  360. airbyte_cdk/utils/spec_schema_transformations.py +26 -0
  361. airbyte_cdk/utils/stream_status_utils.py +43 -0
  362. airbyte_cdk/utils/traced_exception.py +145 -0
  363. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
  364. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
  365. airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
  366. airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
  367. airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
  368. airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,17 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+
6
+ # RecordTransformation is depended upon by every class in this module (since it's the abc everything implements). For this reason,
7
+ # the order of imports matters i.e: this file must fully import RecordTransformation before importing anything which depends on RecordTransformation
8
+ # Otherwise there will be a circular dependency (load order will be init.py --> RemoveFields (which tries to import RecordTransformation) -->
9
+ # init.py --> circular dep error, since loading this file causes it to try to import itself down the line.
10
+ # so we add the split directive below to tell isort to sort imports while keeping RecordTransformation as the first import
11
+ from .transformation import RecordTransformation
12
+
13
+ # isort: split
14
+ from .add_fields import AddFields
15
+ from .remove_fields import RemoveFields
16
+
17
+ __all__ = ["AddFields", "RecordTransformation", "RemoveFields"]
@@ -0,0 +1,146 @@
1
+ #
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import InitVar, dataclass, field
6
+ from typing import Any, Dict, List, Mapping, Optional, Type, Union
7
+
8
+ import dpath
9
+
10
+ from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
11
+ from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
12
+ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
13
+ from airbyte_cdk.sources.types import Config, FieldPointer, StreamSlice, StreamState
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class AddedFieldDefinition:
18
+ """Defines the field to add on a record"""
19
+
20
+ path: FieldPointer
21
+ value: Union[InterpolatedString, str]
22
+ value_type: Optional[Type[Any]]
23
+ parameters: InitVar[Mapping[str, Any]]
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class ParsedAddFieldDefinition:
28
+ """Defines the field to add on a record"""
29
+
30
+ path: FieldPointer
31
+ value: InterpolatedString
32
+ value_type: Optional[Type[Any]]
33
+ parameters: InitVar[Mapping[str, Any]]
34
+
35
+
36
+ @dataclass
37
+ class AddFields(RecordTransformation):
38
+ """
39
+ Transformation which adds field to an output record. The path of the added field can be nested. Adding nested fields will create all
40
+ necessary parent objects (like mkdir -p). Adding fields to an array will extend the array to that index (filling intermediate
41
+ indices with null values). So if you add a field at index 5 to the array ["value"], it will become ["value", null, null, null, null,
42
+ "new_value"].
43
+
44
+
45
+ This transformation has access to the following contextual values:
46
+ record: the record about to be output by the connector
47
+ config: the input configuration provided to a connector
48
+ stream_state: the current state of the stream
49
+ stream_slice: the current stream slice being read
50
+
51
+
52
+
53
+ Examples of instantiating this transformation via YAML:
54
+ - type: AddFields
55
+ fields:
56
+ # hardcoded constant
57
+ - path: ["path"]
58
+ value: "static_value"
59
+
60
+ # nested path
61
+ - path: ["path", "to", "field"]
62
+ value: "static"
63
+
64
+ # from config
65
+ - path: ["shop_id"]
66
+ value: "{{ config.shop_id }}"
67
+
68
+ # from stream_interval
69
+ - path: ["date"]
70
+ value: "{{ stream_interval.start_date }}"
71
+
72
+ # from record
73
+ - path: ["unnested_value"]
74
+ value: {{ record.nested.field }}
75
+
76
+ # from stream_slice
77
+ - path: ["start_date"]
78
+ value: {{ stream_slice.start_date }}
79
+
80
+ # by supplying any valid Jinja template directive or expression https://jinja.palletsprojects.com/en/3.1.x/templates/#
81
+ - path: ["two_times_two"]
82
+ value: {{ 2 * 2 }}
83
+
84
+ Attributes:
85
+ fields (List[AddedFieldDefinition]): A list of transformations (path and corresponding value) that will be added to the record
86
+ """
87
+
88
+ fields: List[AddedFieldDefinition]
89
+ parameters: InitVar[Mapping[str, Any]]
90
+ condition: str = ""
91
+ _parsed_fields: List[ParsedAddFieldDefinition] = field(
92
+ init=False, repr=False, default_factory=list
93
+ )
94
+
95
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
96
+ self._filter_interpolator = InterpolatedBoolean(
97
+ condition=self.condition, parameters=parameters
98
+ )
99
+
100
+ for add_field in self.fields:
101
+ if len(add_field.path) < 1:
102
+ raise ValueError(
103
+ f"Expected a non-zero-length path for the AddFields transformation {add_field}"
104
+ )
105
+
106
+ if not isinstance(add_field.value, InterpolatedString):
107
+ if not isinstance(add_field.value, str):
108
+ raise f"Expected a string value for the AddFields transformation: {add_field}"
109
+ else:
110
+ self._parsed_fields.append(
111
+ ParsedAddFieldDefinition(
112
+ add_field.path,
113
+ InterpolatedString.create(add_field.value, parameters=parameters),
114
+ value_type=add_field.value_type,
115
+ parameters=parameters,
116
+ )
117
+ )
118
+ else:
119
+ self._parsed_fields.append(
120
+ ParsedAddFieldDefinition(
121
+ add_field.path,
122
+ add_field.value,
123
+ value_type=add_field.value_type,
124
+ parameters={},
125
+ )
126
+ )
127
+
128
+ def transform(
129
+ self,
130
+ record: Dict[str, Any],
131
+ config: Optional[Config] = None,
132
+ stream_state: Optional[StreamState] = None,
133
+ stream_slice: Optional[StreamSlice] = None,
134
+ ) -> None:
135
+ if config is None:
136
+ config = {}
137
+ kwargs = {"record": record, "stream_slice": stream_slice}
138
+ for parsed_field in self._parsed_fields:
139
+ valid_types = (parsed_field.value_type,) if parsed_field.value_type else None
140
+ value = parsed_field.value.eval(config, valid_types=valid_types, **kwargs)
141
+ is_empty_condition = not self.condition
142
+ if is_empty_condition or self._filter_interpolator.eval(config, value=value, **kwargs):
143
+ dpath.new(record, parsed_field.path, value)
144
+
145
+ def __eq__(self, other: Any) -> bool:
146
+ return bool(self.__dict__ == other.__dict__)
@@ -0,0 +1,61 @@
1
+ from dataclasses import InitVar, dataclass
2
+ from typing import Any, Dict, List, Mapping, Optional, Union
3
+
4
+ import dpath
5
+
6
+ from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
7
+ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
8
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
9
+
10
+
11
+ @dataclass
12
+ class DpathFlattenFields(RecordTransformation):
13
+ """
14
+ Flatten fields only for provided path.
15
+
16
+ field_path: List[Union[InterpolatedString, str]] path to the field to flatten.
17
+ delete_origin_value: bool = False whether to delete origin field or keep it. Default is False.
18
+ replace_record: bool = False whether to replace origin record or not. Default is False.
19
+
20
+ """
21
+
22
+ config: Config
23
+ field_path: List[Union[InterpolatedString, str]]
24
+ parameters: InitVar[Mapping[str, Any]]
25
+ delete_origin_value: bool = False
26
+ replace_record: bool = False
27
+
28
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
29
+ self._field_path = [
30
+ InterpolatedString.create(path, parameters=parameters) for path in self.field_path
31
+ ]
32
+ for path_index in range(len(self.field_path)):
33
+ if isinstance(self.field_path[path_index], str):
34
+ self._field_path[path_index] = InterpolatedString.create(
35
+ self.field_path[path_index], parameters=parameters
36
+ )
37
+
38
+ def transform(
39
+ self,
40
+ record: Dict[str, Any],
41
+ config: Optional[Config] = None,
42
+ stream_state: Optional[StreamState] = None,
43
+ stream_slice: Optional[StreamSlice] = None,
44
+ ) -> None:
45
+ path = [path.eval(self.config) for path in self._field_path]
46
+ if "*" in path:
47
+ matched = dpath.values(record, path)
48
+ extracted = matched[0] if matched else None
49
+ else:
50
+ extracted = dpath.get(record, path, default=[])
51
+
52
+ if isinstance(extracted, dict):
53
+ if self.replace_record and extracted:
54
+ dpath.delete(record, "**")
55
+ record.update(extracted)
56
+ else:
57
+ conflicts = set(extracted.keys()) & set(record.keys())
58
+ if not conflicts:
59
+ if self.delete_origin_value:
60
+ dpath.delete(record, path)
61
+ record.update(extracted)
@@ -0,0 +1,52 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Dict, Optional
7
+
8
+ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
9
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
10
+
11
+
12
+ @dataclass
13
+ class FlattenFields(RecordTransformation):
14
+ flatten_lists: bool = True
15
+
16
+ def transform(
17
+ self,
18
+ record: Dict[str, Any],
19
+ config: Optional[Config] = None,
20
+ stream_state: Optional[StreamState] = None,
21
+ stream_slice: Optional[StreamSlice] = None,
22
+ ) -> None:
23
+ transformed_record = self.flatten_record(record)
24
+ record.clear()
25
+ record.update(transformed_record)
26
+
27
+ def flatten_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
28
+ stack = [(record, "_")]
29
+ transformed_record: Dict[str, Any] = {}
30
+ force_with_parent_name = False
31
+
32
+ while stack:
33
+ current_record, parent_key = stack.pop()
34
+
35
+ if isinstance(current_record, dict):
36
+ for current_key, value in current_record.items():
37
+ new_key = (
38
+ f"{parent_key}.{current_key}"
39
+ if (current_key in transformed_record or force_with_parent_name)
40
+ else current_key
41
+ )
42
+ stack.append((value, new_key))
43
+
44
+ elif isinstance(current_record, list) and self.flatten_lists:
45
+ for i, item in enumerate(current_record):
46
+ force_with_parent_name = True
47
+ stack.append((item, f"{parent_key}.{i}"))
48
+
49
+ else:
50
+ transformed_record[parent_key] = current_record
51
+
52
+ return transformed_record
@@ -0,0 +1,61 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import InitVar, dataclass
6
+ from typing import Any, Dict, Mapping, Optional
7
+
8
+ from airbyte_cdk import InterpolatedString
9
+ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
10
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
11
+
12
+
13
+ @dataclass
14
+ class KeysReplaceTransformation(RecordTransformation):
15
+ """
16
+ Transformation that applies keys names replacement.
17
+
18
+ Example usage:
19
+ - type: KeysReplace
20
+ old: " "
21
+ new: "_"
22
+ Result:
23
+ from: {"created time": ..., "customer id": ..., "user id": ...}
24
+ to: {"created_time": ..., "customer_id": ..., "user_id": ...}
25
+ """
26
+
27
+ old: str
28
+ new: str
29
+ parameters: InitVar[Mapping[str, Any]]
30
+
31
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
32
+ self._old = InterpolatedString.create(self.old, parameters=parameters)
33
+ self._new = InterpolatedString.create(self.new, parameters=parameters)
34
+
35
+ def transform(
36
+ self,
37
+ record: Dict[str, Any],
38
+ config: Optional[Config] = None,
39
+ stream_state: Optional[StreamState] = None,
40
+ stream_slice: Optional[StreamSlice] = None,
41
+ ) -> None:
42
+ if config is None:
43
+ config = {}
44
+
45
+ kwargs = {"record": record, "stream_state": stream_state, "stream_slice": stream_slice}
46
+ old_key = str(self._old.eval(config, **kwargs))
47
+ new_key = str(self._new.eval(config, **kwargs))
48
+
49
+ def _transform(data: Dict[str, Any]) -> Dict[str, Any]:
50
+ result = {}
51
+ for key, value in data.items():
52
+ updated_key = key.replace(old_key, new_key)
53
+ if isinstance(value, dict):
54
+ result[updated_key] = _transform(value)
55
+ else:
56
+ result[updated_key] = value
57
+ return result
58
+
59
+ transformed_record = _transform(record)
60
+ record.clear()
61
+ record.update(transformed_record)
@@ -0,0 +1,22 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Dict, Optional
7
+
8
+ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
9
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
10
+
11
+
12
+ @dataclass
13
+ class KeysToLowerTransformation(RecordTransformation):
14
+ def transform(
15
+ self,
16
+ record: Dict[str, Any],
17
+ config: Optional[Config] = None,
18
+ stream_state: Optional[StreamState] = None,
19
+ stream_slice: Optional[StreamSlice] = None,
20
+ ) -> None:
21
+ for key in set(record.keys()):
22
+ record[key.lower()] = record.pop(key)
@@ -0,0 +1,68 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import re
6
+ from dataclasses import dataclass
7
+ from typing import Any, Dict, List, Optional
8
+
9
+ import anyascii
10
+
11
+ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
12
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
13
+
14
+
15
+ @dataclass
16
+ class KeysToSnakeCaseTransformation(RecordTransformation):
17
+ token_pattern: re.Pattern[str] = re.compile(
18
+ r"[A-Z]+[a-z]*|[a-z]+|\d+|(?P<NoToken>[^a-zA-Z\d]+)"
19
+ )
20
+
21
+ def transform(
22
+ self,
23
+ record: Dict[str, Any],
24
+ config: Optional[Config] = None,
25
+ stream_state: Optional[StreamState] = None,
26
+ stream_slice: Optional[StreamSlice] = None,
27
+ ) -> None:
28
+ transformed_record = self._transform_record(record)
29
+ record.clear()
30
+ record.update(transformed_record)
31
+
32
+ def _transform_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
33
+ transformed_record = {}
34
+ for key, value in record.items():
35
+ transformed_key = self.process_key(key)
36
+ transformed_value = value
37
+
38
+ if isinstance(value, dict):
39
+ transformed_value = self._transform_record(value)
40
+
41
+ transformed_record[transformed_key] = transformed_value
42
+ return transformed_record
43
+
44
+ def process_key(self, key: str) -> str:
45
+ key = self.normalize_key(key)
46
+ tokens = self.tokenize_key(key)
47
+ tokens = self.filter_tokens(tokens)
48
+ return self.tokens_to_snake_case(tokens)
49
+
50
+ def normalize_key(self, key: str) -> str:
51
+ return str(anyascii.anyascii(key))
52
+
53
+ def tokenize_key(self, key: str) -> List[str]:
54
+ tokens = []
55
+ for match in self.token_pattern.finditer(key):
56
+ token = match.group(0) if match.group("NoToken") is None else ""
57
+ tokens.append(token)
58
+ return tokens
59
+
60
+ def filter_tokens(self, tokens: List[str]) -> List[str]:
61
+ if len(tokens) >= 3:
62
+ tokens = tokens[:1] + [t for t in tokens[1:-1] if t] + tokens[-1:]
63
+ if tokens and tokens[0].isdigit():
64
+ tokens.insert(0, "")
65
+ return tokens
66
+
67
+ def tokens_to_snake_case(self, tokens: List[str]) -> str:
68
+ return "_".join(token.lower() for token in tokens)
@@ -0,0 +1,75 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import InitVar, dataclass
6
+ from typing import Any, Dict, List, Mapping, Optional
7
+
8
+ import dpath
9
+ import dpath.exceptions
10
+
11
+ from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
12
+ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
13
+ from airbyte_cdk.sources.types import Config, FieldPointer, StreamSlice, StreamState
14
+
15
+
16
+ @dataclass
17
+ class RemoveFields(RecordTransformation):
18
+ """
19
+ A transformation which removes fields from a record. The fields removed are designated using FieldPointers.
20
+ During transformation, if a field or any of its parents does not exist in the record, no error is thrown.
21
+
22
+ If an input field pointer references an item in a list (e.g: ["k", 0] in the object {"k": ["a", "b", "c"]}) then
23
+ the object at that index is set to None rather than being not entirely removed from the list. TODO change this behavior.
24
+
25
+ It's possible to remove objects nested in lists e.g: removing [".", 0, "k"] from {".": [{"k": "V"}]} results in {".": [{}]}
26
+
27
+ Usage syntax:
28
+
29
+ ```yaml
30
+ my_stream:
31
+ <other parameters..>
32
+ transformations:
33
+ - type: RemoveFields
34
+ field_pointers:
35
+ - ["path", "to", "field1"]
36
+ - ["path2"]
37
+ ```
38
+
39
+ Attributes:
40
+ field_pointers (List[FieldPointer]): pointers to the fields that should be removed
41
+ """
42
+
43
+ field_pointers: List[FieldPointer]
44
+ parameters: InitVar[Mapping[str, Any]]
45
+ condition: str = ""
46
+
47
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
48
+ self._filter_interpolator = InterpolatedBoolean(
49
+ condition=self.condition, parameters=parameters
50
+ )
51
+
52
+ def transform(
53
+ self,
54
+ record: Dict[str, Any],
55
+ config: Optional[Config] = None,
56
+ stream_state: Optional[StreamState] = None,
57
+ stream_slice: Optional[StreamSlice] = None,
58
+ ) -> None:
59
+ """
60
+ :param record: The record to be transformed
61
+ :return: the input record with the requested fields removed
62
+ """
63
+ for pointer in self.field_pointers:
64
+ # the dpath library by default doesn't delete fields from arrays
65
+ try:
66
+ dpath.delete(
67
+ record,
68
+ pointer,
69
+ afilter=(lambda x: self._filter_interpolator.eval(config or {}, property=x))
70
+ if self.condition
71
+ else None,
72
+ )
73
+ except dpath.exceptions.PathNotFound:
74
+ # if the (potentially nested) property does not exist, silently skip
75
+ pass
@@ -0,0 +1,37 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from abc import abstractmethod
6
+ from dataclasses import dataclass
7
+ from typing import Any, Dict, Optional
8
+
9
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
10
+
11
+
12
+ @dataclass
13
+ class RecordTransformation:
14
+ """
15
+ Implementations of this class define transformations that can be applied to records of a stream.
16
+ """
17
+
18
+ @abstractmethod
19
+ def transform(
20
+ self,
21
+ record: Dict[str, Any],
22
+ config: Optional[Config] = None,
23
+ stream_state: Optional[StreamState] = None,
24
+ stream_slice: Optional[StreamSlice] = None,
25
+ ) -> None:
26
+ """
27
+ Transform a record by adding, deleting, or mutating fields directly from the record reference passed in argument.
28
+
29
+ :param record: The input record to be transformed
30
+ :param config: The user-provided configuration as specified by the source's spec
31
+ :param stream_state: The stream state
32
+ :param stream_slice: The stream slice
33
+ :return: The transformed record
34
+ """
35
+
36
+ def __eq__(self, other: object) -> bool:
37
+ return other.__dict__ == self.__dict__
@@ -0,0 +1,25 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from __future__ import annotations
6
+
7
+ from airbyte_cdk.sources.types import (
8
+ Config,
9
+ ConnectionDefinition,
10
+ FieldPointer,
11
+ Record,
12
+ StreamSlice,
13
+ StreamState,
14
+ )
15
+
16
+ # Note: This package originally contained class definitions for low-code CDK types, but we promoted them into the Python CDK.
17
+ # We've migrated connectors in the repository to reference the new location, but these assignments are used to retain backwards
18
+ # compatibility for sources created by OSS customers or on forks. This can be removed when we start bumping major versions.
19
+
20
+ FieldPointer = FieldPointer
21
+ Config = Config
22
+ ConnectionDefinition = ConnectionDefinition
23
+ StreamState = StreamState
24
+ Record = Record
25
+ StreamSlice = StreamSlice
@@ -0,0 +1,67 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import pkgutil
6
+ from typing import Any, List, Mapping, Optional
7
+
8
+ import yaml
9
+
10
+ from airbyte_cdk.models import AirbyteStateMessage, ConfiguredAirbyteCatalog
11
+ from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
12
+ ConcurrentDeclarativeSource,
13
+ )
14
+ from airbyte_cdk.sources.types import ConnectionDefinition
15
+
16
+
17
+ class YamlDeclarativeSource(ConcurrentDeclarativeSource[List[AirbyteStateMessage]]):
18
+ """Declarative source defined by a yaml file"""
19
+
20
+ def __init__(
21
+ self,
22
+ path_to_yaml: str,
23
+ debug: bool = False,
24
+ catalog: Optional[ConfiguredAirbyteCatalog] = None,
25
+ config: Optional[Mapping[str, Any]] = None,
26
+ state: Optional[List[AirbyteStateMessage]] = None,
27
+ ) -> None:
28
+ """
29
+ :param path_to_yaml: Path to the yaml file describing the source
30
+ """
31
+ self._path_to_yaml = path_to_yaml
32
+ source_config = self._read_and_parse_yaml_file(path_to_yaml)
33
+
34
+ super().__init__(
35
+ catalog=catalog or ConfiguredAirbyteCatalog(streams=[]),
36
+ config=config or {},
37
+ state=state or [],
38
+ source_config=source_config,
39
+ )
40
+
41
+ def _read_and_parse_yaml_file(self, path_to_yaml_file: str) -> ConnectionDefinition:
42
+ try:
43
+ # For testing purposes, we want to allow to just pass a file
44
+ with open(path_to_yaml_file, "r") as f:
45
+ return yaml.safe_load(f) # type: ignore # we assume the yaml represents a ConnectionDefinition
46
+ except FileNotFoundError:
47
+ # Running inside the container, the working directory during an operation is not structured the same as the static files
48
+ package = self.__class__.__module__.split(".")[0]
49
+
50
+ yaml_config = pkgutil.get_data(package, path_to_yaml_file)
51
+ if yaml_config:
52
+ decoded_yaml = yaml_config.decode()
53
+ return self._parse(decoded_yaml)
54
+ return {}
55
+
56
+ def _emit_manifest_debug_message(self, extra_args: dict[str, Any]) -> None:
57
+ extra_args["path_to_yaml"] = self._path_to_yaml
58
+
59
+ @staticmethod
60
+ def _parse(connection_definition_str: str) -> ConnectionDefinition:
61
+ """
62
+ Parses a yaml file into a manifest. Component references still exist in the manifest which will be
63
+ resolved during the creating of the DeclarativeSource.
64
+ :param connection_definition_str: yaml string to parse
65
+ :return: The ConnectionDefinition parsed from connection_definition_str
66
+ """
67
+ return yaml.safe_load(connection_definition_str) # type: ignore # yaml.safe_load doesn't return a type but know it is a Mapping