airbyte-cdk 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (368) hide show
  1. airbyte_cdk/__init__.py +358 -0
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +104 -0
  7. airbyte_cdk/connector.py +123 -0
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/__init__.py +3 -0
  10. airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
  11. airbyte_cdk/connector_builder/main.py +107 -0
  12. airbyte_cdk/connector_builder/models.py +73 -0
  13. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  14. airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
  15. airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
  16. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  17. airbyte_cdk/connector_builder/test_reader/types.py +83 -0
  18. airbyte_cdk/destinations/__init__.py +8 -0
  19. airbyte_cdk/destinations/destination.py +154 -0
  20. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  21. airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
  22. airbyte_cdk/destinations/vector_db_based/config.py +298 -0
  23. airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
  24. airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
  25. airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
  26. airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
  27. airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
  28. airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
  29. airbyte_cdk/entrypoint.py +414 -0
  30. airbyte_cdk/exception_handler.py +56 -0
  31. airbyte_cdk/logger.py +109 -0
  32. airbyte_cdk/models/__init__.py +72 -0
  33. airbyte_cdk/models/airbyte_protocol.py +88 -0
  34. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  35. airbyte_cdk/models/well_known_types.py +5 -0
  36. airbyte_cdk/py.typed +0 -0
  37. airbyte_cdk/sources/__init__.py +26 -0
  38. airbyte_cdk/sources/abstract_source.py +326 -0
  39. airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
  40. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
  41. airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
  42. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
  43. airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
  44. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  45. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
  46. airbyte_cdk/sources/config.py +27 -0
  47. airbyte_cdk/sources/connector_state_manager.py +161 -0
  48. airbyte_cdk/sources/declarative/__init__.py +3 -0
  49. airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
  50. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  51. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
  52. airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
  53. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  54. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  55. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  56. airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
  57. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
  58. airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
  59. airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
  60. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
  61. airbyte_cdk/sources/declarative/auth/token.py +267 -0
  62. airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
  63. airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
  64. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
  65. airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
  66. airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
  67. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  68. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  69. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
  70. airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
  71. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
  72. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
  73. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
  74. airbyte_cdk/sources/declarative/declarative_source.py +36 -0
  75. airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
  76. airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
  77. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
  78. airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
  79. airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
  80. airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
  81. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  82. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  83. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  84. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
  85. airbyte_cdk/sources/declarative/exceptions.py +9 -0
  86. airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
  87. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
  88. airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
  89. airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
  90. airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
  91. airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
  92. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
  93. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  94. airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
  95. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
  96. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
  97. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  98. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
  99. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
  100. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  101. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  102. airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
  103. airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
  104. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
  105. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
  106. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
  107. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
  108. airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
  109. airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
  110. airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
  111. airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
  112. airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
  113. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  114. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  115. airbyte_cdk/sources/declarative/models/__init__.py +2 -0
  116. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
  117. airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
  118. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
  119. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
  120. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
  121. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
  122. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
  123. airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
  124. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  125. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  126. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
  127. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  128. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
  129. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
  130. airbyte_cdk/sources/declarative/requesters/README.md +56 -0
  131. airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
  132. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
  133. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
  134. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
  135. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
  136. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
  137. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
  138. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
  139. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
  140. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
  141. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
  142. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  143. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
  144. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
  145. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
  146. airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
  147. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
  148. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
  149. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
  150. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
  151. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
  152. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
  153. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
  154. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
  155. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
  156. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
  157. airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
  158. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
  159. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
  160. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  161. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
  162. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
  163. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
  164. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
  165. airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
  166. airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
  167. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  168. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  169. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  170. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  171. airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
  172. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
  173. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
  174. airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
  175. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
  176. airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
  177. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
  178. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
  179. airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
  180. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
  181. airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
  182. airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
  183. airbyte_cdk/sources/declarative/spec/spec.py +48 -0
  184. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
  185. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
  186. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
  187. airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
  188. airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
  189. airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
  190. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  191. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  192. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  193. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  194. airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
  195. airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
  196. airbyte_cdk/sources/declarative/types.py +25 -0
  197. airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
  198. airbyte_cdk/sources/file_based/README.md +152 -0
  199. airbyte_cdk/sources/file_based/__init__.py +24 -0
  200. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
  201. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
  202. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
  203. airbyte_cdk/sources/file_based/config/__init__.py +0 -0
  204. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
  205. airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
  206. airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
  207. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  208. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
  209. airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
  210. airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
  211. airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
  212. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  213. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
  214. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
  215. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
  216. airbyte_cdk/sources/file_based/exceptions.py +159 -0
  217. airbyte_cdk/sources/file_based/file_based_source.py +466 -0
  218. airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
  219. airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
  220. airbyte_cdk/sources/file_based/file_record_data.py +22 -0
  221. airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
  222. airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
  223. airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
  224. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  225. airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
  226. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
  227. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
  228. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
  229. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
  230. airbyte_cdk/sources/file_based/remote_file.py +18 -0
  231. airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
  232. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
  233. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
  234. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
  235. airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
  236. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
  237. airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
  238. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
  239. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
  240. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
  241. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
  242. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
  243. airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
  244. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
  245. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
  246. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
  247. airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
  248. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
  249. airbyte_cdk/sources/file_based/types.py +10 -0
  250. airbyte_cdk/sources/http_config.py +10 -0
  251. airbyte_cdk/sources/http_logger.py +55 -0
  252. airbyte_cdk/sources/message/__init__.py +19 -0
  253. airbyte_cdk/sources/message/repository.py +137 -0
  254. airbyte_cdk/sources/source.py +95 -0
  255. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  256. airbyte_cdk/sources/streams/__init__.py +8 -0
  257. airbyte_cdk/sources/streams/availability_strategy.py +84 -0
  258. airbyte_cdk/sources/streams/call_rate.py +704 -0
  259. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  260. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  261. airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
  262. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  263. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  264. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  265. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  266. airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
  267. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
  268. airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
  269. airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
  270. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
  271. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  272. airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
  273. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  274. airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
  275. airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
  276. airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
  277. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
  278. airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
  279. airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
  280. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
  281. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
  282. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  283. airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
  284. airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
  285. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
  286. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
  287. airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
  288. airbyte_cdk/sources/streams/core.py +703 -0
  289. airbyte_cdk/sources/streams/http/__init__.py +10 -0
  290. airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
  291. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  292. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  293. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  294. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  295. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  296. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  297. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  298. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  299. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  300. airbyte_cdk/sources/streams/http/exceptions.py +61 -0
  301. airbyte_cdk/sources/streams/http/http.py +673 -0
  302. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  303. airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
  304. airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
  305. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
  306. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
  307. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
  308. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
  309. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  310. airbyte_cdk/sources/streams/utils/__init__.py +3 -0
  311. airbyte_cdk/sources/types.py +169 -0
  312. airbyte_cdk/sources/utils/__init__.py +7 -0
  313. airbyte_cdk/sources/utils/casing.py +12 -0
  314. airbyte_cdk/sources/utils/files_directory.py +15 -0
  315. airbyte_cdk/sources/utils/record_helper.py +53 -0
  316. airbyte_cdk/sources/utils/schema_helpers.py +230 -0
  317. airbyte_cdk/sources/utils/slice_logger.py +57 -0
  318. airbyte_cdk/sources/utils/transform.py +277 -0
  319. airbyte_cdk/sources/utils/types.py +7 -0
  320. airbyte_cdk/sql/__init__.py +0 -0
  321. airbyte_cdk/sql/_util/__init__.py +0 -0
  322. airbyte_cdk/sql/_util/hashing.py +34 -0
  323. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  324. airbyte_cdk/sql/constants.py +32 -0
  325. airbyte_cdk/sql/exceptions.py +235 -0
  326. airbyte_cdk/sql/secrets.py +123 -0
  327. airbyte_cdk/sql/shared/__init__.py +15 -0
  328. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  329. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  330. airbyte_cdk/sql/types.py +160 -0
  331. airbyte_cdk/test/__init__.py +7 -0
  332. airbyte_cdk/test/catalog_builder.py +81 -0
  333. airbyte_cdk/test/entrypoint_wrapper.py +250 -0
  334. airbyte_cdk/test/mock_http/__init__.py +6 -0
  335. airbyte_cdk/test/mock_http/matcher.py +41 -0
  336. airbyte_cdk/test/mock_http/mocker.py +185 -0
  337. airbyte_cdk/test/mock_http/request.py +103 -0
  338. airbyte_cdk/test/mock_http/response.py +28 -0
  339. airbyte_cdk/test/mock_http/response_builder.py +237 -0
  340. airbyte_cdk/test/state_builder.py +33 -0
  341. airbyte_cdk/test/utils/__init__.py +1 -0
  342. airbyte_cdk/test/utils/data.py +24 -0
  343. airbyte_cdk/test/utils/http_mocking.py +16 -0
  344. airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
  345. airbyte_cdk/test/utils/reading.py +26 -0
  346. airbyte_cdk/utils/__init__.py +10 -0
  347. airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
  348. airbyte_cdk/utils/analytics_message.py +25 -0
  349. airbyte_cdk/utils/constants.py +5 -0
  350. airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
  351. airbyte_cdk/utils/datetime_helpers.py +499 -0
  352. airbyte_cdk/utils/event_timing.py +85 -0
  353. airbyte_cdk/utils/is_cloud_environment.py +18 -0
  354. airbyte_cdk/utils/mapping_helpers.py +162 -0
  355. airbyte_cdk/utils/message_utils.py +26 -0
  356. airbyte_cdk/utils/oneof_option_config.py +33 -0
  357. airbyte_cdk/utils/print_buffer.py +75 -0
  358. airbyte_cdk/utils/schema_inferrer.py +270 -0
  359. airbyte_cdk/utils/slice_hasher.py +37 -0
  360. airbyte_cdk/utils/spec_schema_transformations.py +26 -0
  361. airbyte_cdk/utils/stream_status_utils.py +43 -0
  362. airbyte_cdk/utils/traced_exception.py +145 -0
  363. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
  364. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
  365. airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
  366. airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
  367. airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
  368. airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,169 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any, ItemsView, Iterator, KeysView, List, Mapping, Optional, ValuesView
8
+
9
+ from airbyte_cdk.models import AirbyteRecordMessageFileReference
10
+ from airbyte_cdk.utils.slice_hasher import SliceHasher
11
+
12
+ # A FieldPointer designates a path to a field inside a mapping. For example, retrieving ["k1", "k1.2"] in the object {"k1" :{"k1.2":
13
+ # "hello"}] returns "hello"
14
+ FieldPointer = List[str]
15
+ Config = Mapping[str, Any]
16
+ ConnectionDefinition = Mapping[str, Any]
17
+ StreamState = Mapping[str, Any]
18
+ EmptyString = str()
19
+
20
+
21
+ class Record(Mapping[str, Any]):
22
+ def __init__(
23
+ self,
24
+ data: Mapping[str, Any],
25
+ stream_name: str,
26
+ associated_slice: Optional[StreamSlice] = None,
27
+ file_reference: Optional[AirbyteRecordMessageFileReference] = None,
28
+ ):
29
+ self._data = data
30
+ self._associated_slice = associated_slice
31
+ self.stream_name = stream_name
32
+ self._file_reference = file_reference
33
+
34
+ @property
35
+ def data(self) -> Mapping[str, Any]:
36
+ return self._data
37
+
38
+ @property
39
+ def associated_slice(self) -> Optional[StreamSlice]:
40
+ return self._associated_slice
41
+
42
+ @property
43
+ def file_reference(self) -> AirbyteRecordMessageFileReference:
44
+ return self._file_reference
45
+
46
+ @file_reference.setter
47
+ def file_reference(self, value: AirbyteRecordMessageFileReference) -> None:
48
+ self._file_reference = value
49
+
50
+ def __repr__(self) -> str:
51
+ return repr(self._data)
52
+
53
+ def __getitem__(self, key: str) -> Any:
54
+ return self._data[key]
55
+
56
+ def __len__(self) -> int:
57
+ return len(self._data)
58
+
59
+ def __iter__(self) -> Any:
60
+ return iter(self._data)
61
+
62
+ def __contains__(self, item: object) -> bool:
63
+ return item in self._data
64
+
65
+ def __eq__(self, other: object) -> bool:
66
+ if isinstance(other, Record):
67
+ # noinspection PyProtectedMember
68
+ return self._data == other._data
69
+ return False
70
+
71
+ def __ne__(self, other: object) -> bool:
72
+ return not self.__eq__(other)
73
+
74
+
75
+ class StreamSlice(Mapping[str, Any]):
76
+ def __init__(
77
+ self,
78
+ *,
79
+ partition: Mapping[str, Any],
80
+ cursor_slice: Mapping[str, Any],
81
+ extra_fields: Optional[Mapping[str, Any]] = None,
82
+ ) -> None:
83
+ """
84
+ :param partition: The partition keys representing a unique partition in the stream.
85
+ :param cursor_slice: The incremental cursor slice keys, such as dates or pagination tokens.
86
+ :param extra_fields: Additional fields that should not be part of the partition but passed along, such as metadata from the parent stream.
87
+ """
88
+ self._partition = partition
89
+ self._cursor_slice = cursor_slice
90
+ self._extra_fields = extra_fields or {}
91
+
92
+ # Ensure that partition keys do not overlap with cursor slice keys
93
+ if partition.keys() & cursor_slice.keys():
94
+ raise ValueError("Keys for partition and incremental sync cursor should not overlap")
95
+
96
+ self._stream_slice = dict(partition) | dict(cursor_slice)
97
+
98
+ @property
99
+ def partition(self) -> Mapping[str, Any]:
100
+ """Returns the partition portion of the stream slice."""
101
+ p = self._partition
102
+ while isinstance(p, StreamSlice):
103
+ p = p.partition
104
+ return p
105
+
106
+ @property
107
+ def cursor_slice(self) -> Mapping[str, Any]:
108
+ """Returns the cursor slice portion of the stream slice."""
109
+ c = self._cursor_slice
110
+ while isinstance(c, StreamSlice):
111
+ c = c.cursor_slice
112
+ return c
113
+
114
+ @property
115
+ def extra_fields(self) -> Mapping[str, Any]:
116
+ """Returns the extra fields that are not part of the partition."""
117
+ return self._extra_fields
118
+
119
+ def __repr__(self) -> str:
120
+ return repr(self._stream_slice)
121
+
122
+ def __setitem__(self, key: str, value: Any) -> None:
123
+ raise ValueError("StreamSlice is immutable")
124
+
125
+ def __getitem__(self, key: str) -> Any:
126
+ return self._stream_slice[key]
127
+
128
+ def __len__(self) -> int:
129
+ return len(self._stream_slice)
130
+
131
+ def __iter__(self) -> Iterator[str]:
132
+ return iter(self._stream_slice)
133
+
134
+ def __contains__(self, item: Any) -> bool:
135
+ return item in self._stream_slice
136
+
137
+ def keys(self) -> KeysView[str]:
138
+ return self._stream_slice.keys()
139
+
140
+ def items(self) -> ItemsView[str, Any]:
141
+ return self._stream_slice.items()
142
+
143
+ def values(self) -> ValuesView[Any]:
144
+ return self._stream_slice.values()
145
+
146
+ def get(self, key: str, default: Any = None) -> Optional[Any]:
147
+ return self._stream_slice.get(key, default)
148
+
149
+ def __eq__(self, other: Any) -> bool:
150
+ if isinstance(other, dict):
151
+ return self._stream_slice == other
152
+ if isinstance(other, StreamSlice):
153
+ # noinspection PyProtectedMember
154
+ return self._partition == other._partition and self._cursor_slice == other._cursor_slice
155
+ return False
156
+
157
+ def __ne__(self, other: Any) -> bool:
158
+ return not self.__eq__(other)
159
+
160
+ def __json_serializable__(self) -> Any:
161
+ return self._stream_slice
162
+
163
+ def __hash__(self) -> int:
164
+ return SliceHasher.hash(
165
+ stream_slice=self._stream_slice
166
+ ) # no need to provide stream_name here as this is used for slicing the cursor
167
+
168
+ def __bool__(self) -> bool:
169
+ return bool(self._stream_slice) or bool(self._extra_fields)
@@ -0,0 +1,7 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ # Initialize Utils Package
6
+
7
+ __all__ = ["record_helper"]
@@ -0,0 +1,12 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+
6
+ import re
7
+
8
+
9
+ # https://stackoverflow.com/a/1176023
10
+ def camel_to_snake(s: str) -> str:
11
+ s = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", s)
12
+ return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s).lower()
@@ -0,0 +1,15 @@
1
+ #
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
+ #
4
+ import os
5
+
6
+ AIRBYTE_STAGING_DIRECTORY = os.getenv("AIRBYTE_STAGING_DIRECTORY", "/staging/files")
7
+ DEFAULT_LOCAL_DIRECTORY = "/tmp/airbyte-file-transfer"
8
+
9
+
10
+ def get_files_directory() -> str:
11
+ return (
12
+ AIRBYTE_STAGING_DIRECTORY
13
+ if os.path.exists(AIRBYTE_STAGING_DIRECTORY)
14
+ else DEFAULT_LOCAL_DIRECTORY
15
+ )
@@ -0,0 +1,53 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+ import time
5
+ from collections.abc import Mapping as ABCMapping
6
+ from typing import Any, Mapping, Optional
7
+
8
+ from airbyte_cdk.models import (
9
+ AirbyteLogMessage,
10
+ AirbyteMessage,
11
+ AirbyteRecordMessage,
12
+ AirbyteRecordMessageFileReference,
13
+ AirbyteTraceMessage,
14
+ )
15
+ from airbyte_cdk.models import Type as MessageType
16
+ from airbyte_cdk.sources.streams.core import StreamData
17
+ from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
18
+
19
+
20
+ def stream_data_to_airbyte_message(
21
+ stream_name: str,
22
+ data_or_message: StreamData,
23
+ transformer: TypeTransformer = TypeTransformer(TransformConfig.NoTransform),
24
+ schema: Optional[Mapping[str, Any]] = None,
25
+ file_reference: Optional[AirbyteRecordMessageFileReference] = None,
26
+ ) -> AirbyteMessage:
27
+ if schema is None:
28
+ schema = {}
29
+
30
+ match data_or_message:
31
+ case ABCMapping():
32
+ data = dict(data_or_message)
33
+ now_millis = time.time_ns() // 1_000_000
34
+ # Transform object fields according to config. Most likely you will
35
+ # need it to normalize values against json schema. By default no action
36
+ # taken unless configured. See
37
+ # docs/connector-development/cdk-python/schemas.md for details.
38
+ transformer.transform(data, schema)
39
+ message = AirbyteRecordMessage(
40
+ stream=stream_name,
41
+ data=data,
42
+ emitted_at=now_millis,
43
+ file_reference=file_reference,
44
+ )
45
+ return AirbyteMessage(type=MessageType.RECORD, record=message)
46
+ case AirbyteTraceMessage():
47
+ return AirbyteMessage(type=MessageType.TRACE, trace=data_or_message)
48
+ case AirbyteLogMessage():
49
+ return AirbyteMessage(type=MessageType.LOG, log=data_or_message)
50
+ case _:
51
+ raise ValueError(
52
+ f"Unexpected type for data_or_message: {type(data_or_message)}: {data_or_message}"
53
+ )
@@ -0,0 +1,230 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+
6
+ import importlib
7
+ import json
8
+ import os
9
+ import pkgutil
10
+ from typing import Any, ClassVar, Dict, List, Mapping, MutableMapping, Optional, Tuple
11
+
12
+ import jsonref
13
+ from jsonschema import RefResolver, validate
14
+ from jsonschema.exceptions import ValidationError
15
+ from pydantic.v1 import BaseModel, Field
16
+
17
+ from airbyte_cdk.models import ConnectorSpecification, FailureType
18
+ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
19
+
20
+
21
+ class JsonFileLoader:
22
+ """
23
+ Custom json file loader to resolve references to resources located in "shared" directory.
24
+ We need this for compatability with existing schemas cause all of them have references
25
+ pointing to shared_schema.json file instead of shared/shared_schema.json
26
+ """
27
+
28
+ def __init__(self, uri_base: str, shared: str):
29
+ self.shared = shared
30
+ self.uri_base = uri_base
31
+
32
+ def __call__(self, uri: str) -> Dict[str, Any]:
33
+ uri = uri.replace(self.uri_base, f"{self.uri_base}/{self.shared}/")
34
+ with open(uri) as f:
35
+ data = json.load(f)
36
+ if isinstance(data, dict):
37
+ return data
38
+ else:
39
+ raise ValueError(f"Expected to read a dictionary from {uri}. Got: {data}")
40
+
41
+
42
+ def resolve_ref_links(obj: Any) -> Any:
43
+ """
44
+ Scan resolved schema and convert jsonref.JsonRef object to JSON serializable dict.
45
+
46
+ :param obj - jsonschema object with ref field resolved.
47
+ :return JSON serializable object with references without external dependencies.
48
+ """
49
+ if isinstance(obj, jsonref.JsonRef):
50
+ obj = resolve_ref_links(obj.__subject__)
51
+ # Omit existing definitions for external resource since
52
+ # we dont need it anymore.
53
+ if isinstance(obj, dict):
54
+ obj.pop("definitions", None)
55
+ return obj
56
+ else:
57
+ raise ValueError(f"Expected obj to be a dict. Got {obj}")
58
+ elif isinstance(obj, dict):
59
+ return {k: resolve_ref_links(v) for k, v in obj.items()}
60
+ elif isinstance(obj, list):
61
+ return [resolve_ref_links(item) for item in obj]
62
+ else:
63
+ return obj
64
+
65
+
66
+ def _expand_refs(schema: Any, ref_resolver: Optional[RefResolver] = None) -> None:
67
+ """Internal function to iterate over schema and replace all occurrences of $ref with their definitions. Recursive.
68
+
69
+ :param schema: schema that will be patched
70
+ :param ref_resolver: resolver to get definition from $ref, if None pass it will be instantiated
71
+ """
72
+ ref_resolver = ref_resolver or RefResolver.from_schema(schema)
73
+
74
+ if isinstance(schema, MutableMapping):
75
+ if "$ref" in schema:
76
+ ref_url = schema.pop("$ref")
77
+ _, definition = ref_resolver.resolve(ref_url)
78
+ _expand_refs(
79
+ definition, ref_resolver=ref_resolver
80
+ ) # expand refs in definitions as well
81
+ schema.update(definition)
82
+ else:
83
+ for key, value in schema.items():
84
+ _expand_refs(value, ref_resolver=ref_resolver)
85
+ elif isinstance(schema, List):
86
+ for value in schema:
87
+ _expand_refs(value, ref_resolver=ref_resolver)
88
+
89
+
90
+ def expand_refs(schema: Any) -> None:
91
+ """Iterate over schema and replace all occurrences of $ref with their definitions.
92
+
93
+ :param schema: schema that will be patched
94
+ """
95
+ _expand_refs(schema)
96
+ schema.pop("definitions", None) # remove definitions created by $ref
97
+
98
+
99
+ def rename_key(schema: Any, old_key: str, new_key: str) -> None:
100
+ """Iterate over nested dictionary and replace one key with another. Used to replace anyOf with oneOf. Recursive."
101
+
102
+ :param schema: schema that will be patched
103
+ :param old_key: name of the key to replace
104
+ :param new_key: new name of the key
105
+ """
106
+ if not isinstance(schema, MutableMapping):
107
+ return
108
+
109
+ for key, value in schema.items():
110
+ rename_key(value, old_key, new_key)
111
+ if old_key in schema:
112
+ schema[new_key] = schema.pop(old_key)
113
+
114
+
115
+ class ResourceSchemaLoader:
116
+ """JSONSchema loader from package resources"""
117
+
118
+ def __init__(self, package_name: str):
119
+ self.package_name = package_name
120
+
121
+ def get_schema(self, name: str) -> dict[str, Any]:
122
+ """
123
+ This method retrieves a JSON schema from the schemas/ folder.
124
+
125
+
126
+ The expected file structure is to have all top-level schemas (corresponding to streams) in the "schemas/" folder, with any shared $refs
127
+ living inside the "schemas/shared/" folder. For example:
128
+
129
+ schemas/shared/<shared_definition>.json
130
+ schemas/<name>.json # contains a $ref to shared_definition
131
+ schemas/<name2>.json # contains a $ref to shared_definition
132
+ """
133
+
134
+ schema_filename = f"schemas/{name}.json"
135
+ raw_file = pkgutil.get_data(self.package_name, schema_filename)
136
+ if not raw_file:
137
+ raise IOError(f"Cannot find file {schema_filename}")
138
+ try:
139
+ raw_schema = json.loads(raw_file)
140
+ except ValueError as err:
141
+ raise RuntimeError(f"Invalid JSON file format for file {schema_filename}") from err
142
+
143
+ return self._resolve_schema_references(raw_schema)
144
+
145
+ def _resolve_schema_references(self, raw_schema: dict[str, Any]) -> dict[str, Any]:
146
+ """
147
+ Resolve links to external references and move it to local "definitions" map.
148
+
149
+ :param raw_schema jsonschema to lookup for external links.
150
+ :return JSON serializable object with references without external dependencies.
151
+ """
152
+
153
+ package = importlib.import_module(self.package_name)
154
+ if package.__file__:
155
+ base = os.path.dirname(package.__file__) + "/"
156
+ else:
157
+ raise ValueError(f"Package {package} does not have a valid __file__ field")
158
+ resolved = jsonref.JsonRef.replace_refs(
159
+ raw_schema, loader=JsonFileLoader(base, "schemas/shared"), base_uri=base
160
+ )
161
+ resolved = resolve_ref_links(resolved)
162
+ if isinstance(resolved, dict):
163
+ return resolved
164
+ else:
165
+ raise ValueError(f"Expected resolved to be a dict. Got {resolved}")
166
+
167
+
168
+ def check_config_against_spec_or_exit(
169
+ config: Mapping[str, Any], spec: ConnectorSpecification
170
+ ) -> None:
171
+ """
172
+ Check config object against spec. In case of spec is invalid, throws
173
+ an exception with validation error description.
174
+
175
+ :param config - config loaded from file specified over command line
176
+ :param spec - spec object generated by connector
177
+ """
178
+ spec_schema = spec.connectionSpecification
179
+ try:
180
+ validate(instance=config, schema=spec_schema)
181
+ except ValidationError as validation_error:
182
+ raise AirbyteTracedException(
183
+ message="Config validation error: " + validation_error.message,
184
+ internal_message=validation_error.message,
185
+ failure_type=FailureType.config_error,
186
+ ) from None # required to prevent logging config secrets from the ValidationError's stacktrace
187
+
188
+
189
+ class InternalConfig(BaseModel):
190
+ KEYWORDS: ClassVar[set[str]] = {"_limit", "_page_size"}
191
+ limit: int = Field(None, alias="_limit")
192
+ page_size: int = Field(None, alias="_page_size")
193
+
194
+ def dict(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
195
+ kwargs["by_alias"] = True
196
+ kwargs["exclude_unset"] = True
197
+ return super().dict(*args, **kwargs)
198
+
199
+ def is_limit_reached(self, records_counter: int) -> bool:
200
+ """
201
+ Check if record count reached limit set by internal config.
202
+ :param records_counter - number of records already red
203
+ :return True if limit reached, False otherwise
204
+ """
205
+ if self.limit:
206
+ if records_counter >= self.limit:
207
+ return True
208
+ return False
209
+
210
+
211
+ def split_config(config: Mapping[str, Any]) -> Tuple[dict[str, Any], InternalConfig]:
212
+ """
213
+ Break config map object into 2 instances: first is a dict with user defined
214
+ configuration and second is internal config that contains private keys for
215
+ acceptance test configuration.
216
+
217
+ :param
218
+ config - Dict object that has been loaded from config file.
219
+
220
+ :return tuple of user defined config dict with filtered out internal
221
+ parameters and connector acceptance test internal config object.
222
+ """
223
+ main_config = {}
224
+ internal_config = {}
225
+ for k, v in config.items():
226
+ if k in InternalConfig.KEYWORDS:
227
+ internal_config[k] = v
228
+ else:
229
+ main_config[k] = v
230
+ return main_config, InternalConfig.parse_obj(internal_config)
@@ -0,0 +1,57 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import json
6
+ import logging
7
+ from abc import ABC, abstractmethod
8
+ from typing import Any, Mapping, Optional
9
+
10
+ from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level
11
+ from airbyte_cdk.models import Type as MessageType
12
+
13
+
14
+ class SliceLogger(ABC):
15
+ """
16
+ SliceLogger is an interface that allows us to log slices of data in a uniform way.
17
+ It is responsible for determining whether or not a slice should be logged and for creating the log message.
18
+ """
19
+
20
+ SLICE_LOG_PREFIX = "slice:"
21
+
22
+ def create_slice_log_message(self, _slice: Optional[Mapping[str, Any]]) -> AirbyteMessage:
23
+ """
24
+ Mapping is an interface that can be implemented in various ways. However, json.dumps will just do a `str(<object>)` if
25
+ the slice is a class implementing Mapping. Therefore, we want to cast this as a dict before passing this to json.dump
26
+ """
27
+ printable_slice = dict(_slice) if _slice else _slice
28
+ return AirbyteMessage(
29
+ type=MessageType.LOG,
30
+ log=AirbyteLogMessage(
31
+ level=Level.INFO,
32
+ message=f"{SliceLogger.SLICE_LOG_PREFIX}{json.dumps(printable_slice, default=str)}",
33
+ ),
34
+ )
35
+
36
+ @abstractmethod
37
+ def should_log_slice_message(self, logger: logging.Logger) -> bool:
38
+ """
39
+
40
+ :param logger:
41
+ :return:
42
+ """
43
+
44
+
45
+ class DebugSliceLogger(SliceLogger):
46
+ def should_log_slice_message(self, logger: logging.Logger) -> bool:
47
+ """
48
+
49
+ :param logger:
50
+ :return:
51
+ """
52
+ return logger.isEnabledFor(logging.DEBUG)
53
+
54
+
55
+ class AlwaysLogSliceLogger(SliceLogger):
56
+ def should_log_slice_message(self, logger: logging.Logger) -> bool:
57
+ return True