airbyte-cdk 0.72.0__py3-none-any.whl → 6.17.1.dev0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (518) hide show
  1. airbyte_cdk/__init__.py +355 -6
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +29 -10
  7. airbyte_cdk/connector.py +24 -24
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
  10. airbyte_cdk/connector_builder/main.py +45 -13
  11. airbyte_cdk/connector_builder/message_grouper.py +189 -50
  12. airbyte_cdk/connector_builder/models.py +3 -2
  13. airbyte_cdk/destinations/__init__.py +4 -3
  14. airbyte_cdk/destinations/destination.py +54 -20
  15. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  16. airbyte_cdk/destinations/vector_db_based/config.py +40 -17
  17. airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
  18. airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
  19. airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
  20. airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
  21. airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
  22. airbyte_cdk/entrypoint.py +153 -44
  23. airbyte_cdk/exception_handler.py +21 -3
  24. airbyte_cdk/logger.py +30 -44
  25. airbyte_cdk/models/__init__.py +13 -2
  26. airbyte_cdk/models/airbyte_protocol.py +86 -1
  27. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  28. airbyte_cdk/models/file_transfer_record_message.py +13 -0
  29. airbyte_cdk/models/well_known_types.py +1 -1
  30. airbyte_cdk/sources/__init__.py +5 -1
  31. airbyte_cdk/sources/abstract_source.py +125 -79
  32. airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
  33. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
  34. airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
  35. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
  36. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  37. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
  38. airbyte_cdk/sources/config.py +3 -2
  39. airbyte_cdk/sources/connector_state_manager.py +49 -83
  40. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  41. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
  42. airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
  43. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  44. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  45. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  46. airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
  47. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
  48. airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
  49. airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
  50. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
  51. airbyte_cdk/sources/declarative/auth/token.py +28 -10
  52. airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
  53. airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
  54. airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
  55. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  56. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  57. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +490 -0
  58. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
  59. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
  60. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1213 -88
  61. airbyte_cdk/sources/declarative/declarative_source.py +5 -2
  62. airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
  63. airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
  64. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
  65. airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
  66. airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
  67. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  68. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  69. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  70. airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
  71. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
  72. airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
  73. airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
  74. airbyte_cdk/sources/declarative/extractors/record_filter.py +63 -8
  75. airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
  76. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
  77. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  78. airbyte_cdk/sources/declarative/incremental/__init__.py +31 -3
  79. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +346 -0
  80. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
  81. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  82. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
  83. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +173 -74
  84. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  85. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  86. airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
  87. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
  88. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
  89. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
  90. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
  91. airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
  92. airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
  93. airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
  94. airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
  95. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  96. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  97. airbyte_cdk/sources/declarative/models/__init__.py +1 -1
  98. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1329 -595
  99. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
  100. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
  101. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
  102. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1763 -226
  103. airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
  104. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  105. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  106. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
  107. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  108. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
  109. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
  110. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
  111. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
  112. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
  113. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
  114. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
  115. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
  116. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
  117. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
  118. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
  119. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
  120. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  121. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
  122. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
  123. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
  124. airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
  125. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
  126. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
  127. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
  128. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
  129. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
  130. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
  131. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
  132. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
  133. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
  134. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
  135. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
  136. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
  137. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  138. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
  139. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
  140. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
  141. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
  142. airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
  143. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  144. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  145. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  146. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  147. airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
  148. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
  149. airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
  150. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +229 -73
  151. airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
  152. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
  153. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
  154. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
  155. airbyte_cdk/sources/declarative/spec/spec.py +12 -5
  156. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
  157. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
  158. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
  159. airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
  160. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  161. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  162. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  163. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  164. airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
  165. airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
  166. airbyte_cdk/sources/declarative/types.py +19 -110
  167. airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
  168. airbyte_cdk/sources/embedded/base_integration.py +16 -5
  169. airbyte_cdk/sources/embedded/catalog.py +16 -4
  170. airbyte_cdk/sources/embedded/runner.py +19 -3
  171. airbyte_cdk/sources/embedded/tools.py +5 -2
  172. airbyte_cdk/sources/file_based/README.md +152 -0
  173. airbyte_cdk/sources/file_based/__init__.py +24 -0
  174. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
  175. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
  176. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
  177. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +47 -10
  178. airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
  179. airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
  180. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  181. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
  182. airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
  183. airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
  184. airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
  185. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
  186. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
  187. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
  188. airbyte_cdk/sources/file_based/exceptions.py +18 -15
  189. airbyte_cdk/sources/file_based/file_based_source.py +140 -33
  190. airbyte_cdk/sources/file_based/file_based_stream_reader.py +69 -5
  191. airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
  192. airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
  193. airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
  194. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  195. airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
  196. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
  197. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
  198. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
  199. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +141 -41
  200. airbyte_cdk/sources/file_based/remote_file.py +1 -1
  201. airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
  202. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
  203. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
  204. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
  205. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
  206. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
  207. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
  208. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
  209. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
  210. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
  211. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
  212. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
  213. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +147 -45
  214. airbyte_cdk/sources/http_logger.py +8 -3
  215. airbyte_cdk/sources/message/__init__.py +7 -1
  216. airbyte_cdk/sources/message/repository.py +18 -4
  217. airbyte_cdk/sources/source.py +42 -38
  218. airbyte_cdk/sources/streams/__init__.py +2 -2
  219. airbyte_cdk/sources/streams/availability_strategy.py +54 -3
  220. airbyte_cdk/sources/streams/call_rate.py +64 -21
  221. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  222. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  223. airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
  224. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  225. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  226. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  227. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  228. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
  229. airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
  230. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
  231. airbyte_cdk/sources/streams/concurrent/cursor.py +298 -42
  232. airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
  233. airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
  234. airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
  235. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
  236. airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
  237. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
  238. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  239. airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
  240. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
  241. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
  242. airbyte_cdk/sources/streams/core.py +412 -87
  243. airbyte_cdk/sources/streams/http/__init__.py +2 -1
  244. airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
  245. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  246. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  247. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  248. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  249. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  250. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  251. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  252. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  253. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  254. airbyte_cdk/sources/streams/http/exceptions.py +27 -7
  255. airbyte_cdk/sources/streams/http/http.py +369 -246
  256. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  257. airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
  258. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
  259. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
  260. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
  261. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
  262. airbyte_cdk/sources/types.py +154 -0
  263. airbyte_cdk/sources/utils/record_helper.py +36 -21
  264. airbyte_cdk/sources/utils/schema_helpers.py +13 -6
  265. airbyte_cdk/sources/utils/slice_logger.py +4 -1
  266. airbyte_cdk/sources/utils/transform.py +54 -20
  267. airbyte_cdk/sql/_util/hashing.py +34 -0
  268. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  269. airbyte_cdk/sql/constants.py +32 -0
  270. airbyte_cdk/sql/exceptions.py +235 -0
  271. airbyte_cdk/sql/secrets.py +123 -0
  272. airbyte_cdk/sql/shared/__init__.py +15 -0
  273. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  274. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  275. airbyte_cdk/sql/types.py +160 -0
  276. airbyte_cdk/test/catalog_builder.py +70 -18
  277. airbyte_cdk/test/entrypoint_wrapper.py +117 -42
  278. airbyte_cdk/test/mock_http/__init__.py +1 -1
  279. airbyte_cdk/test/mock_http/matcher.py +6 -0
  280. airbyte_cdk/test/mock_http/mocker.py +57 -10
  281. airbyte_cdk/test/mock_http/request.py +19 -3
  282. airbyte_cdk/test/mock_http/response.py +3 -1
  283. airbyte_cdk/test/mock_http/response_builder.py +32 -16
  284. airbyte_cdk/test/state_builder.py +18 -10
  285. airbyte_cdk/test/utils/__init__.py +1 -0
  286. airbyte_cdk/test/utils/data.py +24 -0
  287. airbyte_cdk/test/utils/http_mocking.py +16 -0
  288. airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
  289. airbyte_cdk/test/utils/reading.py +26 -0
  290. airbyte_cdk/utils/__init__.py +2 -1
  291. airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
  292. airbyte_cdk/utils/analytics_message.py +10 -2
  293. airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
  294. airbyte_cdk/utils/event_timing.py +10 -10
  295. airbyte_cdk/utils/mapping_helpers.py +3 -1
  296. airbyte_cdk/utils/message_utils.py +20 -11
  297. airbyte_cdk/utils/print_buffer.py +75 -0
  298. airbyte_cdk/utils/schema_inferrer.py +198 -28
  299. airbyte_cdk/utils/slice_hasher.py +30 -0
  300. airbyte_cdk/utils/spec_schema_transformations.py +6 -3
  301. airbyte_cdk/utils/stream_status_utils.py +8 -1
  302. airbyte_cdk/utils/traced_exception.py +61 -21
  303. airbyte_cdk-6.17.1.dev0.dist-info/METADATA +109 -0
  304. airbyte_cdk-6.17.1.dev0.dist-info/RECORD +350 -0
  305. {airbyte_cdk-0.72.0.dist-info → airbyte_cdk-6.17.1.dev0.dist-info}/WHEEL +1 -2
  306. airbyte_cdk-6.17.1.dev0.dist-info/entry_points.txt +3 -0
  307. airbyte_cdk/sources/declarative/create_partial.py +0 -92
  308. airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
  309. airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
  310. airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
  311. airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
  312. airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
  313. airbyte_cdk/sources/deprecated/base_source.py +0 -94
  314. airbyte_cdk/sources/deprecated/client.py +0 -99
  315. airbyte_cdk/sources/singer/__init__.py +0 -8
  316. airbyte_cdk/sources/singer/singer_helpers.py +0 -304
  317. airbyte_cdk/sources/singer/source.py +0 -186
  318. airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
  319. airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
  320. airbyte_cdk/sources/streams/http/auth/core.py +0 -29
  321. airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
  322. airbyte_cdk/sources/streams/http/auth/token.py +0 -47
  323. airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
  324. airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
  325. airbyte_cdk/sources/utils/schema_models.py +0 -84
  326. airbyte_cdk-0.72.0.dist-info/METADATA +0 -243
  327. airbyte_cdk-0.72.0.dist-info/RECORD +0 -466
  328. airbyte_cdk-0.72.0.dist-info/top_level.txt +0 -3
  329. source_declarative_manifest/main.py +0 -29
  330. unit_tests/connector_builder/__init__.py +0 -3
  331. unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
  332. unit_tests/connector_builder/test_message_grouper.py +0 -713
  333. unit_tests/connector_builder/utils.py +0 -27
  334. unit_tests/destinations/test_destination.py +0 -243
  335. unit_tests/singer/test_singer_helpers.py +0 -56
  336. unit_tests/singer/test_singer_source.py +0 -112
  337. unit_tests/sources/__init__.py +0 -0
  338. unit_tests/sources/concurrent_source/__init__.py +0 -3
  339. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
  340. unit_tests/sources/declarative/__init__.py +0 -3
  341. unit_tests/sources/declarative/auth/__init__.py +0 -3
  342. unit_tests/sources/declarative/auth/test_oauth.py +0 -331
  343. unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
  344. unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
  345. unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
  346. unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
  347. unit_tests/sources/declarative/checks/__init__.py +0 -3
  348. unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
  349. unit_tests/sources/declarative/decoders/__init__.py +0 -0
  350. unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
  351. unit_tests/sources/declarative/external_component.py +0 -13
  352. unit_tests/sources/declarative/extractors/__init__.py +0 -3
  353. unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
  354. unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
  355. unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
  356. unit_tests/sources/declarative/incremental/__init__.py +0 -0
  357. unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
  358. unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
  359. unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
  360. unit_tests/sources/declarative/interpolation/__init__.py +0 -3
  361. unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
  362. unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
  363. unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
  364. unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
  365. unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
  366. unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
  367. unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
  368. unit_tests/sources/declarative/parsers/__init__.py +0 -3
  369. unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
  370. unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
  371. unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1841
  372. unit_tests/sources/declarative/parsers/testing_components.py +0 -36
  373. unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
  374. unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
  375. unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
  376. unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
  377. unit_tests/sources/declarative/requesters/__init__.py +0 -3
  378. unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
  379. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
  380. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
  381. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
  382. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
  383. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
  384. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
  385. unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
  386. unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
  387. unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
  388. unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
  389. unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
  390. unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
  391. unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
  392. unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
  393. unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
  394. unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
  395. unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
  396. unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
  397. unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
  398. unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
  399. unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
  400. unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
  401. unit_tests/sources/declarative/retrievers/__init__.py +0 -3
  402. unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
  403. unit_tests/sources/declarative/schema/__init__.py +0 -6
  404. unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
  405. unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
  406. unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
  407. unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
  408. unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
  409. unit_tests/sources/declarative/states/__init__.py +0 -3
  410. unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
  411. unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
  412. unit_tests/sources/declarative/test_create_partial.py +0 -83
  413. unit_tests/sources/declarative/test_declarative_stream.py +0 -103
  414. unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
  415. unit_tests/sources/declarative/test_types.py +0 -39
  416. unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
  417. unit_tests/sources/file_based/__init__.py +0 -0
  418. unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
  419. unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
  420. unit_tests/sources/file_based/config/__init__.py +0 -0
  421. unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
  422. unit_tests/sources/file_based/config/test_csv_format.py +0 -34
  423. unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
  424. unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
  425. unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
  426. unit_tests/sources/file_based/file_types/__init__.py +0 -0
  427. unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
  428. unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
  429. unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
  430. unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
  431. unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
  432. unit_tests/sources/file_based/helpers.py +0 -70
  433. unit_tests/sources/file_based/in_memory_files_source.py +0 -211
  434. unit_tests/sources/file_based/scenarios/__init__.py +0 -0
  435. unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
  436. unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
  437. unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
  438. unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
  439. unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
  440. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
  441. unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
  442. unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
  443. unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
  444. unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
  445. unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
  446. unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
  447. unit_tests/sources/file_based/stream/__init__.py +0 -0
  448. unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
  449. unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
  450. unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
  451. unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
  452. unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
  453. unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
  454. unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
  455. unit_tests/sources/file_based/test_scenarios.py +0 -253
  456. unit_tests/sources/file_based/test_schema_helpers.py +0 -346
  457. unit_tests/sources/fixtures/__init__.py +0 -3
  458. unit_tests/sources/fixtures/source_test_fixture.py +0 -153
  459. unit_tests/sources/message/__init__.py +0 -0
  460. unit_tests/sources/message/test_repository.py +0 -153
  461. unit_tests/sources/streams/__init__.py +0 -0
  462. unit_tests/sources/streams/concurrent/__init__.py +0 -3
  463. unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
  464. unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
  465. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
  466. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
  467. unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
  468. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
  469. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
  470. unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
  471. unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
  472. unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
  473. unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
  474. unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
  475. unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
  476. unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
  477. unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
  478. unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
  479. unit_tests/sources/streams/http/__init__.py +0 -0
  480. unit_tests/sources/streams/http/auth/__init__.py +0 -0
  481. unit_tests/sources/streams/http/auth/test_auth.py +0 -173
  482. unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
  483. unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
  484. unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
  485. unit_tests/sources/streams/http/test_http.py +0 -635
  486. unit_tests/sources/streams/test_availability_strategy.py +0 -70
  487. unit_tests/sources/streams/test_call_rate.py +0 -300
  488. unit_tests/sources/streams/test_stream_read.py +0 -405
  489. unit_tests/sources/streams/test_streams_core.py +0 -184
  490. unit_tests/sources/test_abstract_source.py +0 -1442
  491. unit_tests/sources/test_concurrent_source.py +0 -112
  492. unit_tests/sources/test_config.py +0 -92
  493. unit_tests/sources/test_connector_state_manager.py +0 -482
  494. unit_tests/sources/test_http_logger.py +0 -252
  495. unit_tests/sources/test_integration_source.py +0 -86
  496. unit_tests/sources/test_source.py +0 -684
  497. unit_tests/sources/test_source_read.py +0 -460
  498. unit_tests/test/__init__.py +0 -0
  499. unit_tests/test/mock_http/__init__.py +0 -0
  500. unit_tests/test/mock_http/test_matcher.py +0 -53
  501. unit_tests/test/mock_http/test_mocker.py +0 -214
  502. unit_tests/test/mock_http/test_request.py +0 -117
  503. unit_tests/test/mock_http/test_response_builder.py +0 -177
  504. unit_tests/test/test_entrypoint_wrapper.py +0 -240
  505. unit_tests/utils/__init__.py +0 -0
  506. unit_tests/utils/test_datetime_format_inferrer.py +0 -60
  507. unit_tests/utils/test_mapping_helpers.py +0 -54
  508. unit_tests/utils/test_message_utils.py +0 -91
  509. unit_tests/utils/test_rate_limiting.py +0 -26
  510. unit_tests/utils/test_schema_inferrer.py +0 -202
  511. unit_tests/utils/test_secret_utils.py +0 -135
  512. unit_tests/utils/test_stream_status_utils.py +0 -61
  513. unit_tests/utils/test_traced_exception.py +0 -107
  514. /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
  515. {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
  516. {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
  517. {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
  518. {airbyte_cdk-0.72.0.dist-info → airbyte_cdk-6.17.1.dev0.dist-info}/LICENSE.txt +0 -0
@@ -1,28 +1,42 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
-
5
-
4
+ import copy
6
5
  import inspect
6
+ import itertools
7
7
  import logging
8
- import typing
9
8
  from abc import ABC, abstractmethod
10
- from functools import lru_cache
11
- from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
9
+ from dataclasses import dataclass
10
+ from functools import cached_property, lru_cache
11
+ from typing import Any, Dict, Iterable, Iterator, List, Mapping, MutableMapping, Optional, Union
12
+
13
+ from typing_extensions import deprecated
12
14
 
13
15
  import airbyte_cdk.sources.utils.casing as casing
14
- from airbyte_cdk.models import AirbyteMessage, AirbyteStream, ConfiguredAirbyteStream, SyncMode
16
+ from airbyte_cdk.models import (
17
+ AirbyteMessage,
18
+ AirbyteStream,
19
+ ConfiguredAirbyteStream,
20
+ DestinationSyncMode,
21
+ SyncMode,
22
+ )
15
23
  from airbyte_cdk.models import Type as MessageType
24
+ from airbyte_cdk.sources.streams.checkpoint import (
25
+ CheckpointMode,
26
+ CheckpointReader,
27
+ Cursor,
28
+ CursorBasedCheckpointReader,
29
+ FullRefreshCheckpointReader,
30
+ IncrementalCheckpointReader,
31
+ LegacyCursorBasedCheckpointReader,
32
+ ResumableFullRefreshCheckpointReader,
33
+ )
34
+ from airbyte_cdk.sources.types import StreamSlice
16
35
 
17
36
  # list of all possible HTTP methods which can be used for sending of request bodies
18
37
  from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, ResourceSchemaLoader
19
- from airbyte_cdk.sources.utils.slice_logger import SliceLogger
38
+ from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
20
39
  from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
21
- from deprecated.classic import deprecated
22
-
23
- if typing.TYPE_CHECKING:
24
- from airbyte_cdk.sources import Source
25
- from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy
26
40
 
27
41
  # A stream's read method can return one of the following types:
28
42
  # Mapping[str, Any]: The content of an AirbyteRecordMessage
@@ -31,9 +45,7 @@ StreamData = Union[Mapping[str, Any], AirbyteMessage]
31
45
 
32
46
  JsonSchema = Mapping[str, Any]
33
47
 
34
- # Streams that only support full refresh don't have a suitable cursor so this sentinel
35
- # value is used to indicate that stream should not load the incoming state value
36
- FULL_REFRESH_SENTINEL_STATE_KEY = "__ab_full_refresh_state_message"
48
+ NO_CURSOR_STATE_KEY = "__ab_no_cursor_state_message"
37
49
 
38
50
 
39
51
  def package_name_from_class(cls: object) -> str:
@@ -45,10 +57,10 @@ def package_name_from_class(cls: object) -> str:
45
57
  raise ValueError(f"Could not find package name for class {cls}")
46
58
 
47
59
 
48
- class IncrementalMixin(ABC):
49
- """Mixin to make stream incremental.
60
+ class CheckpointMixin(ABC):
61
+ """Mixin for a stream that implements reading and writing the internal state used to checkpoint sync progress to the platform
50
62
 
51
- class IncrementalStream(Stream, IncrementalMixin):
63
+ class CheckpointedStream(Stream, CheckpointMixin):
52
64
  @property
53
65
  def state(self):
54
66
  return self._state
@@ -79,11 +91,38 @@ class IncrementalMixin(ABC):
79
91
  """State setter, accept state serialized by state getter."""
80
92
 
81
93
 
94
+ @deprecated(
95
+ "Deprecated as of CDK version 0.87.0. "
96
+ "Deprecated in favor of the `CheckpointMixin` which offers similar functionality."
97
+ )
98
+ class IncrementalMixin(CheckpointMixin, ABC):
99
+ """Mixin to make stream incremental.
100
+
101
+ class IncrementalStream(Stream, IncrementalMixin):
102
+ @property
103
+ def state(self):
104
+ return self._state
105
+
106
+ @state.setter
107
+ def state(self, value):
108
+ self._state[self.cursor_field] = value[self.cursor_field]
109
+ """
110
+
111
+
112
+ @dataclass
113
+ class StreamClassification:
114
+ is_legacy_format: bool
115
+ has_multiple_slices: bool
116
+
117
+
82
118
  class Stream(ABC):
83
119
  """
84
120
  Base abstract class for an Airbyte Stream. Makes no assumption of the Stream's underlying transport protocol.
85
121
  """
86
122
 
123
+ _configured_json_schema: Optional[Dict[str, Any]] = None
124
+ _exit_on_rate_limit: bool = False
125
+
87
126
  # Use self.logger in subclasses to log any messages
88
127
  @property
89
128
  def logger(self) -> logging.Logger:
@@ -92,7 +131,11 @@ class Stream(ABC):
92
131
  # TypeTransformer object to perform output data transformation
93
132
  transformer: TypeTransformer = TypeTransformer(TransformConfig.NoTransform)
94
133
 
95
- @property
134
+ cursor: Optional[Cursor] = None
135
+
136
+ has_multiple_slices = False
137
+
138
+ @cached_property
96
139
  def name(self) -> str:
97
140
  """
98
141
  :return: Stream name. By default this is the implementing class name, but it can be overridden as needed.
@@ -122,62 +165,121 @@ class Stream(ABC):
122
165
  ) -> Iterable[StreamData]:
123
166
  sync_mode = configured_stream.sync_mode
124
167
  cursor_field = configured_stream.cursor_field
168
+ self.configured_json_schema = configured_stream.stream.json_schema
125
169
 
126
- slices = self.stream_slices(
127
- cursor_field=cursor_field,
128
- sync_mode=sync_mode, # todo: change this interface to no longer rely on sync_mode for behavior
129
- stream_state=stream_state,
170
+ # WARNING: When performing a read() that uses incoming stream state, we MUST use the self.state that is defined as
171
+ # opposed to the incoming stream_state value. Because some connectors like ones using the file-based CDK modify
172
+ # state before setting the value on the Stream attribute, the most up-to-date state is derived from Stream.state
173
+ # instead of the stream_state parameter. This does not apply to legacy connectors using get_updated_state().
174
+ try:
175
+ stream_state = self.state # type: ignore # we know the field might not exist...
176
+ except AttributeError:
177
+ pass
178
+
179
+ should_checkpoint = bool(state_manager)
180
+ checkpoint_reader = self._get_checkpoint_reader(
181
+ logger=logger, cursor_field=cursor_field, sync_mode=sync_mode, stream_state=stream_state
130
182
  )
131
- logger.debug(f"Processing stream slices for {self.name} (sync_mode: {sync_mode.name})", extra={"stream_slices": slices})
132
183
 
133
- has_slices = False
184
+ next_slice = checkpoint_reader.next()
134
185
  record_counter = 0
135
- for _slice in slices:
136
- has_slices = True
186
+ stream_state_tracker = copy.deepcopy(stream_state)
187
+ while next_slice is not None:
137
188
  if slice_logger.should_log_slice_message(logger):
138
- yield slice_logger.create_slice_log_message(_slice)
189
+ yield slice_logger.create_slice_log_message(next_slice)
139
190
  records = self.read_records(
140
191
  sync_mode=sync_mode, # todo: change this interface to no longer rely on sync_mode for behavior
141
- stream_slice=_slice,
192
+ stream_slice=next_slice,
142
193
  stream_state=stream_state,
143
194
  cursor_field=cursor_field or None,
144
195
  )
145
196
  for record_data_or_message in records:
146
197
  yield record_data_or_message
147
198
  if isinstance(record_data_or_message, Mapping) or (
148
- hasattr(record_data_or_message, "type") and record_data_or_message.type == MessageType.RECORD
199
+ hasattr(record_data_or_message, "type")
200
+ and record_data_or_message.type == MessageType.RECORD
149
201
  ):
150
- record_data = record_data_or_message if isinstance(record_data_or_message, Mapping) else record_data_or_message.record
151
- stream_state = self.get_updated_state(stream_state, record_data)
202
+ record_data = (
203
+ record_data_or_message
204
+ if isinstance(record_data_or_message, Mapping)
205
+ else record_data_or_message.record
206
+ )
207
+
208
+ # Thanks I hate it. RFR fundamentally doesn't fit with the concept of the legacy Stream.get_updated_state()
209
+ # method because RFR streams rely on pagination as a cursor. Stream.get_updated_state() was designed to make
210
+ # the CDK manage state using specifically the last seen record. don't @ brian.lai
211
+ #
212
+ # Also, because the legacy incremental state case decouples observing incoming records from emitting state, it
213
+ # requires that we separate CheckpointReader.observe() and CheckpointReader.get_checkpoint() which could
214
+ # otherwise be combined.
215
+ if self.cursor_field:
216
+ # Some connectors have streams that implement get_updated_state(), but do not define a cursor_field. This
217
+ # should be fixed on the stream implementation, but we should also protect against this in the CDK as well
218
+ stream_state_tracker = self.get_updated_state(
219
+ stream_state_tracker,
220
+ record_data, # type: ignore [arg-type]
221
+ )
222
+ self._observe_state(checkpoint_reader, stream_state_tracker)
152
223
  record_counter += 1
153
224
 
154
- if sync_mode == SyncMode.incremental:
155
- # Checkpoint intervals are a bit controversial, but see below comment about why we're gating it right now
156
- checkpoint_interval = self.state_checkpoint_interval
157
- if checkpoint_interval and record_counter % checkpoint_interval == 0:
158
- airbyte_state_message = self._checkpoint_state(stream_state, state_manager)
159
- yield airbyte_state_message
225
+ checkpoint_interval = self.state_checkpoint_interval
226
+ checkpoint = checkpoint_reader.get_checkpoint()
227
+ if (
228
+ should_checkpoint
229
+ and checkpoint_interval
230
+ and record_counter % checkpoint_interval == 0
231
+ and checkpoint is not None
232
+ ):
233
+ airbyte_state_message = self._checkpoint_state(
234
+ checkpoint, state_manager=state_manager
235
+ )
236
+ yield airbyte_state_message
160
237
 
161
238
  if internal_config.is_limit_reached(record_counter):
162
239
  break
163
-
164
- if sync_mode == SyncMode.incremental:
165
- # Even though right now, only incremental streams running as incremental mode will emit periodic checkpoints. Rather than
166
- # overhaul how refresh interacts with the platform, this positions the code so that once we want to start emitting
167
- # periodic checkpoints in full refresh mode it can be done here
168
- airbyte_state_message = self._checkpoint_state(stream_state, state_manager)
240
+ self._observe_state(checkpoint_reader)
241
+ checkpoint_state = checkpoint_reader.get_checkpoint()
242
+ if should_checkpoint and checkpoint_state is not None:
243
+ airbyte_state_message = self._checkpoint_state(
244
+ checkpoint_state, state_manager=state_manager
245
+ )
169
246
  yield airbyte_state_message
170
247
 
171
- if not has_slices or sync_mode == SyncMode.full_refresh:
172
- if sync_mode == SyncMode.full_refresh:
173
- # We use a dummy state if there is no suitable value provided by full_refresh streams that do not have a valid cursor.
174
- # Incremental streams running full_refresh mode emit a meaningful state
175
- stream_state = stream_state or {FULL_REFRESH_SENTINEL_STATE_KEY: True}
248
+ next_slice = checkpoint_reader.next()
176
249
 
177
- # We should always emit a final state message for full refresh sync or streams that do not have any slices
178
- airbyte_state_message = self._checkpoint_state(stream_state, state_manager)
250
+ checkpoint = checkpoint_reader.get_checkpoint()
251
+ if should_checkpoint and checkpoint is not None:
252
+ airbyte_state_message = self._checkpoint_state(checkpoint, state_manager=state_manager)
179
253
  yield airbyte_state_message
180
254
 
255
+ def read_only_records(self, state: Optional[Mapping[str, Any]] = None) -> Iterable[StreamData]:
256
+ """
257
+ Helper method that performs a read on a stream with an optional state and emits records. If the parent stream supports
258
+ incremental, this operation does not update the stream's internal state (if it uses the modern state setter/getter)
259
+ or emit state messages.
260
+ """
261
+
262
+ configured_stream = ConfiguredAirbyteStream(
263
+ stream=AirbyteStream(
264
+ name=self.name,
265
+ json_schema={},
266
+ supported_sync_modes=[SyncMode.full_refresh, SyncMode.incremental],
267
+ ),
268
+ sync_mode=SyncMode.incremental if state else SyncMode.full_refresh,
269
+ destination_sync_mode=DestinationSyncMode.append,
270
+ )
271
+
272
+ yield from self.read(
273
+ configured_stream=configured_stream,
274
+ logger=self.logger,
275
+ slice_logger=DebugSliceLogger(),
276
+ stream_state=dict(state)
277
+ if state
278
+ else {}, # read() expects MutableMapping instead of Mapping which is used more often
279
+ state_manager=None,
280
+ internal_config=InternalConfig(), # type: ignore [call-arg]
281
+ )
282
+
181
283
  @abstractmethod
182
284
  def read_records(
183
285
  self,
@@ -202,14 +304,20 @@ class Stream(ABC):
202
304
  return ResourceSchemaLoader(package_name_from_class(self.__class__)).get_schema(self.name)
203
305
 
204
306
  def as_airbyte_stream(self) -> AirbyteStream:
205
- stream = AirbyteStream(name=self.name, json_schema=dict(self.get_json_schema()), supported_sync_modes=[SyncMode.full_refresh])
307
+ stream = AirbyteStream(
308
+ name=self.name,
309
+ json_schema=dict(self.get_json_schema()),
310
+ supported_sync_modes=[SyncMode.full_refresh],
311
+ is_resumable=self.is_resumable,
312
+ )
206
313
 
207
314
  if self.namespace:
208
315
  stream.namespace = self.namespace
209
316
 
317
+ # If we can offer incremental we always should. RFR is always less reliable than incremental which uses a real cursor value
210
318
  if self.supports_incremental:
211
319
  stream.source_defined_cursor = self.source_defined_cursor
212
- stream.supported_sync_modes.append(SyncMode.incremental) # type: ignore
320
+ stream.supported_sync_modes.append(SyncMode.incremental)
213
321
  stream.default_cursor_field = self._wrapped_cursor_field()
214
322
 
215
323
  keys = Stream._wrapped_primary_key(self.primary_key)
@@ -225,6 +333,29 @@ class Stream(ABC):
225
333
  """
226
334
  return len(self._wrapped_cursor_field()) > 0
227
335
 
336
+ @property
337
+ def is_resumable(self) -> bool:
338
+ """
339
+ :return: True if this stream allows the checkpointing of sync progress and can resume from it on subsequent attempts.
340
+ This differs from supports_incremental because certain kinds of streams like those supporting resumable full refresh
341
+ can checkpoint progress in between attempts for improved fault tolerance. However, they will start from the beginning
342
+ on the next sync job.
343
+ """
344
+ if self.supports_incremental:
345
+ return True
346
+ if self.has_multiple_slices:
347
+ # We temporarily gate substream to not support RFR because puts a pretty high burden on connector developers
348
+ # to structure stream state in a very specific way. We also can't check for issubclass(HttpSubStream) because
349
+ # not all substreams implement the interface and it would be a circular dependency so we use parent as a surrogate
350
+ return False
351
+ elif hasattr(type(self), "state") and getattr(type(self), "state").fset is not None:
352
+ # Modern case where a stream manages state using getter/setter
353
+ return True
354
+ else:
355
+ # Legacy case where the CDK manages state via the get_updated_state() method. This is determined by checking if
356
+ # the stream's get_updated_state() differs from the Stream class and therefore has been overridden
357
+ return type(self).get_updated_state != Stream.get_updated_state
358
+
228
359
  def _wrapped_cursor_field(self) -> List[str]:
229
360
  return [self.cursor_field] if isinstance(self.cursor_field, str) else self.cursor_field
230
361
 
@@ -251,27 +382,15 @@ class Stream(ABC):
251
382
  """
252
383
  return True
253
384
 
254
- def check_availability(self, logger: logging.Logger, source: Optional["Source"] = None) -> Tuple[bool, Optional[str]]:
255
- """
256
- Checks whether this stream is available.
257
-
258
- :param logger: source logger
259
- :param source: (optional) source
260
- :return: A tuple of (boolean, str). If boolean is true, then this stream
261
- is available, and no str is required. Otherwise, this stream is unavailable
262
- for some reason and the str should describe what went wrong and how to
263
- resolve the unavailability, if possible.
264
- """
265
- if self.availability_strategy:
266
- return self.availability_strategy.check_availability(self, logger, source)
267
- return True, None
268
-
269
385
  @property
270
- def availability_strategy(self) -> Optional["AvailabilityStrategy"]:
271
- """
272
- :return: The AvailabilityStrategy used to check whether this stream is available.
273
- """
274
- return None
386
+ def exit_on_rate_limit(self) -> bool:
387
+ """Exit on rate limit getter, should return bool value. False if the stream will retry endlessly when rate limited."""
388
+ return self._exit_on_rate_limit
389
+
390
+ @exit_on_rate_limit.setter
391
+ def exit_on_rate_limit(self, value: bool) -> None:
392
+ """Exit on rate limit setter, accept bool value."""
393
+ self._exit_on_rate_limit = value
275
394
 
276
395
  @property
277
396
  @abstractmethod
@@ -282,7 +401,11 @@ class Stream(ABC):
282
401
  """
283
402
 
284
403
  def stream_slices(
285
- self, *, sync_mode: SyncMode, cursor_field: Optional[List[str]] = None, stream_state: Optional[Mapping[str, Any]] = None
404
+ self,
405
+ *,
406
+ sync_mode: SyncMode,
407
+ cursor_field: Optional[List[str]] = None,
408
+ stream_state: Optional[Mapping[str, Any]] = None,
286
409
  ) -> Iterable[Optional[Mapping[str, Any]]]:
287
410
  """
288
411
  Override to define the slices for this stream. See the stream slicing section of the docs for more information.
@@ -292,7 +415,7 @@ class Stream(ABC):
292
415
  :param stream_state:
293
416
  :return:
294
417
  """
295
- return [None]
418
+ yield StreamSlice(partition={}, cursor_slice={})
296
419
 
297
420
  @property
298
421
  def state_checkpoint_interval(self) -> Optional[int]:
@@ -308,11 +431,18 @@ class Stream(ABC):
308
431
  """
309
432
  return None
310
433
 
311
- @deprecated(version="0.1.49", reason="You should use explicit state property instead, see IncrementalMixin docs.")
434
+ # Commented-out to avoid any runtime penalty, since this is used in a hot per-record codepath.
435
+ # To be evaluated for re-introduction here: https://github.com/airbytehq/airbyte-python-cdk/issues/116
436
+ # @deprecated(
437
+ # "Deprecated method `get_updated_state` as of CDK version 0.1.49. "
438
+ # "Please use explicit state property instead, see `IncrementalMixin` docs."
439
+ # )
312
440
  def get_updated_state(
313
441
  self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]
314
442
  ) -> MutableMapping[str, Any]:
315
- """Override to extract state from the latest record. Needed to implement incremental sync.
443
+ """DEPRECATED. Please use explicit state property instead, see `IncrementalMixin` docs.
444
+
445
+ Override to extract state from the latest record. Needed to implement incremental sync.
316
446
 
317
447
  Inspects the latest record extracted from the data source and the current state object and return an updated state object.
318
448
 
@@ -325,6 +455,138 @@ class Stream(ABC):
325
455
  """
326
456
  return {}
327
457
 
458
+ def get_cursor(self) -> Optional[Cursor]:
459
+ """
460
+ A Cursor is an interface that a stream can implement to manage how its internal state is read and updated while
461
+ reading records. Historically, Python connectors had no concept of a cursor to manage state. Python streams need
462
+ to define a cursor implementation and override this method to manage state through a Cursor.
463
+ """
464
+ return self.cursor
465
+
466
+ def _get_checkpoint_reader(
467
+ self,
468
+ logger: logging.Logger,
469
+ cursor_field: Optional[List[str]],
470
+ sync_mode: SyncMode,
471
+ stream_state: MutableMapping[str, Any],
472
+ ) -> CheckpointReader:
473
+ mappings_or_slices = self.stream_slices(
474
+ cursor_field=cursor_field,
475
+ sync_mode=sync_mode, # todo: change this interface to no longer rely on sync_mode for behavior
476
+ stream_state=stream_state,
477
+ )
478
+
479
+ # Because of poor foresight, we wrote the default Stream.stream_slices() method to return [None] which is confusing and
480
+ # has now normalized this behavior for connector developers. Now some connectors return [None]. This is objectively
481
+ # misleading and a more ideal interface is [{}] to indicate we still want to iterate over one slice, but with no
482
+ # specific slice values. None is bad, and now I feel bad that I have to write this hack.
483
+ if mappings_or_slices == [None]:
484
+ mappings_or_slices = [{}]
485
+
486
+ slices_iterable_copy, iterable_for_detecting_format = itertools.tee(mappings_or_slices, 2)
487
+ stream_classification = self._classify_stream(
488
+ mappings_or_slices=iterable_for_detecting_format
489
+ )
490
+
491
+ # Streams that override has_multiple_slices are explicitly indicating that they will iterate over
492
+ # multiple partitions. Inspecting slices to automatically apply the correct cursor is only needed as
493
+ # a backup. So if this value was already assigned to True by the stream, we don't need to reassign it
494
+ self.has_multiple_slices = (
495
+ self.has_multiple_slices or stream_classification.has_multiple_slices
496
+ )
497
+
498
+ cursor = self.get_cursor()
499
+ if cursor:
500
+ cursor.set_initial_state(stream_state=stream_state)
501
+
502
+ checkpoint_mode = self._checkpoint_mode
503
+
504
+ if cursor and stream_classification.is_legacy_format:
505
+ return LegacyCursorBasedCheckpointReader(
506
+ stream_slices=slices_iterable_copy, cursor=cursor, read_state_from_cursor=True
507
+ )
508
+ elif cursor:
509
+ return CursorBasedCheckpointReader(
510
+ stream_slices=slices_iterable_copy,
511
+ cursor=cursor,
512
+ read_state_from_cursor=checkpoint_mode == CheckpointMode.RESUMABLE_FULL_REFRESH,
513
+ )
514
+ elif checkpoint_mode == CheckpointMode.RESUMABLE_FULL_REFRESH:
515
+ # Resumable full refresh readers rely on the stream state dynamically being updated during pagination and does
516
+ # not iterate over a static set of slices.
517
+ return ResumableFullRefreshCheckpointReader(stream_state=stream_state)
518
+ elif checkpoint_mode == CheckpointMode.INCREMENTAL:
519
+ return IncrementalCheckpointReader(
520
+ stream_slices=slices_iterable_copy, stream_state=stream_state
521
+ )
522
+ else:
523
+ return FullRefreshCheckpointReader(stream_slices=slices_iterable_copy)
524
+
525
+ @property
526
+ def _checkpoint_mode(self) -> CheckpointMode:
527
+ if self.is_resumable and len(self._wrapped_cursor_field()) > 0:
528
+ return CheckpointMode.INCREMENTAL
529
+ elif self.is_resumable:
530
+ return CheckpointMode.RESUMABLE_FULL_REFRESH
531
+ else:
532
+ return CheckpointMode.FULL_REFRESH
533
+
534
+ @staticmethod
535
+ def _classify_stream(
536
+ mappings_or_slices: Iterator[Optional[Union[Mapping[str, Any], StreamSlice]]],
537
+ ) -> StreamClassification:
538
+ """
539
+ This is a bit of a crazy solution, but also the only way we can detect certain attributes about the stream since Python
540
+ streams do not follow consistent implementation patterns. We care about the following two attributes:
541
+ - is_substream: Helps to incrementally release changes since substreams w/ parents are much more complicated. Also
542
+ helps de-risk the release of changes that might impact all connectors
543
+ - uses_legacy_slice_format: Since the checkpoint reader must manage a complex state object, we opted to have it always
544
+ use the structured StreamSlice object. However, this requires backwards compatibility with Python sources that only
545
+ support the legacy mapping object
546
+
547
+ Both attributes can eventually be deprecated once stream's define this method deleted once substreams have been implemented and
548
+ legacy connectors all adhere to the StreamSlice object.
549
+ """
550
+ if not mappings_or_slices:
551
+ raise ValueError("A stream should always have at least one slice")
552
+ try:
553
+ next_slice = next(mappings_or_slices)
554
+ if isinstance(next_slice, StreamSlice) and next_slice == StreamSlice(
555
+ partition={}, cursor_slice={}
556
+ ):
557
+ is_legacy_format = False
558
+ slice_has_value = False
559
+ elif next_slice == {}:
560
+ is_legacy_format = True
561
+ slice_has_value = False
562
+ elif isinstance(next_slice, StreamSlice):
563
+ is_legacy_format = False
564
+ slice_has_value = True
565
+ else:
566
+ is_legacy_format = True
567
+ slice_has_value = True
568
+ except StopIteration:
569
+ # If the stream has no slices, the format ultimately does not matter since no data will get synced. This is technically
570
+ # a valid case because it is up to the stream to define its slicing behavior
571
+ return StreamClassification(is_legacy_format=False, has_multiple_slices=False)
572
+
573
+ if slice_has_value:
574
+ # If the first slice contained a partition value from the result of stream_slices(), this is a substream that might
575
+ # have multiple parent records to iterate over
576
+ return StreamClassification(
577
+ is_legacy_format=is_legacy_format, has_multiple_slices=slice_has_value
578
+ )
579
+
580
+ try:
581
+ # If stream_slices() returns multiple slices, this is also a substream that can potentially generate empty slices
582
+ next(mappings_or_slices)
583
+ return StreamClassification(is_legacy_format=is_legacy_format, has_multiple_slices=True)
584
+ except StopIteration:
585
+ # If the result of stream_slices() only returns a single empty stream slice, then we know this is a regular stream
586
+ return StreamClassification(
587
+ is_legacy_format=is_legacy_format, has_multiple_slices=False
588
+ )
589
+
328
590
  def log_stream_sync_configuration(self) -> None:
329
591
  """
330
592
  Logs the configuration of this stream.
@@ -338,7 +600,9 @@ class Stream(ABC):
338
600
  )
339
601
 
340
602
  @staticmethod
341
- def _wrapped_primary_key(keys: Optional[Union[str, List[str], List[List[str]]]]) -> Optional[List[List[str]]]:
603
+ def _wrapped_primary_key(
604
+ keys: Optional[Union[str, List[str], List[List[str]]]],
605
+ ) -> Optional[List[List[str]]]:
342
606
  """
343
607
  :return: wrap the primary_key property in a list of list of strings required by the Airbyte Stream object.
344
608
  """
@@ -360,19 +624,80 @@ class Stream(ABC):
360
624
  else:
361
625
  raise ValueError(f"Element must be either list or str. Got: {type(keys)}")
362
626
 
627
+ def _observe_state(
628
+ self, checkpoint_reader: CheckpointReader, stream_state: Optional[Mapping[str, Any]] = None
629
+ ) -> None:
630
+ """
631
+ Convenience method that attempts to read the Stream's state using the recommended way of connector's managing their
632
+ own state via state setter/getter. But if we get back an AttributeError, then the legacy Stream.get_updated_state()
633
+ method is used as a fallback method.
634
+ """
635
+
636
+ # This is an inversion of the original logic that used to try state getter/setters first. As part of the work to
637
+ # automatically apply resumable full refresh to all streams, all HttpStream classes implement default state
638
+ # getter/setter methods, we should default to only using the incoming stream_state parameter value is {} which
639
+ # indicates the stream does not override the default get_updated_state() implementation. When the default method
640
+ # is not overridden, then the stream defers to self.state getter
641
+ if stream_state:
642
+ checkpoint_reader.observe(stream_state)
643
+ elif type(self).get_updated_state == Stream.get_updated_state:
644
+ # We only default to the state getter/setter if the stream does not use the legacy get_updated_state() method
645
+ try:
646
+ new_state = self.state # type: ignore # This will always exist on HttpStreams, but may not for Stream
647
+ if new_state:
648
+ checkpoint_reader.observe(new_state)
649
+ except AttributeError:
650
+ pass
651
+
363
652
  def _checkpoint_state( # type: ignore # ignoring typing for ConnectorStateManager because of circular dependencies
364
653
  self,
365
654
  stream_state: Mapping[str, Any],
366
655
  state_manager,
367
656
  ) -> AirbyteMessage:
368
- # First attempt to retrieve the current state using the stream's state property. We receive an AttributeError if the state
369
- # property is not implemented by the stream instance and as a fallback, use the stream_state retrieved from the stream
370
- # instance's deprecated get_updated_state() method.
371
- try:
372
- state_manager.update_state_for_stream(
373
- self.name, self.namespace, self.state # type: ignore # we know the field might not exist...
657
+ # todo: This can be consolidated into one ConnectorStateManager.update_and_create_state_message() method, but I want
658
+ # to reduce changes right now and this would span concurrent as well
659
+ state_manager.update_state_for_stream(self.name, self.namespace, stream_state)
660
+ return state_manager.create_state_message(self.name, self.namespace) # type: ignore [no-any-return]
661
+
662
+ @property
663
+ def configured_json_schema(self) -> Optional[Dict[str, Any]]:
664
+ """
665
+ This property is set from the read method.
666
+
667
+ :return Optional[Dict]: JSON schema from configured catalog if provided, otherwise None.
668
+ """
669
+ return self._configured_json_schema
670
+
671
+ @configured_json_schema.setter
672
+ def configured_json_schema(self, json_schema: Dict[str, Any]) -> None:
673
+ self._configured_json_schema = self._filter_schema_invalid_properties(json_schema)
674
+
675
+ def _filter_schema_invalid_properties(
676
+ self, configured_catalog_json_schema: Dict[str, Any]
677
+ ) -> Dict[str, Any]:
678
+ """
679
+ Filters the properties in json_schema that are not present in the stream schema.
680
+ Configured Schemas can have very old fields, so we need to housekeeping ourselves.
681
+ """
682
+ configured_schema: Any = configured_catalog_json_schema.get("properties", {})
683
+ stream_schema_properties: Any = self.get_json_schema().get("properties", {})
684
+
685
+ configured_keys = configured_schema.keys()
686
+ stream_keys = stream_schema_properties.keys()
687
+ invalid_properties = configured_keys - stream_keys
688
+ if not invalid_properties:
689
+ return configured_catalog_json_schema
690
+
691
+ self.logger.warning(
692
+ f"Stream {self.name}: the following fields are deprecated and cannot be synced. {invalid_properties}. Refresh the connection's source schema to resolve this warning."
693
+ )
694
+
695
+ valid_configured_schema_properties_keys = stream_keys & configured_keys
696
+ valid_configured_schema_properties = {}
697
+
698
+ for configured_schema_property in valid_configured_schema_properties_keys:
699
+ valid_configured_schema_properties[configured_schema_property] = (
700
+ stream_schema_properties[configured_schema_property]
374
701
  )
375
702
 
376
- except AttributeError:
377
- state_manager.update_state_for_stream(self.name, self.namespace, stream_state)
378
- return state_manager.create_state_message(self.name, self.namespace)
703
+ return {**configured_catalog_json_schema, "properties": valid_configured_schema_properties}
@@ -5,5 +5,6 @@
5
5
  # Initialize Streams Package
6
6
  from .exceptions import UserDefinedBackoffException
7
7
  from .http import HttpStream, HttpSubStream
8
+ from .http_client import HttpClient
8
9
 
9
- __all__ = ["HttpStream", "HttpSubStream", "UserDefinedBackoffException"]
10
+ __all__ = ["HttpClient", "HttpStream", "HttpSubStream", "UserDefinedBackoffException"]