airbyte-cdk 0.72.1__py3-none-any.whl → 6.17.1.dev0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (518) hide show
  1. airbyte_cdk/__init__.py +355 -6
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +29 -10
  7. airbyte_cdk/connector.py +24 -24
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
  10. airbyte_cdk/connector_builder/main.py +45 -13
  11. airbyte_cdk/connector_builder/message_grouper.py +189 -50
  12. airbyte_cdk/connector_builder/models.py +3 -2
  13. airbyte_cdk/destinations/__init__.py +4 -3
  14. airbyte_cdk/destinations/destination.py +54 -20
  15. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  16. airbyte_cdk/destinations/vector_db_based/config.py +40 -17
  17. airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
  18. airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
  19. airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
  20. airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
  21. airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
  22. airbyte_cdk/entrypoint.py +153 -44
  23. airbyte_cdk/exception_handler.py +21 -3
  24. airbyte_cdk/logger.py +30 -44
  25. airbyte_cdk/models/__init__.py +13 -2
  26. airbyte_cdk/models/airbyte_protocol.py +86 -1
  27. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  28. airbyte_cdk/models/file_transfer_record_message.py +13 -0
  29. airbyte_cdk/models/well_known_types.py +1 -1
  30. airbyte_cdk/sources/__init__.py +5 -1
  31. airbyte_cdk/sources/abstract_source.py +125 -79
  32. airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
  33. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
  34. airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
  35. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
  36. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  37. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
  38. airbyte_cdk/sources/config.py +3 -2
  39. airbyte_cdk/sources/connector_state_manager.py +49 -83
  40. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  41. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
  42. airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
  43. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  44. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  45. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  46. airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
  47. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
  48. airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
  49. airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
  50. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
  51. airbyte_cdk/sources/declarative/auth/token.py +28 -10
  52. airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
  53. airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
  54. airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
  55. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  56. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  57. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +490 -0
  58. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
  59. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
  60. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1185 -85
  61. airbyte_cdk/sources/declarative/declarative_source.py +5 -2
  62. airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
  63. airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
  64. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
  65. airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
  66. airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
  67. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  68. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  69. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  70. airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
  71. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
  72. airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
  73. airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
  74. airbyte_cdk/sources/declarative/extractors/record_filter.py +63 -8
  75. airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
  76. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
  77. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  78. airbyte_cdk/sources/declarative/incremental/__init__.py +31 -3
  79. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +346 -0
  80. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
  81. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  82. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
  83. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +173 -74
  84. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  85. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  86. airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
  87. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
  88. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
  89. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
  90. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
  91. airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
  92. airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
  93. airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
  94. airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
  95. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  96. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  97. airbyte_cdk/sources/declarative/models/__init__.py +1 -1
  98. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1319 -603
  99. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
  100. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
  101. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
  102. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1759 -225
  103. airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
  104. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  105. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  106. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
  107. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  108. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
  109. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
  110. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
  111. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
  112. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
  113. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
  114. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
  115. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
  116. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
  117. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
  118. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
  119. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
  120. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  121. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
  122. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
  123. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
  124. airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
  125. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
  126. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
  127. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
  128. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
  129. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
  130. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
  131. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
  132. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
  133. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
  134. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
  135. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
  136. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
  137. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  138. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
  139. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
  140. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
  141. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
  142. airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
  143. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  144. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  145. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  146. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  147. airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
  148. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
  149. airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
  150. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +229 -73
  151. airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
  152. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
  153. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
  154. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
  155. airbyte_cdk/sources/declarative/spec/spec.py +12 -5
  156. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
  157. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
  158. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
  159. airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
  160. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  161. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  162. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  163. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  164. airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
  165. airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
  166. airbyte_cdk/sources/declarative/types.py +19 -110
  167. airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
  168. airbyte_cdk/sources/embedded/base_integration.py +16 -5
  169. airbyte_cdk/sources/embedded/catalog.py +16 -4
  170. airbyte_cdk/sources/embedded/runner.py +19 -3
  171. airbyte_cdk/sources/embedded/tools.py +5 -2
  172. airbyte_cdk/sources/file_based/README.md +152 -0
  173. airbyte_cdk/sources/file_based/__init__.py +24 -0
  174. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
  175. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
  176. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
  177. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +47 -10
  178. airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
  179. airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
  180. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  181. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
  182. airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
  183. airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
  184. airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
  185. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
  186. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
  187. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
  188. airbyte_cdk/sources/file_based/exceptions.py +18 -15
  189. airbyte_cdk/sources/file_based/file_based_source.py +140 -33
  190. airbyte_cdk/sources/file_based/file_based_stream_reader.py +69 -5
  191. airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
  192. airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
  193. airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
  194. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  195. airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
  196. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
  197. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
  198. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
  199. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +141 -41
  200. airbyte_cdk/sources/file_based/remote_file.py +1 -1
  201. airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
  202. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
  203. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
  204. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
  205. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
  206. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
  207. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
  208. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
  209. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
  210. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
  211. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
  212. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
  213. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +147 -45
  214. airbyte_cdk/sources/http_logger.py +8 -3
  215. airbyte_cdk/sources/message/__init__.py +7 -1
  216. airbyte_cdk/sources/message/repository.py +18 -4
  217. airbyte_cdk/sources/source.py +42 -38
  218. airbyte_cdk/sources/streams/__init__.py +2 -2
  219. airbyte_cdk/sources/streams/availability_strategy.py +54 -3
  220. airbyte_cdk/sources/streams/call_rate.py +64 -21
  221. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  222. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  223. airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
  224. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  225. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  226. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  227. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  228. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
  229. airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
  230. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
  231. airbyte_cdk/sources/streams/concurrent/cursor.py +298 -42
  232. airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
  233. airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
  234. airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
  235. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
  236. airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
  237. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
  238. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  239. airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
  240. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
  241. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
  242. airbyte_cdk/sources/streams/core.py +412 -87
  243. airbyte_cdk/sources/streams/http/__init__.py +2 -1
  244. airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
  245. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  246. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  247. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  248. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  249. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  250. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  251. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  252. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  253. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  254. airbyte_cdk/sources/streams/http/exceptions.py +27 -7
  255. airbyte_cdk/sources/streams/http/http.py +369 -246
  256. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  257. airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
  258. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
  259. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
  260. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
  261. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
  262. airbyte_cdk/sources/types.py +154 -0
  263. airbyte_cdk/sources/utils/record_helper.py +36 -21
  264. airbyte_cdk/sources/utils/schema_helpers.py +13 -6
  265. airbyte_cdk/sources/utils/slice_logger.py +4 -1
  266. airbyte_cdk/sources/utils/transform.py +54 -20
  267. airbyte_cdk/sql/_util/hashing.py +34 -0
  268. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  269. airbyte_cdk/sql/constants.py +32 -0
  270. airbyte_cdk/sql/exceptions.py +235 -0
  271. airbyte_cdk/sql/secrets.py +123 -0
  272. airbyte_cdk/sql/shared/__init__.py +15 -0
  273. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  274. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  275. airbyte_cdk/sql/types.py +160 -0
  276. airbyte_cdk/test/catalog_builder.py +70 -18
  277. airbyte_cdk/test/entrypoint_wrapper.py +117 -42
  278. airbyte_cdk/test/mock_http/__init__.py +1 -1
  279. airbyte_cdk/test/mock_http/matcher.py +6 -0
  280. airbyte_cdk/test/mock_http/mocker.py +57 -10
  281. airbyte_cdk/test/mock_http/request.py +19 -3
  282. airbyte_cdk/test/mock_http/response.py +3 -1
  283. airbyte_cdk/test/mock_http/response_builder.py +32 -16
  284. airbyte_cdk/test/state_builder.py +18 -10
  285. airbyte_cdk/test/utils/__init__.py +1 -0
  286. airbyte_cdk/test/utils/data.py +24 -0
  287. airbyte_cdk/test/utils/http_mocking.py +16 -0
  288. airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
  289. airbyte_cdk/test/utils/reading.py +26 -0
  290. airbyte_cdk/utils/__init__.py +2 -1
  291. airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
  292. airbyte_cdk/utils/analytics_message.py +10 -2
  293. airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
  294. airbyte_cdk/utils/event_timing.py +10 -10
  295. airbyte_cdk/utils/mapping_helpers.py +3 -1
  296. airbyte_cdk/utils/message_utils.py +20 -11
  297. airbyte_cdk/utils/print_buffer.py +75 -0
  298. airbyte_cdk/utils/schema_inferrer.py +198 -28
  299. airbyte_cdk/utils/slice_hasher.py +30 -0
  300. airbyte_cdk/utils/spec_schema_transformations.py +6 -3
  301. airbyte_cdk/utils/stream_status_utils.py +8 -1
  302. airbyte_cdk/utils/traced_exception.py +61 -21
  303. airbyte_cdk-6.17.1.dev0.dist-info/METADATA +109 -0
  304. airbyte_cdk-6.17.1.dev0.dist-info/RECORD +350 -0
  305. {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.17.1.dev0.dist-info}/WHEEL +1 -2
  306. airbyte_cdk-6.17.1.dev0.dist-info/entry_points.txt +3 -0
  307. airbyte_cdk/sources/declarative/create_partial.py +0 -92
  308. airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
  309. airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
  310. airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
  311. airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
  312. airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
  313. airbyte_cdk/sources/deprecated/base_source.py +0 -94
  314. airbyte_cdk/sources/deprecated/client.py +0 -99
  315. airbyte_cdk/sources/singer/__init__.py +0 -8
  316. airbyte_cdk/sources/singer/singer_helpers.py +0 -304
  317. airbyte_cdk/sources/singer/source.py +0 -186
  318. airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
  319. airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
  320. airbyte_cdk/sources/streams/http/auth/core.py +0 -29
  321. airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
  322. airbyte_cdk/sources/streams/http/auth/token.py +0 -47
  323. airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
  324. airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
  325. airbyte_cdk/sources/utils/schema_models.py +0 -84
  326. airbyte_cdk-0.72.1.dist-info/METADATA +0 -243
  327. airbyte_cdk-0.72.1.dist-info/RECORD +0 -466
  328. airbyte_cdk-0.72.1.dist-info/top_level.txt +0 -3
  329. source_declarative_manifest/main.py +0 -29
  330. unit_tests/connector_builder/__init__.py +0 -3
  331. unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
  332. unit_tests/connector_builder/test_message_grouper.py +0 -713
  333. unit_tests/connector_builder/utils.py +0 -27
  334. unit_tests/destinations/test_destination.py +0 -243
  335. unit_tests/singer/test_singer_helpers.py +0 -56
  336. unit_tests/singer/test_singer_source.py +0 -112
  337. unit_tests/sources/__init__.py +0 -0
  338. unit_tests/sources/concurrent_source/__init__.py +0 -3
  339. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
  340. unit_tests/sources/declarative/__init__.py +0 -3
  341. unit_tests/sources/declarative/auth/__init__.py +0 -3
  342. unit_tests/sources/declarative/auth/test_oauth.py +0 -331
  343. unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
  344. unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
  345. unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
  346. unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
  347. unit_tests/sources/declarative/checks/__init__.py +0 -3
  348. unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
  349. unit_tests/sources/declarative/decoders/__init__.py +0 -0
  350. unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
  351. unit_tests/sources/declarative/external_component.py +0 -13
  352. unit_tests/sources/declarative/extractors/__init__.py +0 -3
  353. unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
  354. unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
  355. unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
  356. unit_tests/sources/declarative/incremental/__init__.py +0 -0
  357. unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
  358. unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
  359. unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
  360. unit_tests/sources/declarative/interpolation/__init__.py +0 -3
  361. unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
  362. unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
  363. unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
  364. unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
  365. unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
  366. unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
  367. unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
  368. unit_tests/sources/declarative/parsers/__init__.py +0 -3
  369. unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
  370. unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
  371. unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1847
  372. unit_tests/sources/declarative/parsers/testing_components.py +0 -36
  373. unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
  374. unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
  375. unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
  376. unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
  377. unit_tests/sources/declarative/requesters/__init__.py +0 -3
  378. unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
  379. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
  380. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
  381. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
  382. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
  383. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
  384. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
  385. unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
  386. unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
  387. unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
  388. unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
  389. unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
  390. unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
  391. unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
  392. unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
  393. unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
  394. unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
  395. unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
  396. unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
  397. unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
  398. unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
  399. unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
  400. unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
  401. unit_tests/sources/declarative/retrievers/__init__.py +0 -3
  402. unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
  403. unit_tests/sources/declarative/schema/__init__.py +0 -6
  404. unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
  405. unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
  406. unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
  407. unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
  408. unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
  409. unit_tests/sources/declarative/states/__init__.py +0 -3
  410. unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
  411. unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
  412. unit_tests/sources/declarative/test_create_partial.py +0 -83
  413. unit_tests/sources/declarative/test_declarative_stream.py +0 -103
  414. unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
  415. unit_tests/sources/declarative/test_types.py +0 -39
  416. unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
  417. unit_tests/sources/file_based/__init__.py +0 -0
  418. unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
  419. unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
  420. unit_tests/sources/file_based/config/__init__.py +0 -0
  421. unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
  422. unit_tests/sources/file_based/config/test_csv_format.py +0 -34
  423. unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
  424. unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
  425. unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
  426. unit_tests/sources/file_based/file_types/__init__.py +0 -0
  427. unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
  428. unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
  429. unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
  430. unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
  431. unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
  432. unit_tests/sources/file_based/helpers.py +0 -70
  433. unit_tests/sources/file_based/in_memory_files_source.py +0 -211
  434. unit_tests/sources/file_based/scenarios/__init__.py +0 -0
  435. unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
  436. unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
  437. unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
  438. unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
  439. unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
  440. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
  441. unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
  442. unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
  443. unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
  444. unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
  445. unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
  446. unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
  447. unit_tests/sources/file_based/stream/__init__.py +0 -0
  448. unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
  449. unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
  450. unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
  451. unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
  452. unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
  453. unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
  454. unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
  455. unit_tests/sources/file_based/test_scenarios.py +0 -253
  456. unit_tests/sources/file_based/test_schema_helpers.py +0 -346
  457. unit_tests/sources/fixtures/__init__.py +0 -3
  458. unit_tests/sources/fixtures/source_test_fixture.py +0 -153
  459. unit_tests/sources/message/__init__.py +0 -0
  460. unit_tests/sources/message/test_repository.py +0 -153
  461. unit_tests/sources/streams/__init__.py +0 -0
  462. unit_tests/sources/streams/concurrent/__init__.py +0 -3
  463. unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
  464. unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
  465. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
  466. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
  467. unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
  468. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
  469. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
  470. unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
  471. unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
  472. unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
  473. unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
  474. unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
  475. unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
  476. unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
  477. unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
  478. unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
  479. unit_tests/sources/streams/http/__init__.py +0 -0
  480. unit_tests/sources/streams/http/auth/__init__.py +0 -0
  481. unit_tests/sources/streams/http/auth/test_auth.py +0 -173
  482. unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
  483. unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
  484. unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
  485. unit_tests/sources/streams/http/test_http.py +0 -635
  486. unit_tests/sources/streams/test_availability_strategy.py +0 -70
  487. unit_tests/sources/streams/test_call_rate.py +0 -300
  488. unit_tests/sources/streams/test_stream_read.py +0 -405
  489. unit_tests/sources/streams/test_streams_core.py +0 -184
  490. unit_tests/sources/test_abstract_source.py +0 -1442
  491. unit_tests/sources/test_concurrent_source.py +0 -112
  492. unit_tests/sources/test_config.py +0 -92
  493. unit_tests/sources/test_connector_state_manager.py +0 -482
  494. unit_tests/sources/test_http_logger.py +0 -252
  495. unit_tests/sources/test_integration_source.py +0 -86
  496. unit_tests/sources/test_source.py +0 -684
  497. unit_tests/sources/test_source_read.py +0 -460
  498. unit_tests/test/__init__.py +0 -0
  499. unit_tests/test/mock_http/__init__.py +0 -0
  500. unit_tests/test/mock_http/test_matcher.py +0 -53
  501. unit_tests/test/mock_http/test_mocker.py +0 -214
  502. unit_tests/test/mock_http/test_request.py +0 -117
  503. unit_tests/test/mock_http/test_response_builder.py +0 -177
  504. unit_tests/test/test_entrypoint_wrapper.py +0 -240
  505. unit_tests/utils/__init__.py +0 -0
  506. unit_tests/utils/test_datetime_format_inferrer.py +0 -60
  507. unit_tests/utils/test_mapping_helpers.py +0 -54
  508. unit_tests/utils/test_message_utils.py +0 -91
  509. unit_tests/utils/test_rate_limiting.py +0 -26
  510. unit_tests/utils/test_schema_inferrer.py +0 -202
  511. unit_tests/utils/test_secret_utils.py +0 -135
  512. unit_tests/utils/test_stream_status_utils.py +0 -61
  513. unit_tests/utils/test_traced_exception.py +0 -107
  514. /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
  515. {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
  516. {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
  517. {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
  518. {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.17.1.dev0.dist-info}/LICENSE.txt +0 -0
@@ -8,12 +8,20 @@ from enum import Enum
8
8
  from functools import total_ordering
9
9
  from typing import Any, Dict, List, Literal, Mapping, Optional, Tuple, Type, Union
10
10
 
11
- from airbyte_cdk.sources.file_based.exceptions import ConfigValidationError, FileBasedSourceError, SchemaInferenceError
11
+ from airbyte_cdk.sources.file_based.exceptions import (
12
+ ConfigValidationError,
13
+ FileBasedSourceError,
14
+ SchemaInferenceError,
15
+ )
12
16
 
13
17
  JsonSchemaSupportedType = Union[List[str], Literal["string"], str]
14
18
  SchemaType = Mapping[str, Mapping[str, JsonSchemaSupportedType]]
15
19
 
16
20
  schemaless_schema = {"type": "object", "properties": {"data": {"type": "object"}}}
21
+ file_transfer_schema = {
22
+ "type": "object",
23
+ "properties": {"data": {"type": "object"}, "file": {"type": "object"}},
24
+ }
17
25
 
18
26
 
19
27
  @total_ordering
@@ -117,14 +125,23 @@ def _is_valid_type(t: JsonSchemaSupportedType) -> bool:
117
125
 
118
126
 
119
127
  def _choose_wider_type(key: str, t1: Mapping[str, Any], t2: Mapping[str, Any]) -> Mapping[str, Any]:
120
- if (t1["type"] == "array" or t2["type"] == "array") and t1 != t2:
128
+ t1_type = t1["type"]
129
+ t2_type = t2["type"]
130
+
131
+ if (t1_type == "array" or t2_type == "array") and t1 != t2:
121
132
  raise SchemaInferenceError(
122
133
  FileBasedSourceError.SCHEMA_INFERENCE_ERROR,
123
134
  details="Cannot merge schema for unequal array types.",
124
135
  key=key,
125
136
  detected_types=f"{t1},{t2}",
126
137
  )
127
- elif (t1["type"] == "object" or t2["type"] == "object") and t1 != t2:
138
+ # Schemas can still be merged if a key contains a null value in either t1 or t2, but it is still an object
139
+ elif (
140
+ (t1_type == "object" or t2_type == "object")
141
+ and t1_type != "null"
142
+ and t2_type != "null"
143
+ and t1 != t2
144
+ ):
128
145
  raise SchemaInferenceError(
129
146
  FileBasedSourceError.SCHEMA_INFERENCE_ERROR,
130
147
  details="Cannot merge schema for unequal object types.",
@@ -132,12 +149,19 @@ def _choose_wider_type(key: str, t1: Mapping[str, Any], t2: Mapping[str, Any]) -
132
149
  detected_types=f"{t1},{t2}",
133
150
  )
134
151
  else:
135
- comparable_t1 = get_comparable_type(TYPE_PYTHON_MAPPING[t1["type"]][0]) # accessing the type_mapping value
136
- comparable_t2 = get_comparable_type(TYPE_PYTHON_MAPPING[t2["type"]][0]) # accessing the type_mapping value
152
+ comparable_t1 = get_comparable_type(
153
+ TYPE_PYTHON_MAPPING[t1_type][0]
154
+ ) # accessing the type_mapping value
155
+ comparable_t2 = get_comparable_type(
156
+ TYPE_PYTHON_MAPPING[t2_type][0]
157
+ ) # accessing the type_mapping value
137
158
  if not comparable_t1 and comparable_t2:
138
- raise SchemaInferenceError(FileBasedSourceError.UNRECOGNIZED_TYPE, key=key, detected_types=f"{t1},{t2}")
159
+ raise SchemaInferenceError(
160
+ FileBasedSourceError.UNRECOGNIZED_TYPE, key=key, detected_types=f"{t1},{t2}"
161
+ )
139
162
  return max(
140
- [t1, t2], key=lambda x: ComparableType(get_comparable_type(TYPE_PYTHON_MAPPING[x["type"]][0]))
163
+ [t1, t2],
164
+ key=lambda x: ComparableType(get_comparable_type(TYPE_PYTHON_MAPPING[x["type"]][0])),
141
165
  ) # accessing the type_mapping value
142
166
 
143
167
 
@@ -200,7 +224,8 @@ def _parse_json_input(input_schema: Union[str, Mapping[str, str]]) -> Optional[M
200
224
  schema = input_schema
201
225
  if not all(isinstance(s, str) for s in schema.values()):
202
226
  raise ConfigValidationError(
203
- FileBasedSourceError.ERROR_PARSING_USER_PROVIDED_SCHEMA, details="Invalid input schema; nested schemas are not supported."
227
+ FileBasedSourceError.ERROR_PARSING_USER_PROVIDED_SCHEMA,
228
+ details="Invalid input schema; nested schemas are not supported.",
204
229
  )
205
230
 
206
231
  except json.decoder.JSONDecodeError:
@@ -209,7 +234,9 @@ def _parse_json_input(input_schema: Union[str, Mapping[str, str]]) -> Optional[M
209
234
  return schema
210
235
 
211
236
 
212
- def type_mapping_to_jsonschema(input_schema: Optional[Union[str, Mapping[str, str]]]) -> Optional[Mapping[str, Any]]:
237
+ def type_mapping_to_jsonschema(
238
+ input_schema: Optional[Union[str, Mapping[str, str]]],
239
+ ) -> Optional[Mapping[str, Any]]:
213
240
  """
214
241
  Return the user input schema (type mapping), transformed to JSON Schema format.
215
242
 
@@ -236,7 +263,8 @@ def type_mapping_to_jsonschema(input_schema: Optional[Union[str, Mapping[str, st
236
263
 
237
264
  if not _json_schema_type:
238
265
  raise ConfigValidationError(
239
- FileBasedSourceError.ERROR_PARSING_USER_PROVIDED_SCHEMA, details=f"Invalid type '{type_name}' for property '{col_name}'."
266
+ FileBasedSourceError.ERROR_PARSING_USER_PROVIDED_SCHEMA,
267
+ details=f"Invalid type '{type_name}' for property '{col_name}'.",
240
268
  )
241
269
 
242
270
  json_schema_type = _json_schema_type[0]
@@ -1,4 +1,6 @@
1
- from airbyte_cdk.sources.file_based.schema_validation_policies.abstract_schema_validation_policy import AbstractSchemaValidationPolicy
1
+ from airbyte_cdk.sources.file_based.schema_validation_policies.abstract_schema_validation_policy import (
2
+ AbstractSchemaValidationPolicy,
3
+ )
2
4
  from airbyte_cdk.sources.file_based.schema_validation_policies.default_schema_validation_policies import (
3
5
  DEFAULT_SCHEMA_VALIDATION_POLICIES,
4
6
  EmitRecordPolicy,
@@ -11,7 +11,9 @@ class AbstractSchemaValidationPolicy(ABC):
11
11
  validate_schema_before_sync = False # Whether to verify that records conform to the schema during the stream's availabilty check
12
12
 
13
13
  @abstractmethod
14
- def record_passes_validation_policy(self, record: Mapping[str, Any], schema: Optional[Mapping[str, Any]]) -> bool:
14
+ def record_passes_validation_policy(
15
+ self, record: Mapping[str, Any], schema: Optional[Mapping[str, Any]]
16
+ ) -> bool:
15
17
  """
16
18
  Return True if the record passes the user's validation policy.
17
19
  """
@@ -5,7 +5,10 @@
5
5
  from typing import Any, Mapping, Optional
6
6
 
7
7
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import ValidationPolicy
8
- from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, StopSyncPerValidationPolicy
8
+ from airbyte_cdk.sources.file_based.exceptions import (
9
+ FileBasedSourceError,
10
+ StopSyncPerValidationPolicy,
11
+ )
9
12
  from airbyte_cdk.sources.file_based.schema_helpers import conforms_to_schema
10
13
  from airbyte_cdk.sources.file_based.schema_validation_policies import AbstractSchemaValidationPolicy
11
14
 
@@ -13,14 +16,18 @@ from airbyte_cdk.sources.file_based.schema_validation_policies import AbstractSc
13
16
  class EmitRecordPolicy(AbstractSchemaValidationPolicy):
14
17
  name = "emit_record"
15
18
 
16
- def record_passes_validation_policy(self, record: Mapping[str, Any], schema: Optional[Mapping[str, Any]]) -> bool:
19
+ def record_passes_validation_policy(
20
+ self, record: Mapping[str, Any], schema: Optional[Mapping[str, Any]]
21
+ ) -> bool:
17
22
  return True
18
23
 
19
24
 
20
25
  class SkipRecordPolicy(AbstractSchemaValidationPolicy):
21
26
  name = "skip_record"
22
27
 
23
- def record_passes_validation_policy(self, record: Mapping[str, Any], schema: Optional[Mapping[str, Any]]) -> bool:
28
+ def record_passes_validation_policy(
29
+ self, record: Mapping[str, Any], schema: Optional[Mapping[str, Any]]
30
+ ) -> bool:
24
31
  return schema is not None and conforms_to_schema(record, schema)
25
32
 
26
33
 
@@ -28,9 +35,13 @@ class WaitForDiscoverPolicy(AbstractSchemaValidationPolicy):
28
35
  name = "wait_for_discover"
29
36
  validate_schema_before_sync = True
30
37
 
31
- def record_passes_validation_policy(self, record: Mapping[str, Any], schema: Optional[Mapping[str, Any]]) -> bool:
38
+ def record_passes_validation_policy(
39
+ self, record: Mapping[str, Any], schema: Optional[Mapping[str, Any]]
40
+ ) -> bool:
32
41
  if schema is None or not conforms_to_schema(record, schema):
33
- raise StopSyncPerValidationPolicy(FileBasedSourceError.STOP_SYNC_PER_SCHEMA_VALIDATION_POLICY)
42
+ raise StopSyncPerValidationPolicy(
43
+ FileBasedSourceError.STOP_SYNC_PER_SCHEMA_VALIDATION_POLICY
44
+ )
34
45
  return True
35
46
 
36
47
 
@@ -6,11 +6,24 @@ from abc import abstractmethod
6
6
  from functools import cache, cached_property, lru_cache
7
7
  from typing import Any, Dict, Iterable, List, Mapping, Optional, Type
8
8
 
9
+ from typing_extensions import deprecated
10
+
11
+ from airbyte_cdk import AirbyteMessage
9
12
  from airbyte_cdk.models import SyncMode
10
- from airbyte_cdk.sources.file_based.availability_strategy import AbstractFileBasedAvailabilityStrategy
11
- from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig, PrimaryKeyType
13
+ from airbyte_cdk.sources.file_based.availability_strategy import (
14
+ AbstractFileBasedAvailabilityStrategy,
15
+ )
16
+ from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
17
+ FileBasedStreamConfig,
18
+ PrimaryKeyType,
19
+ )
12
20
  from airbyte_cdk.sources.file_based.discovery_policy import AbstractDiscoveryPolicy
13
- from airbyte_cdk.sources.file_based.exceptions import FileBasedErrorsCollector, FileBasedSourceError, RecordParseError, UndefinedParserError
21
+ from airbyte_cdk.sources.file_based.exceptions import (
22
+ FileBasedErrorsCollector,
23
+ FileBasedSourceError,
24
+ RecordParseError,
25
+ UndefinedParserError,
26
+ )
14
27
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
15
28
  from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
16
29
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
@@ -18,6 +31,7 @@ from airbyte_cdk.sources.file_based.schema_validation_policies import AbstractSc
18
31
  from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
19
32
  from airbyte_cdk.sources.file_based.types import StreamSlice
20
33
  from airbyte_cdk.sources.streams import Stream
34
+ from airbyte_cdk.sources.streams.checkpoint import Cursor
21
35
 
22
36
 
23
37
  class AbstractFileBasedStream(Stream):
@@ -32,7 +46,7 @@ class AbstractFileBasedStream(Stream):
32
46
  files in the stream.
33
47
  - A DiscoveryPolicy that controls the number of concurrent requests sent to the source
34
48
  during discover, and the number of files used for schema discovery.
35
- - A dictionary of FileType:Parser that holds all of the file types that can be handled
49
+ - A dictionary of FileType:Parser that holds all the file types that can be handled
36
50
  by the stream.
37
51
  """
38
52
 
@@ -61,8 +75,7 @@ class AbstractFileBasedStream(Stream):
61
75
 
62
76
  @property
63
77
  @abstractmethod
64
- def primary_key(self) -> PrimaryKeyType:
65
- ...
78
+ def primary_key(self) -> PrimaryKeyType: ...
66
79
 
67
80
  @cache
68
81
  def list_files(self) -> List[RemoteFile]:
@@ -70,7 +83,7 @@ class AbstractFileBasedStream(Stream):
70
83
  List all files that belong to the stream.
71
84
 
72
85
  The output of this method is cached so we don't need to list the files more than once.
73
- This means we won't pick up changes to the files during a sync. This meethod uses the
86
+ This means we won't pick up changes to the files during a sync. This method uses the
74
87
  get_files method which is implemented by the concrete stream class.
75
88
  """
76
89
  return list(self.get_files())
@@ -88,7 +101,7 @@ class AbstractFileBasedStream(Stream):
88
101
  cursor_field: Optional[List[str]] = None,
89
102
  stream_slice: Optional[StreamSlice] = None,
90
103
  stream_state: Optional[Mapping[str, Any]] = None,
91
- ) -> Iterable[Mapping[str, Any]]:
104
+ ) -> Iterable[Mapping[str, Any] | AirbyteMessage]:
92
105
  """
93
106
  Yield all records from all remote files in `list_files_for_this_sync`.
94
107
  This method acts as an adapter between the generic Stream interface and the file-based's
@@ -99,14 +112,20 @@ class AbstractFileBasedStream(Stream):
99
112
  return self.read_records_from_slice(stream_slice)
100
113
 
101
114
  @abstractmethod
102
- def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[Mapping[str, Any]]:
115
+ def read_records_from_slice(
116
+ self, stream_slice: StreamSlice
117
+ ) -> Iterable[Mapping[str, Any] | AirbyteMessage]:
103
118
  """
104
119
  Yield all records from all remote files in `list_files_for_this_sync`.
105
120
  """
106
121
  ...
107
122
 
108
123
  def stream_slices(
109
- self, *, sync_mode: SyncMode, cursor_field: Optional[List[str]] = None, stream_state: Optional[Mapping[str, Any]] = None
124
+ self,
125
+ *,
126
+ sync_mode: SyncMode,
127
+ cursor_field: Optional[List[str]] = None,
128
+ stream_state: Optional[Mapping[str, Any]] = None,
110
129
  ) -> Iterable[Optional[Mapping[str, Any]]]:
111
130
  """
112
131
  This method acts as an adapter between the generic Stream interface and the file-based's
@@ -141,20 +160,38 @@ class AbstractFileBasedStream(Stream):
141
160
  try:
142
161
  return self._parsers[type(self.config.format)]
143
162
  except KeyError:
144
- raise UndefinedParserError(FileBasedSourceError.UNDEFINED_PARSER, stream=self.name, format=type(self.config.format))
163
+ raise UndefinedParserError(
164
+ FileBasedSourceError.UNDEFINED_PARSER,
165
+ stream=self.name,
166
+ format=type(self.config.format),
167
+ )
145
168
 
146
169
  def record_passes_validation_policy(self, record: Mapping[str, Any]) -> bool:
147
170
  if self.validation_policy:
148
- return self.validation_policy.record_passes_validation_policy(record=record, schema=self.catalog_schema)
171
+ return self.validation_policy.record_passes_validation_policy(
172
+ record=record, schema=self.catalog_schema
173
+ )
149
174
  else:
150
175
  raise RecordParseError(
151
- FileBasedSourceError.UNDEFINED_VALIDATION_POLICY, stream=self.name, validation_policy=self.config.validation_policy
176
+ FileBasedSourceError.UNDEFINED_VALIDATION_POLICY,
177
+ stream=self.name,
178
+ validation_policy=self.config.validation_policy,
152
179
  )
153
180
 
154
181
  @cached_property
182
+ @deprecated("Deprecated as of CDK version 3.7.0.")
155
183
  def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy:
156
184
  return self._availability_strategy
157
185
 
158
186
  @property
159
187
  def name(self) -> str:
160
188
  return self.config.name
189
+
190
+ def get_cursor(self) -> Optional[Cursor]:
191
+ """
192
+ This is a temporary hack. Because file-based, declarative, and concurrent have _slightly_ different cursor implementations
193
+ the file-based cursor isn't compatible with the cursor-based iteration flow in core.py top-level CDK. By setting this to
194
+ None, we defer to the regular incremental checkpoint flow. Once all cursors are consolidated under a common interface
195
+ then this override can be removed.
196
+ """
197
+ return None
@@ -4,10 +4,19 @@
4
4
 
5
5
  import copy
6
6
  import logging
7
- from functools import lru_cache
7
+ from functools import cache, lru_cache
8
8
  from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
9
9
 
10
- from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, ConfiguredAirbyteStream, Level, SyncMode, Type
10
+ from typing_extensions import deprecated
11
+
12
+ from airbyte_cdk.models import (
13
+ AirbyteLogMessage,
14
+ AirbyteMessage,
15
+ ConfiguredAirbyteStream,
16
+ Level,
17
+ SyncMode,
18
+ Type,
19
+ )
11
20
  from airbyte_cdk.sources import AbstractSource
12
21
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
13
22
  from airbyte_cdk.sources.file_based.availability_strategy import (
@@ -22,27 +31,35 @@ from airbyte_cdk.sources.file_based.stream.concurrent.cursor import FileBasedFin
22
31
  from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
23
32
  from airbyte_cdk.sources.file_based.types import StreamSlice
24
33
  from airbyte_cdk.sources.message import MessageRepository
34
+ from airbyte_cdk.sources.source import ExperimentalClassWarning
25
35
  from airbyte_cdk.sources.streams.concurrent.abstract_stream_facade import AbstractStreamFacade
26
36
  from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
27
37
  from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
28
- from airbyte_cdk.sources.streams.concurrent.helpers import get_cursor_field_from_stream, get_primary_key_from_stream
38
+ from airbyte_cdk.sources.streams.concurrent.helpers import (
39
+ get_cursor_field_from_stream,
40
+ get_primary_key_from_stream,
41
+ )
29
42
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
30
43
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
31
- from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
32
44
  from airbyte_cdk.sources.streams.core import StreamData
45
+ from airbyte_cdk.sources.types import Record
33
46
  from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
34
47
  from airbyte_cdk.sources.utils.slice_logger import SliceLogger
35
- from deprecated.classic import deprecated
36
48
 
37
49
  if TYPE_CHECKING:
38
- from airbyte_cdk.sources.file_based.stream.concurrent.cursor import AbstractConcurrentFileBasedCursor
50
+ from airbyte_cdk.sources.file_based.stream.concurrent.cursor import (
51
+ AbstractConcurrentFileBasedCursor,
52
+ )
39
53
 
40
54
  """
41
55
  This module contains adapters to help enabling concurrency on File-based Stream objects without needing to migrate to AbstractStream
42
56
  """
43
57
 
44
58
 
45
- @deprecated("This class is experimental. Use at your own risk.")
59
+ @deprecated(
60
+ "This class is experimental. Use at your own risk.",
61
+ category=ExperimentalClassWarning,
62
+ )
46
63
  class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBasedStream):
47
64
  @classmethod
48
65
  def create_from_stream(
@@ -71,7 +88,9 @@ class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBas
71
88
  partition_generator=FileBasedStreamPartitionGenerator(
72
89
  stream,
73
90
  message_repository,
74
- SyncMode.full_refresh if isinstance(cursor, FileBasedFinalStateCursor) else SyncMode.incremental,
91
+ SyncMode.full_refresh
92
+ if isinstance(cursor, FileBasedFinalStateCursor)
93
+ else SyncMode.incremental,
75
94
  [cursor_field] if cursor_field is not None else None,
76
95
  state,
77
96
  cursor,
@@ -127,6 +146,7 @@ class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBas
127
146
  return self._legacy_stream.supports_incremental
128
147
 
129
148
  @property
149
+ @deprecated("Deprecated as of CDK version 3.7.0.")
130
150
  def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy:
131
151
  return self._legacy_stream.availability_strategy
132
152
 
@@ -136,7 +156,10 @@ class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBas
136
156
 
137
157
  @property
138
158
  def primary_key(self) -> PrimaryKeyType:
139
- return self._legacy_stream.config.primary_key or self.get_parser().get_parser_defined_primary_key(self._legacy_stream.config)
159
+ return (
160
+ self._legacy_stream.config.primary_key
161
+ or self.get_parser().get_parser_defined_primary_key(self._legacy_stream.config)
162
+ )
140
163
 
141
164
  def get_parser(self) -> FileTypeParser:
142
165
  return self._legacy_stream.get_parser()
@@ -145,7 +168,7 @@ class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBas
145
168
  return self._legacy_stream.get_files()
146
169
 
147
170
  def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[Mapping[str, Any]]:
148
- yield from self._legacy_stream.read_records_from_slice(stream_slice)
171
+ yield from self._legacy_stream.read_records_from_slice(stream_slice) # type: ignore[misc] # Only Mapping[str, Any] is expected for legacy streams, not AirbyteMessage
149
172
 
150
173
  def compute_slices(self) -> Iterable[Optional[StreamSlice]]:
151
174
  return self._legacy_stream.compute_slices()
@@ -183,7 +206,10 @@ class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBas
183
206
  # This shouldn't happen if the ConcurrentCursor was used
184
207
  state = "unknown; no state attribute was available on the cursor"
185
208
  yield AirbyteMessage(
186
- type=Type.LOG, log=AirbyteLogMessage(level=Level.ERROR, message=f"Cursor State at time of exception: {state}")
209
+ type=Type.LOG,
210
+ log=AirbyteLogMessage(
211
+ level=Level.ERROR, message=f"Cursor State at time of exception: {state}"
212
+ ),
187
213
  )
188
214
  raise exc
189
215
 
@@ -204,7 +230,6 @@ class FileBasedStreamPartition(Partition):
204
230
  sync_mode: SyncMode,
205
231
  cursor_field: Optional[List[str]],
206
232
  state: Optional[MutableMapping[str, Any]],
207
- cursor: "AbstractConcurrentFileBasedCursor",
208
233
  ):
209
234
  self._stream = stream
210
235
  self._slice = _slice
@@ -212,8 +237,6 @@ class FileBasedStreamPartition(Partition):
212
237
  self._sync_mode = sync_mode
213
238
  self._cursor_field = cursor_field
214
239
  self._state = state
215
- self._cursor = cursor
216
- self._is_closed = False
217
240
 
218
241
  def read(self) -> Iterable[Record]:
219
242
  try:
@@ -225,11 +248,30 @@ class FileBasedStreamPartition(Partition):
225
248
  ):
226
249
  if isinstance(record_data, Mapping):
227
250
  data_to_return = dict(record_data)
228
- self._stream.transformer.transform(data_to_return, self._stream.get_json_schema())
229
- yield Record(data_to_return, self.stream_name())
230
- elif isinstance(record_data, AirbyteMessage) and record_data.type == Type.RECORD:
251
+ self._stream.transformer.transform(
252
+ data_to_return, self._stream.get_json_schema()
253
+ )
254
+ yield Record(data=data_to_return, stream_name=self.stream_name())
255
+ elif (
256
+ isinstance(record_data, AirbyteMessage)
257
+ and record_data.type == Type.RECORD
258
+ and record_data.record is not None
259
+ ):
231
260
  # `AirbyteMessage`s of type `Record` should also be yielded so they are enqueued
232
- yield Record(record_data.record.data, self.stream_name())
261
+ # If stream is flagged for file_transfer the record should data in file key
262
+ record_message_data = (
263
+ record_data.record.file
264
+ if self._use_file_transfer()
265
+ else record_data.record.data
266
+ )
267
+ if not record_message_data:
268
+ raise ExceptionWithDisplayMessage("A record without data was found")
269
+ else:
270
+ yield Record(
271
+ data=record_message_data,
272
+ stream_name=self.stream_name(),
273
+ is_file_transfer_message=self._use_file_transfer(),
274
+ )
233
275
  else:
234
276
  self._message_repository.emit_message(record_data)
235
277
  except Exception as e:
@@ -248,13 +290,6 @@ class FileBasedStreamPartition(Partition):
248
290
  file = self._slice["files"][0]
249
291
  return {"files": [file]}
250
292
 
251
- def close(self) -> None:
252
- self._cursor.close_partition(self)
253
- self._is_closed = True
254
-
255
- def is_closed(self) -> bool:
256
- return self._is_closed
257
-
258
293
  def __hash__(self) -> int:
259
294
  if self._slice:
260
295
  # Convert the slice to a string so that it can be hashed
@@ -271,6 +306,10 @@ class FileBasedStreamPartition(Partition):
271
306
  def stream_name(self) -> str:
272
307
  return self._stream.name
273
308
 
309
+ @cache
310
+ def _use_file_transfer(self) -> bool:
311
+ return hasattr(self._stream, "use_file_transfer") and self._stream.use_file_transfer
312
+
274
313
  def __repr__(self) -> str:
275
314
  return f"FileBasedStreamPartition({self._stream.name}, {self._slice})"
276
315
 
@@ -294,7 +333,9 @@ class FileBasedStreamPartitionGenerator(PartitionGenerator):
294
333
 
295
334
  def generate(self) -> Iterable[FileBasedStreamPartition]:
296
335
  pending_partitions = []
297
- for _slice in self._stream.stream_slices(sync_mode=self._sync_mode, cursor_field=self._cursor_field, stream_state=self._state):
336
+ for _slice in self._stream.stream_slices(
337
+ sync_mode=self._sync_mode, cursor_field=self._cursor_field, stream_state=self._state
338
+ ):
298
339
  if _slice is not None:
299
340
  for file in _slice.get("files", []):
300
341
  pending_partitions.append(
@@ -305,7 +346,6 @@ class FileBasedStreamPartitionGenerator(PartitionGenerator):
305
346
  self._sync_mode,
306
347
  self._cursor_field,
307
348
  self._state,
308
- self._cursor,
309
349
  )
310
350
  )
311
351
  self._cursor.set_pending_partitions(pending_partitions)
@@ -2,4 +2,8 @@ from .abstract_concurrent_file_based_cursor import AbstractConcurrentFileBasedCu
2
2
  from .file_based_concurrent_cursor import FileBasedConcurrentCursor
3
3
  from .file_based_final_state_cursor import FileBasedFinalStateCursor
4
4
 
5
- __all__ = ["AbstractConcurrentFileBasedCursor", "FileBasedConcurrentCursor", "FileBasedFinalStateCursor"]
5
+ __all__ = [
6
+ "AbstractConcurrentFileBasedCursor",
7
+ "FileBasedConcurrentCursor",
8
+ "FileBasedFinalStateCursor",
9
+ ]
@@ -12,7 +12,7 @@ from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
12
12
  from airbyte_cdk.sources.file_based.types import StreamState
13
13
  from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
14
14
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
15
- from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
15
+ from airbyte_cdk.sources.types import Record
16
16
 
17
17
  if TYPE_CHECKING:
18
18
  from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition
@@ -24,45 +24,36 @@ class AbstractConcurrentFileBasedCursor(Cursor, AbstractFileBasedCursor, ABC):
24
24
 
25
25
  @property
26
26
  @abstractmethod
27
- def state(self) -> MutableMapping[str, Any]:
28
- ...
27
+ def state(self) -> MutableMapping[str, Any]: ...
29
28
 
30
29
  @abstractmethod
31
- def observe(self, record: Record) -> None:
32
- ...
30
+ def observe(self, record: Record) -> None: ...
33
31
 
34
32
  @abstractmethod
35
- def close_partition(self, partition: Partition) -> None:
36
- ...
33
+ def close_partition(self, partition: Partition) -> None: ...
37
34
 
38
35
  @abstractmethod
39
- def set_pending_partitions(self, partitions: List["FileBasedStreamPartition"]) -> None:
40
- ...
36
+ def set_pending_partitions(self, partitions: List["FileBasedStreamPartition"]) -> None: ...
41
37
 
42
38
  @abstractmethod
43
- def add_file(self, file: RemoteFile) -> None:
44
- ...
39
+ def add_file(self, file: RemoteFile) -> None: ...
45
40
 
46
41
  @abstractmethod
47
- def get_files_to_sync(self, all_files: Iterable[RemoteFile], logger: logging.Logger) -> Iterable[RemoteFile]:
48
- ...
42
+ def get_files_to_sync(
43
+ self, all_files: Iterable[RemoteFile], logger: logging.Logger
44
+ ) -> Iterable[RemoteFile]: ...
49
45
 
50
46
  @abstractmethod
51
- def get_state(self) -> MutableMapping[str, Any]:
52
- ...
47
+ def get_state(self) -> MutableMapping[str, Any]: ...
53
48
 
54
49
  @abstractmethod
55
- def set_initial_state(self, value: StreamState) -> None:
56
- ...
50
+ def set_initial_state(self, value: StreamState) -> None: ...
57
51
 
58
52
  @abstractmethod
59
- def get_start_time(self) -> datetime:
60
- ...
53
+ def get_start_time(self) -> datetime: ...
61
54
 
62
55
  @abstractmethod
63
- def emit_state_message(self) -> None:
64
- ...
56
+ def emit_state_message(self) -> None: ...
65
57
 
66
58
  @abstractmethod
67
- def ensure_at_least_one_state_emitted(self) -> None:
68
- ...
59
+ def ensure_at_least_one_state_emitted(self) -> None: ...