airbyte-cdk 0.72.1__py3-none-any.whl → 6.13.1.dev4106__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (517) hide show
  1. airbyte_cdk/__init__.py +355 -6
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +29 -10
  7. airbyte_cdk/connector.py +24 -24
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
  10. airbyte_cdk/connector_builder/main.py +45 -13
  11. airbyte_cdk/connector_builder/message_grouper.py +189 -50
  12. airbyte_cdk/connector_builder/models.py +3 -2
  13. airbyte_cdk/destinations/__init__.py +4 -3
  14. airbyte_cdk/destinations/destination.py +54 -20
  15. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  16. airbyte_cdk/destinations/vector_db_based/config.py +40 -17
  17. airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
  18. airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
  19. airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
  20. airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
  21. airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
  22. airbyte_cdk/entrypoint.py +153 -44
  23. airbyte_cdk/exception_handler.py +21 -3
  24. airbyte_cdk/logger.py +30 -44
  25. airbyte_cdk/models/__init__.py +13 -2
  26. airbyte_cdk/models/airbyte_protocol.py +86 -1
  27. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  28. airbyte_cdk/models/file_transfer_record_message.py +13 -0
  29. airbyte_cdk/models/well_known_types.py +1 -1
  30. airbyte_cdk/sources/__init__.py +5 -1
  31. airbyte_cdk/sources/abstract_source.py +125 -79
  32. airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
  33. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
  34. airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
  35. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
  36. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  37. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
  38. airbyte_cdk/sources/config.py +3 -2
  39. airbyte_cdk/sources/connector_state_manager.py +49 -83
  40. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  41. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
  42. airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
  43. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  44. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  45. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  46. airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
  47. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
  48. airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
  49. airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
  50. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
  51. airbyte_cdk/sources/declarative/auth/token.py +28 -10
  52. airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
  53. airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
  54. airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
  55. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  56. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  57. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +421 -0
  58. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
  59. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
  60. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1185 -85
  61. airbyte_cdk/sources/declarative/declarative_source.py +5 -2
  62. airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
  63. airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
  64. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
  65. airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
  66. airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
  67. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  68. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  69. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  70. airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
  71. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
  72. airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
  73. airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
  74. airbyte_cdk/sources/declarative/extractors/record_filter.py +65 -8
  75. airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
  76. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
  77. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  78. airbyte_cdk/sources/declarative/incremental/__init__.py +25 -3
  79. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
  80. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  81. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
  82. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +159 -74
  83. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  84. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  85. airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
  86. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
  87. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
  88. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
  89. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
  90. airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
  91. airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
  92. airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
  93. airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
  94. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  95. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  96. airbyte_cdk/sources/declarative/models/__init__.py +1 -1
  97. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1319 -603
  98. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
  99. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
  100. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
  101. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1695 -225
  102. airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
  103. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  104. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  105. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
  106. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  107. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
  108. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
  109. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
  110. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
  111. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
  112. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
  113. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
  114. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
  115. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
  116. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
  117. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
  118. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
  119. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  120. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
  121. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
  122. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
  123. airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
  124. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
  125. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
  126. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
  127. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
  128. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
  129. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
  130. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
  131. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
  132. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
  133. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
  134. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
  135. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
  136. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  137. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
  138. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
  139. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
  140. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
  141. airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
  142. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  143. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  144. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  145. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  146. airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
  147. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
  148. airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
  149. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +228 -72
  150. airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
  151. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
  152. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
  153. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
  154. airbyte_cdk/sources/declarative/spec/spec.py +12 -5
  155. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
  156. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
  157. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
  158. airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
  159. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  160. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  161. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  162. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  163. airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
  164. airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
  165. airbyte_cdk/sources/declarative/types.py +19 -110
  166. airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
  167. airbyte_cdk/sources/embedded/base_integration.py +16 -5
  168. airbyte_cdk/sources/embedded/catalog.py +16 -4
  169. airbyte_cdk/sources/embedded/runner.py +19 -3
  170. airbyte_cdk/sources/embedded/tools.py +5 -2
  171. airbyte_cdk/sources/file_based/README.md +152 -0
  172. airbyte_cdk/sources/file_based/__init__.py +24 -0
  173. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
  174. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
  175. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
  176. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +58 -10
  177. airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
  178. airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
  179. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  180. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
  181. airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
  182. airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
  183. airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
  184. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
  185. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
  186. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
  187. airbyte_cdk/sources/file_based/exceptions.py +52 -15
  188. airbyte_cdk/sources/file_based/file_based_source.py +163 -33
  189. airbyte_cdk/sources/file_based/file_based_stream_reader.py +83 -5
  190. airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
  191. airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
  192. airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
  193. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  194. airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
  195. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
  196. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
  197. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
  198. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +145 -41
  199. airbyte_cdk/sources/file_based/remote_file.py +1 -1
  200. airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
  201. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
  202. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
  203. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
  204. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
  205. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
  206. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
  207. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
  208. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
  209. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
  210. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
  211. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
  212. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +175 -45
  213. airbyte_cdk/sources/http_logger.py +8 -3
  214. airbyte_cdk/sources/message/__init__.py +7 -1
  215. airbyte_cdk/sources/message/repository.py +18 -4
  216. airbyte_cdk/sources/source.py +42 -38
  217. airbyte_cdk/sources/streams/__init__.py +2 -2
  218. airbyte_cdk/sources/streams/availability_strategy.py +54 -3
  219. airbyte_cdk/sources/streams/call_rate.py +64 -21
  220. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  221. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  222. airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
  223. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  224. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  225. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  226. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  227. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
  228. airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
  229. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
  230. airbyte_cdk/sources/streams/concurrent/cursor.py +298 -42
  231. airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
  232. airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
  233. airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
  234. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
  235. airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
  236. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
  237. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  238. airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
  239. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
  240. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
  241. airbyte_cdk/sources/streams/core.py +412 -87
  242. airbyte_cdk/sources/streams/http/__init__.py +2 -1
  243. airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
  244. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  245. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  246. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  247. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  248. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  249. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  250. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  251. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  252. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  253. airbyte_cdk/sources/streams/http/exceptions.py +27 -7
  254. airbyte_cdk/sources/streams/http/http.py +369 -246
  255. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  256. airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
  257. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
  258. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
  259. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
  260. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
  261. airbyte_cdk/sources/types.py +154 -0
  262. airbyte_cdk/sources/utils/record_helper.py +36 -21
  263. airbyte_cdk/sources/utils/schema_helpers.py +13 -6
  264. airbyte_cdk/sources/utils/slice_logger.py +4 -1
  265. airbyte_cdk/sources/utils/transform.py +54 -20
  266. airbyte_cdk/sql/_util/hashing.py +34 -0
  267. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  268. airbyte_cdk/sql/constants.py +32 -0
  269. airbyte_cdk/sql/exceptions.py +235 -0
  270. airbyte_cdk/sql/secrets.py +123 -0
  271. airbyte_cdk/sql/shared/__init__.py +15 -0
  272. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  273. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  274. airbyte_cdk/sql/types.py +160 -0
  275. airbyte_cdk/test/catalog_builder.py +70 -18
  276. airbyte_cdk/test/entrypoint_wrapper.py +117 -42
  277. airbyte_cdk/test/mock_http/__init__.py +1 -1
  278. airbyte_cdk/test/mock_http/matcher.py +6 -0
  279. airbyte_cdk/test/mock_http/mocker.py +57 -10
  280. airbyte_cdk/test/mock_http/request.py +19 -3
  281. airbyte_cdk/test/mock_http/response.py +3 -1
  282. airbyte_cdk/test/mock_http/response_builder.py +32 -16
  283. airbyte_cdk/test/state_builder.py +18 -10
  284. airbyte_cdk/test/utils/__init__.py +1 -0
  285. airbyte_cdk/test/utils/data.py +24 -0
  286. airbyte_cdk/test/utils/http_mocking.py +16 -0
  287. airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
  288. airbyte_cdk/test/utils/reading.py +26 -0
  289. airbyte_cdk/utils/__init__.py +2 -1
  290. airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
  291. airbyte_cdk/utils/analytics_message.py +10 -2
  292. airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
  293. airbyte_cdk/utils/event_timing.py +10 -10
  294. airbyte_cdk/utils/mapping_helpers.py +3 -1
  295. airbyte_cdk/utils/message_utils.py +20 -11
  296. airbyte_cdk/utils/print_buffer.py +75 -0
  297. airbyte_cdk/utils/schema_inferrer.py +198 -28
  298. airbyte_cdk/utils/slice_hasher.py +30 -0
  299. airbyte_cdk/utils/spec_schema_transformations.py +6 -3
  300. airbyte_cdk/utils/stream_status_utils.py +8 -1
  301. airbyte_cdk/utils/traced_exception.py +61 -21
  302. airbyte_cdk-6.13.1.dev4106.dist-info/METADATA +109 -0
  303. airbyte_cdk-6.13.1.dev4106.dist-info/RECORD +349 -0
  304. {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.13.1.dev4106.dist-info}/WHEEL +1 -2
  305. airbyte_cdk-6.13.1.dev4106.dist-info/entry_points.txt +3 -0
  306. airbyte_cdk/sources/declarative/create_partial.py +0 -92
  307. airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
  308. airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
  309. airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
  310. airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
  311. airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
  312. airbyte_cdk/sources/deprecated/base_source.py +0 -94
  313. airbyte_cdk/sources/deprecated/client.py +0 -99
  314. airbyte_cdk/sources/singer/__init__.py +0 -8
  315. airbyte_cdk/sources/singer/singer_helpers.py +0 -304
  316. airbyte_cdk/sources/singer/source.py +0 -186
  317. airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
  318. airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
  319. airbyte_cdk/sources/streams/http/auth/core.py +0 -29
  320. airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
  321. airbyte_cdk/sources/streams/http/auth/token.py +0 -47
  322. airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
  323. airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
  324. airbyte_cdk/sources/utils/schema_models.py +0 -84
  325. airbyte_cdk-0.72.1.dist-info/METADATA +0 -243
  326. airbyte_cdk-0.72.1.dist-info/RECORD +0 -466
  327. airbyte_cdk-0.72.1.dist-info/top_level.txt +0 -3
  328. source_declarative_manifest/main.py +0 -29
  329. unit_tests/connector_builder/__init__.py +0 -3
  330. unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
  331. unit_tests/connector_builder/test_message_grouper.py +0 -713
  332. unit_tests/connector_builder/utils.py +0 -27
  333. unit_tests/destinations/test_destination.py +0 -243
  334. unit_tests/singer/test_singer_helpers.py +0 -56
  335. unit_tests/singer/test_singer_source.py +0 -112
  336. unit_tests/sources/__init__.py +0 -0
  337. unit_tests/sources/concurrent_source/__init__.py +0 -3
  338. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
  339. unit_tests/sources/declarative/__init__.py +0 -3
  340. unit_tests/sources/declarative/auth/__init__.py +0 -3
  341. unit_tests/sources/declarative/auth/test_oauth.py +0 -331
  342. unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
  343. unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
  344. unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
  345. unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
  346. unit_tests/sources/declarative/checks/__init__.py +0 -3
  347. unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
  348. unit_tests/sources/declarative/decoders/__init__.py +0 -0
  349. unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
  350. unit_tests/sources/declarative/external_component.py +0 -13
  351. unit_tests/sources/declarative/extractors/__init__.py +0 -3
  352. unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
  353. unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
  354. unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
  355. unit_tests/sources/declarative/incremental/__init__.py +0 -0
  356. unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
  357. unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
  358. unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
  359. unit_tests/sources/declarative/interpolation/__init__.py +0 -3
  360. unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
  361. unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
  362. unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
  363. unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
  364. unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
  365. unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
  366. unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
  367. unit_tests/sources/declarative/parsers/__init__.py +0 -3
  368. unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
  369. unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
  370. unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1847
  371. unit_tests/sources/declarative/parsers/testing_components.py +0 -36
  372. unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
  373. unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
  374. unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
  375. unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
  376. unit_tests/sources/declarative/requesters/__init__.py +0 -3
  377. unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
  378. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
  379. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
  380. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
  381. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
  382. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
  383. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
  384. unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
  385. unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
  386. unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
  387. unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
  388. unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
  389. unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
  390. unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
  391. unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
  392. unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
  393. unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
  394. unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
  395. unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
  396. unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
  397. unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
  398. unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
  399. unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
  400. unit_tests/sources/declarative/retrievers/__init__.py +0 -3
  401. unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
  402. unit_tests/sources/declarative/schema/__init__.py +0 -6
  403. unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
  404. unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
  405. unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
  406. unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
  407. unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
  408. unit_tests/sources/declarative/states/__init__.py +0 -3
  409. unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
  410. unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
  411. unit_tests/sources/declarative/test_create_partial.py +0 -83
  412. unit_tests/sources/declarative/test_declarative_stream.py +0 -103
  413. unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
  414. unit_tests/sources/declarative/test_types.py +0 -39
  415. unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
  416. unit_tests/sources/file_based/__init__.py +0 -0
  417. unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
  418. unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
  419. unit_tests/sources/file_based/config/__init__.py +0 -0
  420. unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
  421. unit_tests/sources/file_based/config/test_csv_format.py +0 -34
  422. unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
  423. unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
  424. unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
  425. unit_tests/sources/file_based/file_types/__init__.py +0 -0
  426. unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
  427. unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
  428. unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
  429. unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
  430. unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
  431. unit_tests/sources/file_based/helpers.py +0 -70
  432. unit_tests/sources/file_based/in_memory_files_source.py +0 -211
  433. unit_tests/sources/file_based/scenarios/__init__.py +0 -0
  434. unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
  435. unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
  436. unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
  437. unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
  438. unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
  439. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
  440. unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
  441. unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
  442. unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
  443. unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
  444. unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
  445. unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
  446. unit_tests/sources/file_based/stream/__init__.py +0 -0
  447. unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
  448. unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
  449. unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
  450. unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
  451. unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
  452. unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
  453. unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
  454. unit_tests/sources/file_based/test_scenarios.py +0 -253
  455. unit_tests/sources/file_based/test_schema_helpers.py +0 -346
  456. unit_tests/sources/fixtures/__init__.py +0 -3
  457. unit_tests/sources/fixtures/source_test_fixture.py +0 -153
  458. unit_tests/sources/message/__init__.py +0 -0
  459. unit_tests/sources/message/test_repository.py +0 -153
  460. unit_tests/sources/streams/__init__.py +0 -0
  461. unit_tests/sources/streams/concurrent/__init__.py +0 -3
  462. unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
  463. unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
  464. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
  465. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
  466. unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
  467. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
  468. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
  469. unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
  470. unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
  471. unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
  472. unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
  473. unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
  474. unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
  475. unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
  476. unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
  477. unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
  478. unit_tests/sources/streams/http/__init__.py +0 -0
  479. unit_tests/sources/streams/http/auth/__init__.py +0 -0
  480. unit_tests/sources/streams/http/auth/test_auth.py +0 -173
  481. unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
  482. unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
  483. unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
  484. unit_tests/sources/streams/http/test_http.py +0 -635
  485. unit_tests/sources/streams/test_availability_strategy.py +0 -70
  486. unit_tests/sources/streams/test_call_rate.py +0 -300
  487. unit_tests/sources/streams/test_stream_read.py +0 -405
  488. unit_tests/sources/streams/test_streams_core.py +0 -184
  489. unit_tests/sources/test_abstract_source.py +0 -1442
  490. unit_tests/sources/test_concurrent_source.py +0 -112
  491. unit_tests/sources/test_config.py +0 -92
  492. unit_tests/sources/test_connector_state_manager.py +0 -482
  493. unit_tests/sources/test_http_logger.py +0 -252
  494. unit_tests/sources/test_integration_source.py +0 -86
  495. unit_tests/sources/test_source.py +0 -684
  496. unit_tests/sources/test_source_read.py +0 -460
  497. unit_tests/test/__init__.py +0 -0
  498. unit_tests/test/mock_http/__init__.py +0 -0
  499. unit_tests/test/mock_http/test_matcher.py +0 -53
  500. unit_tests/test/mock_http/test_mocker.py +0 -214
  501. unit_tests/test/mock_http/test_request.py +0 -117
  502. unit_tests/test/mock_http/test_response_builder.py +0 -177
  503. unit_tests/test/test_entrypoint_wrapper.py +0 -240
  504. unit_tests/utils/__init__.py +0 -0
  505. unit_tests/utils/test_datetime_format_inferrer.py +0 -60
  506. unit_tests/utils/test_mapping_helpers.py +0 -54
  507. unit_tests/utils/test_message_utils.py +0 -91
  508. unit_tests/utils/test_rate_limiting.py +0 -26
  509. unit_tests/utils/test_schema_inferrer.py +0 -202
  510. unit_tests/utils/test_secret_utils.py +0 -135
  511. unit_tests/utils/test_stream_status_utils.py +0 -61
  512. unit_tests/utils/test_traced_exception.py +0 -107
  513. /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
  514. {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
  515. {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
  516. {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
  517. {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.13.1.dev4106.dist-info}/LICENSE.txt +0 -0
@@ -0,0 +1,24 @@
1
+ from .config.abstract_file_based_spec import AbstractFileBasedSpec
2
+ from .config.csv_format import CsvFormat
3
+ from .config.file_based_stream_config import FileBasedStreamConfig
4
+ from .config.jsonl_format import JsonlFormat
5
+ from .exceptions import CustomFileBasedException, ErrorListingFiles, FileBasedSourceError
6
+ from .file_based_source import DEFAULT_CONCURRENCY, FileBasedSource
7
+ from .file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
8
+ from .remote_file import RemoteFile
9
+ from .stream.cursor import DefaultFileBasedCursor
10
+
11
+ __all__ = [
12
+ "AbstractFileBasedSpec",
13
+ "AbstractFileBasedStreamReader",
14
+ "CsvFormat",
15
+ "CustomFileBasedException",
16
+ "DefaultFileBasedCursor",
17
+ "ErrorListingFiles",
18
+ "FileBasedSource",
19
+ "FileBasedSourceError",
20
+ "FileBasedStreamConfig",
21
+ "FileReadMode",
22
+ "JsonlFormat",
23
+ "RemoteFile",
24
+ ]
@@ -1,4 +1,11 @@
1
- from .abstract_file_based_availability_strategy import AbstractFileBasedAvailabilityStrategy, AbstractFileBasedAvailabilityStrategyWrapper
1
+ from .abstract_file_based_availability_strategy import (
2
+ AbstractFileBasedAvailabilityStrategy,
3
+ AbstractFileBasedAvailabilityStrategyWrapper,
4
+ )
2
5
  from .default_file_based_availability_strategy import DefaultFileBasedAvailabilityStrategy
3
6
 
4
- __all__ = ["AbstractFileBasedAvailabilityStrategy", "AbstractFileBasedAvailabilityStrategyWrapper", "DefaultFileBasedAvailabilityStrategy"]
7
+ __all__ = [
8
+ "AbstractFileBasedAvailabilityStrategy",
9
+ "AbstractFileBasedAvailabilityStrategyWrapper",
10
+ "DefaultFileBasedAvailabilityStrategy",
11
+ ]
@@ -2,6 +2,8 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ from __future__ import annotations
6
+
5
7
  import logging
6
8
  from abc import abstractmethod
7
9
  from typing import TYPE_CHECKING, Optional, Tuple
@@ -22,7 +24,12 @@ if TYPE_CHECKING:
22
24
 
23
25
  class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
24
26
  @abstractmethod
25
- def check_availability(self, stream: Stream, logger: logging.Logger, _: Optional[Source]) -> Tuple[bool, Optional[str]]:
27
+ def check_availability( # type: ignore[override] # Signature doesn't match base class
28
+ self,
29
+ stream: Stream,
30
+ logger: logging.Logger,
31
+ _: Optional[Source],
32
+ ) -> Tuple[bool, Optional[str]]:
26
33
  """
27
34
  Perform a connection check for the stream.
28
35
 
@@ -32,7 +39,10 @@ class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
32
39
 
33
40
  @abstractmethod
34
41
  def check_availability_and_parsability(
35
- self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source]
42
+ self,
43
+ stream: AbstractFileBasedStream,
44
+ logger: logging.Logger,
45
+ _: Optional[Source],
36
46
  ) -> Tuple[bool, Optional[str]]:
37
47
  """
38
48
  Performs a connection check for the stream, as well as additional checks that
@@ -44,14 +54,20 @@ class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
44
54
 
45
55
 
46
56
  class AbstractFileBasedAvailabilityStrategyWrapper(AbstractAvailabilityStrategy):
47
- def __init__(self, stream: "AbstractFileBasedStream"):
57
+ def __init__(self, stream: AbstractFileBasedStream) -> None:
48
58
  self.stream = stream
49
59
 
50
60
  def check_availability(self, logger: logging.Logger) -> StreamAvailability:
51
- is_available, reason = self.stream.availability_strategy.check_availability(self.stream, logger, None)
61
+ is_available, reason = self.stream.availability_strategy.check_availability(
62
+ self.stream, logger, None
63
+ )
52
64
  if is_available:
53
65
  return StreamAvailable()
54
66
  return StreamUnavailable(reason or "")
55
67
 
56
- def check_availability_and_parsability(self, logger: logging.Logger) -> Tuple[bool, Optional[str]]:
57
- return self.stream.availability_strategy.check_availability_and_parsability(self.stream, logger, None)
68
+ def check_availability_and_parsability(
69
+ self, logger: logging.Logger
70
+ ) -> Tuple[bool, Optional[str]]:
71
+ return self.stream.availability_strategy.check_availability_and_parsability(
72
+ self.stream, logger, None
73
+ )
@@ -2,13 +2,22 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ from __future__ import annotations
6
+
5
7
  import logging
6
8
  import traceback
7
9
  from typing import TYPE_CHECKING, Optional, Tuple
8
10
 
11
+ from airbyte_cdk import AirbyteTracedException
9
12
  from airbyte_cdk.sources import Source
10
- from airbyte_cdk.sources.file_based.availability_strategy import AbstractFileBasedAvailabilityStrategy
11
- from airbyte_cdk.sources.file_based.exceptions import CheckAvailabilityError, CustomFileBasedException, FileBasedSourceError
13
+ from airbyte_cdk.sources.file_based.availability_strategy import (
14
+ AbstractFileBasedAvailabilityStrategy,
15
+ )
16
+ from airbyte_cdk.sources.file_based.exceptions import (
17
+ CheckAvailabilityError,
18
+ CustomFileBasedException,
19
+ FileBasedSourceError,
20
+ )
12
21
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
13
22
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
14
23
  from airbyte_cdk.sources.file_based.schema_helpers import conforms_to_schema
@@ -18,10 +27,15 @@ if TYPE_CHECKING:
18
27
 
19
28
 
20
29
  class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy):
21
- def __init__(self, stream_reader: AbstractFileBasedStreamReader):
30
+ def __init__(self, stream_reader: AbstractFileBasedStreamReader) -> None:
22
31
  self.stream_reader = stream_reader
23
32
 
24
- def check_availability(self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source]) -> Tuple[bool, Optional[str]]: # type: ignore[override]
33
+ def check_availability( # type: ignore[override] # Signature doesn't match base class
34
+ self,
35
+ stream: AbstractFileBasedStream,
36
+ logger: logging.Logger,
37
+ _: Optional[Source],
38
+ ) -> Tuple[bool, Optional[str]]:
25
39
  """
26
40
  Perform a connection check for the stream (verify that we can list files from the stream).
27
41
 
@@ -35,7 +49,10 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
35
49
  return True, None
36
50
 
37
51
  def check_availability_and_parsability(
38
- self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source]
52
+ self,
53
+ stream: AbstractFileBasedStream,
54
+ logger: logging.Logger,
55
+ _: Optional[Source],
39
56
  ) -> Tuple[bool, Optional[str]]:
40
57
  """
41
58
  Perform a connection check for the stream.
@@ -66,12 +83,14 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
66
83
  # If the parser is set to not check parsability, we still want to check that we can open the file.
67
84
  handle = stream.stream_reader.open_file(file, parser.file_read_mode, None, logger)
68
85
  handle.close()
86
+ except AirbyteTracedException as ate:
87
+ raise ate
69
88
  except CheckAvailabilityError:
70
89
  return False, "".join(traceback.format_exc())
71
90
 
72
91
  return True, None
73
92
 
74
- def _check_list_files(self, stream: "AbstractFileBasedStream") -> RemoteFile:
93
+ def _check_list_files(self, stream: AbstractFileBasedStream) -> RemoteFile:
75
94
  """
76
95
  Check that we can list files from the stream.
77
96
 
@@ -84,22 +103,39 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
84
103
  except CustomFileBasedException as exc:
85
104
  raise CheckAvailabilityError(str(exc), stream=stream.name) from exc
86
105
  except Exception as exc:
87
- raise CheckAvailabilityError(FileBasedSourceError.ERROR_LISTING_FILES, stream=stream.name) from exc
106
+ raise CheckAvailabilityError(
107
+ FileBasedSourceError.ERROR_LISTING_FILES, stream=stream.name
108
+ ) from exc
88
109
 
89
110
  return file
90
111
 
91
- def _check_parse_record(self, stream: "AbstractFileBasedStream", file: RemoteFile, logger: logging.Logger) -> None:
112
+ def _check_parse_record(
113
+ self,
114
+ stream: AbstractFileBasedStream,
115
+ file: RemoteFile,
116
+ logger: logging.Logger,
117
+ ) -> None:
92
118
  parser = stream.get_parser()
93
119
 
94
120
  try:
95
- record = next(iter(parser.parse_records(stream.config, file, self.stream_reader, logger, discovered_schema=None)))
121
+ record = next(
122
+ iter(
123
+ parser.parse_records(
124
+ stream.config, file, self.stream_reader, logger, discovered_schema=None
125
+ )
126
+ )
127
+ )
96
128
  except StopIteration:
97
129
  # The file is empty. We've verified that we can open it, so will
98
130
  # consider the connection check successful even though it means
99
131
  # we skip the schema validation check.
100
132
  return
133
+ except AirbyteTracedException as ate:
134
+ raise ate
101
135
  except Exception as exc:
102
- raise CheckAvailabilityError(FileBasedSourceError.ERROR_READING_FILE, stream=stream.name, file=file.uri) from exc
136
+ raise CheckAvailabilityError(
137
+ FileBasedSourceError.ERROR_READING_FILE, stream=stream.name, file=file.uri
138
+ ) from exc
103
139
 
104
140
  schema = stream.catalog_schema or stream.config.input_schema
105
141
  if schema and stream.validation_policy.validate_schema_before_sync:
@@ -4,12 +4,43 @@
4
4
 
5
5
  import copy
6
6
  from abc import abstractmethod
7
- from typing import Any, Dict, List, Optional
7
+ from typing import Any, Dict, List, Literal, Optional, Union
8
8
 
9
- import dpath.util
9
+ import dpath
10
+ from pydantic.v1 import AnyUrl, BaseModel, Field
11
+
12
+ from airbyte_cdk import OneOfOptionConfig
10
13
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
11
14
  from airbyte_cdk.sources.utils import schema_helpers
12
- from pydantic import AnyUrl, BaseModel, Field
15
+
16
+
17
+ class DeliverRecords(BaseModel):
18
+ class Config(OneOfOptionConfig):
19
+ title = "Replicate Records"
20
+ description = "Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination."
21
+ discriminator = "delivery_type"
22
+
23
+ delivery_type: Literal["use_records_transfer"] = Field("use_records_transfer", const=True)
24
+
25
+
26
+ class DeliverRawFiles(BaseModel):
27
+ class Config(OneOfOptionConfig):
28
+ title = "Copy Raw Files"
29
+ description = "Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files."
30
+ discriminator = "delivery_type"
31
+
32
+ delivery_type: Literal["use_file_transfer"] = Field("use_file_transfer", const=True)
33
+
34
+ preserve_directory_structure: bool = Field(
35
+ title="Preserve Sub-Directories in File Paths",
36
+ description=(
37
+ "If enabled, sends subdirectory folder structure "
38
+ "along with source file names to the destination. "
39
+ "Otherwise, files will be synced by their names only. "
40
+ "This option is ignored when file-based replication is not enabled."
41
+ ),
42
+ default=True,
43
+ )
13
44
 
14
45
 
15
46
  class AbstractFileBasedSpec(BaseModel):
@@ -34,6 +65,17 @@ class AbstractFileBasedSpec(BaseModel):
34
65
  order=10,
35
66
  )
36
67
 
68
+ delivery_method: Union[DeliverRecords, DeliverRawFiles] = Field(
69
+ title="Delivery Method",
70
+ discriminator="delivery_type",
71
+ type="object",
72
+ order=7,
73
+ display_type="radio",
74
+ group="advanced",
75
+ default="use_records_transfer",
76
+ airbyte_hidden=True,
77
+ )
78
+
37
79
  @classmethod
38
80
  @abstractmethod
39
81
  def documentation_url(cls) -> AnyUrl:
@@ -57,7 +99,7 @@ class AbstractFileBasedSpec(BaseModel):
57
99
  @staticmethod
58
100
  def remove_discriminator(schema: Dict[str, Any]) -> None:
59
101
  """pydantic adds "discriminator" to the schema for oneOfs, which is not treated right by the platform as we inline all references"""
60
- dpath.util.delete(schema, "properties/**/discriminator")
102
+ dpath.delete(schema, "properties/**/discriminator")
61
103
 
62
104
  @staticmethod
63
105
  def replace_enum_allOf_and_anyOf(schema: Dict[str, Any]) -> Dict[str, Any]:
@@ -77,10 +119,16 @@ class AbstractFileBasedSpec(BaseModel):
77
119
 
78
120
  properties_to_change = ["validation_policy"]
79
121
  for property_to_change in properties_to_change:
80
- property_object = schema["properties"]["streams"]["items"]["properties"][property_to_change]
122
+ property_object = schema["properties"]["streams"]["items"]["properties"][
123
+ property_to_change
124
+ ]
81
125
  if "anyOf" in property_object:
82
- schema["properties"]["streams"]["items"]["properties"][property_to_change]["type"] = "object"
83
- schema["properties"]["streams"]["items"]["properties"][property_to_change]["oneOf"] = property_object.pop("anyOf")
126
+ schema["properties"]["streams"]["items"]["properties"][property_to_change][
127
+ "type"
128
+ ] = "object"
129
+ schema["properties"]["streams"]["items"]["properties"][property_to_change][
130
+ "oneOf"
131
+ ] = property_object.pop("anyOf")
84
132
  AbstractFileBasedSpec.move_enum_to_root(property_object)
85
133
 
86
134
  csv_format_schemas = list(
@@ -91,9 +139,9 @@ class AbstractFileBasedSpec(BaseModel):
91
139
  )
92
140
  if len(csv_format_schemas) != 1:
93
141
  raise ValueError(f"Expecting only one CSV format but got {csv_format_schemas}")
94
- csv_format_schemas[0]["properties"]["header_definition"]["oneOf"] = csv_format_schemas[0]["properties"]["header_definition"].pop(
95
- "anyOf", []
96
- )
142
+ csv_format_schemas[0]["properties"]["header_definition"]["oneOf"] = csv_format_schemas[0][
143
+ "properties"
144
+ ]["header_definition"].pop("anyOf", [])
97
145
  csv_format_schemas[0]["properties"]["header_definition"]["type"] = "object"
98
146
  return schema
99
147
 
@@ -3,8 +3,9 @@
3
3
  #
4
4
 
5
5
 
6
+ from pydantic.v1 import BaseModel, Field
7
+
6
8
  from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
7
- from pydantic import BaseModel, Field
8
9
 
9
10
 
10
11
  class AvroFormat(BaseModel):
@@ -6,8 +6,10 @@ import codecs
6
6
  from enum import Enum
7
7
  from typing import Any, Dict, List, Optional, Set, Union
8
8
 
9
+ from pydantic.v1 import BaseModel, Field, root_validator, validator
10
+ from pydantic.v1.error_wrappers import ValidationError
11
+
9
12
  from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
10
- from pydantic import BaseModel, Field, ValidationError, root_validator, validator
11
13
 
12
14
 
13
15
  class InferenceType(Enum):
@@ -69,7 +71,9 @@ class CsvHeaderUserProvided(BaseModel):
69
71
  @validator("column_names")
70
72
  def validate_column_names(cls, v: List[str]) -> List[str]:
71
73
  if not v:
72
- raise ValueError("At least one column name needs to be provided when using user provided headers")
74
+ raise ValueError(
75
+ "At least one column name needs to be provided when using user provided headers"
76
+ )
73
77
  return v
74
78
 
75
79
 
@@ -106,7 +110,9 @@ class CsvFormat(BaseModel):
106
110
  description='The character encoding of the CSV data. Leave blank to default to <strong>UTF8</strong>. See <a href="https://docs.python.org/3/library/codecs.html#standard-encodings" target="_blank">list of python encodings</a> for allowable options.',
107
111
  )
108
112
  double_quote: bool = Field(
109
- title="Double Quote", default=True, description="Whether two quotes in a quoted CSV value denote a single quote in the data."
113
+ title="Double Quote",
114
+ default=True,
115
+ description="Whether two quotes in a quoted CSV value denote a single quote in the data.",
110
116
  )
111
117
  null_values: Set[str] = Field(
112
118
  title="Null Values",
@@ -124,12 +130,16 @@ class CsvFormat(BaseModel):
124
130
  description="The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field.",
125
131
  )
126
132
  skip_rows_after_header: int = Field(
127
- title="Skip Rows After Header", default=0, description="The number of rows to skip after the header row."
133
+ title="Skip Rows After Header",
134
+ default=0,
135
+ description="The number of rows to skip after the header row.",
128
136
  )
129
- header_definition: Union[CsvHeaderFromCsv, CsvHeaderAutogenerated, CsvHeaderUserProvided] = Field(
130
- title="CSV Header Definition",
131
- default=CsvHeaderFromCsv(header_definition_type=CsvHeaderDefinitionType.FROM_CSV.value),
132
- description="How headers will be defined. `User Provided` assumes the CSV does not have a header row and uses the headers provided and `Autogenerated` assumes the CSV does not have a header row and the CDK will generate headers using for `f{i}` where `i` is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.",
137
+ header_definition: Union[CsvHeaderFromCsv, CsvHeaderAutogenerated, CsvHeaderUserProvided] = (
138
+ Field(
139
+ title="CSV Header Definition",
140
+ default=CsvHeaderFromCsv(header_definition_type=CsvHeaderDefinitionType.FROM_CSV.value),
141
+ description="How headers will be defined. `User Provided` assumes the CSV does not have a header row and uses the headers provided and `Autogenerated` assumes the CSV does not have a header row and the CDK will generate headers using for `f{i}` where `i` is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.",
142
+ )
133
143
  )
134
144
  true_values: Set[str] = Field(
135
145
  title="True Values",
@@ -147,6 +157,11 @@ class CsvFormat(BaseModel):
147
157
  description="How to infer the types of the columns. If none, inference default to strings.",
148
158
  airbyte_hidden=True,
149
159
  )
160
+ ignore_errors_on_fields_mismatch: bool = Field(
161
+ title="Ignore errors on field mismatch",
162
+ default=False,
163
+ description="Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema.",
164
+ )
150
165
 
151
166
  @validator("delimiter")
152
167
  def validate_delimiter(cls, v: str) -> str:
@@ -183,9 +198,13 @@ class CsvFormat(BaseModel):
183
198
  definition_type = values.get("header_definition_type")
184
199
  column_names = values.get("user_provided_column_names")
185
200
  if definition_type == CsvHeaderDefinitionType.USER_PROVIDED and not column_names:
186
- raise ValidationError("`user_provided_column_names` should be defined if the definition 'User Provided'.", model=CsvFormat)
201
+ raise ValidationError(
202
+ "`user_provided_column_names` should be defined if the definition 'User Provided'.",
203
+ model=CsvFormat,
204
+ )
187
205
  if definition_type != CsvHeaderDefinitionType.USER_PROVIDED and column_names:
188
206
  raise ValidationError(
189
- "`user_provided_column_names` should not be defined if the definition is not 'User Provided'.", model=CsvFormat
207
+ "`user_provided_column_names` should not be defined if the definition is not 'User Provided'.",
208
+ model=CsvFormat,
190
209
  )
191
210
  return values
@@ -0,0 +1,18 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from pydantic.v1 import BaseModel, Field
6
+
7
+ from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
8
+
9
+
10
+ class ExcelFormat(BaseModel):
11
+ class Config(OneOfOptionConfig):
12
+ title = "Excel Format"
13
+ discriminator = "filetype"
14
+
15
+ filetype: str = Field(
16
+ "excel",
17
+ const=True,
18
+ )
@@ -1,18 +1,20 @@
1
1
  #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
5
  from enum import Enum
6
6
  from typing import Any, List, Mapping, Optional, Union
7
7
 
8
+ from pydantic.v1 import BaseModel, Field, validator
9
+
8
10
  from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
9
11
  from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
12
+ from airbyte_cdk.sources.file_based.config.excel_format import ExcelFormat
10
13
  from airbyte_cdk.sources.file_based.config.jsonl_format import JsonlFormat
11
14
  from airbyte_cdk.sources.file_based.config.parquet_format import ParquetFormat
12
15
  from airbyte_cdk.sources.file_based.config.unstructured_format import UnstructuredFormat
13
16
  from airbyte_cdk.sources.file_based.exceptions import ConfigValidationError, FileBasedSourceError
14
17
  from airbyte_cdk.sources.file_based.schema_helpers import type_mapping_to_jsonschema
15
- from pydantic import BaseModel, Field, validator
16
18
 
17
19
  PrimaryKeyType = Optional[Union[str, List[str]]]
18
20
 
@@ -55,7 +57,9 @@ class FileBasedStreamConfig(BaseModel):
55
57
  description="When the state history of the file store is full, syncs will only read files that were last modified in the provided day range.",
56
58
  default=3,
57
59
  )
58
- format: Union[AvroFormat, CsvFormat, JsonlFormat, ParquetFormat, UnstructuredFormat] = Field(
60
+ format: Union[
61
+ AvroFormat, CsvFormat, JsonlFormat, ParquetFormat, UnstructuredFormat, ExcelFormat
62
+ ] = Field(
59
63
  title="Format",
60
64
  description="The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.",
61
65
  )
@@ -64,6 +68,12 @@ class FileBasedStreamConfig(BaseModel):
64
68
  description="When enabled, syncs will not validate or structure records against the stream's schema.",
65
69
  default=False,
66
70
  )
71
+ recent_n_files_to_read_for_schema_discovery: Optional[int] = Field(
72
+ title="Files To Read For Schema Discover",
73
+ description="The number of resent files which will be used to discover the schema for this stream.",
74
+ default=None,
75
+ gt=0,
76
+ )
67
77
 
68
78
  @validator("input_schema", pre=True)
69
79
  def validate_input_schema(cls, v: Optional[str]) -> Optional[str]:
@@ -82,6 +92,8 @@ class FileBasedStreamConfig(BaseModel):
82
92
  if self.input_schema:
83
93
  schema = type_mapping_to_jsonschema(self.input_schema)
84
94
  if not schema:
85
- raise ValueError(f"Unable to create JSON schema from input schema {self.input_schema}")
95
+ raise ValueError(
96
+ f"Unable to create JSON schema from input schema {self.input_schema}"
97
+ )
86
98
  return schema
87
99
  return None
@@ -2,8 +2,9 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ from pydantic.v1 import BaseModel, Field
6
+
5
7
  from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
6
- from pydantic import BaseModel, Field
7
8
 
8
9
 
9
10
  class JsonlFormat(BaseModel):
@@ -3,8 +3,9 @@
3
3
  #
4
4
 
5
5
 
6
+ from pydantic.v1 import BaseModel, Field
7
+
6
8
  from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
7
- from pydantic import BaseModel, Field
8
9
 
9
10
 
10
11
  class ParquetFormat(BaseModel):
@@ -4,8 +4,9 @@
4
4
 
5
5
  from typing import List, Literal, Optional, Union
6
6
 
7
+ from pydantic.v1 import BaseModel, Field
8
+
7
9
  from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
8
- from pydantic import BaseModel, Field
9
10
 
10
11
 
11
12
  class LocalProcessingConfigModel(BaseModel):
@@ -13,7 +14,9 @@ class LocalProcessingConfigModel(BaseModel):
13
14
 
14
15
  class Config(OneOfOptionConfig):
15
16
  title = "Local"
16
- description = "Process files locally, supporting `fast` and `ocr` modes. This is the default option."
17
+ description = (
18
+ "Process files locally, supporting `fast` and `ocr` modes. This is the default option."
19
+ )
17
20
  discriminator = "mode"
18
21
 
19
22
 
@@ -23,7 +26,9 @@ class APIParameterConfigModel(BaseModel):
23
26
  description="The name of the unstructured API parameter to use",
24
27
  examples=["combine_under_n_chars", "languages"],
25
28
  )
26
- value: str = Field(title="Value", description="The value of the parameter", examples=["true", "hi_res"])
29
+ value: str = Field(
30
+ title="Value", description="The value of the parameter", examples=["true", "hi_res"]
31
+ )
27
32
 
28
33
 
29
34
  class APIProcessingConfigModel(BaseModel):
@@ -60,7 +65,7 @@ class APIProcessingConfigModel(BaseModel):
60
65
 
61
66
  class UnstructuredFormat(BaseModel):
62
67
  class Config(OneOfOptionConfig):
63
- title = "Document File Type Format (Experimental)"
68
+ title = "Unstructured Document Format"
64
69
  description = "Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file."
65
70
  discriminator = "filetype"
66
71
 
@@ -85,7 +90,10 @@ class UnstructuredFormat(BaseModel):
85
90
  description="The strategy used to parse documents. `fast` extracts text directly from the document which doesn't work for all files. `ocr_only` is more reliable, but slower. `hi_res` is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf",
86
91
  )
87
92
 
88
- processing: Union[LocalProcessingConfigModel, APIProcessingConfigModel,] = Field(
93
+ processing: Union[
94
+ LocalProcessingConfigModel,
95
+ APIProcessingConfigModel,
96
+ ] = Field(
89
97
  default=LocalProcessingConfigModel(mode="local"),
90
98
  title="Processing",
91
99
  description="Processing configuration",
@@ -1,4 +1,8 @@
1
- from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import AbstractDiscoveryPolicy
2
- from airbyte_cdk.sources.file_based.discovery_policy.default_discovery_policy import DefaultDiscoveryPolicy
1
+ from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import (
2
+ AbstractDiscoveryPolicy,
3
+ )
4
+ from airbyte_cdk.sources.file_based.discovery_policy.default_discovery_policy import (
5
+ DefaultDiscoveryPolicy,
6
+ )
3
7
 
4
8
  __all__ = ["AbstractDiscoveryPolicy", "DefaultDiscoveryPolicy"]
@@ -15,9 +15,7 @@ class AbstractDiscoveryPolicy(ABC):
15
15
 
16
16
  @property
17
17
  @abstractmethod
18
- def n_concurrent_requests(self) -> int:
19
- ...
18
+ def n_concurrent_requests(self) -> int: ...
20
19
 
21
20
  @abstractmethod
22
- def get_max_n_files_for_schema_inference(self, parser: FileTypeParser) -> int:
23
- ...
21
+ def get_max_n_files_for_schema_inference(self, parser: FileTypeParser) -> int: ...
@@ -2,7 +2,9 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import AbstractDiscoveryPolicy
5
+ from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import (
6
+ AbstractDiscoveryPolicy,
7
+ )
6
8
  from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
7
9
 
8
10
  DEFAULT_N_CONCURRENT_REQUESTS = 10
@@ -23,6 +25,9 @@ class DefaultDiscoveryPolicy(AbstractDiscoveryPolicy):
23
25
  return min(
24
26
  filter(
25
27
  None,
26
- (DEFAULT_MAX_N_FILES_FOR_STREAM_SCHEMA_INFERENCE, parser.parser_max_n_files_for_schema_inference),
28
+ (
29
+ DEFAULT_MAX_N_FILES_FOR_STREAM_SCHEMA_INFERENCE,
30
+ parser.parser_max_n_files_for_schema_inference,
31
+ ),
27
32
  )
28
33
  )