airbyte-cdk 0.72.1__py3-none-any.whl → 6.17.1.dev0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (518) hide show
  1. airbyte_cdk/__init__.py +355 -6
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +29 -10
  7. airbyte_cdk/connector.py +24 -24
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
  10. airbyte_cdk/connector_builder/main.py +45 -13
  11. airbyte_cdk/connector_builder/message_grouper.py +189 -50
  12. airbyte_cdk/connector_builder/models.py +3 -2
  13. airbyte_cdk/destinations/__init__.py +4 -3
  14. airbyte_cdk/destinations/destination.py +54 -20
  15. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  16. airbyte_cdk/destinations/vector_db_based/config.py +40 -17
  17. airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
  18. airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
  19. airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
  20. airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
  21. airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
  22. airbyte_cdk/entrypoint.py +153 -44
  23. airbyte_cdk/exception_handler.py +21 -3
  24. airbyte_cdk/logger.py +30 -44
  25. airbyte_cdk/models/__init__.py +13 -2
  26. airbyte_cdk/models/airbyte_protocol.py +86 -1
  27. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  28. airbyte_cdk/models/file_transfer_record_message.py +13 -0
  29. airbyte_cdk/models/well_known_types.py +1 -1
  30. airbyte_cdk/sources/__init__.py +5 -1
  31. airbyte_cdk/sources/abstract_source.py +125 -79
  32. airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
  33. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
  34. airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
  35. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
  36. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  37. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
  38. airbyte_cdk/sources/config.py +3 -2
  39. airbyte_cdk/sources/connector_state_manager.py +49 -83
  40. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  41. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
  42. airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
  43. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  44. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  45. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  46. airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
  47. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
  48. airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
  49. airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
  50. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
  51. airbyte_cdk/sources/declarative/auth/token.py +28 -10
  52. airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
  53. airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
  54. airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
  55. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  56. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  57. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +490 -0
  58. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
  59. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
  60. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1185 -85
  61. airbyte_cdk/sources/declarative/declarative_source.py +5 -2
  62. airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
  63. airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
  64. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
  65. airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
  66. airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
  67. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  68. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  69. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  70. airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
  71. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
  72. airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
  73. airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
  74. airbyte_cdk/sources/declarative/extractors/record_filter.py +63 -8
  75. airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
  76. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
  77. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  78. airbyte_cdk/sources/declarative/incremental/__init__.py +31 -3
  79. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +346 -0
  80. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
  81. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  82. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
  83. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +173 -74
  84. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  85. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  86. airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
  87. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
  88. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
  89. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
  90. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
  91. airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
  92. airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
  93. airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
  94. airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
  95. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  96. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  97. airbyte_cdk/sources/declarative/models/__init__.py +1 -1
  98. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1319 -603
  99. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
  100. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
  101. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
  102. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1759 -225
  103. airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
  104. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  105. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  106. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
  107. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  108. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
  109. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
  110. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
  111. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
  112. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
  113. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
  114. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
  115. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
  116. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
  117. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
  118. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
  119. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
  120. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  121. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
  122. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
  123. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
  124. airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
  125. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
  126. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
  127. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
  128. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
  129. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
  130. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
  131. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
  132. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
  133. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
  134. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
  135. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
  136. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
  137. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  138. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
  139. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
  140. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
  141. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
  142. airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
  143. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  144. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  145. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  146. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  147. airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
  148. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
  149. airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
  150. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +229 -73
  151. airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
  152. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
  153. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
  154. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
  155. airbyte_cdk/sources/declarative/spec/spec.py +12 -5
  156. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
  157. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
  158. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
  159. airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
  160. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  161. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  162. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  163. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  164. airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
  165. airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
  166. airbyte_cdk/sources/declarative/types.py +19 -110
  167. airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
  168. airbyte_cdk/sources/embedded/base_integration.py +16 -5
  169. airbyte_cdk/sources/embedded/catalog.py +16 -4
  170. airbyte_cdk/sources/embedded/runner.py +19 -3
  171. airbyte_cdk/sources/embedded/tools.py +5 -2
  172. airbyte_cdk/sources/file_based/README.md +152 -0
  173. airbyte_cdk/sources/file_based/__init__.py +24 -0
  174. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
  175. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
  176. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
  177. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +47 -10
  178. airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
  179. airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
  180. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  181. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
  182. airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
  183. airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
  184. airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
  185. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
  186. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
  187. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
  188. airbyte_cdk/sources/file_based/exceptions.py +18 -15
  189. airbyte_cdk/sources/file_based/file_based_source.py +140 -33
  190. airbyte_cdk/sources/file_based/file_based_stream_reader.py +69 -5
  191. airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
  192. airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
  193. airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
  194. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  195. airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
  196. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
  197. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
  198. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
  199. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +141 -41
  200. airbyte_cdk/sources/file_based/remote_file.py +1 -1
  201. airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
  202. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
  203. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
  204. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
  205. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
  206. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
  207. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
  208. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
  209. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
  210. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
  211. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
  212. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
  213. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +147 -45
  214. airbyte_cdk/sources/http_logger.py +8 -3
  215. airbyte_cdk/sources/message/__init__.py +7 -1
  216. airbyte_cdk/sources/message/repository.py +18 -4
  217. airbyte_cdk/sources/source.py +42 -38
  218. airbyte_cdk/sources/streams/__init__.py +2 -2
  219. airbyte_cdk/sources/streams/availability_strategy.py +54 -3
  220. airbyte_cdk/sources/streams/call_rate.py +64 -21
  221. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  222. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  223. airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
  224. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  225. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  226. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  227. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  228. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
  229. airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
  230. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
  231. airbyte_cdk/sources/streams/concurrent/cursor.py +298 -42
  232. airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
  233. airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
  234. airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
  235. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
  236. airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
  237. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
  238. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  239. airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
  240. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
  241. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
  242. airbyte_cdk/sources/streams/core.py +412 -87
  243. airbyte_cdk/sources/streams/http/__init__.py +2 -1
  244. airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
  245. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  246. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  247. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  248. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  249. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  250. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  251. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  252. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  253. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  254. airbyte_cdk/sources/streams/http/exceptions.py +27 -7
  255. airbyte_cdk/sources/streams/http/http.py +369 -246
  256. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  257. airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
  258. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
  259. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
  260. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
  261. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
  262. airbyte_cdk/sources/types.py +154 -0
  263. airbyte_cdk/sources/utils/record_helper.py +36 -21
  264. airbyte_cdk/sources/utils/schema_helpers.py +13 -6
  265. airbyte_cdk/sources/utils/slice_logger.py +4 -1
  266. airbyte_cdk/sources/utils/transform.py +54 -20
  267. airbyte_cdk/sql/_util/hashing.py +34 -0
  268. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  269. airbyte_cdk/sql/constants.py +32 -0
  270. airbyte_cdk/sql/exceptions.py +235 -0
  271. airbyte_cdk/sql/secrets.py +123 -0
  272. airbyte_cdk/sql/shared/__init__.py +15 -0
  273. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  274. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  275. airbyte_cdk/sql/types.py +160 -0
  276. airbyte_cdk/test/catalog_builder.py +70 -18
  277. airbyte_cdk/test/entrypoint_wrapper.py +117 -42
  278. airbyte_cdk/test/mock_http/__init__.py +1 -1
  279. airbyte_cdk/test/mock_http/matcher.py +6 -0
  280. airbyte_cdk/test/mock_http/mocker.py +57 -10
  281. airbyte_cdk/test/mock_http/request.py +19 -3
  282. airbyte_cdk/test/mock_http/response.py +3 -1
  283. airbyte_cdk/test/mock_http/response_builder.py +32 -16
  284. airbyte_cdk/test/state_builder.py +18 -10
  285. airbyte_cdk/test/utils/__init__.py +1 -0
  286. airbyte_cdk/test/utils/data.py +24 -0
  287. airbyte_cdk/test/utils/http_mocking.py +16 -0
  288. airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
  289. airbyte_cdk/test/utils/reading.py +26 -0
  290. airbyte_cdk/utils/__init__.py +2 -1
  291. airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
  292. airbyte_cdk/utils/analytics_message.py +10 -2
  293. airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
  294. airbyte_cdk/utils/event_timing.py +10 -10
  295. airbyte_cdk/utils/mapping_helpers.py +3 -1
  296. airbyte_cdk/utils/message_utils.py +20 -11
  297. airbyte_cdk/utils/print_buffer.py +75 -0
  298. airbyte_cdk/utils/schema_inferrer.py +198 -28
  299. airbyte_cdk/utils/slice_hasher.py +30 -0
  300. airbyte_cdk/utils/spec_schema_transformations.py +6 -3
  301. airbyte_cdk/utils/stream_status_utils.py +8 -1
  302. airbyte_cdk/utils/traced_exception.py +61 -21
  303. airbyte_cdk-6.17.1.dev0.dist-info/METADATA +109 -0
  304. airbyte_cdk-6.17.1.dev0.dist-info/RECORD +350 -0
  305. {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.17.1.dev0.dist-info}/WHEEL +1 -2
  306. airbyte_cdk-6.17.1.dev0.dist-info/entry_points.txt +3 -0
  307. airbyte_cdk/sources/declarative/create_partial.py +0 -92
  308. airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
  309. airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
  310. airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
  311. airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
  312. airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
  313. airbyte_cdk/sources/deprecated/base_source.py +0 -94
  314. airbyte_cdk/sources/deprecated/client.py +0 -99
  315. airbyte_cdk/sources/singer/__init__.py +0 -8
  316. airbyte_cdk/sources/singer/singer_helpers.py +0 -304
  317. airbyte_cdk/sources/singer/source.py +0 -186
  318. airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
  319. airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
  320. airbyte_cdk/sources/streams/http/auth/core.py +0 -29
  321. airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
  322. airbyte_cdk/sources/streams/http/auth/token.py +0 -47
  323. airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
  324. airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
  325. airbyte_cdk/sources/utils/schema_models.py +0 -84
  326. airbyte_cdk-0.72.1.dist-info/METADATA +0 -243
  327. airbyte_cdk-0.72.1.dist-info/RECORD +0 -466
  328. airbyte_cdk-0.72.1.dist-info/top_level.txt +0 -3
  329. source_declarative_manifest/main.py +0 -29
  330. unit_tests/connector_builder/__init__.py +0 -3
  331. unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
  332. unit_tests/connector_builder/test_message_grouper.py +0 -713
  333. unit_tests/connector_builder/utils.py +0 -27
  334. unit_tests/destinations/test_destination.py +0 -243
  335. unit_tests/singer/test_singer_helpers.py +0 -56
  336. unit_tests/singer/test_singer_source.py +0 -112
  337. unit_tests/sources/__init__.py +0 -0
  338. unit_tests/sources/concurrent_source/__init__.py +0 -3
  339. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
  340. unit_tests/sources/declarative/__init__.py +0 -3
  341. unit_tests/sources/declarative/auth/__init__.py +0 -3
  342. unit_tests/sources/declarative/auth/test_oauth.py +0 -331
  343. unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
  344. unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
  345. unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
  346. unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
  347. unit_tests/sources/declarative/checks/__init__.py +0 -3
  348. unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
  349. unit_tests/sources/declarative/decoders/__init__.py +0 -0
  350. unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
  351. unit_tests/sources/declarative/external_component.py +0 -13
  352. unit_tests/sources/declarative/extractors/__init__.py +0 -3
  353. unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
  354. unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
  355. unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
  356. unit_tests/sources/declarative/incremental/__init__.py +0 -0
  357. unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
  358. unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
  359. unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
  360. unit_tests/sources/declarative/interpolation/__init__.py +0 -3
  361. unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
  362. unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
  363. unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
  364. unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
  365. unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
  366. unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
  367. unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
  368. unit_tests/sources/declarative/parsers/__init__.py +0 -3
  369. unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
  370. unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
  371. unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1847
  372. unit_tests/sources/declarative/parsers/testing_components.py +0 -36
  373. unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
  374. unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
  375. unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
  376. unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
  377. unit_tests/sources/declarative/requesters/__init__.py +0 -3
  378. unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
  379. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
  380. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
  381. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
  382. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
  383. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
  384. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
  385. unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
  386. unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
  387. unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
  388. unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
  389. unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
  390. unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
  391. unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
  392. unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
  393. unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
  394. unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
  395. unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
  396. unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
  397. unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
  398. unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
  399. unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
  400. unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
  401. unit_tests/sources/declarative/retrievers/__init__.py +0 -3
  402. unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
  403. unit_tests/sources/declarative/schema/__init__.py +0 -6
  404. unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
  405. unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
  406. unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
  407. unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
  408. unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
  409. unit_tests/sources/declarative/states/__init__.py +0 -3
  410. unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
  411. unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
  412. unit_tests/sources/declarative/test_create_partial.py +0 -83
  413. unit_tests/sources/declarative/test_declarative_stream.py +0 -103
  414. unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
  415. unit_tests/sources/declarative/test_types.py +0 -39
  416. unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
  417. unit_tests/sources/file_based/__init__.py +0 -0
  418. unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
  419. unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
  420. unit_tests/sources/file_based/config/__init__.py +0 -0
  421. unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
  422. unit_tests/sources/file_based/config/test_csv_format.py +0 -34
  423. unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
  424. unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
  425. unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
  426. unit_tests/sources/file_based/file_types/__init__.py +0 -0
  427. unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
  428. unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
  429. unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
  430. unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
  431. unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
  432. unit_tests/sources/file_based/helpers.py +0 -70
  433. unit_tests/sources/file_based/in_memory_files_source.py +0 -211
  434. unit_tests/sources/file_based/scenarios/__init__.py +0 -0
  435. unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
  436. unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
  437. unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
  438. unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
  439. unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
  440. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
  441. unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
  442. unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
  443. unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
  444. unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
  445. unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
  446. unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
  447. unit_tests/sources/file_based/stream/__init__.py +0 -0
  448. unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
  449. unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
  450. unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
  451. unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
  452. unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
  453. unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
  454. unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
  455. unit_tests/sources/file_based/test_scenarios.py +0 -253
  456. unit_tests/sources/file_based/test_schema_helpers.py +0 -346
  457. unit_tests/sources/fixtures/__init__.py +0 -3
  458. unit_tests/sources/fixtures/source_test_fixture.py +0 -153
  459. unit_tests/sources/message/__init__.py +0 -0
  460. unit_tests/sources/message/test_repository.py +0 -153
  461. unit_tests/sources/streams/__init__.py +0 -0
  462. unit_tests/sources/streams/concurrent/__init__.py +0 -3
  463. unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
  464. unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
  465. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
  466. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
  467. unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
  468. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
  469. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
  470. unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
  471. unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
  472. unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
  473. unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
  474. unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
  475. unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
  476. unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
  477. unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
  478. unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
  479. unit_tests/sources/streams/http/__init__.py +0 -0
  480. unit_tests/sources/streams/http/auth/__init__.py +0 -0
  481. unit_tests/sources/streams/http/auth/test_auth.py +0 -173
  482. unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
  483. unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
  484. unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
  485. unit_tests/sources/streams/http/test_http.py +0 -635
  486. unit_tests/sources/streams/test_availability_strategy.py +0 -70
  487. unit_tests/sources/streams/test_call_rate.py +0 -300
  488. unit_tests/sources/streams/test_stream_read.py +0 -405
  489. unit_tests/sources/streams/test_streams_core.py +0 -184
  490. unit_tests/sources/test_abstract_source.py +0 -1442
  491. unit_tests/sources/test_concurrent_source.py +0 -112
  492. unit_tests/sources/test_config.py +0 -92
  493. unit_tests/sources/test_connector_state_manager.py +0 -482
  494. unit_tests/sources/test_http_logger.py +0 -252
  495. unit_tests/sources/test_integration_source.py +0 -86
  496. unit_tests/sources/test_source.py +0 -684
  497. unit_tests/sources/test_source_read.py +0 -460
  498. unit_tests/test/__init__.py +0 -0
  499. unit_tests/test/mock_http/__init__.py +0 -0
  500. unit_tests/test/mock_http/test_matcher.py +0 -53
  501. unit_tests/test/mock_http/test_mocker.py +0 -214
  502. unit_tests/test/mock_http/test_request.py +0 -117
  503. unit_tests/test/mock_http/test_response_builder.py +0 -177
  504. unit_tests/test/test_entrypoint_wrapper.py +0 -240
  505. unit_tests/utils/__init__.py +0 -0
  506. unit_tests/utils/test_datetime_format_inferrer.py +0 -60
  507. unit_tests/utils/test_mapping_helpers.py +0 -54
  508. unit_tests/utils/test_message_utils.py +0 -91
  509. unit_tests/utils/test_rate_limiting.py +0 -26
  510. unit_tests/utils/test_schema_inferrer.py +0 -202
  511. unit_tests/utils/test_secret_utils.py +0 -135
  512. unit_tests/utils/test_stream_status_utils.py +0 -61
  513. unit_tests/utils/test_traced_exception.py +0 -107
  514. /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
  515. {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
  516. {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
  517. {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
  518. {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.17.1.dev0.dist-info}/LICENSE.txt +0 -0
@@ -0,0 +1,24 @@
1
+ from .config.abstract_file_based_spec import AbstractFileBasedSpec
2
+ from .config.csv_format import CsvFormat
3
+ from .config.file_based_stream_config import FileBasedStreamConfig
4
+ from .config.jsonl_format import JsonlFormat
5
+ from .exceptions import CustomFileBasedException, ErrorListingFiles, FileBasedSourceError
6
+ from .file_based_source import DEFAULT_CONCURRENCY, FileBasedSource
7
+ from .file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
8
+ from .remote_file import RemoteFile
9
+ from .stream.cursor import DefaultFileBasedCursor
10
+
11
+ __all__ = [
12
+ "AbstractFileBasedSpec",
13
+ "AbstractFileBasedStreamReader",
14
+ "CsvFormat",
15
+ "CustomFileBasedException",
16
+ "DefaultFileBasedCursor",
17
+ "ErrorListingFiles",
18
+ "FileBasedSource",
19
+ "FileBasedSourceError",
20
+ "FileBasedStreamConfig",
21
+ "FileReadMode",
22
+ "JsonlFormat",
23
+ "RemoteFile",
24
+ ]
@@ -1,4 +1,11 @@
1
- from .abstract_file_based_availability_strategy import AbstractFileBasedAvailabilityStrategy, AbstractFileBasedAvailabilityStrategyWrapper
1
+ from .abstract_file_based_availability_strategy import (
2
+ AbstractFileBasedAvailabilityStrategy,
3
+ AbstractFileBasedAvailabilityStrategyWrapper,
4
+ )
2
5
  from .default_file_based_availability_strategy import DefaultFileBasedAvailabilityStrategy
3
6
 
4
- __all__ = ["AbstractFileBasedAvailabilityStrategy", "AbstractFileBasedAvailabilityStrategyWrapper", "DefaultFileBasedAvailabilityStrategy"]
7
+ __all__ = [
8
+ "AbstractFileBasedAvailabilityStrategy",
9
+ "AbstractFileBasedAvailabilityStrategyWrapper",
10
+ "DefaultFileBasedAvailabilityStrategy",
11
+ ]
@@ -2,6 +2,8 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ from __future__ import annotations
6
+
5
7
  import logging
6
8
  from abc import abstractmethod
7
9
  from typing import TYPE_CHECKING, Optional, Tuple
@@ -22,7 +24,12 @@ if TYPE_CHECKING:
22
24
 
23
25
  class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
24
26
  @abstractmethod
25
- def check_availability(self, stream: Stream, logger: logging.Logger, _: Optional[Source]) -> Tuple[bool, Optional[str]]:
27
+ def check_availability( # type: ignore[override] # Signature doesn't match base class
28
+ self,
29
+ stream: Stream,
30
+ logger: logging.Logger,
31
+ _: Optional[Source],
32
+ ) -> Tuple[bool, Optional[str]]:
26
33
  """
27
34
  Perform a connection check for the stream.
28
35
 
@@ -32,7 +39,10 @@ class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
32
39
 
33
40
  @abstractmethod
34
41
  def check_availability_and_parsability(
35
- self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source]
42
+ self,
43
+ stream: AbstractFileBasedStream,
44
+ logger: logging.Logger,
45
+ _: Optional[Source],
36
46
  ) -> Tuple[bool, Optional[str]]:
37
47
  """
38
48
  Performs a connection check for the stream, as well as additional checks that
@@ -44,14 +54,20 @@ class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
44
54
 
45
55
 
46
56
  class AbstractFileBasedAvailabilityStrategyWrapper(AbstractAvailabilityStrategy):
47
- def __init__(self, stream: "AbstractFileBasedStream"):
57
+ def __init__(self, stream: AbstractFileBasedStream) -> None:
48
58
  self.stream = stream
49
59
 
50
60
  def check_availability(self, logger: logging.Logger) -> StreamAvailability:
51
- is_available, reason = self.stream.availability_strategy.check_availability(self.stream, logger, None)
61
+ is_available, reason = self.stream.availability_strategy.check_availability(
62
+ self.stream, logger, None
63
+ )
52
64
  if is_available:
53
65
  return StreamAvailable()
54
66
  return StreamUnavailable(reason or "")
55
67
 
56
- def check_availability_and_parsability(self, logger: logging.Logger) -> Tuple[bool, Optional[str]]:
57
- return self.stream.availability_strategy.check_availability_and_parsability(self.stream, logger, None)
68
+ def check_availability_and_parsability(
69
+ self, logger: logging.Logger
70
+ ) -> Tuple[bool, Optional[str]]:
71
+ return self.stream.availability_strategy.check_availability_and_parsability(
72
+ self.stream, logger, None
73
+ )
@@ -2,13 +2,22 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ from __future__ import annotations
6
+
5
7
  import logging
6
8
  import traceback
7
9
  from typing import TYPE_CHECKING, Optional, Tuple
8
10
 
11
+ from airbyte_cdk import AirbyteTracedException
9
12
  from airbyte_cdk.sources import Source
10
- from airbyte_cdk.sources.file_based.availability_strategy import AbstractFileBasedAvailabilityStrategy
11
- from airbyte_cdk.sources.file_based.exceptions import CheckAvailabilityError, CustomFileBasedException, FileBasedSourceError
13
+ from airbyte_cdk.sources.file_based.availability_strategy import (
14
+ AbstractFileBasedAvailabilityStrategy,
15
+ )
16
+ from airbyte_cdk.sources.file_based.exceptions import (
17
+ CheckAvailabilityError,
18
+ CustomFileBasedException,
19
+ FileBasedSourceError,
20
+ )
12
21
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
13
22
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
14
23
  from airbyte_cdk.sources.file_based.schema_helpers import conforms_to_schema
@@ -18,10 +27,15 @@ if TYPE_CHECKING:
18
27
 
19
28
 
20
29
  class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy):
21
- def __init__(self, stream_reader: AbstractFileBasedStreamReader):
30
+ def __init__(self, stream_reader: AbstractFileBasedStreamReader) -> None:
22
31
  self.stream_reader = stream_reader
23
32
 
24
- def check_availability(self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source]) -> Tuple[bool, Optional[str]]: # type: ignore[override]
33
+ def check_availability( # type: ignore[override] # Signature doesn't match base class
34
+ self,
35
+ stream: AbstractFileBasedStream,
36
+ logger: logging.Logger,
37
+ _: Optional[Source],
38
+ ) -> Tuple[bool, Optional[str]]:
25
39
  """
26
40
  Perform a connection check for the stream (verify that we can list files from the stream).
27
41
 
@@ -35,7 +49,10 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
35
49
  return True, None
36
50
 
37
51
  def check_availability_and_parsability(
38
- self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source]
52
+ self,
53
+ stream: AbstractFileBasedStream,
54
+ logger: logging.Logger,
55
+ _: Optional[Source],
39
56
  ) -> Tuple[bool, Optional[str]]:
40
57
  """
41
58
  Perform a connection check for the stream.
@@ -66,12 +83,14 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
66
83
  # If the parser is set to not check parsability, we still want to check that we can open the file.
67
84
  handle = stream.stream_reader.open_file(file, parser.file_read_mode, None, logger)
68
85
  handle.close()
86
+ except AirbyteTracedException as ate:
87
+ raise ate
69
88
  except CheckAvailabilityError:
70
89
  return False, "".join(traceback.format_exc())
71
90
 
72
91
  return True, None
73
92
 
74
- def _check_list_files(self, stream: "AbstractFileBasedStream") -> RemoteFile:
93
+ def _check_list_files(self, stream: AbstractFileBasedStream) -> RemoteFile:
75
94
  """
76
95
  Check that we can list files from the stream.
77
96
 
@@ -84,22 +103,39 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
84
103
  except CustomFileBasedException as exc:
85
104
  raise CheckAvailabilityError(str(exc), stream=stream.name) from exc
86
105
  except Exception as exc:
87
- raise CheckAvailabilityError(FileBasedSourceError.ERROR_LISTING_FILES, stream=stream.name) from exc
106
+ raise CheckAvailabilityError(
107
+ FileBasedSourceError.ERROR_LISTING_FILES, stream=stream.name
108
+ ) from exc
88
109
 
89
110
  return file
90
111
 
91
- def _check_parse_record(self, stream: "AbstractFileBasedStream", file: RemoteFile, logger: logging.Logger) -> None:
112
+ def _check_parse_record(
113
+ self,
114
+ stream: AbstractFileBasedStream,
115
+ file: RemoteFile,
116
+ logger: logging.Logger,
117
+ ) -> None:
92
118
  parser = stream.get_parser()
93
119
 
94
120
  try:
95
- record = next(iter(parser.parse_records(stream.config, file, self.stream_reader, logger, discovered_schema=None)))
121
+ record = next(
122
+ iter(
123
+ parser.parse_records(
124
+ stream.config, file, self.stream_reader, logger, discovered_schema=None
125
+ )
126
+ )
127
+ )
96
128
  except StopIteration:
97
129
  # The file is empty. We've verified that we can open it, so will
98
130
  # consider the connection check successful even though it means
99
131
  # we skip the schema validation check.
100
132
  return
133
+ except AirbyteTracedException as ate:
134
+ raise ate
101
135
  except Exception as exc:
102
- raise CheckAvailabilityError(FileBasedSourceError.ERROR_READING_FILE, stream=stream.name, file=file.uri) from exc
136
+ raise CheckAvailabilityError(
137
+ FileBasedSourceError.ERROR_READING_FILE, stream=stream.name, file=file.uri
138
+ ) from exc
103
139
 
104
140
  schema = stream.catalog_schema or stream.config.input_schema
105
141
  if schema and stream.validation_policy.validate_schema_before_sync:
@@ -4,12 +4,32 @@
4
4
 
5
5
  import copy
6
6
  from abc import abstractmethod
7
- from typing import Any, Dict, List, Optional
7
+ from typing import Any, Dict, List, Literal, Optional, Union
8
8
 
9
- import dpath.util
9
+ import dpath
10
+ from pydantic.v1 import AnyUrl, BaseModel, Field
11
+
12
+ from airbyte_cdk import OneOfOptionConfig
10
13
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
11
14
  from airbyte_cdk.sources.utils import schema_helpers
12
- from pydantic import AnyUrl, BaseModel, Field
15
+
16
+
17
+ class DeliverRecords(BaseModel):
18
+ class Config(OneOfOptionConfig):
19
+ title = "Replicate Records"
20
+ description = "Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination."
21
+ discriminator = "delivery_type"
22
+
23
+ delivery_type: Literal["use_records_transfer"] = Field("use_records_transfer", const=True)
24
+
25
+
26
+ class DeliverRawFiles(BaseModel):
27
+ class Config(OneOfOptionConfig):
28
+ title = "Copy Raw Files"
29
+ description = "Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files."
30
+ discriminator = "delivery_type"
31
+
32
+ delivery_type: Literal["use_file_transfer"] = Field("use_file_transfer", const=True)
13
33
 
14
34
 
15
35
  class AbstractFileBasedSpec(BaseModel):
@@ -34,6 +54,17 @@ class AbstractFileBasedSpec(BaseModel):
34
54
  order=10,
35
55
  )
36
56
 
57
+ delivery_method: Union[DeliverRecords, DeliverRawFiles] = Field(
58
+ title="Delivery Method",
59
+ discriminator="delivery_type",
60
+ type="object",
61
+ order=7,
62
+ display_type="radio",
63
+ group="advanced",
64
+ default="use_records_transfer",
65
+ airbyte_hidden=True,
66
+ )
67
+
37
68
  @classmethod
38
69
  @abstractmethod
39
70
  def documentation_url(cls) -> AnyUrl:
@@ -57,7 +88,7 @@ class AbstractFileBasedSpec(BaseModel):
57
88
  @staticmethod
58
89
  def remove_discriminator(schema: Dict[str, Any]) -> None:
59
90
  """pydantic adds "discriminator" to the schema for oneOfs, which is not treated right by the platform as we inline all references"""
60
- dpath.util.delete(schema, "properties/**/discriminator")
91
+ dpath.delete(schema, "properties/**/discriminator")
61
92
 
62
93
  @staticmethod
63
94
  def replace_enum_allOf_and_anyOf(schema: Dict[str, Any]) -> Dict[str, Any]:
@@ -77,10 +108,16 @@ class AbstractFileBasedSpec(BaseModel):
77
108
 
78
109
  properties_to_change = ["validation_policy"]
79
110
  for property_to_change in properties_to_change:
80
- property_object = schema["properties"]["streams"]["items"]["properties"][property_to_change]
111
+ property_object = schema["properties"]["streams"]["items"]["properties"][
112
+ property_to_change
113
+ ]
81
114
  if "anyOf" in property_object:
82
- schema["properties"]["streams"]["items"]["properties"][property_to_change]["type"] = "object"
83
- schema["properties"]["streams"]["items"]["properties"][property_to_change]["oneOf"] = property_object.pop("anyOf")
115
+ schema["properties"]["streams"]["items"]["properties"][property_to_change][
116
+ "type"
117
+ ] = "object"
118
+ schema["properties"]["streams"]["items"]["properties"][property_to_change][
119
+ "oneOf"
120
+ ] = property_object.pop("anyOf")
84
121
  AbstractFileBasedSpec.move_enum_to_root(property_object)
85
122
 
86
123
  csv_format_schemas = list(
@@ -91,9 +128,9 @@ class AbstractFileBasedSpec(BaseModel):
91
128
  )
92
129
  if len(csv_format_schemas) != 1:
93
130
  raise ValueError(f"Expecting only one CSV format but got {csv_format_schemas}")
94
- csv_format_schemas[0]["properties"]["header_definition"]["oneOf"] = csv_format_schemas[0]["properties"]["header_definition"].pop(
95
- "anyOf", []
96
- )
131
+ csv_format_schemas[0]["properties"]["header_definition"]["oneOf"] = csv_format_schemas[0][
132
+ "properties"
133
+ ]["header_definition"].pop("anyOf", [])
97
134
  csv_format_schemas[0]["properties"]["header_definition"]["type"] = "object"
98
135
  return schema
99
136
 
@@ -3,8 +3,9 @@
3
3
  #
4
4
 
5
5
 
6
+ from pydantic.v1 import BaseModel, Field
7
+
6
8
  from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
7
- from pydantic import BaseModel, Field
8
9
 
9
10
 
10
11
  class AvroFormat(BaseModel):
@@ -6,8 +6,10 @@ import codecs
6
6
  from enum import Enum
7
7
  from typing import Any, Dict, List, Optional, Set, Union
8
8
 
9
+ from pydantic.v1 import BaseModel, Field, root_validator, validator
10
+ from pydantic.v1.error_wrappers import ValidationError
11
+
9
12
  from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
10
- from pydantic import BaseModel, Field, ValidationError, root_validator, validator
11
13
 
12
14
 
13
15
  class InferenceType(Enum):
@@ -69,7 +71,9 @@ class CsvHeaderUserProvided(BaseModel):
69
71
  @validator("column_names")
70
72
  def validate_column_names(cls, v: List[str]) -> List[str]:
71
73
  if not v:
72
- raise ValueError("At least one column name needs to be provided when using user provided headers")
74
+ raise ValueError(
75
+ "At least one column name needs to be provided when using user provided headers"
76
+ )
73
77
  return v
74
78
 
75
79
 
@@ -106,7 +110,9 @@ class CsvFormat(BaseModel):
106
110
  description='The character encoding of the CSV data. Leave blank to default to <strong>UTF8</strong>. See <a href="https://docs.python.org/3/library/codecs.html#standard-encodings" target="_blank">list of python encodings</a> for allowable options.',
107
111
  )
108
112
  double_quote: bool = Field(
109
- title="Double Quote", default=True, description="Whether two quotes in a quoted CSV value denote a single quote in the data."
113
+ title="Double Quote",
114
+ default=True,
115
+ description="Whether two quotes in a quoted CSV value denote a single quote in the data.",
110
116
  )
111
117
  null_values: Set[str] = Field(
112
118
  title="Null Values",
@@ -124,12 +130,16 @@ class CsvFormat(BaseModel):
124
130
  description="The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field.",
125
131
  )
126
132
  skip_rows_after_header: int = Field(
127
- title="Skip Rows After Header", default=0, description="The number of rows to skip after the header row."
133
+ title="Skip Rows After Header",
134
+ default=0,
135
+ description="The number of rows to skip after the header row.",
128
136
  )
129
- header_definition: Union[CsvHeaderFromCsv, CsvHeaderAutogenerated, CsvHeaderUserProvided] = Field(
130
- title="CSV Header Definition",
131
- default=CsvHeaderFromCsv(header_definition_type=CsvHeaderDefinitionType.FROM_CSV.value),
132
- description="How headers will be defined. `User Provided` assumes the CSV does not have a header row and uses the headers provided and `Autogenerated` assumes the CSV does not have a header row and the CDK will generate headers using for `f{i}` where `i` is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.",
137
+ header_definition: Union[CsvHeaderFromCsv, CsvHeaderAutogenerated, CsvHeaderUserProvided] = (
138
+ Field(
139
+ title="CSV Header Definition",
140
+ default=CsvHeaderFromCsv(header_definition_type=CsvHeaderDefinitionType.FROM_CSV.value),
141
+ description="How headers will be defined. `User Provided` assumes the CSV does not have a header row and uses the headers provided and `Autogenerated` assumes the CSV does not have a header row and the CDK will generate headers using for `f{i}` where `i` is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.",
142
+ )
133
143
  )
134
144
  true_values: Set[str] = Field(
135
145
  title="True Values",
@@ -147,6 +157,11 @@ class CsvFormat(BaseModel):
147
157
  description="How to infer the types of the columns. If none, inference default to strings.",
148
158
  airbyte_hidden=True,
149
159
  )
160
+ ignore_errors_on_fields_mismatch: bool = Field(
161
+ title="Ignore errors on field mismatch",
162
+ default=False,
163
+ description="Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema.",
164
+ )
150
165
 
151
166
  @validator("delimiter")
152
167
  def validate_delimiter(cls, v: str) -> str:
@@ -183,9 +198,13 @@ class CsvFormat(BaseModel):
183
198
  definition_type = values.get("header_definition_type")
184
199
  column_names = values.get("user_provided_column_names")
185
200
  if definition_type == CsvHeaderDefinitionType.USER_PROVIDED and not column_names:
186
- raise ValidationError("`user_provided_column_names` should be defined if the definition 'User Provided'.", model=CsvFormat)
201
+ raise ValidationError(
202
+ "`user_provided_column_names` should be defined if the definition 'User Provided'.",
203
+ model=CsvFormat,
204
+ )
187
205
  if definition_type != CsvHeaderDefinitionType.USER_PROVIDED and column_names:
188
206
  raise ValidationError(
189
- "`user_provided_column_names` should not be defined if the definition is not 'User Provided'.", model=CsvFormat
207
+ "`user_provided_column_names` should not be defined if the definition is not 'User Provided'.",
208
+ model=CsvFormat,
190
209
  )
191
210
  return values
@@ -0,0 +1,18 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from pydantic.v1 import BaseModel, Field
6
+
7
+ from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
8
+
9
+
10
+ class ExcelFormat(BaseModel):
11
+ class Config(OneOfOptionConfig):
12
+ title = "Excel Format"
13
+ discriminator = "filetype"
14
+
15
+ filetype: str = Field(
16
+ "excel",
17
+ const=True,
18
+ )
@@ -1,18 +1,20 @@
1
1
  #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
5
  from enum import Enum
6
6
  from typing import Any, List, Mapping, Optional, Union
7
7
 
8
+ from pydantic.v1 import BaseModel, Field, validator
9
+
8
10
  from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
9
11
  from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
12
+ from airbyte_cdk.sources.file_based.config.excel_format import ExcelFormat
10
13
  from airbyte_cdk.sources.file_based.config.jsonl_format import JsonlFormat
11
14
  from airbyte_cdk.sources.file_based.config.parquet_format import ParquetFormat
12
15
  from airbyte_cdk.sources.file_based.config.unstructured_format import UnstructuredFormat
13
16
  from airbyte_cdk.sources.file_based.exceptions import ConfigValidationError, FileBasedSourceError
14
17
  from airbyte_cdk.sources.file_based.schema_helpers import type_mapping_to_jsonschema
15
- from pydantic import BaseModel, Field, validator
16
18
 
17
19
  PrimaryKeyType = Optional[Union[str, List[str]]]
18
20
 
@@ -55,7 +57,9 @@ class FileBasedStreamConfig(BaseModel):
55
57
  description="When the state history of the file store is full, syncs will only read files that were last modified in the provided day range.",
56
58
  default=3,
57
59
  )
58
- format: Union[AvroFormat, CsvFormat, JsonlFormat, ParquetFormat, UnstructuredFormat] = Field(
60
+ format: Union[
61
+ AvroFormat, CsvFormat, JsonlFormat, ParquetFormat, UnstructuredFormat, ExcelFormat
62
+ ] = Field(
59
63
  title="Format",
60
64
  description="The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.",
61
65
  )
@@ -64,6 +68,12 @@ class FileBasedStreamConfig(BaseModel):
64
68
  description="When enabled, syncs will not validate or structure records against the stream's schema.",
65
69
  default=False,
66
70
  )
71
+ recent_n_files_to_read_for_schema_discovery: Optional[int] = Field(
72
+ title="Files To Read For Schema Discover",
73
+ description="The number of resent files which will be used to discover the schema for this stream.",
74
+ default=None,
75
+ gt=0,
76
+ )
67
77
 
68
78
  @validator("input_schema", pre=True)
69
79
  def validate_input_schema(cls, v: Optional[str]) -> Optional[str]:
@@ -82,6 +92,8 @@ class FileBasedStreamConfig(BaseModel):
82
92
  if self.input_schema:
83
93
  schema = type_mapping_to_jsonschema(self.input_schema)
84
94
  if not schema:
85
- raise ValueError(f"Unable to create JSON schema from input schema {self.input_schema}")
95
+ raise ValueError(
96
+ f"Unable to create JSON schema from input schema {self.input_schema}"
97
+ )
86
98
  return schema
87
99
  return None
@@ -2,8 +2,9 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ from pydantic.v1 import BaseModel, Field
6
+
5
7
  from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
6
- from pydantic import BaseModel, Field
7
8
 
8
9
 
9
10
  class JsonlFormat(BaseModel):
@@ -3,8 +3,9 @@
3
3
  #
4
4
 
5
5
 
6
+ from pydantic.v1 import BaseModel, Field
7
+
6
8
  from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
7
- from pydantic import BaseModel, Field
8
9
 
9
10
 
10
11
  class ParquetFormat(BaseModel):
@@ -4,8 +4,9 @@
4
4
 
5
5
  from typing import List, Literal, Optional, Union
6
6
 
7
+ from pydantic.v1 import BaseModel, Field
8
+
7
9
  from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
8
- from pydantic import BaseModel, Field
9
10
 
10
11
 
11
12
  class LocalProcessingConfigModel(BaseModel):
@@ -13,7 +14,9 @@ class LocalProcessingConfigModel(BaseModel):
13
14
 
14
15
  class Config(OneOfOptionConfig):
15
16
  title = "Local"
16
- description = "Process files locally, supporting `fast` and `ocr` modes. This is the default option."
17
+ description = (
18
+ "Process files locally, supporting `fast` and `ocr` modes. This is the default option."
19
+ )
17
20
  discriminator = "mode"
18
21
 
19
22
 
@@ -23,7 +26,9 @@ class APIParameterConfigModel(BaseModel):
23
26
  description="The name of the unstructured API parameter to use",
24
27
  examples=["combine_under_n_chars", "languages"],
25
28
  )
26
- value: str = Field(title="Value", description="The value of the parameter", examples=["true", "hi_res"])
29
+ value: str = Field(
30
+ title="Value", description="The value of the parameter", examples=["true", "hi_res"]
31
+ )
27
32
 
28
33
 
29
34
  class APIProcessingConfigModel(BaseModel):
@@ -60,7 +65,7 @@ class APIProcessingConfigModel(BaseModel):
60
65
 
61
66
  class UnstructuredFormat(BaseModel):
62
67
  class Config(OneOfOptionConfig):
63
- title = "Document File Type Format (Experimental)"
68
+ title = "Unstructured Document Format"
64
69
  description = "Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file."
65
70
  discriminator = "filetype"
66
71
 
@@ -85,7 +90,10 @@ class UnstructuredFormat(BaseModel):
85
90
  description="The strategy used to parse documents. `fast` extracts text directly from the document which doesn't work for all files. `ocr_only` is more reliable, but slower. `hi_res` is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf",
86
91
  )
87
92
 
88
- processing: Union[LocalProcessingConfigModel, APIProcessingConfigModel,] = Field(
93
+ processing: Union[
94
+ LocalProcessingConfigModel,
95
+ APIProcessingConfigModel,
96
+ ] = Field(
89
97
  default=LocalProcessingConfigModel(mode="local"),
90
98
  title="Processing",
91
99
  description="Processing configuration",
@@ -1,4 +1,8 @@
1
- from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import AbstractDiscoveryPolicy
2
- from airbyte_cdk.sources.file_based.discovery_policy.default_discovery_policy import DefaultDiscoveryPolicy
1
+ from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import (
2
+ AbstractDiscoveryPolicy,
3
+ )
4
+ from airbyte_cdk.sources.file_based.discovery_policy.default_discovery_policy import (
5
+ DefaultDiscoveryPolicy,
6
+ )
3
7
 
4
8
  __all__ = ["AbstractDiscoveryPolicy", "DefaultDiscoveryPolicy"]
@@ -15,9 +15,7 @@ class AbstractDiscoveryPolicy(ABC):
15
15
 
16
16
  @property
17
17
  @abstractmethod
18
- def n_concurrent_requests(self) -> int:
19
- ...
18
+ def n_concurrent_requests(self) -> int: ...
20
19
 
21
20
  @abstractmethod
22
- def get_max_n_files_for_schema_inference(self, parser: FileTypeParser) -> int:
23
- ...
21
+ def get_max_n_files_for_schema_inference(self, parser: FileTypeParser) -> int: ...
@@ -2,7 +2,9 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import AbstractDiscoveryPolicy
5
+ from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import (
6
+ AbstractDiscoveryPolicy,
7
+ )
6
8
  from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
7
9
 
8
10
  DEFAULT_N_CONCURRENT_REQUESTS = 10
@@ -23,6 +25,9 @@ class DefaultDiscoveryPolicy(AbstractDiscoveryPolicy):
23
25
  return min(
24
26
  filter(
25
27
  None,
26
- (DEFAULT_MAX_N_FILES_FOR_STREAM_SCHEMA_INFERENCE, parser.parser_max_n_files_for_schema_inference),
28
+ (
29
+ DEFAULT_MAX_N_FILES_FOR_STREAM_SCHEMA_INFERENCE,
30
+ parser.parser_max_n_files_for_schema_inference,
31
+ ),
27
32
  )
28
33
  )