airbyte-cdk 6.5.3rc2__py3-none-any.whl → 6.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. airbyte_cdk/__init__.py +17 -2
  2. airbyte_cdk/config_observation.py +10 -3
  3. airbyte_cdk/connector.py +19 -9
  4. airbyte_cdk/connector_builder/connector_builder_handler.py +28 -8
  5. airbyte_cdk/connector_builder/main.py +26 -6
  6. airbyte_cdk/connector_builder/message_grouper.py +95 -25
  7. airbyte_cdk/destinations/destination.py +47 -14
  8. airbyte_cdk/destinations/vector_db_based/config.py +36 -14
  9. airbyte_cdk/destinations/vector_db_based/document_processor.py +49 -11
  10. airbyte_cdk/destinations/vector_db_based/embedder.py +52 -11
  11. airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
  12. airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
  13. airbyte_cdk/destinations/vector_db_based/writer.py +15 -4
  14. airbyte_cdk/entrypoint.py +82 -26
  15. airbyte_cdk/exception_handler.py +13 -3
  16. airbyte_cdk/logger.py +10 -2
  17. airbyte_cdk/models/airbyte_protocol.py +11 -5
  18. airbyte_cdk/models/airbyte_protocol_serializers.py +9 -3
  19. airbyte_cdk/models/well_known_types.py +1 -1
  20. airbyte_cdk/sources/abstract_source.py +63 -17
  21. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +47 -14
  22. airbyte_cdk/sources/concurrent_source/concurrent_source.py +25 -7
  23. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +27 -6
  24. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +9 -3
  25. airbyte_cdk/sources/connector_state_manager.py +32 -10
  26. airbyte_cdk/sources/declarative/async_job/job.py +3 -1
  27. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +68 -14
  28. airbyte_cdk/sources/declarative/async_job/job_tracker.py +24 -6
  29. airbyte_cdk/sources/declarative/async_job/repository.py +3 -1
  30. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
  31. airbyte_cdk/sources/declarative/auth/jwt.py +27 -7
  32. airbyte_cdk/sources/declarative/auth/oauth.py +35 -11
  33. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +3 -1
  34. airbyte_cdk/sources/declarative/auth/token.py +25 -8
  35. airbyte_cdk/sources/declarative/checks/check_stream.py +12 -4
  36. airbyte_cdk/sources/declarative/checks/connection_checker.py +3 -1
  37. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +11 -3
  38. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +106 -50
  39. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +20 -6
  40. airbyte_cdk/sources/declarative/declarative_source.py +3 -1
  41. airbyte_cdk/sources/declarative/declarative_stream.py +27 -6
  42. airbyte_cdk/sources/declarative/decoders/decoder.py +3 -1
  43. airbyte_cdk/sources/declarative/decoders/json_decoder.py +3 -1
  44. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +3 -1
  45. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +6 -2
  46. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +6 -2
  47. airbyte_cdk/sources/declarative/extractors/record_filter.py +24 -7
  48. airbyte_cdk/sources/declarative/extractors/record_selector.py +10 -3
  49. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +15 -5
  50. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +96 -31
  51. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +22 -8
  52. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +46 -15
  53. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +19 -5
  54. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +3 -1
  55. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +20 -2
  56. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +5 -1
  57. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +10 -3
  58. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +6 -2
  59. airbyte_cdk/sources/declarative/interpolation/interpolation.py +7 -1
  60. airbyte_cdk/sources/declarative/interpolation/jinja.py +6 -2
  61. airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
  62. airbyte_cdk/sources/declarative/manifest_declarative_source.py +106 -24
  63. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +7 -2
  64. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +656 -678
  65. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +13 -4
  66. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +9 -2
  67. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +782 -232
  68. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +29 -7
  69. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +25 -7
  70. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +54 -15
  71. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +6 -2
  72. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +3 -1
  73. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +17 -5
  74. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +15 -5
  75. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +3 -1
  76. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +18 -8
  77. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +16 -7
  78. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +51 -14
  79. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +29 -8
  80. airbyte_cdk/sources/declarative/requesters/http_requester.py +58 -16
  81. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +49 -14
  82. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +3 -1
  83. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +3 -1
  84. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +17 -5
  85. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +24 -7
  86. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +9 -3
  87. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +3 -1
  88. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +6 -2
  89. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +19 -6
  90. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +3 -1
  91. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +21 -7
  92. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +18 -6
  93. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +27 -8
  94. airbyte_cdk/sources/declarative/requesters/requester.py +3 -1
  95. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -5
  96. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +105 -24
  97. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +3 -1
  98. airbyte_cdk/sources/declarative/spec/spec.py +8 -2
  99. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +3 -1
  100. airbyte_cdk/sources/declarative/transformations/add_fields.py +12 -3
  101. airbyte_cdk/sources/declarative/transformations/remove_fields.py +6 -2
  102. airbyte_cdk/sources/declarative/types.py +8 -1
  103. airbyte_cdk/sources/declarative/yaml_declarative_source.py +3 -1
  104. airbyte_cdk/sources/embedded/base_integration.py +14 -4
  105. airbyte_cdk/sources/embedded/catalog.py +16 -4
  106. airbyte_cdk/sources/embedded/runner.py +19 -3
  107. airbyte_cdk/sources/embedded/tools.py +3 -1
  108. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
  109. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +27 -7
  110. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +12 -6
  111. airbyte_cdk/sources/file_based/config/csv_format.py +21 -9
  112. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +6 -2
  113. airbyte_cdk/sources/file_based/config/unstructured_format.py +10 -3
  114. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
  115. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
  116. airbyte_cdk/sources/file_based/exceptions.py +13 -15
  117. airbyte_cdk/sources/file_based/file_based_source.py +82 -24
  118. airbyte_cdk/sources/file_based/file_based_stream_reader.py +16 -5
  119. airbyte_cdk/sources/file_based/file_types/avro_parser.py +58 -17
  120. airbyte_cdk/sources/file_based/file_types/csv_parser.py +89 -26
  121. airbyte_cdk/sources/file_based/file_types/excel_parser.py +25 -7
  122. airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -2
  123. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
  124. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +20 -6
  125. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +57 -16
  126. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +64 -15
  127. airbyte_cdk/sources/file_based/schema_helpers.py +33 -10
  128. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
  129. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
  130. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +33 -10
  131. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +47 -11
  132. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +13 -22
  133. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +53 -17
  134. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +17 -5
  135. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
  136. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +26 -9
  137. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +67 -21
  138. airbyte_cdk/sources/http_logger.py +5 -1
  139. airbyte_cdk/sources/message/repository.py +18 -4
  140. airbyte_cdk/sources/source.py +17 -7
  141. airbyte_cdk/sources/streams/availability_strategy.py +9 -3
  142. airbyte_cdk/sources/streams/call_rate.py +63 -19
  143. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +31 -7
  144. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +6 -2
  145. airbyte_cdk/sources/streams/concurrent/adapters.py +77 -22
  146. airbyte_cdk/sources/streams/concurrent/cursor.py +56 -20
  147. airbyte_cdk/sources/streams/concurrent/default_stream.py +9 -2
  148. airbyte_cdk/sources/streams/concurrent/helpers.py +6 -2
  149. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +9 -2
  150. airbyte_cdk/sources/streams/concurrent/partition_reader.py +4 -1
  151. airbyte_cdk/sources/streams/concurrent/partitions/record.py +10 -2
  152. airbyte_cdk/sources/streams/concurrent/partitions/types.py +6 -2
  153. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +25 -10
  154. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +32 -16
  155. airbyte_cdk/sources/streams/core.py +77 -22
  156. airbyte_cdk/sources/streams/http/availability_strategy.py +3 -1
  157. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +4 -1
  158. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +3 -1
  159. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +16 -5
  160. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +9 -3
  161. airbyte_cdk/sources/streams/http/exceptions.py +2 -2
  162. airbyte_cdk/sources/streams/http/http.py +133 -33
  163. airbyte_cdk/sources/streams/http/http_client.py +91 -29
  164. airbyte_cdk/sources/streams/http/rate_limiting.py +23 -7
  165. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +19 -6
  166. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +38 -11
  167. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
  168. airbyte_cdk/sources/types.py +5 -1
  169. airbyte_cdk/sources/utils/record_helper.py +12 -3
  170. airbyte_cdk/sources/utils/schema_helpers.py +9 -3
  171. airbyte_cdk/sources/utils/slice_logger.py +4 -1
  172. airbyte_cdk/sources/utils/transform.py +24 -9
  173. airbyte_cdk/sql/exceptions.py +19 -6
  174. airbyte_cdk/sql/secrets.py +3 -1
  175. airbyte_cdk/sql/shared/catalog_providers.py +13 -4
  176. airbyte_cdk/sql/shared/sql_processor.py +44 -14
  177. airbyte_cdk/test/catalog_builder.py +19 -8
  178. airbyte_cdk/test/entrypoint_wrapper.py +27 -8
  179. airbyte_cdk/test/mock_http/mocker.py +41 -11
  180. airbyte_cdk/test/mock_http/request.py +9 -3
  181. airbyte_cdk/test/mock_http/response.py +3 -1
  182. airbyte_cdk/test/mock_http/response_builder.py +29 -7
  183. airbyte_cdk/test/state_builder.py +10 -2
  184. airbyte_cdk/test/utils/data.py +6 -2
  185. airbyte_cdk/test/utils/http_mocking.py +3 -1
  186. airbyte_cdk/utils/airbyte_secrets_utils.py +3 -1
  187. airbyte_cdk/utils/analytics_message.py +10 -2
  188. airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
  189. airbyte_cdk/utils/mapping_helpers.py +3 -1
  190. airbyte_cdk/utils/message_utils.py +11 -4
  191. airbyte_cdk/utils/print_buffer.py +6 -1
  192. airbyte_cdk/utils/schema_inferrer.py +30 -9
  193. airbyte_cdk/utils/spec_schema_transformations.py +3 -1
  194. airbyte_cdk/utils/traced_exception.py +35 -9
  195. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/METADATA +7 -6
  196. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/RECORD +198 -198
  197. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/LICENSE.txt +0 -0
  198. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/WHEEL +0 -0
@@ -38,7 +38,11 @@ class InterpolatedMapping:
38
38
  valid_value_types = additional_parameters.pop("valid_value_types", None)
39
39
  return {
40
40
  self._interpolation.eval(
41
- name, config, valid_types=valid_key_types, parameters=self._parameters, **additional_parameters
41
+ name,
42
+ config,
43
+ valid_types=valid_key_types,
44
+ parameters=self._parameters,
45
+ **additional_parameters,
42
46
  ): self._eval(value, config, valid_types=valid_value_types, **additional_parameters)
43
47
  for name, value in self.mapping.items()
44
48
  }
@@ -9,7 +9,9 @@ from typing import Any, Mapping, Optional, Union
9
9
  from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation
10
10
  from airbyte_cdk.sources.types import Config
11
11
 
12
- NestedMappingEntry = Union[dict[str, "NestedMapping"], list["NestedMapping"], str, int, float, bool, None]
12
+ NestedMappingEntry = Union[
13
+ dict[str, "NestedMapping"], list["NestedMapping"], str, int, float, bool, None
14
+ ]
13
15
  NestedMapping = Union[dict[str, NestedMappingEntry], str]
14
16
 
15
17
 
@@ -32,12 +34,17 @@ class InterpolatedNestedMapping:
32
34
  def eval(self, config: Config, **additional_parameters: Any) -> Any:
33
35
  return self._eval(self.mapping, config, **additional_parameters)
34
36
 
35
- def _eval(self, value: Union[NestedMapping, NestedMappingEntry], config: Config, **kwargs: Any) -> Any:
37
+ def _eval(
38
+ self, value: Union[NestedMapping, NestedMappingEntry], config: Config, **kwargs: Any
39
+ ) -> Any:
36
40
  # Recursively interpolate dictionaries and lists
37
41
  if isinstance(value, str):
38
42
  return self._interpolation.eval(value, config, parameters=self._parameters, **kwargs)
39
43
  elif isinstance(value, dict):
40
- interpolated_dict = {self._eval(k, config, **kwargs): self._eval(v, config, **kwargs) for k, v in value.items()}
44
+ interpolated_dict = {
45
+ self._eval(k, config, **kwargs): self._eval(v, config, **kwargs)
46
+ for k, v in value.items()
47
+ }
41
48
  return {k: v for k, v in interpolated_dict.items() if v is not None}
42
49
  elif isinstance(value, list):
43
50
  return [self._eval(v, config, **kwargs) for v in value]
@@ -45,10 +45,14 @@ class InterpolatedString:
45
45
  if self._is_plain_string is None:
46
46
  # Let's check whether output from evaluation is the same as input.
47
47
  # This indicates occurrence of a plain string, not a template and we can skip Jinja in subsequent runs.
48
- evaluated = self._interpolation.eval(self.string, config, self.default, parameters=self._parameters, **kwargs)
48
+ evaluated = self._interpolation.eval(
49
+ self.string, config, self.default, parameters=self._parameters, **kwargs
50
+ )
49
51
  self._is_plain_string = self.string == evaluated
50
52
  return evaluated
51
- return self._interpolation.eval(self.string, config, self.default, parameters=self._parameters, **kwargs)
53
+ return self._interpolation.eval(
54
+ self.string, config, self.default, parameters=self._parameters, **kwargs
55
+ )
52
56
 
53
57
  def __eq__(self, other: Any) -> bool:
54
58
  if not isinstance(other, InterpolatedString):
@@ -14,7 +14,13 @@ class Interpolation(ABC):
14
14
  """
15
15
 
16
16
  @abstractmethod
17
- def eval(self, input_str: str, config: Config, default: Optional[str] = None, **additional_options: Any) -> Any:
17
+ def eval(
18
+ self,
19
+ input_str: str,
20
+ config: Config,
21
+ default: Optional[str] = None,
22
+ **additional_options: Any,
23
+ ) -> Any:
18
24
  """
19
25
  Interpolates the input string using the config, and additional options passed as parameter.
20
26
 
@@ -61,7 +61,9 @@ class JinjaInterpolation(Interpolation):
61
61
  # By default, these Python builtin functions are available in the Jinja context.
62
62
  # We explicitely remove them because of the potential security risk.
63
63
  # Please add a unit test to test_jinja.py when adding a restriction.
64
- RESTRICTED_BUILTIN_FUNCTIONS = ["range"] # The range function can cause very expensive computations
64
+ RESTRICTED_BUILTIN_FUNCTIONS = [
65
+ "range"
66
+ ] # The range function can cause very expensive computations
65
67
 
66
68
  def __init__(self) -> None:
67
69
  self._environment = StreamPartitionAccessEnvironment()
@@ -119,7 +121,9 @@ class JinjaInterpolation(Interpolation):
119
121
  undeclared = self._find_undeclared_variables(s)
120
122
  undeclared_not_in_context = {var for var in undeclared if var not in context}
121
123
  if undeclared_not_in_context:
122
- raise ValueError(f"Jinja macro has undeclared variables: {undeclared_not_in_context}. Context: {context}")
124
+ raise ValueError(
125
+ f"Jinja macro has undeclared variables: {undeclared_not_in_context}. Context: {context}"
126
+ )
123
127
  return self._compile(s).render(context) # type: ignore # from_string is able to handle None
124
128
  except TypeError:
125
129
  # The string is a static value, not a jinja template
@@ -104,7 +104,9 @@ def day_delta(num_days: int, format: str = "%Y-%m-%dT%H:%M:%S.%f%z") -> str:
104
104
  :param num_days: number of days to add to current date time
105
105
  :return: datetime formatted as RFC3339
106
106
  """
107
- return (datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=num_days)).strftime(format)
107
+ return (
108
+ datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=num_days)
109
+ ).strftime(format)
108
110
 
109
111
 
110
112
  def duration(datestring: str) -> Union[datetime.timedelta, isodate.Duration]:
@@ -117,7 +119,9 @@ def duration(datestring: str) -> Union[datetime.timedelta, isodate.Duration]:
117
119
  return parse_duration(datestring) # type: ignore # mypy thinks this returns Any for some reason
118
120
 
119
121
 
120
- def format_datetime(dt: Union[str, datetime.datetime], format: str, input_format: Optional[str] = None) -> str:
122
+ def format_datetime(
123
+ dt: Union[str, datetime.datetime], format: str, input_format: Optional[str] = None
124
+ ) -> str:
121
125
  """
122
126
  Converts datetime to another format
123
127
 
@@ -130,11 +134,22 @@ def format_datetime(dt: Union[str, datetime.datetime], format: str, input_format
130
134
  """
131
135
  if isinstance(dt, datetime.datetime):
132
136
  return dt.strftime(format)
133
- dt_datetime = datetime.datetime.strptime(dt, input_format) if input_format else _str_to_datetime(dt)
137
+ dt_datetime = (
138
+ datetime.datetime.strptime(dt, input_format) if input_format else _str_to_datetime(dt)
139
+ )
134
140
  if format == "%s":
135
141
  return str(int(dt_datetime.timestamp()))
136
142
  return dt_datetime.strftime(format)
137
143
 
138
144
 
139
- _macros_list = [now_utc, today_utc, timestamp, max, day_delta, duration, format_datetime, today_with_timezone]
145
+ _macros_list = [
146
+ now_utc,
147
+ today_utc,
148
+ timestamp,
149
+ max,
150
+ day_delta,
151
+ duration,
152
+ format_datetime,
153
+ today_with_timezone,
154
+ ]
140
155
  macros = {f.__name__: f for f in _macros_list}
@@ -20,16 +20,30 @@ from airbyte_cdk.models import (
20
20
  )
21
21
  from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
22
22
  from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
23
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import CheckStream as CheckStreamModel
24
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import DeclarativeStream as DeclarativeStreamModel
23
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
24
+ CheckStream as CheckStreamModel,
25
+ )
26
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
27
+ DeclarativeStream as DeclarativeStreamModel,
28
+ )
25
29
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
26
- from airbyte_cdk.sources.declarative.parsers.manifest_component_transformer import ManifestComponentTransformer
27
- from airbyte_cdk.sources.declarative.parsers.manifest_reference_resolver import ManifestReferenceResolver
28
- from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import ModelToComponentFactory
30
+ from airbyte_cdk.sources.declarative.parsers.manifest_component_transformer import (
31
+ ManifestComponentTransformer,
32
+ )
33
+ from airbyte_cdk.sources.declarative.parsers.manifest_reference_resolver import (
34
+ ManifestReferenceResolver,
35
+ )
36
+ from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
37
+ ModelToComponentFactory,
38
+ )
29
39
  from airbyte_cdk.sources.message import MessageRepository
30
40
  from airbyte_cdk.sources.streams.core import Stream
31
41
  from airbyte_cdk.sources.types import ConnectionDefinition
32
- from airbyte_cdk.sources.utils.slice_logger import AlwaysLogSliceLogger, DebugSliceLogger, SliceLogger
42
+ from airbyte_cdk.sources.utils.slice_logger import (
43
+ AlwaysLogSliceLogger,
44
+ DebugSliceLogger,
45
+ SliceLogger,
46
+ )
33
47
  from jsonschema.exceptions import ValidationError
34
48
  from jsonschema.validators import validate
35
49
 
@@ -57,13 +71,21 @@ class ManifestDeclarativeSource(DeclarativeSource):
57
71
  manifest["type"] = "DeclarativeSource"
58
72
 
59
73
  resolved_source_config = ManifestReferenceResolver().preprocess_manifest(manifest)
60
- propagated_source_config = ManifestComponentTransformer().propagate_types_and_parameters("", resolved_source_config, {})
74
+ propagated_source_config = ManifestComponentTransformer().propagate_types_and_parameters(
75
+ "", resolved_source_config, {}
76
+ )
61
77
  self._source_config = propagated_source_config
62
78
  self._debug = debug
63
79
  self._emit_connector_builder_messages = emit_connector_builder_messages
64
- self._constructor = component_factory if component_factory else ModelToComponentFactory(emit_connector_builder_messages)
80
+ self._constructor = (
81
+ component_factory
82
+ if component_factory
83
+ else ModelToComponentFactory(emit_connector_builder_messages)
84
+ )
65
85
  self._message_repository = self._constructor.get_message_repository()
66
- self._slice_logger: SliceLogger = AlwaysLogSliceLogger() if emit_connector_builder_messages else DebugSliceLogger()
86
+ self._slice_logger: SliceLogger = (
87
+ AlwaysLogSliceLogger() if emit_connector_builder_messages else DebugSliceLogger()
88
+ )
67
89
 
68
90
  self._validate_source()
69
91
 
@@ -81,20 +103,30 @@ class ManifestDeclarativeSource(DeclarativeSource):
81
103
  if "type" not in check:
82
104
  check["type"] = "CheckStream"
83
105
  check_stream = self._constructor.create_component(
84
- CheckStreamModel, check, dict(), emit_connector_builder_messages=self._emit_connector_builder_messages
106
+ CheckStreamModel,
107
+ check,
108
+ dict(),
109
+ emit_connector_builder_messages=self._emit_connector_builder_messages,
85
110
  )
86
111
  if isinstance(check_stream, ConnectionChecker):
87
112
  return check_stream
88
113
  else:
89
- raise ValueError(f"Expected to generate a ConnectionChecker component, but received {check_stream.__class__}")
114
+ raise ValueError(
115
+ f"Expected to generate a ConnectionChecker component, but received {check_stream.__class__}"
116
+ )
90
117
 
91
118
  def streams(self, config: Mapping[str, Any]) -> List[Stream]:
92
- self._emit_manifest_debug_message(extra_args={"source_name": self.name, "parsed_config": json.dumps(self._source_config)})
119
+ self._emit_manifest_debug_message(
120
+ extra_args={"source_name": self.name, "parsed_config": json.dumps(self._source_config)}
121
+ )
93
122
  stream_configs = self._stream_configs(self._source_config)
94
123
 
95
124
  source_streams = [
96
125
  self._constructor.create_component(
97
- DeclarativeStreamModel, stream_config, config, emit_connector_builder_messages=self._emit_connector_builder_messages
126
+ DeclarativeStreamModel,
127
+ stream_config,
128
+ config,
129
+ emit_connector_builder_messages=self._emit_connector_builder_messages,
98
130
  )
99
131
  for stream_config in self._initialize_cache_for_parent_streams(deepcopy(stream_configs))
100
132
  ]
@@ -102,7 +134,9 @@ class ManifestDeclarativeSource(DeclarativeSource):
102
134
  return source_streams
103
135
 
104
136
  @staticmethod
105
- def _initialize_cache_for_parent_streams(stream_configs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
137
+ def _initialize_cache_for_parent_streams(
138
+ stream_configs: List[Dict[str, Any]],
139
+ ) -> List[Dict[str, Any]]:
106
140
  parent_streams = set()
107
141
 
108
142
  def update_with_cache_parent_configs(parent_configs: list[dict[str, Any]]) -> None:
@@ -113,12 +147,16 @@ class ManifestDeclarativeSource(DeclarativeSource):
113
147
  for stream_config in stream_configs:
114
148
  if stream_config.get("incremental_sync", {}).get("parent_stream"):
115
149
  parent_streams.add(stream_config["incremental_sync"]["parent_stream"]["name"])
116
- stream_config["incremental_sync"]["parent_stream"]["retriever"]["requester"]["use_cache"] = True
150
+ stream_config["incremental_sync"]["parent_stream"]["retriever"]["requester"][
151
+ "use_cache"
152
+ ] = True
117
153
 
118
154
  elif stream_config.get("retriever", {}).get("partition_router", {}):
119
155
  partition_router = stream_config["retriever"]["partition_router"]
120
156
 
121
- if isinstance(partition_router, dict) and partition_router.get("parent_stream_configs"):
157
+ if isinstance(partition_router, dict) and partition_router.get(
158
+ "parent_stream_configs"
159
+ ):
122
160
  update_with_cache_parent_configs(partition_router["parent_stream_configs"])
123
161
  elif isinstance(partition_router, list):
124
162
  for router in partition_router:
@@ -139,7 +177,9 @@ class ManifestDeclarativeSource(DeclarativeSource):
139
177
  in the project root.
140
178
  """
141
179
  self._configure_logger_level(logger)
142
- self._emit_manifest_debug_message(extra_args={"source_name": self.name, "parsed_config": json.dumps(self._source_config)})
180
+ self._emit_manifest_debug_message(
181
+ extra_args={"source_name": self.name, "parsed_config": json.dumps(self._source_config)}
182
+ )
143
183
 
144
184
  spec = self._source_config.get("spec")
145
185
  if spec:
@@ -176,22 +216,62 @@ class ManifestDeclarativeSource(DeclarativeSource):
176
216
  Validates the connector manifest against the declarative component schema
177
217
  """
178
218
  try:
179
- raw_component_schema = pkgutil.get_data("airbyte_cdk", "sources/declarative/declarative_component_schema.yaml")
219
+ raw_component_schema = pkgutil.get_data(
220
+ "airbyte_cdk", "sources/declarative/declarative_component_schema.yaml"
221
+ )
180
222
  if raw_component_schema is not None:
181
- declarative_component_schema = yaml.load(raw_component_schema, Loader=yaml.SafeLoader)
223
+ declarative_component_schema = yaml.load(
224
+ raw_component_schema, Loader=yaml.SafeLoader
225
+ )
182
226
  else:
183
- raise RuntimeError("Failed to read manifest component json schema required for validation")
227
+ raise RuntimeError(
228
+ "Failed to read manifest component json schema required for validation"
229
+ )
184
230
  except FileNotFoundError as e:
185
- raise FileNotFoundError(f"Failed to read manifest component json schema required for validation: {e}")
231
+ raise FileNotFoundError(
232
+ f"Failed to read manifest component json schema required for validation: {e}"
233
+ )
186
234
 
187
235
  streams = self._source_config.get("streams")
188
236
  if not streams:
189
- raise ValidationError(f"A valid manifest should have at least one stream defined. Got {streams}")
237
+ raise ValidationError(
238
+ f"A valid manifest should have at least one stream defined. Got {streams}"
239
+ )
190
240
 
191
241
  try:
192
242
  validate(self._source_config, declarative_component_schema)
193
243
  except ValidationError as e:
194
- raise ValidationError("Validation against json schema defined in declarative_component_schema.yaml schema failed") from e
244
+ raise ValidationError(
245
+ "Validation against json schema defined in declarative_component_schema.yaml schema failed"
246
+ ) from e
247
+
248
+ cdk_version = metadata.version("airbyte_cdk")
249
+ cdk_major, cdk_minor, cdk_patch = self._get_version_parts(cdk_version, "airbyte-cdk")
250
+ manifest_version = self._source_config.get("version")
251
+ if manifest_version is None:
252
+ raise RuntimeError(
253
+ "Manifest version is not defined in the manifest. This is unexpected since it should be a required field. Please contact support."
254
+ )
255
+ manifest_major, manifest_minor, manifest_patch = self._get_version_parts(
256
+ manifest_version, "manifest"
257
+ )
258
+
259
+ if cdk_version.startswith("0.0.0"):
260
+ # Skipping version compatibility check on unreleased dev branch
261
+ pass
262
+ elif cdk_major < manifest_major or (
263
+ cdk_major == manifest_major and cdk_minor < manifest_minor
264
+ ):
265
+ raise ValidationError(
266
+ f"The manifest version {manifest_version} is greater than the airbyte-cdk package version ({cdk_version}). Your "
267
+ f"manifest may contain features that are not in the current CDK version."
268
+ )
269
+ elif manifest_major == 0 and manifest_minor < 29:
270
+ raise ValidationError(
271
+ f"The low-code framework was promoted to Beta in airbyte-cdk version 0.29.0 and contains many breaking changes to the "
272
+ f"language. The manifest version {manifest_version} is incompatible with the airbyte-cdk package version "
273
+ f"{cdk_version} which contains these breaking changes."
274
+ )
195
275
 
196
276
  @staticmethod
197
277
  def _get_version_parts(version: str, version_type: str) -> Tuple[int, int, int]:
@@ -200,7 +280,9 @@ class ManifestDeclarativeSource(DeclarativeSource):
200
280
  """
201
281
  version_parts = re.split(r"\.", version)
202
282
  if len(version_parts) != 3 or not all([part.isdigit() for part in version_parts]):
203
- raise ValidationError(f"The {version_type} version {version} specified is not a valid version format (ex. 1.2.3)")
283
+ raise ValidationError(
284
+ f"The {version_type} version {version} specified is not a valid version format (ex. 1.2.3)"
285
+ )
204
286
  return tuple(int(part) for part in version_parts) # type: ignore # We already verified there were 3 parts and they are all digits
205
287
 
206
288
  def _stream_configs(self, manifest: Mapping[str, Any]) -> List[Dict[str, Any]]:
@@ -43,7 +43,9 @@ class LegacyToPerPartitionStateMigration(StateMigration):
43
43
  self._partition_key_field = InterpolatedString.create(
44
44
  self._get_partition_field(self._partition_router), parameters=self._parameters
45
45
  ).eval(self._config)
46
- self._cursor_field = InterpolatedString.create(self._cursor.cursor_field, parameters=self._parameters).eval(self._config)
46
+ self._cursor_field = InterpolatedString.create(
47
+ self._cursor.cursor_field, parameters=self._parameters
48
+ ).eval(self._config)
47
49
 
48
50
  def _get_partition_field(self, partition_router: SubstreamPartitionRouter) -> str:
49
51
  parent_stream_config = partition_router.parent_stream_configs[0]
@@ -85,5 +87,8 @@ class LegacyToPerPartitionStateMigration(StateMigration):
85
87
  return True
86
88
 
87
89
  def migrate(self, stream_state: Mapping[str, Any]) -> Mapping[str, Any]:
88
- states = [{"partition": {self._partition_key_field: key}, "cursor": value} for key, value in stream_state.items()]
90
+ states = [
91
+ {"partition": {self._partition_key_field: key}, "cursor": value}
92
+ for key, value in stream_state.items()
93
+ ]
89
94
  return {"states": states}