airbyte-cdk 6.7.0rc2__py3-none-any.whl → 6.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. airbyte_cdk/cli/__init__.py +1 -0
  2. airbyte_cdk/cli/source_declarative_manifest/__init__.py +6 -0
  3. airbyte_cdk/cli/source_declarative_manifest/_run.py +224 -0
  4. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  5. airbyte_cdk/config_observation.py +2 -1
  6. airbyte_cdk/connector.py +1 -0
  7. airbyte_cdk/connector_builder/connector_builder_handler.py +1 -1
  8. airbyte_cdk/connector_builder/main.py +2 -1
  9. airbyte_cdk/destinations/destination.py +2 -1
  10. airbyte_cdk/destinations/vector_db_based/config.py +2 -1
  11. airbyte_cdk/destinations/vector_db_based/document_processor.py +4 -3
  12. airbyte_cdk/destinations/vector_db_based/embedder.py +5 -4
  13. airbyte_cdk/entrypoint.py +3 -2
  14. airbyte_cdk/logger.py +2 -1
  15. airbyte_cdk/models/__init__.py +2 -0
  16. airbyte_cdk/models/airbyte_protocol.py +2 -1
  17. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +3 -3
  18. airbyte_cdk/sources/concurrent_source/concurrent_source.py +1 -1
  19. airbyte_cdk/sources/config.py +2 -1
  20. airbyte_cdk/sources/declarative/auth/jwt.py +1 -0
  21. airbyte_cdk/sources/declarative/auth/oauth.py +1 -0
  22. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +1 -0
  23. airbyte_cdk/sources/declarative/auth/token.py +2 -1
  24. airbyte_cdk/sources/declarative/auth/token_provider.py +3 -2
  25. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +24 -54
  26. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +180 -13
  27. airbyte_cdk/sources/declarative/decoders/json_decoder.py +3 -2
  28. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +1 -0
  29. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +1 -0
  30. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +1 -0
  31. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +1 -0
  32. airbyte_cdk/sources/declarative/extractors/http_selector.py +1 -0
  33. airbyte_cdk/sources/declarative/extractors/record_filter.py +6 -48
  34. airbyte_cdk/sources/declarative/extractors/record_selector.py +32 -4
  35. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +2 -1
  36. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +2 -1
  37. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +5 -2
  38. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +5 -2
  39. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +1 -3
  40. airbyte_cdk/sources/declarative/interpolation/jinja.py +5 -4
  41. airbyte_cdk/sources/declarative/manifest_declarative_source.py +4 -3
  42. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +1 -1
  43. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +122 -0
  44. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +26 -17
  45. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +1 -0
  46. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +1 -0
  47. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +1 -0
  48. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +1 -0
  49. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +1 -0
  50. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +1 -0
  51. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +1 -0
  52. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +1 -0
  53. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +1 -0
  54. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +2 -1
  55. airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -0
  56. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +1 -0
  57. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +1 -0
  58. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +1 -0
  59. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +1 -0
  60. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +1 -0
  61. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +1 -0
  62. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +1 -0
  63. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +9 -3
  64. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -1
  65. airbyte_cdk/sources/declarative/requesters/requester.py +1 -0
  66. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +2 -1
  67. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +12 -4
  68. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +7 -4
  69. airbyte_cdk/sources/declarative/transformations/add_fields.py +1 -0
  70. airbyte_cdk/sources/declarative/transformations/remove_fields.py +1 -0
  71. airbyte_cdk/sources/declarative/yaml_declarative_source.py +1 -0
  72. airbyte_cdk/sources/embedded/tools.py +1 -0
  73. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -1
  74. airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
  75. airbyte_cdk/sources/file_based/config/csv_format.py +2 -1
  76. airbyte_cdk/sources/file_based/config/excel_format.py +2 -1
  77. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +2 -1
  78. airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
  79. airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
  80. airbyte_cdk/sources/file_based/config/unstructured_format.py +2 -1
  81. airbyte_cdk/sources/file_based/file_based_source.py +2 -1
  82. airbyte_cdk/sources/file_based/file_based_stream_reader.py +2 -1
  83. airbyte_cdk/sources/file_based/file_types/avro_parser.py +1 -0
  84. airbyte_cdk/sources/file_based/file_types/csv_parser.py +2 -1
  85. airbyte_cdk/sources/file_based/file_types/excel_parser.py +5 -5
  86. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +2 -1
  87. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +2 -1
  88. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +9 -8
  89. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +2 -1
  90. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +5 -4
  91. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +1 -1
  92. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +1 -1
  93. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +1 -1
  94. airbyte_cdk/sources/http_logger.py +1 -0
  95. airbyte_cdk/sources/streams/call_rate.py +1 -2
  96. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +2 -1
  97. airbyte_cdk/sources/streams/concurrent/adapters.py +8 -4
  98. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +2 -1
  99. airbyte_cdk/sources/streams/concurrent/cursor.py +30 -6
  100. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -1
  101. airbyte_cdk/sources/streams/concurrent/partitions/types.py +1 -1
  102. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +1 -1
  103. airbyte_cdk/sources/streams/core.py +2 -1
  104. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +2 -1
  105. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +1 -0
  106. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +1 -0
  107. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +2 -1
  108. airbyte_cdk/sources/streams/http/http.py +3 -2
  109. airbyte_cdk/sources/streams/http/http_client.py +49 -2
  110. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +2 -1
  111. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +1 -0
  112. airbyte_cdk/sources/types.py +14 -1
  113. airbyte_cdk/sources/utils/schema_helpers.py +3 -2
  114. airbyte_cdk/sql/secrets.py +2 -1
  115. airbyte_cdk/sql/shared/sql_processor.py +8 -6
  116. airbyte_cdk/test/entrypoint_wrapper.py +4 -3
  117. airbyte_cdk/test/mock_http/mocker.py +1 -0
  118. airbyte_cdk/utils/schema_inferrer.py +2 -1
  119. airbyte_cdk/utils/slice_hasher.py +1 -1
  120. airbyte_cdk/utils/traced_exception.py +2 -1
  121. {airbyte_cdk-6.7.0rc2.dist-info → airbyte_cdk-6.7.1.dist-info}/METADATA +11 -3
  122. {airbyte_cdk-6.7.0rc2.dist-info → airbyte_cdk-6.7.1.dist-info}/RECORD +125 -121
  123. airbyte_cdk-6.7.1.dist-info/entry_points.txt +3 -0
  124. airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -35
  125. {airbyte_cdk-6.7.0rc2.dist-info → airbyte_cdk-6.7.1.dist-info}/LICENSE.txt +0 -0
  126. {airbyte_cdk-6.7.0rc2.dist-info → airbyte_cdk-6.7.1.dist-info}/WHEEL +0 -0
@@ -3,7 +3,7 @@
3
3
  #
4
4
 
5
5
  import logging
6
- from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple, Union, Callable
6
+ from typing import Any, Callable, Generic, Iterator, List, Mapping, Optional, Tuple, Union
7
7
 
8
8
  from airbyte_cdk.models import (
9
9
  AirbyteCatalog,
@@ -27,14 +27,16 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
27
27
  )
28
28
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
29
29
  DatetimeBasedCursor as DatetimeBasedCursorModel,
30
+ )
31
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
30
32
  DeclarativeStream as DeclarativeStreamModel,
31
33
  )
32
34
  from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
33
- ModelToComponentFactory,
34
35
  ComponentDefinition,
36
+ ModelToComponentFactory,
35
37
  )
36
38
  from airbyte_cdk.sources.declarative.requesters import HttpRequester
37
- from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever, Retriever
39
+ from airbyte_cdk.sources.declarative.retrievers import Retriever, SimpleRetriever
38
40
  from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
39
41
  DeclarativePartitionFactory,
40
42
  StreamSlicerPartitionGenerator,
@@ -42,15 +44,14 @@ from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_genera
42
44
  from airbyte_cdk.sources.declarative.transformations.add_fields import AddFields
43
45
  from airbyte_cdk.sources.declarative.types import ConnectionDefinition
44
46
  from airbyte_cdk.sources.source import TState
45
- from airbyte_cdk.sources.types import Config, StreamState
46
47
  from airbyte_cdk.sources.streams import Stream
47
48
  from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
48
49
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
49
50
  AlwaysAvailableAvailabilityStrategy,
50
51
  )
51
- from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, FinalStateCursor
52
52
  from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
53
53
  from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
54
+ from airbyte_cdk.sources.types import Config, StreamState
54
55
 
55
56
 
56
57
  class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
@@ -194,44 +195,31 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
194
195
  declarative_stream.name
195
196
  ].get("incremental_sync")
196
197
 
197
- is_without_partition_router_nor_cursor = not bool(
198
+ if (
198
199
  datetime_based_cursor_component_definition
199
- ) and not (
200
- name_to_stream_mapping[declarative_stream.name]
201
- .get("retriever", {})
202
- .get("partition_router")
203
- )
204
- is_datetime_incremental_without_partition_routing = (
205
- self._is_datetime_incremental_without_partition_routing(
206
- datetime_based_cursor_component_definition, declarative_stream
200
+ and datetime_based_cursor_component_definition.get("type", "")
201
+ == DatetimeBasedCursorModel.__name__
202
+ and self._stream_supports_concurrent_partition_processing(
203
+ declarative_stream=declarative_stream
207
204
  )
208
- )
209
- if (
210
- is_without_partition_router_nor_cursor
211
- or is_datetime_incremental_without_partition_routing
205
+ and hasattr(declarative_stream.retriever, "stream_slicer")
206
+ and isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
212
207
  ):
213
208
  stream_state = state_manager.get_stream_state(
214
209
  stream_name=declarative_stream.name, namespace=declarative_stream.namespace
215
210
  )
216
211
 
217
- if is_datetime_incremental_without_partition_routing:
218
- cursor: Cursor = (
219
- self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
220
- state_manager=state_manager,
221
- model_type=DatetimeBasedCursorModel,
222
- component_definition=datetime_based_cursor_component_definition,
223
- stream_name=declarative_stream.name,
224
- stream_namespace=declarative_stream.namespace,
225
- config=config or {},
226
- stream_state=stream_state,
227
- )
228
- )
229
- else:
230
- cursor = FinalStateCursor(
231
- declarative_stream.name,
232
- declarative_stream.namespace,
233
- self.message_repository,
212
+ cursor, connector_state_converter = (
213
+ self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
214
+ state_manager=state_manager,
215
+ model_type=DatetimeBasedCursorModel,
216
+ component_definition=datetime_based_cursor_component_definition,
217
+ stream_name=declarative_stream.name,
218
+ stream_namespace=declarative_stream.namespace,
219
+ config=config or {},
220
+ stream_state=stream_state,
234
221
  )
222
+ )
235
223
 
236
224
  partition_generator = StreamSlicerPartitionGenerator(
237
225
  DeclarativePartitionFactory(
@@ -254,9 +242,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
254
242
  json_schema=declarative_stream.get_json_schema(),
255
243
  availability_strategy=AlwaysAvailableAvailabilityStrategy(),
256
244
  primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
257
- cursor_field=cursor.cursor_field.cursor_field_key
258
- if hasattr(cursor, "cursor_field")
259
- else None,
245
+ cursor_field=cursor.cursor_field.cursor_field_key,
260
246
  logger=self.logger,
261
247
  cursor=cursor,
262
248
  )
@@ -268,22 +254,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
268
254
 
269
255
  return concurrent_streams, synchronous_streams
270
256
 
271
- def _is_datetime_incremental_without_partition_routing(
272
- self,
273
- datetime_based_cursor_component_definition: Mapping[str, Any],
274
- declarative_stream: DeclarativeStream,
275
- ) -> bool:
276
- return (
277
- bool(datetime_based_cursor_component_definition)
278
- and datetime_based_cursor_component_definition.get("type", "")
279
- == DatetimeBasedCursorModel.__name__
280
- and self._stream_supports_concurrent_partition_processing(
281
- declarative_stream=declarative_stream
282
- )
283
- and hasattr(declarative_stream.retriever, "stream_slicer")
284
- and isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
285
- )
286
-
287
257
  def _stream_supports_concurrent_partition_processing(
288
258
  self, declarative_stream: DeclarativeStream
289
259
  ) -> bool:
@@ -1972,6 +1972,173 @@ definitions:
1972
1972
  - app_id:
1973
1973
  type: string
1974
1974
  path_in_connector_config: ["info", "app_id"]
1975
+ oauth_connector_input_specification:
1976
+ title: DeclarativeOAuth Connector Specification
1977
+ description: |-
1978
+ The DeclarativeOAuth specific blob.
1979
+ Pertains to the fields defined by the connector relating to the OAuth flow.
1980
+
1981
+ Interpolation capabilities:
1982
+ - The variables placeholders are declared as `{my_var}`.
1983
+ - The nested resolution variables like `{{my_nested_var}}` is allowed as well.
1984
+
1985
+ - The allowed interpolation context is:
1986
+ + base64Encoder - encode to `base64`, {base64Encoder:{my_var_a}:{my_var_b}}
1987
+ + base64Decorer - decode from `base64` encoded string, {base64Decoder:{my_string_variable_or_string_value}}
1988
+ + urlEncoder - encode the input string to URL-like format, {urlEncoder:https://test.host.com/endpoint}
1989
+ + urlDecorer - decode the input url-encoded string into text format, {urlDecoder:https%3A%2F%2Fairbyte.io}
1990
+ + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {codeChallengeS256:{state_value}}
1991
+
1992
+ Examples:
1993
+ - The TikTok Marketing DeclarativeOAuth spec:
1994
+ {
1995
+ "oauth_connector_input_specification": {
1996
+ "type": "object",
1997
+ "additionalProperties": false,
1998
+ "properties": {
1999
+ "consent_url": "https://ads.tiktok.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",
2000
+ "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",
2001
+ "access_token_params": {
2002
+ "{auth_code_key}": "{{auth_code_key}}",
2003
+ "{client_id_key}": "{{client_id_key}}",
2004
+ "{client_secret_key}": "{{client_secret_key}}"
2005
+ },
2006
+ "access_token_headers": {
2007
+ "Content-Type": "application/json",
2008
+ "Accept": "application/json"
2009
+ },
2010
+ "extract_output": ["data.access_token"],
2011
+ "client_id_key": "app_id",
2012
+ "client_secret_key": "secret",
2013
+ "auth_code_key": "auth_code"
2014
+ }
2015
+ }
2016
+ }
2017
+ type: object
2018
+ additionalProperties: true
2019
+ required:
2020
+ - consent_url
2021
+ - access_token_url
2022
+ - extract_output
2023
+ properties:
2024
+ consent_url:
2025
+ title: DeclarativeOAuth Consent URL
2026
+ type: string
2027
+ description: |-
2028
+ The DeclarativeOAuth Specific string URL string template to initiate the authentication.
2029
+ The placeholders are replaced during the processing to provide neccessary values.
2030
+ examples:
2031
+ - consent_url: https://domain.host.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}
2032
+ - consent_url: https://endpoint.host.com/oauth2/authorize?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{scope_key}={urlEncoder:{{scope_key}}}&{state_key}={{state_key}}&subdomain={subdomain}
2033
+ scope:
2034
+ title: (Optional) DeclarativeOAuth Scope
2035
+ type: string
2036
+ description: |-
2037
+ The DeclarativeOAuth Specific string of the scopes needed to be grant for authenticated user.
2038
+ examples:
2039
+ - scope: user:read user:read_orders workspaces:read
2040
+ access_token_url:
2041
+ title: DeclarativeOAuth Access Token URL
2042
+ type: string
2043
+ description: |-
2044
+ The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc.
2045
+ The placeholders are replaced during the processing to provide neccessary values.
2046
+ examples:
2047
+ - access_token_url: https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}
2048
+ access_token_headers:
2049
+ title: (Optional) DeclarativeOAuth Access Token Headers
2050
+ type: object
2051
+ additionalProperties: true
2052
+ description: |-
2053
+ The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.
2054
+ examples:
2055
+ - access_token_headers: {
2056
+ "Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}"
2057
+ }
2058
+ access_token_params:
2059
+ title: (Optional) DeclarativeOAuth Access Token Query Params (Json Encoded)
2060
+ type: object
2061
+ additionalProperties: true
2062
+ description: |-
2063
+ The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.
2064
+ When this property is provided, the query params will be encoded as `Json` and included in the outgoing API request.
2065
+ examples:
2066
+ - access_token_params: {
2067
+ "{auth_code_key}": "{{auth_code_key}}",
2068
+ "{client_id_key}": "{{client_id_key}}",
2069
+ "{client_secret_key}": "{{client_secret_key}}"
2070
+ }
2071
+ extract_output:
2072
+ title: DeclarativeOAuth Extract Output
2073
+ type: array
2074
+ items:
2075
+ type: string
2076
+ description: |-
2077
+ The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.
2078
+ examples:
2079
+ - extract_output: ["access_token", "refresh_token", "other_field"]
2080
+ state:
2081
+ title: (Optional) DeclarativeOAuth Configurable State Query Param
2082
+ type: object
2083
+ additionalProperties: true
2084
+ required:
2085
+ - min
2086
+ - max
2087
+ description: |-
2088
+ The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,
2089
+ including length and complexity.
2090
+ properties:
2091
+ min:
2092
+ type: integer
2093
+ max:
2094
+ type: integer
2095
+ examples:
2096
+ - state: {
2097
+ "min": 7,
2098
+ "max": 128,
2099
+ }
2100
+ client_id_key:
2101
+ title: (Optional) DeclarativeOAuth Client ID Key Override
2102
+ type: string
2103
+ description: |-
2104
+ The DeclarativeOAuth Specific optional override to provide the custom `client_id` key name, if required by data-provider.
2105
+ examples:
2106
+ - client_id_key: "my_custom_client_id_key_name"
2107
+ client_secret_key:
2108
+ title: (Optional) DeclarativeOAuth Client Secret Key Override
2109
+ type: string
2110
+ description: |-
2111
+ The DeclarativeOAuth Specific optional override to provide the custom `client_secret` key name, if required by data-provider.
2112
+ examples:
2113
+ - client_secret_key: "my_custom_client_secret_key_name"
2114
+ scope_key:
2115
+ title: (Optional) DeclarativeOAuth Scope Key Override
2116
+ type: string
2117
+ description: |-
2118
+ The DeclarativeOAuth Specific optional override to provide the custom `scope` key name, if required by data-provider.
2119
+ examples:
2120
+ - scope_key: "my_custom_scope_key_key_name"
2121
+ state_key:
2122
+ title: (Optional) DeclarativeOAuth State Key Override
2123
+ type: string
2124
+ description: |-
2125
+ The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.
2126
+ examples:
2127
+ - state_key: "my_custom_state_key_key_name"
2128
+ auth_code_key:
2129
+ title: (Optional) DeclarativeOAuth Auth Code Key Override
2130
+ type: string
2131
+ description: |-
2132
+ The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.
2133
+ examples:
2134
+ - auth_code_key: "my_custom_auth_code_key_name"
2135
+ redirect_uri_key:
2136
+ title: (Optional) DeclarativeOAuth Redirect URI Key Override
2137
+ type: string
2138
+ description: |-
2139
+ The DeclarativeOAuth Specific optional override to provide the custom `redirect_uri` key name to something like `callback_uri`, if required by data-provider.
2140
+ examples:
2141
+ - redirect_uri_key: "my_custom_redirect_uri_key_name"
1975
2142
  complete_oauth_output_specification:
1976
2143
  title: "OAuth output specification"
1977
2144
  description: |-
@@ -2790,21 +2957,21 @@ interpolation:
2790
2957
  - created_at: "2020-01-01 00:00:00.000+00:00"
2791
2958
  - updated_at: "2020-01-02 00:00:00.000+00:00"
2792
2959
  macros:
2793
- - title: Now (UTC)
2960
+ - title: now_utc
2794
2961
  description: Returns the current date and time in the UTC timezone.
2795
2962
  arguments: {}
2796
2963
  return_type: Datetime
2797
2964
  examples:
2798
2965
  - "'{{ now_utc() }}' -> '2021-09-01 00:00:00+00:00'"
2799
2966
  - "'{{ now_utc().strftime('%Y-%m-%d') }}' -> '2021-09-01'"
2800
- - title: Today (UTC)
2967
+ - title: today_utc
2801
2968
  description: Returns the current date in UTC timezone. The output is a date object.
2802
2969
  arguments: {}
2803
2970
  return_type: Date
2804
2971
  examples:
2805
2972
  - "'{{ today_utc() }}' -> '2021-09-01'"
2806
2973
  - "'{{ today_utc().strftime('%Y/%m/%d')}}' -> '2021/09/01'"
2807
- - title: Timestamp
2974
+ - title: timestamp
2808
2975
  description: Converts a number or a string representing a datetime (formatted as ISO8601) to a timestamp. If the input is a number, it is converted to an int. If no timezone is specified, the string is interpreted as UTC.
2809
2976
  arguments:
2810
2977
  datetime: A string formatted as ISO8601 or an integer representing a unix timestamp
@@ -2815,7 +2982,7 @@ interpolation:
2815
2982
  - "'{{ timestamp('2022-02-28T00:00:00Z') }}' -> 1646006400"
2816
2983
  - "'{{ timestamp('2022-02-28 00:00:00Z') }}' -> 1646006400"
2817
2984
  - "'{{ timestamp('2022-02-28T00:00:00-08:00') }}' -> 1646035200"
2818
- - title: Max
2985
+ - title: max
2819
2986
  description: Returns the largest object of a iterable, or or two or more arguments.
2820
2987
  arguments:
2821
2988
  args: iterable or a sequence of two or more arguments
@@ -2823,7 +2990,7 @@ interpolation:
2823
2990
  examples:
2824
2991
  - "'{{ max(2, 3) }}' -> 3"
2825
2992
  - "'{{ max([2, 3]) }}' -> 3"
2826
- - title: Day Delta
2993
+ - title: day_delta
2827
2994
  description: Returns the datetime of now() + num_days.
2828
2995
  arguments:
2829
2996
  num_days: The number of days to add to now
@@ -2833,8 +3000,8 @@ interpolation:
2833
3000
  - "'{{ day_delta(1) }}' -> '2021-09-02T00:00:00.000000+0000'"
2834
3001
  - "'{{ day_delta(-1) }}' -> '2021-08-31:00:00.000000+0000'"
2835
3002
  - "'{{ day_delta(25, format='%Y-%m-%d') }}' -> '2021-09-02'"
2836
- - title: Duration
2837
- description: Converts an ISO8601 duratioin to datetime.timedelta.
3003
+ - title: duration
3004
+ description: Converts an ISO8601 duration to datetime timedelta.
2838
3005
  arguments:
2839
3006
  duration_string: "A string representing an ISO8601 duration. See https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm for more details."
2840
3007
  return_type: datetime.timedelta
@@ -2842,7 +3009,7 @@ interpolation:
2842
3009
  - "'{{ duration('P1D') }}' -> '1 day, 0:00:00'"
2843
3010
  - "'{{ duration('P6DT23H') }}' -> '6 days, 23:00:00'"
2844
3011
  - "'{{ (now_utc() - duration('P1D')).strftime('%Y-%m-%dT%H:%M:%SZ') }}' -> '2021-08-31T00:00:00Z'"
2845
- - title: Format Datetime
3012
+ - title: format_datetime
2846
3013
  description: Converts a datetime or a datetime-string to the specified format.
2847
3014
  arguments:
2848
3015
  datetime: The datetime object or a string to convert. If datetime is a string, it must be formatted as ISO8601.
@@ -2854,7 +3021,7 @@ interpolation:
2854
3021
  - "{{ format_datetime(config['start_date'], '%Y-%m-%dT%H:%M:%S.%fZ') }}"
2855
3022
  - "{{ format_datetime(config['start_date'], '%Y-%m-%dT%H:%M:%S.%fZ', '%a, %d %b %Y %H:%M:%S %z') }}"
2856
3023
  filters:
2857
- - title: Hash
3024
+ - title: hash
2858
3025
  description: Convert the specified value to a hashed string.
2859
3026
  arguments:
2860
3027
  hash_type: Valid hash type for converts ('md5' as default value).
@@ -2864,26 +3031,26 @@ interpolation:
2864
3031
  - "{{ 'Test client_secret' | hash() }} -> '3032d57a12f76b61a820e47b9a5a0cbb'"
2865
3032
  - "{{ 'Test client_secret' | hash('md5') }} -> '3032d57a12f76b61a820e47b9a5a0cbb'"
2866
3033
  - "{{ 'Test client_secret' | hash('md5', salt='salt') }} -> '5011a0168579c2d94cbbe1c6ad14327c'"
2867
- - title: Base64 encoder
3034
+ - title: base64encode
2868
3035
  description: Convert the specified value to a string in the base64 format.
2869
3036
  arguments: {}
2870
3037
  return_type: str
2871
3038
  examples:
2872
3039
  - "{{ 'Test client_secret' | base64encode }} -> 'VGVzdCBjbGllbnRfc2VjcmV0'"
2873
- - title: Base64 decoder
3040
+ - title: base64decode
2874
3041
  description: Decodes the specified base64 format value into a common string.
2875
3042
  arguments: {}
2876
3043
  return_type: str
2877
3044
  examples:
2878
3045
  - "{{ 'ZmFrZSByZWZyZXNoX3Rva2VuIHZhbHVl' | base64decode }} -> 'fake refresh_token value'"
2879
- - title: String
3046
+ - title: string
2880
3047
  description: Converts the specified value to a string.
2881
3048
  arguments: {}
2882
3049
  return_type: str
2883
3050
  examples:
2884
3051
  - '{{ 1 | string }} -> "1"'
2885
3052
  - '{{ ["hello", "world" | string }} -> "["hello", "world"]"'
2886
- - title: Regex Search
3053
+ - title: regex_search
2887
3054
  description: Match the input string against a regular expression and return the first match.
2888
3055
  arguments:
2889
3056
  regex: The regular expression to search for. It must include a capture group.
@@ -5,11 +5,12 @@ import codecs
5
5
  import logging
6
6
  from dataclasses import InitVar, dataclass
7
7
  from gzip import decompress
8
- from typing import Any, Generator, Mapping, MutableMapping, List, Optional
8
+ from typing import Any, Generator, List, Mapping, MutableMapping, Optional
9
9
 
10
+ import orjson
10
11
  import requests
12
+
11
13
  from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
12
- import orjson
13
14
 
14
15
  logger = logging.getLogger("airbyte")
15
16
 
@@ -4,6 +4,7 @@ import logging
4
4
  from typing import Any, Generator, Mapping
5
5
 
6
6
  import requests
7
+
7
8
  from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
8
9
 
9
10
  logger = logging.getLogger("airbyte")
@@ -7,6 +7,7 @@ from dataclasses import dataclass
7
7
  from typing import Any, Generator, MutableMapping
8
8
 
9
9
  import requests
10
+
10
11
  from airbyte_cdk.sources.declarative.decoders import Decoder
11
12
 
12
13
  logger = logging.getLogger("airbyte")
@@ -9,6 +9,7 @@ from xml.parsers.expat import ExpatError
9
9
 
10
10
  import requests
11
11
  import xmltodict
12
+
12
13
  from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
13
14
 
14
15
  logger = logging.getLogger("airbyte")
@@ -7,6 +7,7 @@ from typing import Any, Iterable, List, Mapping, MutableMapping, Union
7
7
 
8
8
  import dpath
9
9
  import requests
10
+
10
11
  from airbyte_cdk.sources.declarative.decoders import Decoder, JsonDecoder
11
12
  from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
12
13
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
@@ -6,6 +6,7 @@ from abc import abstractmethod
6
6
  from typing import Any, Iterable, Mapping, Optional
7
7
 
8
8
  import requests
9
+
9
10
  from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
10
11
 
11
12
 
@@ -1,7 +1,6 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
- import datetime
5
4
  from dataclasses import InitVar, dataclass
6
5
  from typing import Any, Iterable, Mapping, Optional, Union
7
6
 
@@ -11,7 +10,7 @@ from airbyte_cdk.sources.declarative.incremental import (
11
10
  PerPartitionWithGlobalCursor,
12
11
  )
13
12
  from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
14
- from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
13
+ from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
15
14
 
16
15
 
17
16
  @dataclass
@@ -68,20 +67,6 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
68
67
  self._date_time_based_cursor = date_time_based_cursor
69
68
  self._substream_cursor = substream_cursor
70
69
 
71
- @property
72
- def _cursor_field(self) -> str:
73
- return self._date_time_based_cursor.cursor_field.eval(self._date_time_based_cursor.config) # type: ignore # eval returns a string in this context
74
-
75
- @property
76
- def _start_date_from_config(self) -> datetime.datetime:
77
- return self._date_time_based_cursor._start_datetime.get_datetime(
78
- self._date_time_based_cursor.config
79
- )
80
-
81
- @property
82
- def _end_datetime(self) -> datetime.datetime:
83
- return self._date_time_based_cursor.select_best_end_datetime()
84
-
85
70
  def filter_records(
86
71
  self,
87
72
  records: Iterable[Mapping[str, Any]],
@@ -89,16 +74,14 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
89
74
  stream_slice: Optional[StreamSlice] = None,
90
75
  next_page_token: Optional[Mapping[str, Any]] = None,
91
76
  ) -> Iterable[Mapping[str, Any]]:
92
- state_value = self._get_state_value(
93
- stream_state, stream_slice or StreamSlice(partition={}, cursor_slice={})
94
- )
95
- filter_date: datetime.datetime = self._get_filter_date(state_value)
96
77
  records = (
97
78
  record
98
79
  for record in records
99
- if self._end_datetime
100
- >= self._date_time_based_cursor.parse_date(record[self._cursor_field])
101
- >= filter_date
80
+ if (self._substream_cursor or self._date_time_based_cursor).should_be_synced(
81
+ # Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
82
+ # Record stream name is empty cause it is not used durig the filtering
83
+ Record(data=record, associated_slice=stream_slice, stream_name="")
84
+ )
102
85
  )
103
86
  if self.condition:
104
87
  records = super().filter_records(
@@ -108,28 +91,3 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
108
91
  next_page_token=next_page_token,
109
92
  )
110
93
  yield from records
111
-
112
- def _get_state_value(
113
- self, stream_state: StreamState, stream_slice: StreamSlice
114
- ) -> Optional[str]:
115
- """
116
- Return cursor_value or None in case it was not found.
117
- Cursor_value may be empty if:
118
- 1. It is an initial sync => no stream_state exist at all.
119
- 2. In Parent-child stream, and we already make initial sync, so stream_state is present.
120
- During the second read, we receive one extra record from parent and therefore no stream_state for this record will be found.
121
-
122
- :param StreamState stream_state: State
123
- :param StreamSlice stream_slice: Current Stream slice
124
- :return Optional[str]: cursor_value in case it was found, otherwise None.
125
- """
126
- state = (self._substream_cursor or self._date_time_based_cursor).select_state(stream_slice)
127
-
128
- return state.get(self._cursor_field) if state else None
129
-
130
- def _get_filter_date(self, state_value: Optional[str]) -> datetime.datetime:
131
- start_date_parsed = self._start_date_from_config
132
- if state_value:
133
- return max(start_date_parsed, self._date_time_based_cursor.parse_date(state_value))
134
- else:
135
- return start_date_parsed
@@ -3,12 +3,14 @@
3
3
  #
4
4
 
5
5
  from dataclasses import InitVar, dataclass, field
6
- from typing import Any, Iterable, List, Mapping, Optional
6
+ from typing import Any, Iterable, List, Mapping, Optional, Union
7
7
 
8
8
  import requests
9
+
9
10
  from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
10
11
  from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
11
12
  from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter
13
+ from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
12
14
  from airbyte_cdk.sources.declarative.models import SchemaNormalization
13
15
  from airbyte_cdk.sources.declarative.transformations import RecordTransformation
14
16
  from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
@@ -37,11 +39,34 @@ class RecordSelector(HttpSelector):
37
39
  config: Config
38
40
  parameters: InitVar[Mapping[str, Any]]
39
41
  schema_normalization: TypeTransformer
42
+ name: str
43
+ _name: Union[InterpolatedString, str] = field(init=False, repr=False, default="")
40
44
  record_filter: Optional[RecordFilter] = None
41
45
  transformations: List[RecordTransformation] = field(default_factory=lambda: [])
42
46
 
43
47
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
44
48
  self._parameters = parameters
49
+ self._name = (
50
+ InterpolatedString(self._name, parameters=parameters)
51
+ if isinstance(self._name, str)
52
+ else self._name
53
+ )
54
+
55
+ @property # type: ignore
56
+ def name(self) -> str:
57
+ """
58
+ :return: Stream name
59
+ """
60
+ return (
61
+ str(self._name.eval(self.config))
62
+ if isinstance(self._name, InterpolatedString)
63
+ else self._name
64
+ )
65
+
66
+ @name.setter
67
+ def name(self, value: str) -> None:
68
+ if not isinstance(value, property):
69
+ self._name = value
45
70
 
46
71
  def select_records(
47
72
  self,
@@ -85,7 +110,7 @@ class RecordSelector(HttpSelector):
85
110
  transformed_data = self._transform(filtered_data, stream_state, stream_slice)
86
111
  normalized_data = self._normalize_by_schema(transformed_data, schema=records_schema)
87
112
  for data in normalized_data:
88
- yield Record(data, stream_slice)
113
+ yield Record(data=data, stream_name=self.name, associated_slice=stream_slice)
89
114
 
90
115
  def _normalize_by_schema(
91
116
  self, records: Iterable[Mapping[str, Any]], schema: Optional[Mapping[str, Any]]
@@ -125,6 +150,9 @@ class RecordSelector(HttpSelector):
125
150
  for record in records:
126
151
  for transformation in self.transformations:
127
152
  transformation.transform(
128
- record, config=self.config, stream_state=stream_state, stream_slice=stream_slice
129
- ) # type: ignore # record has type Mapping[str, Any], but Dict[str, Any] expected
153
+ record, # type: ignore # record has type Mapping[str, Any], but Dict[str, Any] expected
154
+ config=self.config,
155
+ stream_state=stream_state,
156
+ stream_slice=stream_slice,
157
+ )
130
158
  yield record
@@ -10,9 +10,10 @@ from typing import Any, Dict, Iterable, Mapping, Optional, Tuple
10
10
 
11
11
  import pandas as pd
12
12
  import requests
13
- from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
14
13
  from numpy import nan
15
14
 
15
+ from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
16
+
16
17
  EMPTY_STR: str = ""
17
18
  DEFAULT_ENCODING: str = "utf-8"
18
19
  DOWNLOAD_CHUNK_SIZE: int = 1024 * 10