airbyte-cdk 6.7.0rc2__py3-none-any.whl → 6.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +6 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +224 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +2 -1
- airbyte_cdk/connector.py +1 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +1 -1
- airbyte_cdk/connector_builder/main.py +2 -1
- airbyte_cdk/destinations/destination.py +2 -1
- airbyte_cdk/destinations/vector_db_based/config.py +2 -1
- airbyte_cdk/destinations/vector_db_based/document_processor.py +4 -3
- airbyte_cdk/destinations/vector_db_based/embedder.py +5 -4
- airbyte_cdk/entrypoint.py +3 -2
- airbyte_cdk/logger.py +2 -1
- airbyte_cdk/models/__init__.py +2 -0
- airbyte_cdk/models/airbyte_protocol.py +2 -1
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +3 -3
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +1 -1
- airbyte_cdk/sources/config.py +2 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +1 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +1 -0
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +1 -0
- airbyte_cdk/sources/declarative/auth/token.py +2 -1
- airbyte_cdk/sources/declarative/auth/token_provider.py +3 -2
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +24 -54
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +180 -13
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +3 -2
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +1 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +1 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +1 -0
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +1 -0
- airbyte_cdk/sources/declarative/extractors/http_selector.py +1 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +6 -48
- airbyte_cdk/sources/declarative/extractors/record_selector.py +32 -4
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +2 -1
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +2 -1
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +5 -2
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +5 -2
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +1 -3
- airbyte_cdk/sources/declarative/interpolation/jinja.py +5 -4
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +4 -3
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +1 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +122 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +26 -17
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +1 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +2 -1
- airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +9 -3
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -1
- airbyte_cdk/sources/declarative/requesters/requester.py +1 -0
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +2 -1
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +12 -4
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +7 -4
- airbyte_cdk/sources/declarative/transformations/add_fields.py +1 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +1 -0
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +1 -0
- airbyte_cdk/sources/embedded/tools.py +1 -0
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -1
- airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
- airbyte_cdk/sources/file_based/config/csv_format.py +2 -1
- airbyte_cdk/sources/file_based/config/excel_format.py +2 -1
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +2 -1
- airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
- airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
- airbyte_cdk/sources/file_based/config/unstructured_format.py +2 -1
- airbyte_cdk/sources/file_based/file_based_source.py +2 -1
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +2 -1
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +1 -0
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +2 -1
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +5 -5
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +2 -1
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +2 -1
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +9 -8
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +2 -1
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +5 -4
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +1 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +1 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +1 -1
- airbyte_cdk/sources/http_logger.py +1 -0
- airbyte_cdk/sources/streams/call_rate.py +1 -2
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +2 -1
- airbyte_cdk/sources/streams/concurrent/adapters.py +8 -4
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +2 -1
- airbyte_cdk/sources/streams/concurrent/cursor.py +30 -6
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -1
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +1 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +1 -1
- airbyte_cdk/sources/streams/core.py +2 -1
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +2 -1
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +1 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +1 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +2 -1
- airbyte_cdk/sources/streams/http/http.py +3 -2
- airbyte_cdk/sources/streams/http/http_client.py +49 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +1 -0
- airbyte_cdk/sources/types.py +14 -1
- airbyte_cdk/sources/utils/schema_helpers.py +3 -2
- airbyte_cdk/sql/secrets.py +2 -1
- airbyte_cdk/sql/shared/sql_processor.py +8 -6
- airbyte_cdk/test/entrypoint_wrapper.py +4 -3
- airbyte_cdk/test/mock_http/mocker.py +1 -0
- airbyte_cdk/utils/schema_inferrer.py +2 -1
- airbyte_cdk/utils/slice_hasher.py +1 -1
- airbyte_cdk/utils/traced_exception.py +2 -1
- {airbyte_cdk-6.7.0rc2.dist-info → airbyte_cdk-6.7.1.dist-info}/METADATA +11 -3
- {airbyte_cdk-6.7.0rc2.dist-info → airbyte_cdk-6.7.1.dist-info}/RECORD +125 -121
- airbyte_cdk-6.7.1.dist-info/entry_points.txt +3 -0
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -35
- {airbyte_cdk-6.7.0rc2.dist-info → airbyte_cdk-6.7.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.7.0rc2.dist-info → airbyte_cdk-6.7.1.dist-info}/WHEEL +0 -0
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple, Union
|
6
|
+
from typing import Any, Callable, Generic, Iterator, List, Mapping, Optional, Tuple, Union
|
7
7
|
|
8
8
|
from airbyte_cdk.models import (
|
9
9
|
AirbyteCatalog,
|
@@ -27,14 +27,16 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
27
27
|
)
|
28
28
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
29
29
|
DatetimeBasedCursor as DatetimeBasedCursorModel,
|
30
|
+
)
|
31
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
30
32
|
DeclarativeStream as DeclarativeStreamModel,
|
31
33
|
)
|
32
34
|
from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
33
|
-
ModelToComponentFactory,
|
34
35
|
ComponentDefinition,
|
36
|
+
ModelToComponentFactory,
|
35
37
|
)
|
36
38
|
from airbyte_cdk.sources.declarative.requesters import HttpRequester
|
37
|
-
from airbyte_cdk.sources.declarative.retrievers import
|
39
|
+
from airbyte_cdk.sources.declarative.retrievers import Retriever, SimpleRetriever
|
38
40
|
from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
|
39
41
|
DeclarativePartitionFactory,
|
40
42
|
StreamSlicerPartitionGenerator,
|
@@ -42,15 +44,14 @@ from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_genera
|
|
42
44
|
from airbyte_cdk.sources.declarative.transformations.add_fields import AddFields
|
43
45
|
from airbyte_cdk.sources.declarative.types import ConnectionDefinition
|
44
46
|
from airbyte_cdk.sources.source import TState
|
45
|
-
from airbyte_cdk.sources.types import Config, StreamState
|
46
47
|
from airbyte_cdk.sources.streams import Stream
|
47
48
|
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
48
49
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
|
49
50
|
AlwaysAvailableAvailabilityStrategy,
|
50
51
|
)
|
51
|
-
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, FinalStateCursor
|
52
52
|
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
53
53
|
from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
|
54
|
+
from airbyte_cdk.sources.types import Config, StreamState
|
54
55
|
|
55
56
|
|
56
57
|
class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
@@ -194,44 +195,31 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
194
195
|
declarative_stream.name
|
195
196
|
].get("incremental_sync")
|
196
197
|
|
197
|
-
|
198
|
+
if (
|
198
199
|
datetime_based_cursor_component_definition
|
199
|
-
|
200
|
-
|
201
|
-
.
|
202
|
-
|
203
|
-
)
|
204
|
-
is_datetime_incremental_without_partition_routing = (
|
205
|
-
self._is_datetime_incremental_without_partition_routing(
|
206
|
-
datetime_based_cursor_component_definition, declarative_stream
|
200
|
+
and datetime_based_cursor_component_definition.get("type", "")
|
201
|
+
== DatetimeBasedCursorModel.__name__
|
202
|
+
and self._stream_supports_concurrent_partition_processing(
|
203
|
+
declarative_stream=declarative_stream
|
207
204
|
)
|
208
|
-
|
209
|
-
|
210
|
-
is_without_partition_router_nor_cursor
|
211
|
-
or is_datetime_incremental_without_partition_routing
|
205
|
+
and hasattr(declarative_stream.retriever, "stream_slicer")
|
206
|
+
and isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
|
212
207
|
):
|
213
208
|
stream_state = state_manager.get_stream_state(
|
214
209
|
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
215
210
|
)
|
216
211
|
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
stream_state=stream_state,
|
227
|
-
)
|
228
|
-
)
|
229
|
-
else:
|
230
|
-
cursor = FinalStateCursor(
|
231
|
-
declarative_stream.name,
|
232
|
-
declarative_stream.namespace,
|
233
|
-
self.message_repository,
|
212
|
+
cursor, connector_state_converter = (
|
213
|
+
self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
|
214
|
+
state_manager=state_manager,
|
215
|
+
model_type=DatetimeBasedCursorModel,
|
216
|
+
component_definition=datetime_based_cursor_component_definition,
|
217
|
+
stream_name=declarative_stream.name,
|
218
|
+
stream_namespace=declarative_stream.namespace,
|
219
|
+
config=config or {},
|
220
|
+
stream_state=stream_state,
|
234
221
|
)
|
222
|
+
)
|
235
223
|
|
236
224
|
partition_generator = StreamSlicerPartitionGenerator(
|
237
225
|
DeclarativePartitionFactory(
|
@@ -254,9 +242,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
254
242
|
json_schema=declarative_stream.get_json_schema(),
|
255
243
|
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
256
244
|
primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
|
257
|
-
cursor_field=cursor.cursor_field.cursor_field_key
|
258
|
-
if hasattr(cursor, "cursor_field")
|
259
|
-
else None,
|
245
|
+
cursor_field=cursor.cursor_field.cursor_field_key,
|
260
246
|
logger=self.logger,
|
261
247
|
cursor=cursor,
|
262
248
|
)
|
@@ -268,22 +254,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
268
254
|
|
269
255
|
return concurrent_streams, synchronous_streams
|
270
256
|
|
271
|
-
def _is_datetime_incremental_without_partition_routing(
|
272
|
-
self,
|
273
|
-
datetime_based_cursor_component_definition: Mapping[str, Any],
|
274
|
-
declarative_stream: DeclarativeStream,
|
275
|
-
) -> bool:
|
276
|
-
return (
|
277
|
-
bool(datetime_based_cursor_component_definition)
|
278
|
-
and datetime_based_cursor_component_definition.get("type", "")
|
279
|
-
== DatetimeBasedCursorModel.__name__
|
280
|
-
and self._stream_supports_concurrent_partition_processing(
|
281
|
-
declarative_stream=declarative_stream
|
282
|
-
)
|
283
|
-
and hasattr(declarative_stream.retriever, "stream_slicer")
|
284
|
-
and isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
|
285
|
-
)
|
286
|
-
|
287
257
|
def _stream_supports_concurrent_partition_processing(
|
288
258
|
self, declarative_stream: DeclarativeStream
|
289
259
|
) -> bool:
|
@@ -1972,6 +1972,173 @@ definitions:
|
|
1972
1972
|
- app_id:
|
1973
1973
|
type: string
|
1974
1974
|
path_in_connector_config: ["info", "app_id"]
|
1975
|
+
oauth_connector_input_specification:
|
1976
|
+
title: DeclarativeOAuth Connector Specification
|
1977
|
+
description: |-
|
1978
|
+
The DeclarativeOAuth specific blob.
|
1979
|
+
Pertains to the fields defined by the connector relating to the OAuth flow.
|
1980
|
+
|
1981
|
+
Interpolation capabilities:
|
1982
|
+
- The variables placeholders are declared as `{my_var}`.
|
1983
|
+
- The nested resolution variables like `{{my_nested_var}}` is allowed as well.
|
1984
|
+
|
1985
|
+
- The allowed interpolation context is:
|
1986
|
+
+ base64Encoder - encode to `base64`, {base64Encoder:{my_var_a}:{my_var_b}}
|
1987
|
+
+ base64Decorer - decode from `base64` encoded string, {base64Decoder:{my_string_variable_or_string_value}}
|
1988
|
+
+ urlEncoder - encode the input string to URL-like format, {urlEncoder:https://test.host.com/endpoint}
|
1989
|
+
+ urlDecorer - decode the input url-encoded string into text format, {urlDecoder:https%3A%2F%2Fairbyte.io}
|
1990
|
+
+ codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {codeChallengeS256:{state_value}}
|
1991
|
+
|
1992
|
+
Examples:
|
1993
|
+
- The TikTok Marketing DeclarativeOAuth spec:
|
1994
|
+
{
|
1995
|
+
"oauth_connector_input_specification": {
|
1996
|
+
"type": "object",
|
1997
|
+
"additionalProperties": false,
|
1998
|
+
"properties": {
|
1999
|
+
"consent_url": "https://ads.tiktok.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",
|
2000
|
+
"access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",
|
2001
|
+
"access_token_params": {
|
2002
|
+
"{auth_code_key}": "{{auth_code_key}}",
|
2003
|
+
"{client_id_key}": "{{client_id_key}}",
|
2004
|
+
"{client_secret_key}": "{{client_secret_key}}"
|
2005
|
+
},
|
2006
|
+
"access_token_headers": {
|
2007
|
+
"Content-Type": "application/json",
|
2008
|
+
"Accept": "application/json"
|
2009
|
+
},
|
2010
|
+
"extract_output": ["data.access_token"],
|
2011
|
+
"client_id_key": "app_id",
|
2012
|
+
"client_secret_key": "secret",
|
2013
|
+
"auth_code_key": "auth_code"
|
2014
|
+
}
|
2015
|
+
}
|
2016
|
+
}
|
2017
|
+
type: object
|
2018
|
+
additionalProperties: true
|
2019
|
+
required:
|
2020
|
+
- consent_url
|
2021
|
+
- access_token_url
|
2022
|
+
- extract_output
|
2023
|
+
properties:
|
2024
|
+
consent_url:
|
2025
|
+
title: DeclarativeOAuth Consent URL
|
2026
|
+
type: string
|
2027
|
+
description: |-
|
2028
|
+
The DeclarativeOAuth Specific string URL string template to initiate the authentication.
|
2029
|
+
The placeholders are replaced during the processing to provide neccessary values.
|
2030
|
+
examples:
|
2031
|
+
- consent_url: https://domain.host.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}
|
2032
|
+
- consent_url: https://endpoint.host.com/oauth2/authorize?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{scope_key}={urlEncoder:{{scope_key}}}&{state_key}={{state_key}}&subdomain={subdomain}
|
2033
|
+
scope:
|
2034
|
+
title: (Optional) DeclarativeOAuth Scope
|
2035
|
+
type: string
|
2036
|
+
description: |-
|
2037
|
+
The DeclarativeOAuth Specific string of the scopes needed to be grant for authenticated user.
|
2038
|
+
examples:
|
2039
|
+
- scope: user:read user:read_orders workspaces:read
|
2040
|
+
access_token_url:
|
2041
|
+
title: DeclarativeOAuth Access Token URL
|
2042
|
+
type: string
|
2043
|
+
description: |-
|
2044
|
+
The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc.
|
2045
|
+
The placeholders are replaced during the processing to provide neccessary values.
|
2046
|
+
examples:
|
2047
|
+
- access_token_url: https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}
|
2048
|
+
access_token_headers:
|
2049
|
+
title: (Optional) DeclarativeOAuth Access Token Headers
|
2050
|
+
type: object
|
2051
|
+
additionalProperties: true
|
2052
|
+
description: |-
|
2053
|
+
The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.
|
2054
|
+
examples:
|
2055
|
+
- access_token_headers: {
|
2056
|
+
"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}"
|
2057
|
+
}
|
2058
|
+
access_token_params:
|
2059
|
+
title: (Optional) DeclarativeOAuth Access Token Query Params (Json Encoded)
|
2060
|
+
type: object
|
2061
|
+
additionalProperties: true
|
2062
|
+
description: |-
|
2063
|
+
The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.
|
2064
|
+
When this property is provided, the query params will be encoded as `Json` and included in the outgoing API request.
|
2065
|
+
examples:
|
2066
|
+
- access_token_params: {
|
2067
|
+
"{auth_code_key}": "{{auth_code_key}}",
|
2068
|
+
"{client_id_key}": "{{client_id_key}}",
|
2069
|
+
"{client_secret_key}": "{{client_secret_key}}"
|
2070
|
+
}
|
2071
|
+
extract_output:
|
2072
|
+
title: DeclarativeOAuth Extract Output
|
2073
|
+
type: array
|
2074
|
+
items:
|
2075
|
+
type: string
|
2076
|
+
description: |-
|
2077
|
+
The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.
|
2078
|
+
examples:
|
2079
|
+
- extract_output: ["access_token", "refresh_token", "other_field"]
|
2080
|
+
state:
|
2081
|
+
title: (Optional) DeclarativeOAuth Configurable State Query Param
|
2082
|
+
type: object
|
2083
|
+
additionalProperties: true
|
2084
|
+
required:
|
2085
|
+
- min
|
2086
|
+
- max
|
2087
|
+
description: |-
|
2088
|
+
The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,
|
2089
|
+
including length and complexity.
|
2090
|
+
properties:
|
2091
|
+
min:
|
2092
|
+
type: integer
|
2093
|
+
max:
|
2094
|
+
type: integer
|
2095
|
+
examples:
|
2096
|
+
- state: {
|
2097
|
+
"min": 7,
|
2098
|
+
"max": 128,
|
2099
|
+
}
|
2100
|
+
client_id_key:
|
2101
|
+
title: (Optional) DeclarativeOAuth Client ID Key Override
|
2102
|
+
type: string
|
2103
|
+
description: |-
|
2104
|
+
The DeclarativeOAuth Specific optional override to provide the custom `client_id` key name, if required by data-provider.
|
2105
|
+
examples:
|
2106
|
+
- client_id_key: "my_custom_client_id_key_name"
|
2107
|
+
client_secret_key:
|
2108
|
+
title: (Optional) DeclarativeOAuth Client Secret Key Override
|
2109
|
+
type: string
|
2110
|
+
description: |-
|
2111
|
+
The DeclarativeOAuth Specific optional override to provide the custom `client_secret` key name, if required by data-provider.
|
2112
|
+
examples:
|
2113
|
+
- client_secret_key: "my_custom_client_secret_key_name"
|
2114
|
+
scope_key:
|
2115
|
+
title: (Optional) DeclarativeOAuth Scope Key Override
|
2116
|
+
type: string
|
2117
|
+
description: |-
|
2118
|
+
The DeclarativeOAuth Specific optional override to provide the custom `scope` key name, if required by data-provider.
|
2119
|
+
examples:
|
2120
|
+
- scope_key: "my_custom_scope_key_key_name"
|
2121
|
+
state_key:
|
2122
|
+
title: (Optional) DeclarativeOAuth State Key Override
|
2123
|
+
type: string
|
2124
|
+
description: |-
|
2125
|
+
The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.
|
2126
|
+
examples:
|
2127
|
+
- state_key: "my_custom_state_key_key_name"
|
2128
|
+
auth_code_key:
|
2129
|
+
title: (Optional) DeclarativeOAuth Auth Code Key Override
|
2130
|
+
type: string
|
2131
|
+
description: |-
|
2132
|
+
The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.
|
2133
|
+
examples:
|
2134
|
+
- auth_code_key: "my_custom_auth_code_key_name"
|
2135
|
+
redirect_uri_key:
|
2136
|
+
title: (Optional) DeclarativeOAuth Redirect URI Key Override
|
2137
|
+
type: string
|
2138
|
+
description: |-
|
2139
|
+
The DeclarativeOAuth Specific optional override to provide the custom `redirect_uri` key name to something like `callback_uri`, if required by data-provider.
|
2140
|
+
examples:
|
2141
|
+
- redirect_uri_key: "my_custom_redirect_uri_key_name"
|
1975
2142
|
complete_oauth_output_specification:
|
1976
2143
|
title: "OAuth output specification"
|
1977
2144
|
description: |-
|
@@ -2790,21 +2957,21 @@ interpolation:
|
|
2790
2957
|
- created_at: "2020-01-01 00:00:00.000+00:00"
|
2791
2958
|
- updated_at: "2020-01-02 00:00:00.000+00:00"
|
2792
2959
|
macros:
|
2793
|
-
- title:
|
2960
|
+
- title: now_utc
|
2794
2961
|
description: Returns the current date and time in the UTC timezone.
|
2795
2962
|
arguments: {}
|
2796
2963
|
return_type: Datetime
|
2797
2964
|
examples:
|
2798
2965
|
- "'{{ now_utc() }}' -> '2021-09-01 00:00:00+00:00'"
|
2799
2966
|
- "'{{ now_utc().strftime('%Y-%m-%d') }}' -> '2021-09-01'"
|
2800
|
-
- title:
|
2967
|
+
- title: today_utc
|
2801
2968
|
description: Returns the current date in UTC timezone. The output is a date object.
|
2802
2969
|
arguments: {}
|
2803
2970
|
return_type: Date
|
2804
2971
|
examples:
|
2805
2972
|
- "'{{ today_utc() }}' -> '2021-09-01'"
|
2806
2973
|
- "'{{ today_utc().strftime('%Y/%m/%d')}}' -> '2021/09/01'"
|
2807
|
-
- title:
|
2974
|
+
- title: timestamp
|
2808
2975
|
description: Converts a number or a string representing a datetime (formatted as ISO8601) to a timestamp. If the input is a number, it is converted to an int. If no timezone is specified, the string is interpreted as UTC.
|
2809
2976
|
arguments:
|
2810
2977
|
datetime: A string formatted as ISO8601 or an integer representing a unix timestamp
|
@@ -2815,7 +2982,7 @@ interpolation:
|
|
2815
2982
|
- "'{{ timestamp('2022-02-28T00:00:00Z') }}' -> 1646006400"
|
2816
2983
|
- "'{{ timestamp('2022-02-28 00:00:00Z') }}' -> 1646006400"
|
2817
2984
|
- "'{{ timestamp('2022-02-28T00:00:00-08:00') }}' -> 1646035200"
|
2818
|
-
- title:
|
2985
|
+
- title: max
|
2819
2986
|
description: Returns the largest object of a iterable, or or two or more arguments.
|
2820
2987
|
arguments:
|
2821
2988
|
args: iterable or a sequence of two or more arguments
|
@@ -2823,7 +2990,7 @@ interpolation:
|
|
2823
2990
|
examples:
|
2824
2991
|
- "'{{ max(2, 3) }}' -> 3"
|
2825
2992
|
- "'{{ max([2, 3]) }}' -> 3"
|
2826
|
-
- title:
|
2993
|
+
- title: day_delta
|
2827
2994
|
description: Returns the datetime of now() + num_days.
|
2828
2995
|
arguments:
|
2829
2996
|
num_days: The number of days to add to now
|
@@ -2833,8 +3000,8 @@ interpolation:
|
|
2833
3000
|
- "'{{ day_delta(1) }}' -> '2021-09-02T00:00:00.000000+0000'"
|
2834
3001
|
- "'{{ day_delta(-1) }}' -> '2021-08-31:00:00.000000+0000'"
|
2835
3002
|
- "'{{ day_delta(25, format='%Y-%m-%d') }}' -> '2021-09-02'"
|
2836
|
-
- title:
|
2837
|
-
description: Converts an ISO8601
|
3003
|
+
- title: duration
|
3004
|
+
description: Converts an ISO8601 duration to datetime timedelta.
|
2838
3005
|
arguments:
|
2839
3006
|
duration_string: "A string representing an ISO8601 duration. See https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm for more details."
|
2840
3007
|
return_type: datetime.timedelta
|
@@ -2842,7 +3009,7 @@ interpolation:
|
|
2842
3009
|
- "'{{ duration('P1D') }}' -> '1 day, 0:00:00'"
|
2843
3010
|
- "'{{ duration('P6DT23H') }}' -> '6 days, 23:00:00'"
|
2844
3011
|
- "'{{ (now_utc() - duration('P1D')).strftime('%Y-%m-%dT%H:%M:%SZ') }}' -> '2021-08-31T00:00:00Z'"
|
2845
|
-
- title:
|
3012
|
+
- title: format_datetime
|
2846
3013
|
description: Converts a datetime or a datetime-string to the specified format.
|
2847
3014
|
arguments:
|
2848
3015
|
datetime: The datetime object or a string to convert. If datetime is a string, it must be formatted as ISO8601.
|
@@ -2854,7 +3021,7 @@ interpolation:
|
|
2854
3021
|
- "{{ format_datetime(config['start_date'], '%Y-%m-%dT%H:%M:%S.%fZ') }}"
|
2855
3022
|
- "{{ format_datetime(config['start_date'], '%Y-%m-%dT%H:%M:%S.%fZ', '%a, %d %b %Y %H:%M:%S %z') }}"
|
2856
3023
|
filters:
|
2857
|
-
- title:
|
3024
|
+
- title: hash
|
2858
3025
|
description: Convert the specified value to a hashed string.
|
2859
3026
|
arguments:
|
2860
3027
|
hash_type: Valid hash type for converts ('md5' as default value).
|
@@ -2864,26 +3031,26 @@ interpolation:
|
|
2864
3031
|
- "{{ 'Test client_secret' | hash() }} -> '3032d57a12f76b61a820e47b9a5a0cbb'"
|
2865
3032
|
- "{{ 'Test client_secret' | hash('md5') }} -> '3032d57a12f76b61a820e47b9a5a0cbb'"
|
2866
3033
|
- "{{ 'Test client_secret' | hash('md5', salt='salt') }} -> '5011a0168579c2d94cbbe1c6ad14327c'"
|
2867
|
-
- title:
|
3034
|
+
- title: base64encode
|
2868
3035
|
description: Convert the specified value to a string in the base64 format.
|
2869
3036
|
arguments: {}
|
2870
3037
|
return_type: str
|
2871
3038
|
examples:
|
2872
3039
|
- "{{ 'Test client_secret' | base64encode }} -> 'VGVzdCBjbGllbnRfc2VjcmV0'"
|
2873
|
-
- title:
|
3040
|
+
- title: base64decode
|
2874
3041
|
description: Decodes the specified base64 format value into a common string.
|
2875
3042
|
arguments: {}
|
2876
3043
|
return_type: str
|
2877
3044
|
examples:
|
2878
3045
|
- "{{ 'ZmFrZSByZWZyZXNoX3Rva2VuIHZhbHVl' | base64decode }} -> 'fake refresh_token value'"
|
2879
|
-
- title:
|
3046
|
+
- title: string
|
2880
3047
|
description: Converts the specified value to a string.
|
2881
3048
|
arguments: {}
|
2882
3049
|
return_type: str
|
2883
3050
|
examples:
|
2884
3051
|
- '{{ 1 | string }} -> "1"'
|
2885
3052
|
- '{{ ["hello", "world" | string }} -> "["hello", "world"]"'
|
2886
|
-
- title:
|
3053
|
+
- title: regex_search
|
2887
3054
|
description: Match the input string against a regular expression and return the first match.
|
2888
3055
|
arguments:
|
2889
3056
|
regex: The regular expression to search for. It must include a capture group.
|
@@ -5,11 +5,12 @@ import codecs
|
|
5
5
|
import logging
|
6
6
|
from dataclasses import InitVar, dataclass
|
7
7
|
from gzip import decompress
|
8
|
-
from typing import Any, Generator, Mapping, MutableMapping,
|
8
|
+
from typing import Any, Generator, List, Mapping, MutableMapping, Optional
|
9
9
|
|
10
|
+
import orjson
|
10
11
|
import requests
|
12
|
+
|
11
13
|
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
12
|
-
import orjson
|
13
14
|
|
14
15
|
logger = logging.getLogger("airbyte")
|
15
16
|
|
@@ -7,6 +7,7 @@ from typing import Any, Iterable, List, Mapping, MutableMapping, Union
|
|
7
7
|
|
8
8
|
import dpath
|
9
9
|
import requests
|
10
|
+
|
10
11
|
from airbyte_cdk.sources.declarative.decoders import Decoder, JsonDecoder
|
11
12
|
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
|
12
13
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
@@ -1,7 +1,6 @@
|
|
1
1
|
#
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
|
-
import datetime
|
5
4
|
from dataclasses import InitVar, dataclass
|
6
5
|
from typing import Any, Iterable, Mapping, Optional, Union
|
7
6
|
|
@@ -11,7 +10,7 @@ from airbyte_cdk.sources.declarative.incremental import (
|
|
11
10
|
PerPartitionWithGlobalCursor,
|
12
11
|
)
|
13
12
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
|
14
|
-
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
13
|
+
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
|
15
14
|
|
16
15
|
|
17
16
|
@dataclass
|
@@ -68,20 +67,6 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
68
67
|
self._date_time_based_cursor = date_time_based_cursor
|
69
68
|
self._substream_cursor = substream_cursor
|
70
69
|
|
71
|
-
@property
|
72
|
-
def _cursor_field(self) -> str:
|
73
|
-
return self._date_time_based_cursor.cursor_field.eval(self._date_time_based_cursor.config) # type: ignore # eval returns a string in this context
|
74
|
-
|
75
|
-
@property
|
76
|
-
def _start_date_from_config(self) -> datetime.datetime:
|
77
|
-
return self._date_time_based_cursor._start_datetime.get_datetime(
|
78
|
-
self._date_time_based_cursor.config
|
79
|
-
)
|
80
|
-
|
81
|
-
@property
|
82
|
-
def _end_datetime(self) -> datetime.datetime:
|
83
|
-
return self._date_time_based_cursor.select_best_end_datetime()
|
84
|
-
|
85
70
|
def filter_records(
|
86
71
|
self,
|
87
72
|
records: Iterable[Mapping[str, Any]],
|
@@ -89,16 +74,14 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
89
74
|
stream_slice: Optional[StreamSlice] = None,
|
90
75
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
91
76
|
) -> Iterable[Mapping[str, Any]]:
|
92
|
-
state_value = self._get_state_value(
|
93
|
-
stream_state, stream_slice or StreamSlice(partition={}, cursor_slice={})
|
94
|
-
)
|
95
|
-
filter_date: datetime.datetime = self._get_filter_date(state_value)
|
96
77
|
records = (
|
97
78
|
record
|
98
79
|
for record in records
|
99
|
-
if self.
|
100
|
-
|
101
|
-
|
80
|
+
if (self._substream_cursor or self._date_time_based_cursor).should_be_synced(
|
81
|
+
# Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
|
82
|
+
# Record stream name is empty cause it is not used durig the filtering
|
83
|
+
Record(data=record, associated_slice=stream_slice, stream_name="")
|
84
|
+
)
|
102
85
|
)
|
103
86
|
if self.condition:
|
104
87
|
records = super().filter_records(
|
@@ -108,28 +91,3 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
108
91
|
next_page_token=next_page_token,
|
109
92
|
)
|
110
93
|
yield from records
|
111
|
-
|
112
|
-
def _get_state_value(
|
113
|
-
self, stream_state: StreamState, stream_slice: StreamSlice
|
114
|
-
) -> Optional[str]:
|
115
|
-
"""
|
116
|
-
Return cursor_value or None in case it was not found.
|
117
|
-
Cursor_value may be empty if:
|
118
|
-
1. It is an initial sync => no stream_state exist at all.
|
119
|
-
2. In Parent-child stream, and we already make initial sync, so stream_state is present.
|
120
|
-
During the second read, we receive one extra record from parent and therefore no stream_state for this record will be found.
|
121
|
-
|
122
|
-
:param StreamState stream_state: State
|
123
|
-
:param StreamSlice stream_slice: Current Stream slice
|
124
|
-
:return Optional[str]: cursor_value in case it was found, otherwise None.
|
125
|
-
"""
|
126
|
-
state = (self._substream_cursor or self._date_time_based_cursor).select_state(stream_slice)
|
127
|
-
|
128
|
-
return state.get(self._cursor_field) if state else None
|
129
|
-
|
130
|
-
def _get_filter_date(self, state_value: Optional[str]) -> datetime.datetime:
|
131
|
-
start_date_parsed = self._start_date_from_config
|
132
|
-
if state_value:
|
133
|
-
return max(start_date_parsed, self._date_time_based_cursor.parse_date(state_value))
|
134
|
-
else:
|
135
|
-
return start_date_parsed
|
@@ -3,12 +3,14 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
from dataclasses import InitVar, dataclass, field
|
6
|
-
from typing import Any, Iterable, List, Mapping, Optional
|
6
|
+
from typing import Any, Iterable, List, Mapping, Optional, Union
|
7
7
|
|
8
8
|
import requests
|
9
|
+
|
9
10
|
from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
|
10
11
|
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
|
11
12
|
from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter
|
13
|
+
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
12
14
|
from airbyte_cdk.sources.declarative.models import SchemaNormalization
|
13
15
|
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
14
16
|
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
|
@@ -37,11 +39,34 @@ class RecordSelector(HttpSelector):
|
|
37
39
|
config: Config
|
38
40
|
parameters: InitVar[Mapping[str, Any]]
|
39
41
|
schema_normalization: TypeTransformer
|
42
|
+
name: str
|
43
|
+
_name: Union[InterpolatedString, str] = field(init=False, repr=False, default="")
|
40
44
|
record_filter: Optional[RecordFilter] = None
|
41
45
|
transformations: List[RecordTransformation] = field(default_factory=lambda: [])
|
42
46
|
|
43
47
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
44
48
|
self._parameters = parameters
|
49
|
+
self._name = (
|
50
|
+
InterpolatedString(self._name, parameters=parameters)
|
51
|
+
if isinstance(self._name, str)
|
52
|
+
else self._name
|
53
|
+
)
|
54
|
+
|
55
|
+
@property # type: ignore
|
56
|
+
def name(self) -> str:
|
57
|
+
"""
|
58
|
+
:return: Stream name
|
59
|
+
"""
|
60
|
+
return (
|
61
|
+
str(self._name.eval(self.config))
|
62
|
+
if isinstance(self._name, InterpolatedString)
|
63
|
+
else self._name
|
64
|
+
)
|
65
|
+
|
66
|
+
@name.setter
|
67
|
+
def name(self, value: str) -> None:
|
68
|
+
if not isinstance(value, property):
|
69
|
+
self._name = value
|
45
70
|
|
46
71
|
def select_records(
|
47
72
|
self,
|
@@ -85,7 +110,7 @@ class RecordSelector(HttpSelector):
|
|
85
110
|
transformed_data = self._transform(filtered_data, stream_state, stream_slice)
|
86
111
|
normalized_data = self._normalize_by_schema(transformed_data, schema=records_schema)
|
87
112
|
for data in normalized_data:
|
88
|
-
yield Record(data, stream_slice)
|
113
|
+
yield Record(data=data, stream_name=self.name, associated_slice=stream_slice)
|
89
114
|
|
90
115
|
def _normalize_by_schema(
|
91
116
|
self, records: Iterable[Mapping[str, Any]], schema: Optional[Mapping[str, Any]]
|
@@ -125,6 +150,9 @@ class RecordSelector(HttpSelector):
|
|
125
150
|
for record in records:
|
126
151
|
for transformation in self.transformations:
|
127
152
|
transformation.transform(
|
128
|
-
record,
|
129
|
-
|
153
|
+
record, # type: ignore # record has type Mapping[str, Any], but Dict[str, Any] expected
|
154
|
+
config=self.config,
|
155
|
+
stream_state=stream_state,
|
156
|
+
stream_slice=stream_slice,
|
157
|
+
)
|
130
158
|
yield record
|
@@ -10,9 +10,10 @@ from typing import Any, Dict, Iterable, Mapping, Optional, Tuple
|
|
10
10
|
|
11
11
|
import pandas as pd
|
12
12
|
import requests
|
13
|
-
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
|
14
13
|
from numpy import nan
|
15
14
|
|
15
|
+
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
|
16
|
+
|
16
17
|
EMPTY_STR: str = ""
|
17
18
|
DEFAULT_ENCODING: str = "utf-8"
|
18
19
|
DOWNLOAD_CHUNK_SIZE: int = 1024 * 10
|