airbyte-cdk 6.20.1__py3-none-any.whl → 6.20.2.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/auth/oauth.py +0 -34
- airbyte_cdk/sources/declarative/checks/__init__.py +2 -18
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +80 -16
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +21 -97
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +0 -43
- airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
- airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +331 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +15 -0
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +1 -2
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +26 -97
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +106 -116
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +4 -33
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +3 -13
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +0 -11
- airbyte_cdk/sources/file_based/exceptions.py +0 -34
- airbyte_cdk/sources/file_based/file_based_source.py +5 -28
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +4 -18
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +2 -25
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +2 -30
- airbyte_cdk/sources/streams/concurrent/cursor.py +30 -21
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +4 -33
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +4 -42
- airbyte_cdk/sources/types.py +0 -3
- {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/RECORD +31 -32
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +0 -51
- airbyte_cdk/sources/declarative/requesters/README.md +0 -56
- {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -39,7 +39,6 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
39
39
|
token_expiry_date_format str: format of the datetime; provide it if expires_in is returned in datetime instead of seconds
|
40
40
|
token_expiry_is_time_of_expiration bool: set True it if expires_in is returned as time of expiration instead of the number seconds until expiration
|
41
41
|
refresh_request_body (Optional[Mapping[str, Any]]): The request body to send in the refresh request
|
42
|
-
refresh_request_headers (Optional[Mapping[str, Any]]): The request headers to send in the refresh request
|
43
42
|
grant_type: The grant_type to request for access_token. If set to refresh_token, the refresh_token parameter has to be provided
|
44
43
|
message_repository (MessageRepository): the message repository used to emit logs on HTTP requests
|
45
44
|
"""
|
@@ -57,13 +56,8 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
57
56
|
token_expiry_is_time_of_expiration: bool = False
|
58
57
|
access_token_name: Union[InterpolatedString, str] = "access_token"
|
59
58
|
access_token_value: Optional[Union[InterpolatedString, str]] = None
|
60
|
-
client_id_name: Union[InterpolatedString, str] = "client_id"
|
61
|
-
client_secret_name: Union[InterpolatedString, str] = "client_secret"
|
62
59
|
expires_in_name: Union[InterpolatedString, str] = "expires_in"
|
63
|
-
refresh_token_name: Union[InterpolatedString, str] = "refresh_token"
|
64
60
|
refresh_request_body: Optional[Mapping[str, Any]] = None
|
65
|
-
refresh_request_headers: Optional[Mapping[str, Any]] = None
|
66
|
-
grant_type_name: Union[InterpolatedString, str] = "grant_type"
|
67
61
|
grant_type: Union[InterpolatedString, str] = "refresh_token"
|
68
62
|
message_repository: MessageRepository = NoopMessageRepository()
|
69
63
|
|
@@ -75,15 +69,8 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
75
69
|
)
|
76
70
|
else:
|
77
71
|
self._token_refresh_endpoint = None
|
78
|
-
self._client_id_name = InterpolatedString.create(self.client_id_name, parameters=parameters)
|
79
72
|
self._client_id = InterpolatedString.create(self.client_id, parameters=parameters)
|
80
|
-
self._client_secret_name = InterpolatedString.create(
|
81
|
-
self.client_secret_name, parameters=parameters
|
82
|
-
)
|
83
73
|
self._client_secret = InterpolatedString.create(self.client_secret, parameters=parameters)
|
84
|
-
self._refresh_token_name = InterpolatedString.create(
|
85
|
-
self.refresh_token_name, parameters=parameters
|
86
|
-
)
|
87
74
|
if self.refresh_token is not None:
|
88
75
|
self._refresh_token: Optional[InterpolatedString] = InterpolatedString.create(
|
89
76
|
self.refresh_token, parameters=parameters
|
@@ -96,16 +83,10 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
96
83
|
self.expires_in_name = InterpolatedString.create(
|
97
84
|
self.expires_in_name, parameters=parameters
|
98
85
|
)
|
99
|
-
self.grant_type_name = InterpolatedString.create(
|
100
|
-
self.grant_type_name, parameters=parameters
|
101
|
-
)
|
102
86
|
self.grant_type = InterpolatedString.create(self.grant_type, parameters=parameters)
|
103
87
|
self._refresh_request_body = InterpolatedMapping(
|
104
88
|
self.refresh_request_body or {}, parameters=parameters
|
105
89
|
)
|
106
|
-
self._refresh_request_headers = InterpolatedMapping(
|
107
|
-
self.refresh_request_headers or {}, parameters=parameters
|
108
|
-
)
|
109
90
|
self._token_expiry_date: pendulum.DateTime = (
|
110
91
|
pendulum.parse(
|
111
92
|
InterpolatedString.create(self.token_expiry_date, parameters=parameters).eval(
|
@@ -141,27 +122,18 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
141
122
|
return refresh_token_endpoint
|
142
123
|
return None
|
143
124
|
|
144
|
-
def get_client_id_name(self) -> str:
|
145
|
-
return self._client_id_name.eval(self.config) # type: ignore # eval returns a string in this context
|
146
|
-
|
147
125
|
def get_client_id(self) -> str:
|
148
126
|
client_id: str = self._client_id.eval(self.config)
|
149
127
|
if not client_id:
|
150
128
|
raise ValueError("OAuthAuthenticator was unable to evaluate client_id parameter")
|
151
129
|
return client_id
|
152
130
|
|
153
|
-
def get_client_secret_name(self) -> str:
|
154
|
-
return self._client_secret_name.eval(self.config) # type: ignore # eval returns a string in this context
|
155
|
-
|
156
131
|
def get_client_secret(self) -> str:
|
157
132
|
client_secret: str = self._client_secret.eval(self.config)
|
158
133
|
if not client_secret:
|
159
134
|
raise ValueError("OAuthAuthenticator was unable to evaluate client_secret parameter")
|
160
135
|
return client_secret
|
161
136
|
|
162
|
-
def get_refresh_token_name(self) -> str:
|
163
|
-
return self._refresh_token_name.eval(self.config) # type: ignore # eval returns a string in this context
|
164
|
-
|
165
137
|
def get_refresh_token(self) -> Optional[str]:
|
166
138
|
return None if self._refresh_token is None else str(self._refresh_token.eval(self.config))
|
167
139
|
|
@@ -174,18 +146,12 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
174
146
|
def get_expires_in_name(self) -> str:
|
175
147
|
return self.expires_in_name.eval(self.config) # type: ignore # eval returns a string in this context
|
176
148
|
|
177
|
-
def get_grant_type_name(self) -> str:
|
178
|
-
return self.grant_type_name.eval(self.config) # type: ignore # eval returns a string in this context
|
179
|
-
|
180
149
|
def get_grant_type(self) -> str:
|
181
150
|
return self.grant_type.eval(self.config) # type: ignore # eval returns a string in this context
|
182
151
|
|
183
152
|
def get_refresh_request_body(self) -> Mapping[str, Any]:
|
184
153
|
return self._refresh_request_body.eval(self.config)
|
185
154
|
|
186
|
-
def get_refresh_request_headers(self) -> Mapping[str, Any]:
|
187
|
-
return self._refresh_request_headers.eval(self.config)
|
188
|
-
|
189
155
|
def get_token_expiry_date(self) -> pendulum.DateTime:
|
190
156
|
return self._token_expiry_date # type: ignore # _token_expiry_date is a pendulum.DateTime. It is never None despite what mypy thinks
|
191
157
|
|
@@ -1,24 +1,8 @@
|
|
1
1
|
#
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from typing import Mapping
|
6
|
-
|
7
|
-
from pydantic.v1 import BaseModel
|
8
|
-
|
9
|
-
from airbyte_cdk.sources.declarative.checks.check_dynamic_stream import CheckDynamicStream
|
10
5
|
from airbyte_cdk.sources.declarative.checks.check_stream import CheckStream
|
11
6
|
from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
|
12
|
-
from airbyte_cdk.sources.declarative.models import (
|
13
|
-
CheckDynamicStream as CheckDynamicStreamModel,
|
14
|
-
)
|
15
|
-
from airbyte_cdk.sources.declarative.models import (
|
16
|
-
CheckStream as CheckStreamModel,
|
17
|
-
)
|
18
|
-
|
19
|
-
COMPONENTS_CHECKER_TYPE_MAPPING: Mapping[str, type[BaseModel]] = {
|
20
|
-
"CheckStream": CheckStreamModel,
|
21
|
-
"CheckDynamicStream": CheckDynamicStreamModel,
|
22
|
-
}
|
23
7
|
|
24
|
-
__all__ = ["CheckStream", "
|
8
|
+
__all__ = ["CheckStream", "ConnectionChecker"]
|
@@ -20,6 +20,9 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
|
20
20
|
ClientSideIncrementalRecordFilterDecorator,
|
21
21
|
)
|
22
22
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
23
|
+
from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
|
24
|
+
PerPartitionWithGlobalCursor,
|
25
|
+
)
|
23
26
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
24
27
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
25
28
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
@@ -32,7 +35,7 @@ from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
|
32
35
|
ModelToComponentFactory,
|
33
36
|
)
|
34
37
|
from airbyte_cdk.sources.declarative.requesters import HttpRequester
|
35
|
-
from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
|
38
|
+
from airbyte_cdk.sources.declarative.retrievers import Retriever, SimpleRetriever
|
36
39
|
from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
|
37
40
|
DeclarativePartitionFactory,
|
38
41
|
StreamSlicerPartitionGenerator,
|
@@ -230,21 +233,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
230
233
|
stream_state=stream_state,
|
231
234
|
)
|
232
235
|
|
233
|
-
retriever = declarative_stream
|
234
|
-
|
235
|
-
# This is an optimization so that we don't invoke any cursor or state management flows within the
|
236
|
-
# low-code framework because state management is handled through the ConcurrentCursor.
|
237
|
-
if declarative_stream and isinstance(retriever, SimpleRetriever):
|
238
|
-
# Also a temporary hack. In the legacy Stream implementation, as part of the read,
|
239
|
-
# set_initial_state() is called to instantiate incoming state on the cursor. Although we no
|
240
|
-
# longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
|
241
|
-
# like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
|
242
|
-
# still rely on a DatetimeBasedCursor that is properly initialized with state.
|
243
|
-
if retriever.cursor:
|
244
|
-
retriever.cursor.set_initial_state(stream_state=stream_state)
|
245
|
-
# We zero it out here, but since this is a cursor reference, the state is still properly
|
246
|
-
# instantiated for the other components that reference it
|
247
|
-
retriever.cursor = None
|
236
|
+
retriever = self._get_retriever(declarative_stream, stream_state)
|
248
237
|
|
249
238
|
partition_generator = StreamSlicerPartitionGenerator(
|
250
239
|
DeclarativePartitionFactory(
|
@@ -304,6 +293,60 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
304
293
|
cursor=final_state_cursor,
|
305
294
|
)
|
306
295
|
)
|
296
|
+
elif (
|
297
|
+
incremental_sync_component_definition
|
298
|
+
and incremental_sync_component_definition.get("type", "")
|
299
|
+
== DatetimeBasedCursorModel.__name__
|
300
|
+
and self._stream_supports_concurrent_partition_processing(
|
301
|
+
declarative_stream=declarative_stream
|
302
|
+
)
|
303
|
+
and hasattr(declarative_stream.retriever, "stream_slicer")
|
304
|
+
and isinstance(
|
305
|
+
declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
|
306
|
+
)
|
307
|
+
):
|
308
|
+
stream_state = state_manager.get_stream_state(
|
309
|
+
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
310
|
+
)
|
311
|
+
partition_router = declarative_stream.retriever.stream_slicer._partition_router
|
312
|
+
|
313
|
+
perpartition_cursor = (
|
314
|
+
self._constructor.create_concurrent_cursor_from_perpartition_cursor(
|
315
|
+
state_manager=state_manager,
|
316
|
+
model_type=DatetimeBasedCursorModel,
|
317
|
+
component_definition=incremental_sync_component_definition,
|
318
|
+
stream_name=declarative_stream.name,
|
319
|
+
stream_namespace=declarative_stream.namespace,
|
320
|
+
config=config or {},
|
321
|
+
stream_state=stream_state,
|
322
|
+
partition_router=partition_router,
|
323
|
+
)
|
324
|
+
)
|
325
|
+
|
326
|
+
retriever = self._get_retriever(declarative_stream, stream_state)
|
327
|
+
|
328
|
+
partition_generator = StreamSlicerPartitionGenerator(
|
329
|
+
DeclarativePartitionFactory(
|
330
|
+
declarative_stream.name,
|
331
|
+
declarative_stream.get_json_schema(),
|
332
|
+
retriever,
|
333
|
+
self.message_repository,
|
334
|
+
),
|
335
|
+
perpartition_cursor,
|
336
|
+
)
|
337
|
+
|
338
|
+
concurrent_streams.append(
|
339
|
+
DefaultStream(
|
340
|
+
partition_generator=partition_generator,
|
341
|
+
name=declarative_stream.name,
|
342
|
+
json_schema=declarative_stream.get_json_schema(),
|
343
|
+
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
344
|
+
primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
|
345
|
+
cursor_field=perpartition_cursor.cursor_field.cursor_field_key,
|
346
|
+
logger=self.logger,
|
347
|
+
cursor=perpartition_cursor,
|
348
|
+
)
|
349
|
+
)
|
307
350
|
else:
|
308
351
|
synchronous_streams.append(declarative_stream)
|
309
352
|
else:
|
@@ -394,6 +437,27 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
394
437
|
return False
|
395
438
|
return True
|
396
439
|
|
440
|
+
def _get_retriever(
|
441
|
+
self, declarative_stream: DeclarativeStream, stream_state: Mapping[str, Any]
|
442
|
+
) -> Retriever:
|
443
|
+
retriever = declarative_stream.retriever
|
444
|
+
|
445
|
+
# This is an optimization so that we don't invoke any cursor or state management flows within the
|
446
|
+
# low-code framework because state management is handled through the ConcurrentCursor.
|
447
|
+
if declarative_stream and isinstance(retriever, SimpleRetriever):
|
448
|
+
# Also a temporary hack. In the legacy Stream implementation, as part of the read,
|
449
|
+
# set_initial_state() is called to instantiate incoming state on the cursor. Although we no
|
450
|
+
# longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
|
451
|
+
# like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
|
452
|
+
# still rely on a DatetimeBasedCursor that is properly initialized with state.
|
453
|
+
if retriever.cursor:
|
454
|
+
retriever.cursor.set_initial_state(stream_state=stream_state)
|
455
|
+
# We zero it out here, but since this is a cursor reference, the state is still properly
|
456
|
+
# instantiated for the other components that reference it
|
457
|
+
retriever.cursor = None
|
458
|
+
|
459
|
+
return retriever
|
460
|
+
|
397
461
|
@staticmethod
|
398
462
|
def _select_streams(
|
399
463
|
streams: List[AbstractStream], configured_catalog: ConfiguredAirbyteCatalog
|
@@ -18,9 +18,7 @@ properties:
|
|
18
18
|
type: string
|
19
19
|
enum: [DeclarativeSource]
|
20
20
|
check:
|
21
|
-
|
22
|
-
- "$ref": "#/definitions/CheckStream"
|
23
|
-
- "$ref": "#/definitions/CheckDynamicStream"
|
21
|
+
"$ref": "#/definitions/CheckStream"
|
24
22
|
streams:
|
25
23
|
type: array
|
26
24
|
items:
|
@@ -305,21 +303,6 @@ definitions:
|
|
305
303
|
examples:
|
306
304
|
- ["users"]
|
307
305
|
- ["users", "contacts"]
|
308
|
-
CheckDynamicStream:
|
309
|
-
title: Dynamic Streams to Check
|
310
|
-
description: (This component is experimental. Use at your own risk.) Defines the dynamic streams to try reading when running a check operation.
|
311
|
-
type: object
|
312
|
-
required:
|
313
|
-
- type
|
314
|
-
- stream_count
|
315
|
-
properties:
|
316
|
-
type:
|
317
|
-
type: string
|
318
|
-
enum: [CheckDynamicStream]
|
319
|
-
stream_count:
|
320
|
-
title: Stream Count
|
321
|
-
description: Numbers of the streams to try reading from when running a check operation.
|
322
|
-
type: integer
|
323
306
|
CompositeErrorHandler:
|
324
307
|
title: Composite Error Handler
|
325
308
|
description: Error handler that sequentially iterates over a list of error handlers.
|
@@ -695,7 +678,7 @@ definitions:
|
|
695
678
|
properties:
|
696
679
|
type:
|
697
680
|
type: string
|
698
|
-
enum: [CustomSchemaNormalization]
|
681
|
+
enum: [ CustomSchemaNormalization ]
|
699
682
|
class_name:
|
700
683
|
title: Class Name
|
701
684
|
description: Fully-qualified name of the class that will be implementing the custom normalization. The format is `source_<name>.<package>.<class_name>`.
|
@@ -1064,13 +1047,6 @@ definitions:
|
|
1064
1047
|
type:
|
1065
1048
|
type: string
|
1066
1049
|
enum: [OAuthAuthenticator]
|
1067
|
-
client_id_name:
|
1068
|
-
title: Client ID Property Name
|
1069
|
-
description: The name of the property to use to refresh the `access_token`.
|
1070
|
-
type: string
|
1071
|
-
default: "client_id"
|
1072
|
-
examples:
|
1073
|
-
- custom_app_id
|
1074
1050
|
client_id:
|
1075
1051
|
title: Client ID
|
1076
1052
|
description: The OAuth client ID. Fill it in the user inputs.
|
@@ -1078,13 +1054,6 @@ definitions:
|
|
1078
1054
|
examples:
|
1079
1055
|
- "{{ config['client_id }}"
|
1080
1056
|
- "{{ config['credentials']['client_id }}"
|
1081
|
-
client_secret_name:
|
1082
|
-
title: Client Secret Property Name
|
1083
|
-
description: The name of the property to use to refresh the `access_token`.
|
1084
|
-
type: string
|
1085
|
-
default: "client_secret"
|
1086
|
-
examples:
|
1087
|
-
- custom_app_secret
|
1088
1057
|
client_secret:
|
1089
1058
|
title: Client Secret
|
1090
1059
|
description: The OAuth client secret. Fill it in the user inputs.
|
@@ -1092,13 +1061,6 @@ definitions:
|
|
1092
1061
|
examples:
|
1093
1062
|
- "{{ config['client_secret }}"
|
1094
1063
|
- "{{ config['credentials']['client_secret }}"
|
1095
|
-
refresh_token_name:
|
1096
|
-
title: Refresh Token Property Name
|
1097
|
-
description: The name of the property to use to refresh the `access_token`.
|
1098
|
-
type: string
|
1099
|
-
default: "refresh_token"
|
1100
|
-
examples:
|
1101
|
-
- custom_app_refresh_value
|
1102
1064
|
refresh_token:
|
1103
1065
|
title: Refresh Token
|
1104
1066
|
description: Credential artifact used to get a new access token.
|
@@ -1132,13 +1094,6 @@ definitions:
|
|
1132
1094
|
default: "expires_in"
|
1133
1095
|
examples:
|
1134
1096
|
- expires_in
|
1135
|
-
grant_type_name:
|
1136
|
-
title: Grant Type Property Name
|
1137
|
-
description: The name of the property to use to refresh the `access_token`.
|
1138
|
-
type: string
|
1139
|
-
default: "grant_type"
|
1140
|
-
examples:
|
1141
|
-
- custom_grant_type
|
1142
1097
|
grant_type:
|
1143
1098
|
title: Grant Type
|
1144
1099
|
description: Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.
|
@@ -1156,14 +1111,6 @@ definitions:
|
|
1156
1111
|
- applicationId: "{{ config['application_id'] }}"
|
1157
1112
|
applicationSecret: "{{ config['application_secret'] }}"
|
1158
1113
|
token: "{{ config['token'] }}"
|
1159
|
-
refresh_request_headers:
|
1160
|
-
title: Refresh Request Headers
|
1161
|
-
description: Headers of the request sent to get a new access token.
|
1162
|
-
type: object
|
1163
|
-
additionalProperties: true
|
1164
|
-
examples:
|
1165
|
-
- Authorization: "<AUTH_TOKEN>"
|
1166
|
-
Content-Type: "application/x-www-form-urlencoded"
|
1167
1114
|
scopes:
|
1168
1115
|
title: Scopes
|
1169
1116
|
description: List of scopes that should be granted to the access token.
|
@@ -1788,10 +1735,6 @@ definitions:
|
|
1788
1735
|
- type: array
|
1789
1736
|
items:
|
1790
1737
|
type: string
|
1791
|
-
condition:
|
1792
|
-
type: string
|
1793
|
-
interpolation_context:
|
1794
|
-
- raw_schema
|
1795
1738
|
SchemaTypeIdentifier:
|
1796
1739
|
title: Schema Type Identifier
|
1797
1740
|
description: (This component is experimental. Use at your own risk.) Identifies schema details for dynamic schema extraction and processing.
|
@@ -2261,15 +2204,15 @@ definitions:
|
|
2261
2204
|
Pertains to the fields defined by the connector relating to the OAuth flow.
|
2262
2205
|
|
2263
2206
|
Interpolation capabilities:
|
2264
|
-
- The variables placeholders are declared as `{
|
2265
|
-
- The nested resolution variables like `{{
|
2207
|
+
- The variables placeholders are declared as `{my_var}`.
|
2208
|
+
- The nested resolution variables like `{{my_nested_var}}` is allowed as well.
|
2266
2209
|
|
2267
2210
|
- The allowed interpolation context is:
|
2268
|
-
+ base64Encoder - encode to `base64`, {{
|
2269
|
-
+ base64Decorer - decode from `base64` encoded string, {{
|
2270
|
-
+ urlEncoder - encode the input string to URL-like format, {
|
2271
|
-
+ urlDecorer - decode the input url-encoded string into text format, {
|
2272
|
-
+ codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {{
|
2211
|
+
+ base64Encoder - encode to `base64`, {base64Encoder:{my_var_a}:{my_var_b}}
|
2212
|
+
+ base64Decorer - decode from `base64` encoded string, {base64Decoder:{my_string_variable_or_string_value}}
|
2213
|
+
+ urlEncoder - encode the input string to URL-like format, {urlEncoder:https://test.host.com/endpoint}
|
2214
|
+
+ urlDecorer - decode the input url-encoded string into text format, {urlDecoder:https%3A%2F%2Fairbyte.io}
|
2215
|
+
+ codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {codeChallengeS256:{state_value}}
|
2273
2216
|
|
2274
2217
|
Examples:
|
2275
2218
|
- The TikTok Marketing DeclarativeOAuth spec:
|
@@ -2278,12 +2221,12 @@ definitions:
|
|
2278
2221
|
"type": "object",
|
2279
2222
|
"additionalProperties": false,
|
2280
2223
|
"properties": {
|
2281
|
-
"consent_url": "https://ads.tiktok.com/marketing_api/auth?{
|
2224
|
+
"consent_url": "https://ads.tiktok.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",
|
2282
2225
|
"access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",
|
2283
2226
|
"access_token_params": {
|
2284
|
-
"{
|
2285
|
-
"{
|
2286
|
-
"{
|
2227
|
+
"{auth_code_key}": "{{auth_code_key}}",
|
2228
|
+
"{client_id_key}": "{{client_id_key}}",
|
2229
|
+
"{client_secret_key}": "{{client_secret_key}}"
|
2287
2230
|
},
|
2288
2231
|
"access_token_headers": {
|
2289
2232
|
"Content-Type": "application/json",
|
@@ -2301,6 +2244,7 @@ definitions:
|
|
2301
2244
|
required:
|
2302
2245
|
- consent_url
|
2303
2246
|
- access_token_url
|
2247
|
+
- extract_output
|
2304
2248
|
properties:
|
2305
2249
|
consent_url:
|
2306
2250
|
title: Consent URL
|
@@ -2309,8 +2253,8 @@ definitions:
|
|
2309
2253
|
The DeclarativeOAuth Specific string URL string template to initiate the authentication.
|
2310
2254
|
The placeholders are replaced during the processing to provide neccessary values.
|
2311
2255
|
examples:
|
2312
|
-
- https://domain.host.com/marketing_api/auth?{
|
2313
|
-
- https://endpoint.host.com/oauth2/authorize?{
|
2256
|
+
- https://domain.host.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}
|
2257
|
+
- https://endpoint.host.com/oauth2/authorize?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{scope_key}={urlEncoder:{{scope_key}}}&{state_key}={{state_key}}&subdomain={subdomain}
|
2314
2258
|
scope:
|
2315
2259
|
title: Scopes
|
2316
2260
|
type: string
|
@@ -2325,7 +2269,7 @@ definitions:
|
|
2325
2269
|
The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc.
|
2326
2270
|
The placeholders are replaced during the processing to provide neccessary values.
|
2327
2271
|
examples:
|
2328
|
-
- https://auth.host.com/oauth2/token?{
|
2272
|
+
- https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}
|
2329
2273
|
access_token_headers:
|
2330
2274
|
title: Access Token Headers
|
2331
2275
|
type: object
|
@@ -2334,7 +2278,7 @@ definitions:
|
|
2334
2278
|
The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.
|
2335
2279
|
examples:
|
2336
2280
|
- {
|
2337
|
-
"Authorization": "Basic {{
|
2281
|
+
"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}",
|
2338
2282
|
}
|
2339
2283
|
access_token_params:
|
2340
2284
|
title: Access Token Query Params (Json Encoded)
|
@@ -2345,9 +2289,9 @@ definitions:
|
|
2345
2289
|
When this property is provided, the query params will be encoded as `Json` and included in the outgoing API request.
|
2346
2290
|
examples:
|
2347
2291
|
- {
|
2348
|
-
"{
|
2349
|
-
"{
|
2350
|
-
"{
|
2292
|
+
"{auth_code_key}": "{{auth_code_key}}",
|
2293
|
+
"{client_id_key}": "{{client_id_key}}",
|
2294
|
+
"{client_secret_key}": "{{client_secret_key}}",
|
2351
2295
|
}
|
2352
2296
|
extract_output:
|
2353
2297
|
title: Extract Output
|
@@ -2915,7 +2859,6 @@ definitions:
|
|
2915
2859
|
parser:
|
2916
2860
|
anyOf:
|
2917
2861
|
- "$ref": "#/definitions/GzipParser"
|
2918
|
-
- "$ref": "#/definitions/JsonParser"
|
2919
2862
|
- "$ref": "#/definitions/JsonLineParser"
|
2920
2863
|
- "$ref": "#/definitions/CsvParser"
|
2921
2864
|
# PARSERS
|
@@ -2932,20 +2875,6 @@ definitions:
|
|
2932
2875
|
anyOf:
|
2933
2876
|
- "$ref": "#/definitions/JsonLineParser"
|
2934
2877
|
- "$ref": "#/definitions/CsvParser"
|
2935
|
-
- "$ref": "#/definitions/JsonParser"
|
2936
|
-
JsonParser:
|
2937
|
-
title: JsonParser
|
2938
|
-
description: Parser used for parsing str, bytes, or bytearray data and returning data in a dictionary format.
|
2939
|
-
type: object
|
2940
|
-
required:
|
2941
|
-
- type
|
2942
|
-
properties:
|
2943
|
-
type:
|
2944
|
-
type: string
|
2945
|
-
enum: [JsonParser]
|
2946
|
-
encoding:
|
2947
|
-
type: string
|
2948
|
-
default: utf-8
|
2949
2878
|
JsonLineParser:
|
2950
2879
|
type: object
|
2951
2880
|
required:
|
@@ -3048,11 +2977,6 @@ definitions:
|
|
3048
2977
|
anyOf:
|
3049
2978
|
- "$ref": "#/definitions/CustomRequester"
|
3050
2979
|
- "$ref": "#/definitions/HttpRequester"
|
3051
|
-
url_requester:
|
3052
|
-
description: Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.
|
3053
|
-
anyOf:
|
3054
|
-
- "$ref": "#/definitions/CustomRequester"
|
3055
|
-
- "$ref": "#/definitions/HttpRequester"
|
3056
2980
|
download_requester:
|
3057
2981
|
description: Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.
|
3058
2982
|
anyOf:
|
@@ -7,12 +7,9 @@ from dataclasses import dataclass
|
|
7
7
|
from io import BufferedIOBase, TextIOWrapper
|
8
8
|
from typing import Any, Generator, MutableMapping, Optional
|
9
9
|
|
10
|
-
import orjson
|
11
10
|
import requests
|
12
11
|
|
13
|
-
from airbyte_cdk.models import FailureType
|
14
12
|
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
15
|
-
from airbyte_cdk.utils import AirbyteTracedException
|
16
13
|
|
17
14
|
logger = logging.getLogger("airbyte")
|
18
15
|
|
@@ -45,46 +42,6 @@ class GzipParser(Parser):
|
|
45
42
|
yield from self.inner_parser.parse(gzipobj)
|
46
43
|
|
47
44
|
|
48
|
-
@dataclass
|
49
|
-
class JsonParser(Parser):
|
50
|
-
encoding: str = "utf-8"
|
51
|
-
|
52
|
-
def parse(self, data: BufferedIOBase) -> Generator[MutableMapping[str, Any], None, None]:
|
53
|
-
"""
|
54
|
-
Attempts to deserialize data using orjson library. As an extra layer of safety we fallback on the json library to deserialize the data.
|
55
|
-
"""
|
56
|
-
raw_data = data.read()
|
57
|
-
body_json = self._parse_orjson(raw_data) or self._parse_json(raw_data)
|
58
|
-
|
59
|
-
if body_json is None:
|
60
|
-
raise AirbyteTracedException(
|
61
|
-
message="Response JSON data failed to be parsed. See logs for more information.",
|
62
|
-
internal_message=f"Response JSON data failed to be parsed.",
|
63
|
-
failure_type=FailureType.system_error,
|
64
|
-
)
|
65
|
-
|
66
|
-
if isinstance(body_json, list):
|
67
|
-
yield from body_json
|
68
|
-
else:
|
69
|
-
yield from [body_json]
|
70
|
-
|
71
|
-
def _parse_orjson(self, raw_data: bytes) -> Optional[Any]:
|
72
|
-
try:
|
73
|
-
return orjson.loads(raw_data.decode(self.encoding))
|
74
|
-
except Exception as exc:
|
75
|
-
logger.debug(
|
76
|
-
f"Failed to parse JSON data using orjson library. Falling back to json library. {exc}"
|
77
|
-
)
|
78
|
-
return None
|
79
|
-
|
80
|
-
def _parse_json(self, raw_data: bytes) -> Optional[Any]:
|
81
|
-
try:
|
82
|
-
return json.loads(raw_data.decode(self.encoding))
|
83
|
-
except Exception as exc:
|
84
|
-
logger.error(f"Failed to parse JSON data using json library. {exc}")
|
85
|
-
return None
|
86
|
-
|
87
|
-
|
88
45
|
@dataclass
|
89
46
|
class JsonLineParser(Parser):
|
90
47
|
encoding: Optional[str] = "utf-8"
|
@@ -59,13 +59,11 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
59
59
|
|
60
60
|
def __init__(
|
61
61
|
self,
|
62
|
-
|
63
|
-
substream_cursor: Optional[Union[PerPartitionWithGlobalCursor, GlobalSubstreamCursor]],
|
62
|
+
cursor: Union[DatetimeBasedCursor, PerPartitionWithGlobalCursor, GlobalSubstreamCursor],
|
64
63
|
**kwargs: Any,
|
65
64
|
):
|
66
65
|
super().__init__(**kwargs)
|
67
|
-
self.
|
68
|
-
self._substream_cursor = substream_cursor
|
66
|
+
self._cursor = cursor
|
69
67
|
|
70
68
|
def filter_records(
|
71
69
|
self,
|
@@ -77,7 +75,7 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
77
75
|
records = (
|
78
76
|
record
|
79
77
|
for record in records
|
80
|
-
if
|
78
|
+
if self._cursor.should_be_synced(
|
81
79
|
# Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
|
82
80
|
# Record stream name is empty cause it is not used durig the filtering
|
83
81
|
Record(data=record, associated_slice=stream_slice, stream_name="")
|
@@ -2,6 +2,10 @@
|
|
2
2
|
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import (
|
6
|
+
ConcurrentCursorFactory,
|
7
|
+
ConcurrentPerPartitionCursor,
|
8
|
+
)
|
5
9
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
6
10
|
from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
|
7
11
|
from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
|
@@ -21,6 +25,8 @@ from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor i
|
|
21
25
|
|
22
26
|
__all__ = [
|
23
27
|
"CursorFactory",
|
28
|
+
"ConcurrentCursorFactory",
|
29
|
+
"ConcurrentPerPartitionCursor",
|
24
30
|
"DatetimeBasedCursor",
|
25
31
|
"DeclarativeCursor",
|
26
32
|
"GlobalSubstreamCursor",
|