airbyte-cdk 6.21.1.dev0__py3-none-any.whl → 6.26.0.dev4103__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +6 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +1 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +68 -11
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +81 -16
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +58 -2
- airbyte_cdk/sources/declarative/decoders/__init__.py +9 -1
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +59 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
- airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +334 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +35 -3
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +15 -4
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +50 -14
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +143 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +220 -22
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +6 -2
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +22 -0
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +15 -0
- airbyte_cdk/sources/file_based/config/identities_based_stream_config.py +8 -0
- airbyte_cdk/sources/file_based/config/permissions.py +34 -0
- airbyte_cdk/sources/file_based/file_based_source.py +65 -1
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +33 -0
- airbyte_cdk/sources/file_based/schema_helpers.py +25 -0
- airbyte_cdk/sources/file_based/stream/__init__.py +2 -1
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +29 -0
- airbyte_cdk/sources/file_based/stream/identities_stream.py +99 -0
- airbyte_cdk/sources/http_logger.py +1 -1
- airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +51 -57
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +20 -20
- airbyte_cdk/test/utils/manifest_only_fixtures.py +1 -2
- {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/METADATA +3 -3
- {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/RECORD +39 -31
- {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/entry_points.txt +0 -0
@@ -171,6 +171,12 @@ def create_declarative_source(
|
|
171
171
|
"Invalid config: `__injected_declarative_manifest` should be provided at the root "
|
172
172
|
f"of the config but config only has keys: {list(config.keys() if config else [])}"
|
173
173
|
)
|
174
|
+
if not isinstance(config["__injected_declarative_manifest"], dict):
|
175
|
+
raise ValueError(
|
176
|
+
"Invalid config: `__injected_declarative_manifest` should be a dictionary, "
|
177
|
+
f"but got type: {type(config['__injected_declarative_manifest'])}"
|
178
|
+
)
|
179
|
+
|
174
180
|
return ConcurrentDeclarativeSource(
|
175
181
|
config=config,
|
176
182
|
catalog=catalog,
|
@@ -52,6 +52,7 @@ def get_limits(config: Mapping[str, Any]) -> TestReadLimits:
|
|
52
52
|
def create_source(config: Mapping[str, Any], limits: TestReadLimits) -> ManifestDeclarativeSource:
|
53
53
|
manifest = config["__injected_declarative_manifest"]
|
54
54
|
return ManifestDeclarativeSource(
|
55
|
+
config=config,
|
55
56
|
emit_connector_builder_messages=True,
|
56
57
|
source_config=manifest,
|
57
58
|
component_factory=ModelToComponentFactory(
|
@@ -3,11 +3,12 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
from dataclasses import InitVar, dataclass, field
|
6
|
-
from typing import Any, List, Mapping, Optional, Union
|
6
|
+
from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
7
7
|
|
8
8
|
import pendulum
|
9
9
|
|
10
10
|
from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
|
11
|
+
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
|
11
12
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping
|
12
13
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
13
14
|
from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
|
@@ -44,10 +45,10 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
44
45
|
message_repository (MessageRepository): the message repository used to emit logs on HTTP requests
|
45
46
|
"""
|
46
47
|
|
47
|
-
client_id: Union[InterpolatedString, str]
|
48
|
-
client_secret: Union[InterpolatedString, str]
|
49
48
|
config: Mapping[str, Any]
|
50
49
|
parameters: InitVar[Mapping[str, Any]]
|
50
|
+
client_id: Optional[Union[InterpolatedString, str]] = None
|
51
|
+
client_secret: Optional[Union[InterpolatedString, str]] = None
|
51
52
|
token_refresh_endpoint: Optional[Union[InterpolatedString, str]] = None
|
52
53
|
refresh_token: Optional[Union[InterpolatedString, str]] = None
|
53
54
|
scopes: Optional[List[str]] = None
|
@@ -66,6 +67,8 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
66
67
|
grant_type_name: Union[InterpolatedString, str] = "grant_type"
|
67
68
|
grant_type: Union[InterpolatedString, str] = "refresh_token"
|
68
69
|
message_repository: MessageRepository = NoopMessageRepository()
|
70
|
+
profile_assertion: Optional[DeclarativeAuthenticator] = None
|
71
|
+
use_profile_assertion: Optional[Union[InterpolatedBoolean, str, bool]] = False
|
69
72
|
|
70
73
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
71
74
|
super().__init__()
|
@@ -76,11 +79,19 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
76
79
|
else:
|
77
80
|
self._token_refresh_endpoint = None
|
78
81
|
self._client_id_name = InterpolatedString.create(self.client_id_name, parameters=parameters)
|
79
|
-
self._client_id =
|
82
|
+
self._client_id = (
|
83
|
+
InterpolatedString.create(self.client_id, parameters=parameters)
|
84
|
+
if self.client_id
|
85
|
+
else self.client_id
|
86
|
+
)
|
80
87
|
self._client_secret_name = InterpolatedString.create(
|
81
88
|
self.client_secret_name, parameters=parameters
|
82
89
|
)
|
83
|
-
self._client_secret =
|
90
|
+
self._client_secret = (
|
91
|
+
InterpolatedString.create(self.client_secret, parameters=parameters)
|
92
|
+
if self.client_secret
|
93
|
+
else self.client_secret
|
94
|
+
)
|
84
95
|
self._refresh_token_name = InterpolatedString.create(
|
85
96
|
self.refresh_token_name, parameters=parameters
|
86
97
|
)
|
@@ -99,7 +110,12 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
99
110
|
self.grant_type_name = InterpolatedString.create(
|
100
111
|
self.grant_type_name, parameters=parameters
|
101
112
|
)
|
102
|
-
self.grant_type = InterpolatedString.create(
|
113
|
+
self.grant_type = InterpolatedString.create(
|
114
|
+
"urn:ietf:params:oauth:grant-type:jwt-bearer"
|
115
|
+
if self.use_profile_assertion
|
116
|
+
else self.grant_type,
|
117
|
+
parameters=parameters,
|
118
|
+
)
|
103
119
|
self._refresh_request_body = InterpolatedMapping(
|
104
120
|
self.refresh_request_body or {}, parameters=parameters
|
105
121
|
)
|
@@ -115,6 +131,13 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
115
131
|
if self.token_expiry_date
|
116
132
|
else pendulum.now().subtract(days=1) # type: ignore # substract does not have type hints
|
117
133
|
)
|
134
|
+
self.use_profile_assertion = (
|
135
|
+
InterpolatedBoolean(self.use_profile_assertion, parameters=parameters)
|
136
|
+
if isinstance(self.use_profile_assertion, str)
|
137
|
+
else self.use_profile_assertion
|
138
|
+
)
|
139
|
+
self.assertion_name = "assertion"
|
140
|
+
|
118
141
|
if self.access_token_value is not None:
|
119
142
|
self._access_token_value = InterpolatedString.create(
|
120
143
|
self.access_token_value, parameters=parameters
|
@@ -126,9 +149,20 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
126
149
|
self._access_token_value if self.access_token_value else None
|
127
150
|
)
|
128
151
|
|
152
|
+
if not self.use_profile_assertion and any(
|
153
|
+
client_creds is None for client_creds in [self.client_id, self.client_secret]
|
154
|
+
):
|
155
|
+
raise ValueError(
|
156
|
+
"OAuthAuthenticator configuration error: Both 'client_id' and 'client_secret' are required for the "
|
157
|
+
"basic OAuth flow."
|
158
|
+
)
|
159
|
+
if self.profile_assertion is None and self.use_profile_assertion:
|
160
|
+
raise ValueError(
|
161
|
+
"OAuthAuthenticator configuration error: 'profile_assertion' is required when using the profile assertion flow."
|
162
|
+
)
|
129
163
|
if self.get_grant_type() == "refresh_token" and self._refresh_token is None:
|
130
164
|
raise ValueError(
|
131
|
-
"OAuthAuthenticator
|
165
|
+
"OAuthAuthenticator configuration error: A 'refresh_token' is required when the 'grant_type' is set to 'refresh_token'."
|
132
166
|
)
|
133
167
|
|
134
168
|
def get_token_refresh_endpoint(self) -> Optional[str]:
|
@@ -145,19 +179,21 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
145
179
|
return self._client_id_name.eval(self.config) # type: ignore # eval returns a string in this context
|
146
180
|
|
147
181
|
def get_client_id(self) -> str:
|
148
|
-
client_id
|
182
|
+
client_id = self._client_id.eval(self.config) if self._client_id else self._client_id
|
149
183
|
if not client_id:
|
150
184
|
raise ValueError("OAuthAuthenticator was unable to evaluate client_id parameter")
|
151
|
-
return client_id
|
185
|
+
return client_id # type: ignore # value will be returned as a string, or an error will be raised
|
152
186
|
|
153
187
|
def get_client_secret_name(self) -> str:
|
154
188
|
return self._client_secret_name.eval(self.config) # type: ignore # eval returns a string in this context
|
155
189
|
|
156
190
|
def get_client_secret(self) -> str:
|
157
|
-
client_secret
|
191
|
+
client_secret = (
|
192
|
+
self._client_secret.eval(self.config) if self._client_secret else self._client_secret
|
193
|
+
)
|
158
194
|
if not client_secret:
|
159
195
|
raise ValueError("OAuthAuthenticator was unable to evaluate client_secret parameter")
|
160
|
-
return client_secret
|
196
|
+
return client_secret # type: ignore # value will be returned as a string, or an error will be raised
|
161
197
|
|
162
198
|
def get_refresh_token_name(self) -> str:
|
163
199
|
return self._refresh_token_name.eval(self.config) # type: ignore # eval returns a string in this context
|
@@ -192,6 +228,27 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
192
228
|
def set_token_expiry_date(self, value: Union[str, int]) -> None:
|
193
229
|
self._token_expiry_date = self._parse_token_expiration_date(value)
|
194
230
|
|
231
|
+
def get_assertion_name(self) -> str:
|
232
|
+
return self.assertion_name
|
233
|
+
|
234
|
+
def get_assertion(self) -> str:
|
235
|
+
if self.profile_assertion is None:
|
236
|
+
raise ValueError("profile_assertion is not set")
|
237
|
+
return self.profile_assertion.token
|
238
|
+
|
239
|
+
def build_refresh_request_body(self) -> Mapping[str, Any]:
|
240
|
+
"""
|
241
|
+
Returns the request body to set on the refresh request
|
242
|
+
|
243
|
+
Override to define additional parameters
|
244
|
+
"""
|
245
|
+
if self.use_profile_assertion:
|
246
|
+
return {
|
247
|
+
self.get_grant_type_name(): self.get_grant_type(),
|
248
|
+
self.get_assertion_name(): self.get_assertion(),
|
249
|
+
}
|
250
|
+
return super().build_refresh_request_body()
|
251
|
+
|
195
252
|
@property
|
196
253
|
def access_token(self) -> str:
|
197
254
|
if self._access_token is None:
|
@@ -20,6 +20,9 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
|
20
20
|
ClientSideIncrementalRecordFilterDecorator,
|
21
21
|
)
|
22
22
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
23
|
+
from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
|
24
|
+
PerPartitionWithGlobalCursor,
|
25
|
+
)
|
23
26
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
24
27
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
25
28
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
@@ -32,7 +35,7 @@ from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
|
32
35
|
ModelToComponentFactory,
|
33
36
|
)
|
34
37
|
from airbyte_cdk.sources.declarative.requesters import HttpRequester
|
35
|
-
from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
|
38
|
+
from airbyte_cdk.sources.declarative.retrievers import Retriever, SimpleRetriever
|
36
39
|
from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
|
37
40
|
DeclarativePartitionFactory,
|
38
41
|
StreamSlicerPartitionGenerator,
|
@@ -77,6 +80,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
77
80
|
|
78
81
|
super().__init__(
|
79
82
|
source_config=source_config,
|
83
|
+
config=config,
|
80
84
|
debug=debug,
|
81
85
|
emit_connector_builder_messages=emit_connector_builder_messages,
|
82
86
|
component_factory=component_factory,
|
@@ -230,21 +234,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
230
234
|
stream_state=stream_state,
|
231
235
|
)
|
232
236
|
|
233
|
-
retriever = declarative_stream
|
234
|
-
|
235
|
-
# This is an optimization so that we don't invoke any cursor or state management flows within the
|
236
|
-
# low-code framework because state management is handled through the ConcurrentCursor.
|
237
|
-
if declarative_stream and isinstance(retriever, SimpleRetriever):
|
238
|
-
# Also a temporary hack. In the legacy Stream implementation, as part of the read,
|
239
|
-
# set_initial_state() is called to instantiate incoming state on the cursor. Although we no
|
240
|
-
# longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
|
241
|
-
# like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
|
242
|
-
# still rely on a DatetimeBasedCursor that is properly initialized with state.
|
243
|
-
if retriever.cursor:
|
244
|
-
retriever.cursor.set_initial_state(stream_state=stream_state)
|
245
|
-
# We zero it out here, but since this is a cursor reference, the state is still properly
|
246
|
-
# instantiated for the other components that reference it
|
247
|
-
retriever.cursor = None
|
237
|
+
retriever = self._get_retriever(declarative_stream, stream_state)
|
248
238
|
|
249
239
|
partition_generator = StreamSlicerPartitionGenerator(
|
250
240
|
DeclarativePartitionFactory(
|
@@ -304,6 +294,60 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
304
294
|
cursor=final_state_cursor,
|
305
295
|
)
|
306
296
|
)
|
297
|
+
elif (
|
298
|
+
incremental_sync_component_definition
|
299
|
+
and incremental_sync_component_definition.get("type", "")
|
300
|
+
== DatetimeBasedCursorModel.__name__
|
301
|
+
and self._stream_supports_concurrent_partition_processing(
|
302
|
+
declarative_stream=declarative_stream
|
303
|
+
)
|
304
|
+
and hasattr(declarative_stream.retriever, "stream_slicer")
|
305
|
+
and isinstance(
|
306
|
+
declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
|
307
|
+
)
|
308
|
+
):
|
309
|
+
stream_state = state_manager.get_stream_state(
|
310
|
+
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
311
|
+
)
|
312
|
+
partition_router = declarative_stream.retriever.stream_slicer._partition_router
|
313
|
+
|
314
|
+
perpartition_cursor = (
|
315
|
+
self._constructor.create_concurrent_cursor_from_perpartition_cursor(
|
316
|
+
state_manager=state_manager,
|
317
|
+
model_type=DatetimeBasedCursorModel,
|
318
|
+
component_definition=incremental_sync_component_definition,
|
319
|
+
stream_name=declarative_stream.name,
|
320
|
+
stream_namespace=declarative_stream.namespace,
|
321
|
+
config=config or {},
|
322
|
+
stream_state=stream_state,
|
323
|
+
partition_router=partition_router,
|
324
|
+
)
|
325
|
+
)
|
326
|
+
|
327
|
+
retriever = self._get_retriever(declarative_stream, stream_state)
|
328
|
+
|
329
|
+
partition_generator = StreamSlicerPartitionGenerator(
|
330
|
+
DeclarativePartitionFactory(
|
331
|
+
declarative_stream.name,
|
332
|
+
declarative_stream.get_json_schema(),
|
333
|
+
retriever,
|
334
|
+
self.message_repository,
|
335
|
+
),
|
336
|
+
perpartition_cursor,
|
337
|
+
)
|
338
|
+
|
339
|
+
concurrent_streams.append(
|
340
|
+
DefaultStream(
|
341
|
+
partition_generator=partition_generator,
|
342
|
+
name=declarative_stream.name,
|
343
|
+
json_schema=declarative_stream.get_json_schema(),
|
344
|
+
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
345
|
+
primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
|
346
|
+
cursor_field=perpartition_cursor.cursor_field.cursor_field_key,
|
347
|
+
logger=self.logger,
|
348
|
+
cursor=perpartition_cursor,
|
349
|
+
)
|
350
|
+
)
|
307
351
|
else:
|
308
352
|
synchronous_streams.append(declarative_stream)
|
309
353
|
else:
|
@@ -394,6 +438,27 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
394
438
|
return False
|
395
439
|
return True
|
396
440
|
|
441
|
+
def _get_retriever(
|
442
|
+
self, declarative_stream: DeclarativeStream, stream_state: Mapping[str, Any]
|
443
|
+
) -> Retriever:
|
444
|
+
retriever = declarative_stream.retriever
|
445
|
+
|
446
|
+
# This is an optimization so that we don't invoke any cursor or state management flows within the
|
447
|
+
# low-code framework because state management is handled through the ConcurrentCursor.
|
448
|
+
if declarative_stream and isinstance(retriever, SimpleRetriever):
|
449
|
+
# Also a temporary hack. In the legacy Stream implementation, as part of the read,
|
450
|
+
# set_initial_state() is called to instantiate incoming state on the cursor. Although we no
|
451
|
+
# longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
|
452
|
+
# like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
|
453
|
+
# still rely on a DatetimeBasedCursor that is properly initialized with state.
|
454
|
+
if retriever.cursor:
|
455
|
+
retriever.cursor.set_initial_state(stream_state=stream_state)
|
456
|
+
# We zero it out here, but since this is a cursor reference, the state is still properly
|
457
|
+
# instantiated for the other components that reference it
|
458
|
+
retriever.cursor = None
|
459
|
+
|
460
|
+
return retriever
|
461
|
+
|
397
462
|
@staticmethod
|
398
463
|
def _select_streams(
|
399
464
|
streams: List[AbstractStream], configured_catalog: ConfiguredAirbyteCatalog
|
@@ -784,6 +784,29 @@ definitions:
|
|
784
784
|
type:
|
785
785
|
type: string
|
786
786
|
enum: [DatetimeBasedCursor]
|
787
|
+
clamping:
|
788
|
+
title: Date Range Clamping
|
789
|
+
description: This option is used to adjust the upper and lower boundaries of each datetime window to beginning and end of the provided target period (day, week, month)
|
790
|
+
type: object
|
791
|
+
required:
|
792
|
+
- target
|
793
|
+
properties:
|
794
|
+
target:
|
795
|
+
title: Target
|
796
|
+
description: The period of time that datetime windows will be clamped by
|
797
|
+
# This should ideally be an enum. However, we don't use an enum because we want to allow for connectors
|
798
|
+
# to support interpolation on the connector config to get the target which is an arbitrary string
|
799
|
+
type: string
|
800
|
+
interpolation_context:
|
801
|
+
- config
|
802
|
+
examples:
|
803
|
+
- "DAY"
|
804
|
+
- "WEEK"
|
805
|
+
- "MONTH"
|
806
|
+
- "{{ config['target'] }}"
|
807
|
+
target_details:
|
808
|
+
type: object
|
809
|
+
additionalProperties: true
|
787
810
|
cursor_field:
|
788
811
|
title: Cursor Field
|
789
812
|
description: The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.
|
@@ -1058,8 +1081,6 @@ definitions:
|
|
1058
1081
|
type: object
|
1059
1082
|
required:
|
1060
1083
|
- type
|
1061
|
-
- client_id
|
1062
|
-
- client_secret
|
1063
1084
|
properties:
|
1064
1085
|
type:
|
1065
1086
|
type: string
|
@@ -1254,6 +1275,15 @@ definitions:
|
|
1254
1275
|
default: []
|
1255
1276
|
examples:
|
1256
1277
|
- ["invalid_grant", "invalid_permissions"]
|
1278
|
+
profile_assertion:
|
1279
|
+
title: Profile Assertion
|
1280
|
+
description: The authenticator being used to authenticate the client authenticator.
|
1281
|
+
"$ref": "#/definitions/JwtAuthenticator"
|
1282
|
+
use_profile_assertion:
|
1283
|
+
title: Use Profile Assertion
|
1284
|
+
description: Enable using profile assertion as a flow for OAuth authorization.
|
1285
|
+
type: boolean
|
1286
|
+
default: false
|
1257
1287
|
$parameters:
|
1258
1288
|
type: object
|
1259
1289
|
additionalProperties: true
|
@@ -1514,6 +1544,7 @@ definitions:
|
|
1514
1544
|
anyOf:
|
1515
1545
|
- "$ref": "#/definitions/JsonDecoder"
|
1516
1546
|
- "$ref": "#/definitions/XmlDecoder"
|
1547
|
+
- "$ref": "#/definitions/CompositeRawDecoder"
|
1517
1548
|
$parameters:
|
1518
1549
|
type: object
|
1519
1550
|
additionalProperties: true
|
@@ -2100,6 +2131,26 @@ definitions:
|
|
2100
2131
|
$parameters:
|
2101
2132
|
type: object
|
2102
2133
|
additionalProperties: true
|
2134
|
+
ZipfileDecoder:
|
2135
|
+
title: Zipfile Decoder
|
2136
|
+
description: Decoder for response data that is returned as zipfile(s).
|
2137
|
+
type: object
|
2138
|
+
additionalProperties: true
|
2139
|
+
required:
|
2140
|
+
- type
|
2141
|
+
- parser
|
2142
|
+
properties:
|
2143
|
+
type:
|
2144
|
+
type: string
|
2145
|
+
enum: [ZipfileDecoder]
|
2146
|
+
parser:
|
2147
|
+
title: Parser
|
2148
|
+
description: Parser to parse the decompressed data from the zipfile(s).
|
2149
|
+
anyOf:
|
2150
|
+
- "$ref": "#/definitions/GzipParser"
|
2151
|
+
- "$ref": "#/definitions/JsonParser"
|
2152
|
+
- "$ref": "#/definitions/JsonLineParser"
|
2153
|
+
- "$ref": "#/definitions/CsvParser"
|
2103
2154
|
ListPartitionRouter:
|
2104
2155
|
title: List Partition Router
|
2105
2156
|
description: A Partition router that specifies a list of attributes where each attribute describes a portion of the complete data set for a stream. During a sync, each value is iterated over and can be used as input to outbound API requests.
|
@@ -2928,6 +2979,7 @@ definitions:
|
|
2928
2979
|
- "$ref": "#/definitions/XmlDecoder"
|
2929
2980
|
- "$ref": "#/definitions/GzipJsonDecoder"
|
2930
2981
|
- "$ref": "#/definitions/CompositeRawDecoder"
|
2982
|
+
- "$ref": "#/definitions/ZipfileDecoder"
|
2931
2983
|
$parameters:
|
2932
2984
|
type: object
|
2933
2985
|
additionalProperties: true
|
@@ -3126,6 +3178,8 @@ definitions:
|
|
3126
3178
|
- "$ref": "#/definitions/IterableDecoder"
|
3127
3179
|
- "$ref": "#/definitions/XmlDecoder"
|
3128
3180
|
- "$ref": "#/definitions/GzipJsonDecoder"
|
3181
|
+
- "$ref": "#/definitions/CompositeRawDecoder"
|
3182
|
+
- "$ref": "#/definitions/ZipfileDecoder"
|
3129
3183
|
download_decoder:
|
3130
3184
|
title: Download Decoder
|
3131
3185
|
description: Component decoding the download response so records can be extracted.
|
@@ -3136,6 +3190,8 @@ definitions:
|
|
3136
3190
|
- "$ref": "#/definitions/IterableDecoder"
|
3137
3191
|
- "$ref": "#/definitions/XmlDecoder"
|
3138
3192
|
- "$ref": "#/definitions/GzipJsonDecoder"
|
3193
|
+
- "$ref": "#/definitions/CompositeRawDecoder"
|
3194
|
+
- "$ref": "#/definitions/ZipfileDecoder"
|
3139
3195
|
$parameters:
|
3140
3196
|
type: object
|
3141
3197
|
additionalProperties: true
|
@@ -2,7 +2,12 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import
|
5
|
+
from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
|
6
|
+
CompositeRawDecoder,
|
7
|
+
GzipParser,
|
8
|
+
JsonParser,
|
9
|
+
Parser,
|
10
|
+
)
|
6
11
|
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
7
12
|
from airbyte_cdk.sources.declarative.decoders.json_decoder import (
|
8
13
|
GzipJsonDecoder,
|
@@ -15,15 +20,18 @@ from airbyte_cdk.sources.declarative.decoders.pagination_decoder_decorator impor
|
|
15
20
|
PaginationDecoderDecorator,
|
16
21
|
)
|
17
22
|
from airbyte_cdk.sources.declarative.decoders.xml_decoder import XmlDecoder
|
23
|
+
from airbyte_cdk.sources.declarative.decoders.zipfile_decoder import ZipfileDecoder
|
18
24
|
|
19
25
|
__all__ = [
|
20
26
|
"Decoder",
|
21
27
|
"CompositeRawDecoder",
|
22
28
|
"JsonDecoder",
|
29
|
+
"JsonParser",
|
23
30
|
"JsonlDecoder",
|
24
31
|
"IterableDecoder",
|
25
32
|
"GzipJsonDecoder",
|
26
33
|
"NoopDecoder",
|
27
34
|
"PaginationDecoderDecorator",
|
28
35
|
"XmlDecoder",
|
36
|
+
"ZipfileDecoder",
|
29
37
|
]
|
@@ -0,0 +1,59 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import logging
|
6
|
+
import zipfile
|
7
|
+
from dataclasses import dataclass
|
8
|
+
from io import BytesIO
|
9
|
+
from typing import Any, Generator, MutableMapping
|
10
|
+
|
11
|
+
import orjson
|
12
|
+
import requests
|
13
|
+
|
14
|
+
from airbyte_cdk.models import FailureType
|
15
|
+
from airbyte_cdk.sources.declarative.decoders import Decoder
|
16
|
+
from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
|
17
|
+
Parser,
|
18
|
+
)
|
19
|
+
from airbyte_cdk.utils import AirbyteTracedException
|
20
|
+
|
21
|
+
logger = logging.getLogger("airbyte")
|
22
|
+
|
23
|
+
|
24
|
+
@dataclass
|
25
|
+
class ZipfileDecoder(Decoder):
|
26
|
+
parser: Parser
|
27
|
+
|
28
|
+
def is_stream_response(self) -> bool:
|
29
|
+
return False
|
30
|
+
|
31
|
+
def decode(
|
32
|
+
self, response: requests.Response
|
33
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
34
|
+
try:
|
35
|
+
with zipfile.ZipFile(BytesIO(response.content)) as zip_file:
|
36
|
+
for file_name in zip_file.namelist():
|
37
|
+
unzipped_content = zip_file.read(file_name)
|
38
|
+
buffered_content = BytesIO(unzipped_content)
|
39
|
+
try:
|
40
|
+
yield from self.parser.parse(buffered_content)
|
41
|
+
except Exception as e:
|
42
|
+
logger.error(
|
43
|
+
f"Failed to parse file: {file_name} from zip file: {response.request.url} with exception {e}."
|
44
|
+
)
|
45
|
+
raise AirbyteTracedException(
|
46
|
+
message=f"Failed to parse file: {file_name} from zip file.",
|
47
|
+
internal_message=f"Failed to parse file: {file_name} from zip file: {response.request.url}.",
|
48
|
+
failure_type=FailureType.system_error,
|
49
|
+
) from e
|
50
|
+
except zipfile.BadZipFile as e:
|
51
|
+
logger.error(
|
52
|
+
f"Received an invalid zip file in response to URL: {response.request.url}. "
|
53
|
+
f"The size of the response body is: {len(response.content)}"
|
54
|
+
)
|
55
|
+
raise AirbyteTracedException(
|
56
|
+
message="Received an invalid zip file in response.",
|
57
|
+
internal_message=f"Received an invalid zip file in response to URL: {response.request.url}.",
|
58
|
+
failure_type=FailureType.system_error,
|
59
|
+
) from e
|
@@ -59,13 +59,11 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
59
59
|
|
60
60
|
def __init__(
|
61
61
|
self,
|
62
|
-
|
63
|
-
substream_cursor: Optional[Union[PerPartitionWithGlobalCursor, GlobalSubstreamCursor]],
|
62
|
+
cursor: Union[DatetimeBasedCursor, PerPartitionWithGlobalCursor, GlobalSubstreamCursor],
|
64
63
|
**kwargs: Any,
|
65
64
|
):
|
66
65
|
super().__init__(**kwargs)
|
67
|
-
self.
|
68
|
-
self._substream_cursor = substream_cursor
|
66
|
+
self._cursor = cursor
|
69
67
|
|
70
68
|
def filter_records(
|
71
69
|
self,
|
@@ -77,7 +75,7 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
77
75
|
records = (
|
78
76
|
record
|
79
77
|
for record in records
|
80
|
-
if
|
78
|
+
if self._cursor.should_be_synced(
|
81
79
|
# Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
|
82
80
|
# Record stream name is empty cause it is not used durig the filtering
|
83
81
|
Record(data=record, associated_slice=stream_slice, stream_name="")
|
@@ -2,6 +2,10 @@
|
|
2
2
|
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import (
|
6
|
+
ConcurrentCursorFactory,
|
7
|
+
ConcurrentPerPartitionCursor,
|
8
|
+
)
|
5
9
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
6
10
|
from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
|
7
11
|
from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
|
@@ -21,6 +25,8 @@ from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor i
|
|
21
25
|
|
22
26
|
__all__ = [
|
23
27
|
"CursorFactory",
|
28
|
+
"ConcurrentCursorFactory",
|
29
|
+
"ConcurrentPerPartitionCursor",
|
24
30
|
"DatetimeBasedCursor",
|
25
31
|
"DeclarativeCursor",
|
26
32
|
"GlobalSubstreamCursor",
|