airbyte-cdk 6.12.4.dev0__py3-none-any.whl → 6.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +93 -34
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +0 -1
- airbyte_cdk/models/__init__.py +10 -11
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +1 -1
- airbyte_cdk/sources/declarative/auth/__init__.py +2 -5
- airbyte_cdk/sources/declarative/auth/oauth.py +27 -12
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +0 -57
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +78 -1
- airbyte_cdk/sources/declarative/decoders/__init__.py +21 -3
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +10 -2
- airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
- airbyte_cdk/sources/declarative/incremental/__init__.py +10 -6
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +0 -14
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +49 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +96 -80
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +23 -5
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +4 -1
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +12 -3
- airbyte_cdk/sources/declarative/resolvers/__init__.py +31 -8
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +20 -14
- airbyte_cdk/sources/declarative/retrievers/__init__.py +5 -2
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +9 -32
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
- airbyte_cdk/sources/declarative/schema/__init__.py +14 -2
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +20 -3
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
- airbyte_cdk/sources/file_based/file_types/__init__.py +12 -3
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
- airbyte_cdk/sources/message/__init__.py +7 -1
- airbyte_cdk/sources/streams/__init__.py +1 -1
- airbyte_cdk/sources/streams/checkpoint/__init__.py +2 -3
- airbyte_cdk/sources/streams/concurrent/cursor.py +0 -1
- airbyte_cdk/sources/streams/http/__init__.py +2 -2
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +2 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +8 -3
- airbyte_cdk/test/mock_http/__init__.py +1 -1
- airbyte_cdk/test/mock_http/mocker.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +1 -1
- airbyte_cdk/utils/__init__.py +1 -1
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dist-info}/METADATA +3 -3
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dist-info}/RECORD +53 -52
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dist-info}/WHEEL +1 -1
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -344
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dist-info}/entry_points.txt +0 -0
airbyte_cdk/__init__.py
CHANGED
@@ -48,27 +48,46 @@ API Reference
|
|
48
48
|
# Once those issues are resolved, the below can be sorted with isort.
|
49
49
|
import dunamai as _dunamai
|
50
50
|
|
51
|
-
from .
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
from .config_observation import create_connector_config_control_message, emit_configuration_as_airbyte_control_message
|
51
|
+
from .config_observation import (
|
52
|
+
create_connector_config_control_message,
|
53
|
+
emit_configuration_as_airbyte_control_message,
|
54
|
+
)
|
56
55
|
from .connector import BaseConnector, Connector
|
57
|
-
|
58
|
-
from .entrypoint import
|
59
|
-
|
56
|
+
from .destinations import Destination
|
57
|
+
from .entrypoint import AirbyteEntrypoint, launch
|
60
58
|
from .logger import AirbyteLogFormatter, init_logger
|
61
|
-
from .
|
59
|
+
from .models import (
|
60
|
+
AdvancedAuth,
|
61
|
+
AirbyteConnectionStatus,
|
62
|
+
AirbyteLogMessage,
|
63
|
+
AirbyteMessage,
|
64
|
+
AirbyteRecordMessage,
|
65
|
+
AirbyteStream,
|
66
|
+
ConfiguredAirbyteCatalog,
|
67
|
+
ConfiguredAirbyteStream,
|
68
|
+
ConnectorSpecification,
|
69
|
+
DestinationSyncMode,
|
70
|
+
FailureType,
|
71
|
+
Level,
|
72
|
+
OAuthConfigSpecification,
|
73
|
+
OrchestratorType,
|
74
|
+
Status,
|
75
|
+
SyncMode,
|
76
|
+
Type,
|
77
|
+
)
|
78
|
+
from .sources import AbstractSource, Source
|
62
79
|
from .sources.concurrent_source.concurrent_source import ConcurrentSource
|
63
80
|
from .sources.concurrent_source.concurrent_source_adapter import ConcurrentSourceAdapter
|
64
81
|
from .sources.config import BaseConfig
|
65
|
-
from .sources.types import Config, Record, StreamSlice
|
66
82
|
from .sources.connector_state_manager import ConnectorStateManager
|
67
83
|
from .sources.declarative.auth import DeclarativeOauth2Authenticator
|
68
|
-
from .sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
|
69
|
-
from .sources.declarative.auth.declarative_authenticator import NoAuth
|
84
|
+
from .sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator, NoAuth
|
70
85
|
from .sources.declarative.auth.oauth import DeclarativeSingleUseRefreshTokenOauth2Authenticator
|
71
|
-
from .sources.declarative.auth.token import
|
86
|
+
from .sources.declarative.auth.token import (
|
87
|
+
ApiKeyAuthenticator,
|
88
|
+
BasicHttpAuthenticator,
|
89
|
+
BearerAuthenticator,
|
90
|
+
)
|
72
91
|
from .sources.declarative.datetime.min_max_datetime import MinMaxDatetime
|
73
92
|
from .sources.declarative.declarative_stream import DeclarativeStream
|
74
93
|
from .sources.declarative.decoders import Decoder, JsonDecoder
|
@@ -77,48 +96,89 @@ from .sources.declarative.extractors import DpathExtractor, RecordSelector
|
|
77
96
|
from .sources.declarative.extractors.record_extractor import RecordExtractor
|
78
97
|
from .sources.declarative.extractors.record_filter import RecordFilter
|
79
98
|
from .sources.declarative.incremental import DatetimeBasedCursor
|
80
|
-
from .sources.declarative.interpolation import
|
99
|
+
from .sources.declarative.interpolation import InterpolatedBoolean, InterpolatedString
|
81
100
|
from .sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
82
|
-
from .sources.declarative.migrations.legacy_to_per_partition_state_migration import
|
83
|
-
|
84
|
-
|
101
|
+
from .sources.declarative.migrations.legacy_to_per_partition_state_migration import (
|
102
|
+
LegacyToPerPartitionStateMigration,
|
103
|
+
)
|
104
|
+
from .sources.declarative.partition_routers import (
|
105
|
+
CartesianProductStreamSlicer,
|
106
|
+
SinglePartitionRouter,
|
107
|
+
SubstreamPartitionRouter,
|
108
|
+
)
|
85
109
|
from .sources.declarative.partition_routers.substream_partition_router import ParentStreamConfig
|
86
|
-
from .sources.declarative.requesters import
|
87
|
-
|
110
|
+
from .sources.declarative.requesters import HttpRequester, Requester
|
88
111
|
from .sources.declarative.requesters.error_handlers import BackoffStrategy
|
89
112
|
from .sources.declarative.requesters.paginators import DefaultPaginator, PaginationStrategy
|
90
|
-
from .sources.declarative.requesters.paginators.strategies import
|
91
|
-
|
113
|
+
from .sources.declarative.requesters.paginators.strategies import (
|
114
|
+
CursorPaginationStrategy,
|
115
|
+
OffsetIncrement,
|
116
|
+
PageIncrement,
|
117
|
+
StopConditionPaginationStrategyDecorator,
|
118
|
+
)
|
92
119
|
from .sources.declarative.requesters.request_option import RequestOption, RequestOptionType
|
93
|
-
|
94
|
-
|
95
|
-
|
120
|
+
from .sources.declarative.requesters.request_options.default_request_options_provider import (
|
121
|
+
DefaultRequestOptionsProvider,
|
122
|
+
)
|
123
|
+
from .sources.declarative.requesters.request_options.interpolated_request_input_provider import (
|
124
|
+
InterpolatedRequestInputProvider,
|
125
|
+
)
|
96
126
|
from .sources.declarative.requesters.requester import HttpMethod
|
97
127
|
from .sources.declarative.retrievers import SimpleRetriever
|
98
128
|
from .sources.declarative.schema import JsonFileSchemaLoader
|
99
|
-
from .sources.declarative.transformations.add_fields import
|
129
|
+
from .sources.declarative.transformations.add_fields import AddedFieldDefinition, AddFields
|
100
130
|
from .sources.declarative.transformations.transformation import RecordTransformation
|
101
131
|
from .sources.declarative.types import FieldPointer
|
102
132
|
from .sources.declarative.yaml_declarative_source import YamlDeclarativeSource
|
103
133
|
from .sources.message import InMemoryMessageRepository, MessageRepository
|
104
134
|
from .sources.source import TState
|
105
135
|
from .sources.streams.availability_strategy import AvailabilityStrategy
|
106
|
-
from .sources.streams.call_rate import
|
136
|
+
from .sources.streams.call_rate import (
|
137
|
+
AbstractAPIBudget,
|
138
|
+
CachedLimiterSession,
|
139
|
+
HttpAPIBudget,
|
140
|
+
HttpRequestMatcher,
|
141
|
+
LimiterSession,
|
142
|
+
MovingWindowCallRatePolicy,
|
143
|
+
Rate,
|
144
|
+
)
|
107
145
|
from .sources.streams.checkpoint import Cursor as LegacyCursor
|
108
146
|
from .sources.streams.checkpoint import ResumableFullRefreshCursor
|
109
147
|
from .sources.streams.concurrent.adapters import StreamFacade
|
110
|
-
from .sources.streams.concurrent.cursor import
|
111
|
-
|
112
|
-
|
113
|
-
|
148
|
+
from .sources.streams.concurrent.cursor import (
|
149
|
+
ConcurrentCursor,
|
150
|
+
Cursor,
|
151
|
+
CursorField,
|
152
|
+
FinalStateCursor,
|
153
|
+
)
|
154
|
+
from .sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
|
155
|
+
EpochValueConcurrentStreamStateConverter,
|
156
|
+
IsoMillisConcurrentStreamStateConverter,
|
157
|
+
)
|
158
|
+
from .sources.streams.core import IncrementalMixin, Stream, package_name_from_class
|
114
159
|
from .sources.streams.http import HttpStream, HttpSubStream
|
115
160
|
from .sources.streams.http.availability_strategy import HttpAvailabilityStrategy
|
116
|
-
from .sources.streams.http.exceptions import
|
161
|
+
from .sources.streams.http.exceptions import (
|
162
|
+
BaseBackoffException,
|
163
|
+
DefaultBackoffException,
|
164
|
+
UserDefinedBackoffException,
|
165
|
+
)
|
117
166
|
from .sources.streams.http.rate_limiting import default_backoff_handler
|
118
|
-
from .sources.streams.http.requests_native_auth import
|
167
|
+
from .sources.streams.http.requests_native_auth import (
|
168
|
+
Oauth2Authenticator,
|
169
|
+
SingleUseRefreshTokenOauth2Authenticator,
|
170
|
+
TokenAuthenticator,
|
171
|
+
)
|
119
172
|
from .sources.streams.http.requests_native_auth.abstract_token import AbstractHeaderAuthenticator
|
173
|
+
from .sources.types import Config, Record, StreamSlice
|
120
174
|
from .sources.utils import casing
|
121
|
-
from .sources.utils.schema_helpers import
|
175
|
+
from .sources.utils.schema_helpers import (
|
176
|
+
InternalConfig,
|
177
|
+
ResourceSchemaLoader,
|
178
|
+
check_config_against_spec_or_exit,
|
179
|
+
expand_refs,
|
180
|
+
split_config,
|
181
|
+
)
|
122
182
|
from .sources.utils.transform import TransformConfig, TypeTransformer
|
123
183
|
from .utils import AirbyteTracedException, is_cloud_environment
|
124
184
|
from .utils.constants import ENV_REQUEST_CACHE_PATH
|
@@ -127,7 +187,6 @@ from .utils.oneof_option_config import OneOfOptionConfig
|
|
127
187
|
from .utils.spec_schema_transformations import resolve_refs
|
128
188
|
from .utils.stream_status_utils import as_airbyte_message
|
129
189
|
|
130
|
-
|
131
190
|
__all__ = [
|
132
191
|
# Availability strategy
|
133
192
|
"AvailabilityStrategy",
|
airbyte_cdk/models/__init__.py
CHANGED
@@ -7,7 +7,6 @@
|
|
7
7
|
# of airbyte-cdk rather than a standalone package.
|
8
8
|
from .airbyte_protocol import (
|
9
9
|
AdvancedAuth,
|
10
|
-
AirbyteStateStats,
|
11
10
|
AirbyteAnalyticsTraceMessage,
|
12
11
|
AirbyteCatalog,
|
13
12
|
AirbyteConnectionStatus,
|
@@ -22,13 +21,14 @@ from .airbyte_protocol import (
|
|
22
21
|
AirbyteRecordMessage,
|
23
22
|
AirbyteStateBlob,
|
24
23
|
AirbyteStateMessage,
|
24
|
+
AirbyteStateStats,
|
25
25
|
AirbyteStateType,
|
26
26
|
AirbyteStream,
|
27
27
|
AirbyteStreamState,
|
28
28
|
AirbyteStreamStatus,
|
29
|
-
AirbyteStreamStatusTraceMessage,
|
30
29
|
AirbyteStreamStatusReason,
|
31
30
|
AirbyteStreamStatusReasonType,
|
31
|
+
AirbyteStreamStatusTraceMessage,
|
32
32
|
AirbyteTraceMessage,
|
33
33
|
AuthFlowType,
|
34
34
|
ConfiguredAirbyteCatalog,
|
@@ -48,6 +48,14 @@ from .airbyte_protocol import (
|
|
48
48
|
TraceType,
|
49
49
|
Type,
|
50
50
|
)
|
51
|
+
from .airbyte_protocol_serializers import (
|
52
|
+
AirbyteMessageSerializer,
|
53
|
+
AirbyteStateMessageSerializer,
|
54
|
+
AirbyteStreamStateSerializer,
|
55
|
+
ConfiguredAirbyteCatalogSerializer,
|
56
|
+
ConfiguredAirbyteStreamSerializer,
|
57
|
+
ConnectorSpecificationSerializer,
|
58
|
+
)
|
51
59
|
from .well_known_types import (
|
52
60
|
BinaryData,
|
53
61
|
Boolean,
|
@@ -61,12 +69,3 @@ from .well_known_types import (
|
|
61
69
|
TimeWithoutTimezone,
|
62
70
|
TimeWithTimezone,
|
63
71
|
)
|
64
|
-
|
65
|
-
from .airbyte_protocol_serializers import (
|
66
|
-
AirbyteStreamStateSerializer,
|
67
|
-
AirbyteStateMessageSerializer,
|
68
|
-
AirbyteMessageSerializer,
|
69
|
-
ConfiguredAirbyteCatalogSerializer,
|
70
|
-
ConfiguredAirbyteStreamSerializer,
|
71
|
-
ConnectorSpecificationSerializer,
|
72
|
-
)
|
@@ -20,7 +20,6 @@ from typing import (
|
|
20
20
|
TypeVar,
|
21
21
|
)
|
22
22
|
|
23
|
-
from airbyte_cdk import StreamSlice
|
24
23
|
from airbyte_cdk.logger import lazy_log
|
25
24
|
from airbyte_cdk.models import FailureType
|
26
25
|
from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
|
@@ -31,6 +30,7 @@ from airbyte_cdk.sources.declarative.async_job.job_tracker import (
|
|
31
30
|
from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
|
32
31
|
from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
|
33
32
|
from airbyte_cdk.sources.message import MessageRepository
|
33
|
+
from airbyte_cdk.sources.types import StreamSlice
|
34
34
|
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
35
35
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
36
36
|
|
@@ -2,10 +2,7 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.sources.declarative.auth.oauth import DeclarativeOauth2Authenticator
|
6
5
|
from airbyte_cdk.sources.declarative.auth.jwt import JwtAuthenticator
|
6
|
+
from airbyte_cdk.sources.declarative.auth.oauth import DeclarativeOauth2Authenticator
|
7
7
|
|
8
|
-
__all__ = [
|
9
|
-
"DeclarativeOauth2Authenticator",
|
10
|
-
"JwtAuthenticator"
|
11
|
-
]
|
8
|
+
__all__ = ["DeclarativeOauth2Authenticator", "JwtAuthenticator"]
|
@@ -43,11 +43,11 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
43
43
|
message_repository (MessageRepository): the message repository used to emit logs on HTTP requests
|
44
44
|
"""
|
45
45
|
|
46
|
-
token_refresh_endpoint: Union[InterpolatedString, str]
|
47
46
|
client_id: Union[InterpolatedString, str]
|
48
47
|
client_secret: Union[InterpolatedString, str]
|
49
48
|
config: Mapping[str, Any]
|
50
49
|
parameters: InitVar[Mapping[str, Any]]
|
50
|
+
token_refresh_endpoint: Optional[Union[InterpolatedString, str]] = None
|
51
51
|
refresh_token: Optional[Union[InterpolatedString, str]] = None
|
52
52
|
scopes: Optional[List[str]] = None
|
53
53
|
token_expiry_date: Optional[Union[InterpolatedString, str]] = None
|
@@ -55,6 +55,7 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
55
55
|
token_expiry_date_format: Optional[str] = None
|
56
56
|
token_expiry_is_time_of_expiration: bool = False
|
57
57
|
access_token_name: Union[InterpolatedString, str] = "access_token"
|
58
|
+
access_token_value: Optional[Union[InterpolatedString, str]] = None
|
58
59
|
expires_in_name: Union[InterpolatedString, str] = "expires_in"
|
59
60
|
refresh_request_body: Optional[Mapping[str, Any]] = None
|
60
61
|
grant_type: Union[InterpolatedString, str] = "refresh_token"
|
@@ -62,9 +63,12 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
62
63
|
|
63
64
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
64
65
|
super().__init__()
|
65
|
-
self.
|
66
|
-
self.
|
67
|
-
|
66
|
+
if self.token_refresh_endpoint is not None:
|
67
|
+
self._token_refresh_endpoint: Optional[InterpolatedString] = InterpolatedString.create(
|
68
|
+
self.token_refresh_endpoint, parameters=parameters
|
69
|
+
)
|
70
|
+
else:
|
71
|
+
self._token_refresh_endpoint = None
|
68
72
|
self._client_id = InterpolatedString.create(self.client_id, parameters=parameters)
|
69
73
|
self._client_secret = InterpolatedString.create(self.client_secret, parameters=parameters)
|
70
74
|
if self.refresh_token is not None:
|
@@ -92,20 +96,31 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
92
96
|
if self.token_expiry_date
|
93
97
|
else pendulum.now().subtract(days=1) # type: ignore # substract does not have type hints
|
94
98
|
)
|
95
|
-
self.
|
99
|
+
if self.access_token_value is not None:
|
100
|
+
self._access_token_value = InterpolatedString.create(
|
101
|
+
self.access_token_value, parameters=parameters
|
102
|
+
).eval(self.config)
|
103
|
+
else:
|
104
|
+
self._access_token_value = None
|
105
|
+
|
106
|
+
self._access_token: Optional[str] = (
|
107
|
+
self._access_token_value if self.access_token_value else None
|
108
|
+
)
|
96
109
|
|
97
110
|
if self.get_grant_type() == "refresh_token" and self._refresh_token is None:
|
98
111
|
raise ValueError(
|
99
112
|
"OAuthAuthenticator needs a refresh_token parameter if grant_type is set to `refresh_token`"
|
100
113
|
)
|
101
114
|
|
102
|
-
def get_token_refresh_endpoint(self) -> str:
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
115
|
+
def get_token_refresh_endpoint(self) -> Optional[str]:
|
116
|
+
if self._token_refresh_endpoint is not None:
|
117
|
+
refresh_token_endpoint: str = self._token_refresh_endpoint.eval(self.config)
|
118
|
+
if not refresh_token_endpoint:
|
119
|
+
raise ValueError(
|
120
|
+
"OAuthAuthenticator was unable to evaluate token_refresh_endpoint parameter"
|
121
|
+
)
|
122
|
+
return refresh_token_endpoint
|
123
|
+
return None
|
109
124
|
|
110
125
|
def get_client_id(self) -> str:
|
111
126
|
client_id: str = self._client_id.eval(self.config)
|
@@ -20,9 +20,6 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
|
20
20
|
ClientSideIncrementalRecordFilterDecorator,
|
21
21
|
)
|
22
22
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
23
|
-
from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
|
24
|
-
PerPartitionWithGlobalCursor,
|
25
|
-
)
|
26
23
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
27
24
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
28
25
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
@@ -303,60 +300,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
303
300
|
cursor=final_state_cursor,
|
304
301
|
)
|
305
302
|
)
|
306
|
-
elif (
|
307
|
-
incremental_sync_component_definition
|
308
|
-
and incremental_sync_component_definition.get("type", "")
|
309
|
-
== DatetimeBasedCursorModel.__name__
|
310
|
-
and self._stream_supports_concurrent_partition_processing(
|
311
|
-
declarative_stream=declarative_stream
|
312
|
-
)
|
313
|
-
and hasattr(declarative_stream.retriever, "stream_slicer")
|
314
|
-
and isinstance(
|
315
|
-
declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
|
316
|
-
)
|
317
|
-
):
|
318
|
-
stream_state = state_manager.get_stream_state(
|
319
|
-
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
320
|
-
)
|
321
|
-
partition_router = declarative_stream.retriever.stream_slicer._partition_router
|
322
|
-
|
323
|
-
cursor = self._constructor.create_concurrent_cursor_from_perpartition_cursor(
|
324
|
-
state_manager=state_manager,
|
325
|
-
model_type=DatetimeBasedCursorModel,
|
326
|
-
component_definition=incremental_sync_component_definition,
|
327
|
-
stream_name=declarative_stream.name,
|
328
|
-
stream_namespace=declarative_stream.namespace,
|
329
|
-
config=config or {},
|
330
|
-
stream_state=stream_state,
|
331
|
-
partition_router=partition_router,
|
332
|
-
)
|
333
|
-
|
334
|
-
partition_generator = StreamSlicerPartitionGenerator(
|
335
|
-
DeclarativePartitionFactory(
|
336
|
-
declarative_stream.name,
|
337
|
-
declarative_stream.get_json_schema(),
|
338
|
-
self._retriever_factory(
|
339
|
-
name_to_stream_mapping[declarative_stream.name],
|
340
|
-
config,
|
341
|
-
stream_state,
|
342
|
-
),
|
343
|
-
self.message_repository,
|
344
|
-
),
|
345
|
-
cursor,
|
346
|
-
)
|
347
|
-
|
348
|
-
concurrent_streams.append(
|
349
|
-
DefaultStream(
|
350
|
-
partition_generator=partition_generator,
|
351
|
-
name=declarative_stream.name,
|
352
|
-
json_schema=declarative_stream.get_json_schema(),
|
353
|
-
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
354
|
-
primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
|
355
|
-
cursor_field=cursor.cursor_field.cursor_field_key,
|
356
|
-
logger=self.logger,
|
357
|
-
cursor=cursor,
|
358
|
-
)
|
359
|
-
)
|
360
303
|
else:
|
361
304
|
synchronous_streams.append(declarative_stream)
|
362
305
|
else:
|
@@ -1021,7 +1021,6 @@ definitions:
|
|
1021
1021
|
- type
|
1022
1022
|
- client_id
|
1023
1023
|
- client_secret
|
1024
|
-
- token_refresh_endpoint
|
1025
1024
|
properties:
|
1026
1025
|
type:
|
1027
1026
|
type: string
|
@@ -1060,6 +1059,12 @@ definitions:
|
|
1060
1059
|
default: "access_token"
|
1061
1060
|
examples:
|
1062
1061
|
- access_token
|
1062
|
+
access_token_value:
|
1063
|
+
title: Access Token Value
|
1064
|
+
description: The value of the access_token to bypass the token refreshing using `refresh_token`.
|
1065
|
+
type: string
|
1066
|
+
examples:
|
1067
|
+
- secret_access_token_value
|
1063
1068
|
expires_in_name:
|
1064
1069
|
title: Token Expiry Property Name
|
1065
1070
|
description: The name of the property which contains the expiry date in the response from the token refresh endpoint.
|
@@ -1235,6 +1240,7 @@ definitions:
|
|
1235
1240
|
- "$ref": "#/definitions/RemoveFields"
|
1236
1241
|
- "$ref": "#/definitions/KeysToLower"
|
1237
1242
|
- "$ref": "#/definitions/KeysToSnakeCase"
|
1243
|
+
- "$ref": "#/definitions/FlattenFields"
|
1238
1244
|
state_migrations:
|
1239
1245
|
title: State Migrations
|
1240
1246
|
description: Array of state migrations to be applied on the input state
|
@@ -1767,6 +1773,18 @@ definitions:
|
|
1767
1773
|
- "$ref": "#/definitions/AsyncRetriever"
|
1768
1774
|
- "$ref": "#/definitions/CustomRetriever"
|
1769
1775
|
- "$ref": "#/definitions/SimpleRetriever"
|
1776
|
+
schema_transformations:
|
1777
|
+
title: Schema Transformations
|
1778
|
+
description: A list of transformations to be applied to the schema.
|
1779
|
+
type: array
|
1780
|
+
items:
|
1781
|
+
anyOf:
|
1782
|
+
- "$ref": "#/definitions/AddFields"
|
1783
|
+
- "$ref": "#/definitions/CustomTransformation"
|
1784
|
+
- "$ref": "#/definitions/RemoveFields"
|
1785
|
+
- "$ref": "#/definitions/KeysToLower"
|
1786
|
+
- "$ref": "#/definitions/KeysToSnakeCase"
|
1787
|
+
- "$ref": "#/definitions/FlattenFields"
|
1770
1788
|
schema_type_identifier:
|
1771
1789
|
"$ref": "#/definitions/SchemaTypeIdentifier"
|
1772
1790
|
$parameters:
|
@@ -2748,9 +2766,64 @@ definitions:
|
|
2748
2766
|
- "$ref": "#/definitions/IterableDecoder"
|
2749
2767
|
- "$ref": "#/definitions/XmlDecoder"
|
2750
2768
|
- "$ref": "#/definitions/GzipJsonDecoder"
|
2769
|
+
- "$ref": "#/definitions/CompositeRawDecoder"
|
2751
2770
|
$parameters:
|
2752
2771
|
type: object
|
2753
2772
|
additionalProperties: true
|
2773
|
+
CompositeRawDecoder:
|
2774
|
+
description: "(This is experimental, use at your own risk)"
|
2775
|
+
type: object
|
2776
|
+
required:
|
2777
|
+
- type
|
2778
|
+
- parser
|
2779
|
+
properties:
|
2780
|
+
type:
|
2781
|
+
type: string
|
2782
|
+
enum: [CompositeRawDecoder]
|
2783
|
+
parser:
|
2784
|
+
anyOf:
|
2785
|
+
- "$ref": "#/definitions/GzipParser"
|
2786
|
+
- "$ref": "#/definitions/JsonLineParser"
|
2787
|
+
- "$ref": "#/definitions/CsvParser"
|
2788
|
+
# PARSERS
|
2789
|
+
GzipParser:
|
2790
|
+
type: object
|
2791
|
+
required:
|
2792
|
+
- type
|
2793
|
+
- inner_parser
|
2794
|
+
properties:
|
2795
|
+
type:
|
2796
|
+
type: string
|
2797
|
+
enum: [GzipParser]
|
2798
|
+
inner_parser:
|
2799
|
+
anyOf:
|
2800
|
+
- "$ref": "#/definitions/JsonLineParser"
|
2801
|
+
- "$ref": "#/definitions/CsvParser"
|
2802
|
+
JsonLineParser:
|
2803
|
+
type: object
|
2804
|
+
required:
|
2805
|
+
- type
|
2806
|
+
properties:
|
2807
|
+
type:
|
2808
|
+
type: string
|
2809
|
+
enum: [JsonLineParser]
|
2810
|
+
encoding:
|
2811
|
+
type: string
|
2812
|
+
default: utf-8
|
2813
|
+
CsvParser:
|
2814
|
+
type: object
|
2815
|
+
required:
|
2816
|
+
- type
|
2817
|
+
properties:
|
2818
|
+
type:
|
2819
|
+
type: string
|
2820
|
+
enum: [CsvParser]
|
2821
|
+
encoding:
|
2822
|
+
type: string
|
2823
|
+
default: utf-8
|
2824
|
+
delimiter:
|
2825
|
+
type: string
|
2826
|
+
default: ","
|
2754
2827
|
AsyncJobStatusMap:
|
2755
2828
|
description: Matches the api job status to Async Job Status.
|
2756
2829
|
type: object
|
@@ -3036,6 +3109,7 @@ definitions:
|
|
3036
3109
|
interpolation_context:
|
3037
3110
|
- config
|
3038
3111
|
- components_values
|
3112
|
+
- stream_slice
|
3039
3113
|
- stream_template_config
|
3040
3114
|
examples:
|
3041
3115
|
- ["data"]
|
@@ -3052,10 +3126,13 @@ definitions:
|
|
3052
3126
|
- config
|
3053
3127
|
- stream_template_config
|
3054
3128
|
- components_values
|
3129
|
+
- stream_slice
|
3055
3130
|
examples:
|
3056
3131
|
- "{{ components_values['updates'] }}"
|
3057
3132
|
- "{{ components_values['MetaData']['LastUpdatedTime'] }}"
|
3058
3133
|
- "{{ config['segment_id'] }}"
|
3134
|
+
- "{{ stream_slice['parent_id'] }}"
|
3135
|
+
- "{{ stream_slice['extra_fields']['name'] }}"
|
3059
3136
|
value_type:
|
3060
3137
|
title: Value Type
|
3061
3138
|
description: The expected data type of the value. If omitted, the type will be inferred from the value provided.
|
@@ -2,10 +2,28 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import CompositeRawDecoder
|
5
6
|
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
6
|
-
from airbyte_cdk.sources.declarative.decoders.json_decoder import
|
7
|
+
from airbyte_cdk.sources.declarative.decoders.json_decoder import (
|
8
|
+
GzipJsonDecoder,
|
9
|
+
IterableDecoder,
|
10
|
+
JsonDecoder,
|
11
|
+
JsonlDecoder,
|
12
|
+
)
|
7
13
|
from airbyte_cdk.sources.declarative.decoders.noop_decoder import NoopDecoder
|
8
|
-
from airbyte_cdk.sources.declarative.decoders.pagination_decoder_decorator import
|
14
|
+
from airbyte_cdk.sources.declarative.decoders.pagination_decoder_decorator import (
|
15
|
+
PaginationDecoderDecorator,
|
16
|
+
)
|
9
17
|
from airbyte_cdk.sources.declarative.decoders.xml_decoder import XmlDecoder
|
10
18
|
|
11
|
-
__all__ = [
|
19
|
+
__all__ = [
|
20
|
+
"Decoder",
|
21
|
+
"CompositeRawDecoder",
|
22
|
+
"JsonDecoder",
|
23
|
+
"JsonlDecoder",
|
24
|
+
"IterableDecoder",
|
25
|
+
"GzipJsonDecoder",
|
26
|
+
"NoopDecoder",
|
27
|
+
"PaginationDecoderDecorator",
|
28
|
+
"XmlDecoder",
|
29
|
+
]
|
@@ -0,0 +1,97 @@
|
|
1
|
+
import csv
|
2
|
+
import gzip
|
3
|
+
import json
|
4
|
+
import logging
|
5
|
+
from abc import ABC, abstractmethod
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from io import BufferedIOBase, TextIOWrapper
|
8
|
+
from typing import Any, Generator, MutableMapping, Optional
|
9
|
+
|
10
|
+
import requests
|
11
|
+
|
12
|
+
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
13
|
+
|
14
|
+
logger = logging.getLogger("airbyte")
|
15
|
+
|
16
|
+
|
17
|
+
@dataclass
|
18
|
+
class Parser(ABC):
|
19
|
+
@abstractmethod
|
20
|
+
def parse(
|
21
|
+
self,
|
22
|
+
data: BufferedIOBase,
|
23
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
24
|
+
"""
|
25
|
+
Parse data and yield dictionaries.
|
26
|
+
"""
|
27
|
+
pass
|
28
|
+
|
29
|
+
|
30
|
+
@dataclass
|
31
|
+
class GzipParser(Parser):
|
32
|
+
inner_parser: Parser
|
33
|
+
|
34
|
+
def parse(
|
35
|
+
self,
|
36
|
+
data: BufferedIOBase,
|
37
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
38
|
+
"""
|
39
|
+
Decompress gzipped bytes and pass decompressed data to the inner parser.
|
40
|
+
"""
|
41
|
+
with gzip.GzipFile(fileobj=data, mode="rb") as gzipobj:
|
42
|
+
yield from self.inner_parser.parse(gzipobj)
|
43
|
+
|
44
|
+
|
45
|
+
@dataclass
|
46
|
+
class JsonLineParser(Parser):
|
47
|
+
encoding: Optional[str] = "utf-8"
|
48
|
+
|
49
|
+
def parse(
|
50
|
+
self,
|
51
|
+
data: BufferedIOBase,
|
52
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
53
|
+
for line in data:
|
54
|
+
try:
|
55
|
+
yield json.loads(line.decode(encoding=self.encoding or "utf-8"))
|
56
|
+
except json.JSONDecodeError as e:
|
57
|
+
logger.warning(f"Cannot decode/parse line {line!r} as JSON, error: {e}")
|
58
|
+
|
59
|
+
|
60
|
+
@dataclass
|
61
|
+
class CsvParser(Parser):
|
62
|
+
# TODO: migrate implementation to re-use file-base classes
|
63
|
+
encoding: Optional[str] = "utf-8"
|
64
|
+
delimiter: Optional[str] = ","
|
65
|
+
|
66
|
+
def parse(
|
67
|
+
self,
|
68
|
+
data: BufferedIOBase,
|
69
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
70
|
+
"""
|
71
|
+
Parse CSV data from decompressed bytes.
|
72
|
+
"""
|
73
|
+
text_data = TextIOWrapper(data, encoding=self.encoding) # type: ignore
|
74
|
+
reader = csv.DictReader(text_data, delimiter=self.delimiter or ",")
|
75
|
+
yield from reader
|
76
|
+
|
77
|
+
|
78
|
+
@dataclass
|
79
|
+
class CompositeRawDecoder(Decoder):
|
80
|
+
"""
|
81
|
+
Decoder strategy to transform a requests.Response into a Generator[MutableMapping[str, Any], None, None]
|
82
|
+
passed response.raw to parser(s).
|
83
|
+
Note: response.raw is not decoded/decompressed by default.
|
84
|
+
parsers should be instantiated recursively.
|
85
|
+
Example:
|
86
|
+
composite_raw_decoder = CompositeRawDecoder(parser=GzipParser(inner_parser=JsonLineParser(encoding="iso-8859-1")))
|
87
|
+
"""
|
88
|
+
|
89
|
+
parser: Parser
|
90
|
+
|
91
|
+
def is_stream_response(self) -> bool:
|
92
|
+
return True
|
93
|
+
|
94
|
+
def decode(
|
95
|
+
self, response: requests.Response
|
96
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
97
|
+
yield from self.parser.parse(data=response.raw) # type: ignore[arg-type]
|