airbyte-cdk 6.12.4__py3-none-any.whl → 6.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +93 -34
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +0 -1
- airbyte_cdk/models/__init__.py +10 -11
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +1 -1
- airbyte_cdk/sources/declarative/auth/__init__.py +2 -5
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +55 -0
- airbyte_cdk/sources/declarative/decoders/__init__.py +21 -3
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +10 -2
- airbyte_cdk/sources/declarative/incremental/__init__.py +10 -3
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +24 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +43 -0
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +16 -6
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +4 -1
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +12 -3
- airbyte_cdk/sources/declarative/resolvers/__init__.py +31 -8
- airbyte_cdk/sources/declarative/retrievers/__init__.py +5 -2
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +1 -1
- airbyte_cdk/sources/declarative/schema/__init__.py +14 -2
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
- airbyte_cdk/sources/file_based/file_types/__init__.py +12 -3
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
- airbyte_cdk/sources/message/__init__.py +7 -1
- airbyte_cdk/sources/streams/__init__.py +1 -1
- airbyte_cdk/sources/streams/checkpoint/__init__.py +2 -3
- airbyte_cdk/sources/streams/http/__init__.py +2 -2
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +2 -2
- airbyte_cdk/test/mock_http/__init__.py +1 -1
- airbyte_cdk/test/mock_http/mocker.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +1 -1
- airbyte_cdk/utils/__init__.py +1 -1
- {airbyte_cdk-6.12.4.dist-info → airbyte_cdk-6.13.0.dist-info}/METADATA +2 -2
- {airbyte_cdk-6.12.4.dist-info → airbyte_cdk-6.13.0.dist-info}/RECORD +43 -42
- {airbyte_cdk-6.12.4.dist-info → airbyte_cdk-6.13.0.dist-info}/WHEEL +1 -1
- {airbyte_cdk-6.12.4.dist-info → airbyte_cdk-6.13.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.12.4.dist-info → airbyte_cdk-6.13.0.dist-info}/entry_points.txt +0 -0
airbyte_cdk/__init__.py
CHANGED
@@ -48,27 +48,46 @@ API Reference
|
|
48
48
|
# Once those issues are resolved, the below can be sorted with isort.
|
49
49
|
import dunamai as _dunamai
|
50
50
|
|
51
|
-
from .
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
from .config_observation import create_connector_config_control_message, emit_configuration_as_airbyte_control_message
|
51
|
+
from .config_observation import (
|
52
|
+
create_connector_config_control_message,
|
53
|
+
emit_configuration_as_airbyte_control_message,
|
54
|
+
)
|
56
55
|
from .connector import BaseConnector, Connector
|
57
|
-
|
58
|
-
from .entrypoint import
|
59
|
-
|
56
|
+
from .destinations import Destination
|
57
|
+
from .entrypoint import AirbyteEntrypoint, launch
|
60
58
|
from .logger import AirbyteLogFormatter, init_logger
|
61
|
-
from .
|
59
|
+
from .models import (
|
60
|
+
AdvancedAuth,
|
61
|
+
AirbyteConnectionStatus,
|
62
|
+
AirbyteLogMessage,
|
63
|
+
AirbyteMessage,
|
64
|
+
AirbyteRecordMessage,
|
65
|
+
AirbyteStream,
|
66
|
+
ConfiguredAirbyteCatalog,
|
67
|
+
ConfiguredAirbyteStream,
|
68
|
+
ConnectorSpecification,
|
69
|
+
DestinationSyncMode,
|
70
|
+
FailureType,
|
71
|
+
Level,
|
72
|
+
OAuthConfigSpecification,
|
73
|
+
OrchestratorType,
|
74
|
+
Status,
|
75
|
+
SyncMode,
|
76
|
+
Type,
|
77
|
+
)
|
78
|
+
from .sources import AbstractSource, Source
|
62
79
|
from .sources.concurrent_source.concurrent_source import ConcurrentSource
|
63
80
|
from .sources.concurrent_source.concurrent_source_adapter import ConcurrentSourceAdapter
|
64
81
|
from .sources.config import BaseConfig
|
65
|
-
from .sources.types import Config, Record, StreamSlice
|
66
82
|
from .sources.connector_state_manager import ConnectorStateManager
|
67
83
|
from .sources.declarative.auth import DeclarativeOauth2Authenticator
|
68
|
-
from .sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
|
69
|
-
from .sources.declarative.auth.declarative_authenticator import NoAuth
|
84
|
+
from .sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator, NoAuth
|
70
85
|
from .sources.declarative.auth.oauth import DeclarativeSingleUseRefreshTokenOauth2Authenticator
|
71
|
-
from .sources.declarative.auth.token import
|
86
|
+
from .sources.declarative.auth.token import (
|
87
|
+
ApiKeyAuthenticator,
|
88
|
+
BasicHttpAuthenticator,
|
89
|
+
BearerAuthenticator,
|
90
|
+
)
|
72
91
|
from .sources.declarative.datetime.min_max_datetime import MinMaxDatetime
|
73
92
|
from .sources.declarative.declarative_stream import DeclarativeStream
|
74
93
|
from .sources.declarative.decoders import Decoder, JsonDecoder
|
@@ -77,48 +96,89 @@ from .sources.declarative.extractors import DpathExtractor, RecordSelector
|
|
77
96
|
from .sources.declarative.extractors.record_extractor import RecordExtractor
|
78
97
|
from .sources.declarative.extractors.record_filter import RecordFilter
|
79
98
|
from .sources.declarative.incremental import DatetimeBasedCursor
|
80
|
-
from .sources.declarative.interpolation import
|
99
|
+
from .sources.declarative.interpolation import InterpolatedBoolean, InterpolatedString
|
81
100
|
from .sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
82
|
-
from .sources.declarative.migrations.legacy_to_per_partition_state_migration import
|
83
|
-
|
84
|
-
|
101
|
+
from .sources.declarative.migrations.legacy_to_per_partition_state_migration import (
|
102
|
+
LegacyToPerPartitionStateMigration,
|
103
|
+
)
|
104
|
+
from .sources.declarative.partition_routers import (
|
105
|
+
CartesianProductStreamSlicer,
|
106
|
+
SinglePartitionRouter,
|
107
|
+
SubstreamPartitionRouter,
|
108
|
+
)
|
85
109
|
from .sources.declarative.partition_routers.substream_partition_router import ParentStreamConfig
|
86
|
-
from .sources.declarative.requesters import
|
87
|
-
|
110
|
+
from .sources.declarative.requesters import HttpRequester, Requester
|
88
111
|
from .sources.declarative.requesters.error_handlers import BackoffStrategy
|
89
112
|
from .sources.declarative.requesters.paginators import DefaultPaginator, PaginationStrategy
|
90
|
-
from .sources.declarative.requesters.paginators.strategies import
|
91
|
-
|
113
|
+
from .sources.declarative.requesters.paginators.strategies import (
|
114
|
+
CursorPaginationStrategy,
|
115
|
+
OffsetIncrement,
|
116
|
+
PageIncrement,
|
117
|
+
StopConditionPaginationStrategyDecorator,
|
118
|
+
)
|
92
119
|
from .sources.declarative.requesters.request_option import RequestOption, RequestOptionType
|
93
|
-
|
94
|
-
|
95
|
-
|
120
|
+
from .sources.declarative.requesters.request_options.default_request_options_provider import (
|
121
|
+
DefaultRequestOptionsProvider,
|
122
|
+
)
|
123
|
+
from .sources.declarative.requesters.request_options.interpolated_request_input_provider import (
|
124
|
+
InterpolatedRequestInputProvider,
|
125
|
+
)
|
96
126
|
from .sources.declarative.requesters.requester import HttpMethod
|
97
127
|
from .sources.declarative.retrievers import SimpleRetriever
|
98
128
|
from .sources.declarative.schema import JsonFileSchemaLoader
|
99
|
-
from .sources.declarative.transformations.add_fields import
|
129
|
+
from .sources.declarative.transformations.add_fields import AddedFieldDefinition, AddFields
|
100
130
|
from .sources.declarative.transformations.transformation import RecordTransformation
|
101
131
|
from .sources.declarative.types import FieldPointer
|
102
132
|
from .sources.declarative.yaml_declarative_source import YamlDeclarativeSource
|
103
133
|
from .sources.message import InMemoryMessageRepository, MessageRepository
|
104
134
|
from .sources.source import TState
|
105
135
|
from .sources.streams.availability_strategy import AvailabilityStrategy
|
106
|
-
from .sources.streams.call_rate import
|
136
|
+
from .sources.streams.call_rate import (
|
137
|
+
AbstractAPIBudget,
|
138
|
+
CachedLimiterSession,
|
139
|
+
HttpAPIBudget,
|
140
|
+
HttpRequestMatcher,
|
141
|
+
LimiterSession,
|
142
|
+
MovingWindowCallRatePolicy,
|
143
|
+
Rate,
|
144
|
+
)
|
107
145
|
from .sources.streams.checkpoint import Cursor as LegacyCursor
|
108
146
|
from .sources.streams.checkpoint import ResumableFullRefreshCursor
|
109
147
|
from .sources.streams.concurrent.adapters import StreamFacade
|
110
|
-
from .sources.streams.concurrent.cursor import
|
111
|
-
|
112
|
-
|
113
|
-
|
148
|
+
from .sources.streams.concurrent.cursor import (
|
149
|
+
ConcurrentCursor,
|
150
|
+
Cursor,
|
151
|
+
CursorField,
|
152
|
+
FinalStateCursor,
|
153
|
+
)
|
154
|
+
from .sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
|
155
|
+
EpochValueConcurrentStreamStateConverter,
|
156
|
+
IsoMillisConcurrentStreamStateConverter,
|
157
|
+
)
|
158
|
+
from .sources.streams.core import IncrementalMixin, Stream, package_name_from_class
|
114
159
|
from .sources.streams.http import HttpStream, HttpSubStream
|
115
160
|
from .sources.streams.http.availability_strategy import HttpAvailabilityStrategy
|
116
|
-
from .sources.streams.http.exceptions import
|
161
|
+
from .sources.streams.http.exceptions import (
|
162
|
+
BaseBackoffException,
|
163
|
+
DefaultBackoffException,
|
164
|
+
UserDefinedBackoffException,
|
165
|
+
)
|
117
166
|
from .sources.streams.http.rate_limiting import default_backoff_handler
|
118
|
-
from .sources.streams.http.requests_native_auth import
|
167
|
+
from .sources.streams.http.requests_native_auth import (
|
168
|
+
Oauth2Authenticator,
|
169
|
+
SingleUseRefreshTokenOauth2Authenticator,
|
170
|
+
TokenAuthenticator,
|
171
|
+
)
|
119
172
|
from .sources.streams.http.requests_native_auth.abstract_token import AbstractHeaderAuthenticator
|
173
|
+
from .sources.types import Config, Record, StreamSlice
|
120
174
|
from .sources.utils import casing
|
121
|
-
from .sources.utils.schema_helpers import
|
175
|
+
from .sources.utils.schema_helpers import (
|
176
|
+
InternalConfig,
|
177
|
+
ResourceSchemaLoader,
|
178
|
+
check_config_against_spec_or_exit,
|
179
|
+
expand_refs,
|
180
|
+
split_config,
|
181
|
+
)
|
122
182
|
from .sources.utils.transform import TransformConfig, TypeTransformer
|
123
183
|
from .utils import AirbyteTracedException, is_cloud_environment
|
124
184
|
from .utils.constants import ENV_REQUEST_CACHE_PATH
|
@@ -127,7 +187,6 @@ from .utils.oneof_option_config import OneOfOptionConfig
|
|
127
187
|
from .utils.spec_schema_transformations import resolve_refs
|
128
188
|
from .utils.stream_status_utils import as_airbyte_message
|
129
189
|
|
130
|
-
|
131
190
|
__all__ = [
|
132
191
|
# Availability strategy
|
133
192
|
"AvailabilityStrategy",
|
airbyte_cdk/models/__init__.py
CHANGED
@@ -7,7 +7,6 @@
|
|
7
7
|
# of airbyte-cdk rather than a standalone package.
|
8
8
|
from .airbyte_protocol import (
|
9
9
|
AdvancedAuth,
|
10
|
-
AirbyteStateStats,
|
11
10
|
AirbyteAnalyticsTraceMessage,
|
12
11
|
AirbyteCatalog,
|
13
12
|
AirbyteConnectionStatus,
|
@@ -22,13 +21,14 @@ from .airbyte_protocol import (
|
|
22
21
|
AirbyteRecordMessage,
|
23
22
|
AirbyteStateBlob,
|
24
23
|
AirbyteStateMessage,
|
24
|
+
AirbyteStateStats,
|
25
25
|
AirbyteStateType,
|
26
26
|
AirbyteStream,
|
27
27
|
AirbyteStreamState,
|
28
28
|
AirbyteStreamStatus,
|
29
|
-
AirbyteStreamStatusTraceMessage,
|
30
29
|
AirbyteStreamStatusReason,
|
31
30
|
AirbyteStreamStatusReasonType,
|
31
|
+
AirbyteStreamStatusTraceMessage,
|
32
32
|
AirbyteTraceMessage,
|
33
33
|
AuthFlowType,
|
34
34
|
ConfiguredAirbyteCatalog,
|
@@ -48,6 +48,14 @@ from .airbyte_protocol import (
|
|
48
48
|
TraceType,
|
49
49
|
Type,
|
50
50
|
)
|
51
|
+
from .airbyte_protocol_serializers import (
|
52
|
+
AirbyteMessageSerializer,
|
53
|
+
AirbyteStateMessageSerializer,
|
54
|
+
AirbyteStreamStateSerializer,
|
55
|
+
ConfiguredAirbyteCatalogSerializer,
|
56
|
+
ConfiguredAirbyteStreamSerializer,
|
57
|
+
ConnectorSpecificationSerializer,
|
58
|
+
)
|
51
59
|
from .well_known_types import (
|
52
60
|
BinaryData,
|
53
61
|
Boolean,
|
@@ -61,12 +69,3 @@ from .well_known_types import (
|
|
61
69
|
TimeWithoutTimezone,
|
62
70
|
TimeWithTimezone,
|
63
71
|
)
|
64
|
-
|
65
|
-
from .airbyte_protocol_serializers import (
|
66
|
-
AirbyteStreamStateSerializer,
|
67
|
-
AirbyteStateMessageSerializer,
|
68
|
-
AirbyteMessageSerializer,
|
69
|
-
ConfiguredAirbyteCatalogSerializer,
|
70
|
-
ConfiguredAirbyteStreamSerializer,
|
71
|
-
ConnectorSpecificationSerializer,
|
72
|
-
)
|
@@ -20,7 +20,6 @@ from typing import (
|
|
20
20
|
TypeVar,
|
21
21
|
)
|
22
22
|
|
23
|
-
from airbyte_cdk import StreamSlice
|
24
23
|
from airbyte_cdk.logger import lazy_log
|
25
24
|
from airbyte_cdk.models import FailureType
|
26
25
|
from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
|
@@ -31,6 +30,7 @@ from airbyte_cdk.sources.declarative.async_job.job_tracker import (
|
|
31
30
|
from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
|
32
31
|
from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
|
33
32
|
from airbyte_cdk.sources.message import MessageRepository
|
33
|
+
from airbyte_cdk.sources.types import StreamSlice
|
34
34
|
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
35
35
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
36
36
|
|
@@ -2,10 +2,7 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.sources.declarative.auth.oauth import DeclarativeOauth2Authenticator
|
6
5
|
from airbyte_cdk.sources.declarative.auth.jwt import JwtAuthenticator
|
6
|
+
from airbyte_cdk.sources.declarative.auth.oauth import DeclarativeOauth2Authenticator
|
7
7
|
|
8
|
-
__all__ = [
|
9
|
-
"DeclarativeOauth2Authenticator",
|
10
|
-
"JwtAuthenticator"
|
11
|
-
]
|
8
|
+
__all__ = ["DeclarativeOauth2Authenticator", "JwtAuthenticator"]
|
@@ -2766,9 +2766,64 @@ definitions:
|
|
2766
2766
|
- "$ref": "#/definitions/IterableDecoder"
|
2767
2767
|
- "$ref": "#/definitions/XmlDecoder"
|
2768
2768
|
- "$ref": "#/definitions/GzipJsonDecoder"
|
2769
|
+
- "$ref": "#/definitions/CompositeRawDecoder"
|
2769
2770
|
$parameters:
|
2770
2771
|
type: object
|
2771
2772
|
additionalProperties: true
|
2773
|
+
CompositeRawDecoder:
|
2774
|
+
description: "(This is experimental, use at your own risk)"
|
2775
|
+
type: object
|
2776
|
+
required:
|
2777
|
+
- type
|
2778
|
+
- parser
|
2779
|
+
properties:
|
2780
|
+
type:
|
2781
|
+
type: string
|
2782
|
+
enum: [CompositeRawDecoder]
|
2783
|
+
parser:
|
2784
|
+
anyOf:
|
2785
|
+
- "$ref": "#/definitions/GzipParser"
|
2786
|
+
- "$ref": "#/definitions/JsonLineParser"
|
2787
|
+
- "$ref": "#/definitions/CsvParser"
|
2788
|
+
# PARSERS
|
2789
|
+
GzipParser:
|
2790
|
+
type: object
|
2791
|
+
required:
|
2792
|
+
- type
|
2793
|
+
- inner_parser
|
2794
|
+
properties:
|
2795
|
+
type:
|
2796
|
+
type: string
|
2797
|
+
enum: [GzipParser]
|
2798
|
+
inner_parser:
|
2799
|
+
anyOf:
|
2800
|
+
- "$ref": "#/definitions/JsonLineParser"
|
2801
|
+
- "$ref": "#/definitions/CsvParser"
|
2802
|
+
JsonLineParser:
|
2803
|
+
type: object
|
2804
|
+
required:
|
2805
|
+
- type
|
2806
|
+
properties:
|
2807
|
+
type:
|
2808
|
+
type: string
|
2809
|
+
enum: [JsonLineParser]
|
2810
|
+
encoding:
|
2811
|
+
type: string
|
2812
|
+
default: utf-8
|
2813
|
+
CsvParser:
|
2814
|
+
type: object
|
2815
|
+
required:
|
2816
|
+
- type
|
2817
|
+
properties:
|
2818
|
+
type:
|
2819
|
+
type: string
|
2820
|
+
enum: [CsvParser]
|
2821
|
+
encoding:
|
2822
|
+
type: string
|
2823
|
+
default: utf-8
|
2824
|
+
delimiter:
|
2825
|
+
type: string
|
2826
|
+
default: ","
|
2772
2827
|
AsyncJobStatusMap:
|
2773
2828
|
description: Matches the api job status to Async Job Status.
|
2774
2829
|
type: object
|
@@ -2,10 +2,28 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import CompositeRawDecoder
|
5
6
|
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
6
|
-
from airbyte_cdk.sources.declarative.decoders.json_decoder import
|
7
|
+
from airbyte_cdk.sources.declarative.decoders.json_decoder import (
|
8
|
+
GzipJsonDecoder,
|
9
|
+
IterableDecoder,
|
10
|
+
JsonDecoder,
|
11
|
+
JsonlDecoder,
|
12
|
+
)
|
7
13
|
from airbyte_cdk.sources.declarative.decoders.noop_decoder import NoopDecoder
|
8
|
-
from airbyte_cdk.sources.declarative.decoders.pagination_decoder_decorator import
|
14
|
+
from airbyte_cdk.sources.declarative.decoders.pagination_decoder_decorator import (
|
15
|
+
PaginationDecoderDecorator,
|
16
|
+
)
|
9
17
|
from airbyte_cdk.sources.declarative.decoders.xml_decoder import XmlDecoder
|
10
18
|
|
11
|
-
__all__ = [
|
19
|
+
__all__ = [
|
20
|
+
"Decoder",
|
21
|
+
"CompositeRawDecoder",
|
22
|
+
"JsonDecoder",
|
23
|
+
"JsonlDecoder",
|
24
|
+
"IterableDecoder",
|
25
|
+
"GzipJsonDecoder",
|
26
|
+
"NoopDecoder",
|
27
|
+
"PaginationDecoderDecorator",
|
28
|
+
"XmlDecoder",
|
29
|
+
]
|
@@ -0,0 +1,97 @@
|
|
1
|
+
import csv
|
2
|
+
import gzip
|
3
|
+
import json
|
4
|
+
import logging
|
5
|
+
from abc import ABC, abstractmethod
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from io import BufferedIOBase, TextIOWrapper
|
8
|
+
from typing import Any, Generator, MutableMapping, Optional
|
9
|
+
|
10
|
+
import requests
|
11
|
+
|
12
|
+
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
13
|
+
|
14
|
+
logger = logging.getLogger("airbyte")
|
15
|
+
|
16
|
+
|
17
|
+
@dataclass
|
18
|
+
class Parser(ABC):
|
19
|
+
@abstractmethod
|
20
|
+
def parse(
|
21
|
+
self,
|
22
|
+
data: BufferedIOBase,
|
23
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
24
|
+
"""
|
25
|
+
Parse data and yield dictionaries.
|
26
|
+
"""
|
27
|
+
pass
|
28
|
+
|
29
|
+
|
30
|
+
@dataclass
|
31
|
+
class GzipParser(Parser):
|
32
|
+
inner_parser: Parser
|
33
|
+
|
34
|
+
def parse(
|
35
|
+
self,
|
36
|
+
data: BufferedIOBase,
|
37
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
38
|
+
"""
|
39
|
+
Decompress gzipped bytes and pass decompressed data to the inner parser.
|
40
|
+
"""
|
41
|
+
with gzip.GzipFile(fileobj=data, mode="rb") as gzipobj:
|
42
|
+
yield from self.inner_parser.parse(gzipobj)
|
43
|
+
|
44
|
+
|
45
|
+
@dataclass
|
46
|
+
class JsonLineParser(Parser):
|
47
|
+
encoding: Optional[str] = "utf-8"
|
48
|
+
|
49
|
+
def parse(
|
50
|
+
self,
|
51
|
+
data: BufferedIOBase,
|
52
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
53
|
+
for line in data:
|
54
|
+
try:
|
55
|
+
yield json.loads(line.decode(encoding=self.encoding or "utf-8"))
|
56
|
+
except json.JSONDecodeError as e:
|
57
|
+
logger.warning(f"Cannot decode/parse line {line!r} as JSON, error: {e}")
|
58
|
+
|
59
|
+
|
60
|
+
@dataclass
|
61
|
+
class CsvParser(Parser):
|
62
|
+
# TODO: migrate implementation to re-use file-base classes
|
63
|
+
encoding: Optional[str] = "utf-8"
|
64
|
+
delimiter: Optional[str] = ","
|
65
|
+
|
66
|
+
def parse(
|
67
|
+
self,
|
68
|
+
data: BufferedIOBase,
|
69
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
70
|
+
"""
|
71
|
+
Parse CSV data from decompressed bytes.
|
72
|
+
"""
|
73
|
+
text_data = TextIOWrapper(data, encoding=self.encoding) # type: ignore
|
74
|
+
reader = csv.DictReader(text_data, delimiter=self.delimiter or ",")
|
75
|
+
yield from reader
|
76
|
+
|
77
|
+
|
78
|
+
@dataclass
|
79
|
+
class CompositeRawDecoder(Decoder):
|
80
|
+
"""
|
81
|
+
Decoder strategy to transform a requests.Response into a Generator[MutableMapping[str, Any], None, None]
|
82
|
+
passed response.raw to parser(s).
|
83
|
+
Note: response.raw is not decoded/decompressed by default.
|
84
|
+
parsers should be instantiated recursively.
|
85
|
+
Example:
|
86
|
+
composite_raw_decoder = CompositeRawDecoder(parser=GzipParser(inner_parser=JsonLineParser(encoding="iso-8859-1")))
|
87
|
+
"""
|
88
|
+
|
89
|
+
parser: Parser
|
90
|
+
|
91
|
+
def is_stream_response(self) -> bool:
|
92
|
+
return True
|
93
|
+
|
94
|
+
def decode(
|
95
|
+
self, response: requests.Response
|
96
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
97
|
+
yield from self.parser.parse(data=response.raw) # type: ignore[arg-type]
|
@@ -6,6 +6,14 @@ from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtr
|
|
6
6
|
from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
|
7
7
|
from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter
|
8
8
|
from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
|
9
|
-
from airbyte_cdk.sources.declarative.extractors.response_to_file_extractor import
|
9
|
+
from airbyte_cdk.sources.declarative.extractors.response_to_file_extractor import (
|
10
|
+
ResponseToFileExtractor,
|
11
|
+
)
|
10
12
|
|
11
|
-
__all__ = [
|
13
|
+
__all__ = [
|
14
|
+
"HttpSelector",
|
15
|
+
"DpathExtractor",
|
16
|
+
"RecordFilter",
|
17
|
+
"RecordSelector",
|
18
|
+
"ResponseToFileExtractor",
|
19
|
+
]
|
@@ -4,9 +4,16 @@
|
|
4
4
|
|
5
5
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
6
6
|
from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
|
7
|
-
from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import
|
8
|
-
|
9
|
-
|
7
|
+
from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
|
8
|
+
GlobalSubstreamCursor,
|
9
|
+
)
|
10
|
+
from airbyte_cdk.sources.declarative.incremental.per_partition_cursor import (
|
11
|
+
CursorFactory,
|
12
|
+
PerPartitionCursor,
|
13
|
+
)
|
14
|
+
from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
|
15
|
+
PerPartitionWithGlobalCursor,
|
16
|
+
)
|
10
17
|
from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor import (
|
11
18
|
ChildPartitionResumableFullRefreshCursor,
|
12
19
|
ResumableFullRefreshCursor,
|
@@ -1125,6 +1125,17 @@ class LegacySessionTokenAuthenticator(BaseModel):
|
|
1125
1125
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1126
1126
|
|
1127
1127
|
|
1128
|
+
class JsonLineParser(BaseModel):
|
1129
|
+
type: Literal["JsonLineParser"]
|
1130
|
+
encoding: Optional[str] = "utf-8"
|
1131
|
+
|
1132
|
+
|
1133
|
+
class CsvParser(BaseModel):
|
1134
|
+
type: Literal["CsvParser"]
|
1135
|
+
encoding: Optional[str] = "utf-8"
|
1136
|
+
delimiter: Optional[str] = ","
|
1137
|
+
|
1138
|
+
|
1128
1139
|
class AsyncJobStatusMap(BaseModel):
|
1129
1140
|
type: Optional[Literal["AsyncJobStatusMap"]] = None
|
1130
1141
|
running: List[str]
|
@@ -1208,6 +1219,8 @@ class ComponentMappingDefinition(BaseModel):
|
|
1208
1219
|
"{{ components_values['updates'] }}",
|
1209
1220
|
"{{ components_values['MetaData']['LastUpdatedTime'] }}",
|
1210
1221
|
"{{ config['segment_id'] }}",
|
1222
|
+
"{{ stream_slice['parent_id'] }}",
|
1223
|
+
"{{ stream_slice['extra_fields']['name'] }}",
|
1211
1224
|
],
|
1212
1225
|
title="Value",
|
1213
1226
|
)
|
@@ -1504,6 +1517,11 @@ class RecordSelector(BaseModel):
|
|
1504
1517
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1505
1518
|
|
1506
1519
|
|
1520
|
+
class GzipParser(BaseModel):
|
1521
|
+
type: Literal["GzipParser"]
|
1522
|
+
inner_parser: Union[JsonLineParser, CsvParser]
|
1523
|
+
|
1524
|
+
|
1507
1525
|
class Spec(BaseModel):
|
1508
1526
|
type: Literal["Spec"]
|
1509
1527
|
connection_specification: Dict[str, Any] = Field(
|
@@ -1534,6 +1552,11 @@ class CompositeErrorHandler(BaseModel):
|
|
1534
1552
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1535
1553
|
|
1536
1554
|
|
1555
|
+
class CompositeRawDecoder(BaseModel):
|
1556
|
+
type: Literal["CompositeRawDecoder"]
|
1557
|
+
parser: Union[GzipParser, JsonLineParser, CsvParser]
|
1558
|
+
|
1559
|
+
|
1537
1560
|
class DeclarativeSource1(BaseModel):
|
1538
1561
|
class Config:
|
1539
1562
|
extra = Extra.forbid
|
@@ -1936,6 +1959,7 @@ class SimpleRetriever(BaseModel):
|
|
1936
1959
|
IterableDecoder,
|
1937
1960
|
XmlDecoder,
|
1938
1961
|
GzipJsonDecoder,
|
1962
|
+
CompositeRawDecoder,
|
1939
1963
|
]
|
1940
1964
|
] = Field(
|
1941
1965
|
None,
|
@@ -67,6 +67,12 @@ from airbyte_cdk.sources.declarative.decoders import (
|
|
67
67
|
PaginationDecoderDecorator,
|
68
68
|
XmlDecoder,
|
69
69
|
)
|
70
|
+
from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
|
71
|
+
CompositeRawDecoder,
|
72
|
+
CsvParser,
|
73
|
+
GzipParser,
|
74
|
+
JsonLineParser,
|
75
|
+
)
|
70
76
|
from airbyte_cdk.sources.declarative.extractors import (
|
71
77
|
DpathExtractor,
|
72
78
|
RecordFilter,
|
@@ -125,6 +131,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
125
131
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
126
132
|
CompositeErrorHandler as CompositeErrorHandlerModel,
|
127
133
|
)
|
134
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
135
|
+
CompositeRawDecoder as CompositeRawDecoderModel,
|
136
|
+
)
|
128
137
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
129
138
|
ConcurrencyLevel as ConcurrencyLevelModel,
|
130
139
|
)
|
@@ -134,6 +143,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
134
143
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
135
144
|
ConstantBackoffStrategy as ConstantBackoffStrategyModel,
|
136
145
|
)
|
146
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
147
|
+
CsvParser as CsvParserModel,
|
148
|
+
)
|
137
149
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
138
150
|
CursorPagination as CursorPaginationModel,
|
139
151
|
)
|
@@ -203,6 +215,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
203
215
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
204
216
|
GzipJsonDecoder as GzipJsonDecoderModel,
|
205
217
|
)
|
218
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
219
|
+
GzipParser as GzipParserModel,
|
220
|
+
)
|
206
221
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
207
222
|
HttpComponentsResolver as HttpComponentsResolverModel,
|
208
223
|
)
|
@@ -227,6 +242,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
227
242
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
228
243
|
JsonlDecoder as JsonlDecoderModel,
|
229
244
|
)
|
245
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
246
|
+
JsonLineParser as JsonLineParserModel,
|
247
|
+
)
|
230
248
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
231
249
|
JwtAuthenticator as JwtAuthenticatorModel,
|
232
250
|
)
|
@@ -455,6 +473,7 @@ class ModelToComponentFactory:
|
|
455
473
|
BearerAuthenticatorModel: self.create_bearer_authenticator,
|
456
474
|
CheckStreamModel: self.create_check_stream,
|
457
475
|
CompositeErrorHandlerModel: self.create_composite_error_handler,
|
476
|
+
CompositeRawDecoderModel: self.create_composite_raw_decoder,
|
458
477
|
ConcurrencyLevelModel: self.create_concurrency_level,
|
459
478
|
ConstantBackoffStrategyModel: self.create_constant_backoff_strategy,
|
460
479
|
CursorPaginationModel: self.create_cursor_pagination,
|
@@ -485,7 +504,9 @@ class ModelToComponentFactory:
|
|
485
504
|
InlineSchemaLoaderModel: self.create_inline_schema_loader,
|
486
505
|
JsonDecoderModel: self.create_json_decoder,
|
487
506
|
JsonlDecoderModel: self.create_jsonl_decoder,
|
507
|
+
JsonLineParserModel: self.create_json_line_parser,
|
488
508
|
GzipJsonDecoderModel: self.create_gzipjson_decoder,
|
509
|
+
GzipParserModel: self.create_gzip_parser,
|
489
510
|
KeysToLowerModel: self.create_keys_to_lower_transformation,
|
490
511
|
KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
|
491
512
|
FlattenFieldsModel: self.create_flatten_fields,
|
@@ -1701,6 +1722,12 @@ class ModelToComponentFactory:
|
|
1701
1722
|
) -> JsonlDecoder:
|
1702
1723
|
return JsonlDecoder(parameters={})
|
1703
1724
|
|
1725
|
+
@staticmethod
|
1726
|
+
def create_json_line_parser(
|
1727
|
+
model: JsonLineParserModel, config: Config, **kwargs: Any
|
1728
|
+
) -> JsonLineParser:
|
1729
|
+
return JsonLineParser(encoding=model.encoding)
|
1730
|
+
|
1704
1731
|
@staticmethod
|
1705
1732
|
def create_iterable_decoder(
|
1706
1733
|
model: IterableDecoderModel, config: Config, **kwargs: Any
|
@@ -1717,6 +1744,22 @@ class ModelToComponentFactory:
|
|
1717
1744
|
) -> GzipJsonDecoder:
|
1718
1745
|
return GzipJsonDecoder(parameters={}, encoding=model.encoding)
|
1719
1746
|
|
1747
|
+
def create_gzip_parser(
|
1748
|
+
self, model: GzipParserModel, config: Config, **kwargs: Any
|
1749
|
+
) -> GzipParser:
|
1750
|
+
inner_parser = self._create_component_from_model(model=model.inner_parser, config=config)
|
1751
|
+
return GzipParser(inner_parser=inner_parser)
|
1752
|
+
|
1753
|
+
@staticmethod
|
1754
|
+
def create_csv_parser(model: CsvParserModel, config: Config, **kwargs: Any) -> CsvParser:
|
1755
|
+
return CsvParser(encoding=model.encoding, delimiter=model.delimiter)
|
1756
|
+
|
1757
|
+
def create_composite_raw_decoder(
|
1758
|
+
self, model: CompositeRawDecoderModel, config: Config, **kwargs: Any
|
1759
|
+
) -> CompositeRawDecoder:
|
1760
|
+
parser = self._create_component_from_model(model=model.parser, config=config)
|
1761
|
+
return CompositeRawDecoder(parser=parser)
|
1762
|
+
|
1720
1763
|
@staticmethod
|
1721
1764
|
def create_json_file_schema_loader(
|
1722
1765
|
model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any
|