airbyte-cdk 6.34.0.dev2__py3-none-any.whl → 6.34.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +12 -16
- airbyte_cdk/connector_builder/message_grouper.py +448 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
- airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
- airbyte_cdk/sources/declarative/auth/oauth.py +1 -6
- airbyte_cdk/sources/declarative/auth/token.py +8 -3
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +19 -30
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +85 -203
- airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
- airbyte_cdk/sources/declarative/decoders/__init__.py +4 -0
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +2 -7
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +58 -12
- airbyte_cdk/sources/declarative/extractors/record_selector.py +3 -12
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +38 -122
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -12
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +0 -9
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +41 -150
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +84 -234
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +18 -26
- airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -8
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -16
- airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -4
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -2
- airbyte_cdk/sources/file_based/file_based_source.py +37 -70
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +12 -107
- airbyte_cdk/sources/file_based/stream/__init__.py +1 -10
- airbyte_cdk/sources/streams/call_rate.py +47 -185
- airbyte_cdk/sources/streams/http/http.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +56 -217
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +73 -144
- airbyte_cdk/test/mock_http/mocker.py +1 -9
- airbyte_cdk/test/mock_http/response.py +3 -6
- airbyte_cdk/utils/datetime_helpers.py +66 -48
- airbyte_cdk/utils/mapping_helpers.py +26 -126
- {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/RECORD +45 -54
- airbyte_cdk/connector_builder/test_reader/__init__.py +0 -7
- airbyte_cdk/connector_builder/test_reader/helpers.py +0 -591
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +0 -160
- airbyte_cdk/connector_builder/test_reader/reader.py +0 -441
- airbyte_cdk/connector_builder/test_reader/types.py +0 -75
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +0 -81
- airbyte_cdk/sources/file_based/stream/identities_stream.py +0 -47
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +0 -85
- airbyte_cdk/sources/specs/transfer_modes.py +0 -26
- airbyte_cdk/sources/streams/permissions/identities_stream.py +0 -75
- {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/entry_points.txt +0 -0
@@ -4,7 +4,7 @@
|
|
4
4
|
import copy
|
5
5
|
import logging
|
6
6
|
from dataclasses import InitVar, dataclass
|
7
|
-
from typing import TYPE_CHECKING, Any, Iterable, List, Mapping,
|
7
|
+
from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, Optional, Union
|
8
8
|
|
9
9
|
import dpath
|
10
10
|
|
@@ -118,7 +118,7 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
118
118
|
def _get_request_option(
|
119
119
|
self, option_type: RequestOptionType, stream_slice: Optional[StreamSlice]
|
120
120
|
) -> Mapping[str, Any]:
|
121
|
-
params
|
121
|
+
params = {}
|
122
122
|
if stream_slice:
|
123
123
|
for parent_config in self.parent_stream_configs:
|
124
124
|
if (
|
@@ -128,7 +128,13 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
128
128
|
key = parent_config.partition_field.eval(self.config) # type: ignore # partition_field is always casted to an interpolated string
|
129
129
|
value = stream_slice.get(key)
|
130
130
|
if value:
|
131
|
-
|
131
|
+
params.update(
|
132
|
+
{
|
133
|
+
parent_config.request_option.field_name.eval( # type: ignore [union-attr]
|
134
|
+
config=self.config
|
135
|
+
): value
|
136
|
+
}
|
137
|
+
)
|
132
138
|
return params
|
133
139
|
|
134
140
|
def stream_slices(self) -> Iterable[StreamSlice]:
|
@@ -299,33 +305,23 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
299
305
|
|
300
306
|
def _migrate_child_state_to_parent_state(self, stream_state: StreamState) -> StreamState:
|
301
307
|
"""
|
302
|
-
Migrate the child
|
303
|
-
|
304
|
-
This method converts the child stream state—or, if present, the global state—into a format that is
|
305
|
-
compatible with parent streams that use incremental synchronization. The migration occurs only for
|
306
|
-
parent streams with incremental dependencies. It filters out per-partition states and retains only the
|
307
|
-
global state in the form {cursor_field: cursor_value}.
|
308
|
+
Migrate the child stream state to the parent stream's state format.
|
308
309
|
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
{"state": {"updated_at": "2023-05-27T00:00:00Z"}, ...}
|
314
|
-
In this case, the migration uses the first value from the "state" dictionary.
|
315
|
-
- Any per-partition state formats or other non-simple structures are ignored during migration.
|
310
|
+
This method converts the global or child state into a format compatible with parent
|
311
|
+
streams. The migration occurs only for parent streams with incremental dependencies.
|
312
|
+
The method filters out per-partition states and retains only the global state in the
|
313
|
+
format `{cursor_field: cursor_value}`.
|
316
314
|
|
317
315
|
Args:
|
318
316
|
stream_state (StreamState): The state to migrate. Expected formats include:
|
319
317
|
- {"updated_at": "2023-05-27T00:00:00Z"}
|
320
|
-
- {"
|
321
|
-
(In this format, only the first global state value is used, and per-partition states are ignored.)
|
318
|
+
- {"states": [...] } (ignored during migration)
|
322
319
|
|
323
320
|
Returns:
|
324
321
|
StreamState: A migrated state for parent streams in the format:
|
325
322
|
{
|
326
323
|
"parent_stream_name": {"parent_stream_cursor": "2023-05-27T00:00:00Z"}
|
327
324
|
}
|
328
|
-
where each parent stream with an incremental dependency is assigned its corresponding cursor value.
|
329
325
|
|
330
326
|
Example:
|
331
327
|
Input: {"updated_at": "2023-05-27T00:00:00Z"}
|
@@ -336,15 +332,11 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
336
332
|
substream_state_values = list(stream_state.values())
|
337
333
|
substream_state = substream_state_values[0] if substream_state_values else {}
|
338
334
|
|
339
|
-
# Ignore per-partition states or invalid formats
|
335
|
+
# Ignore per-partition states or invalid formats
|
340
336
|
if isinstance(substream_state, (list, dict)) or len(substream_state_values) != 1:
|
341
|
-
|
342
|
-
if "state" in stream_state and isinstance(stream_state["state"], dict):
|
343
|
-
substream_state = list(stream_state["state"].values())[0]
|
344
|
-
else:
|
345
|
-
return {}
|
337
|
+
return {}
|
346
338
|
|
347
|
-
#
|
339
|
+
# Copy child state to parent streams with incremental dependencies
|
348
340
|
parent_state = {}
|
349
341
|
if substream_state:
|
350
342
|
for parent_config in self.parent_stream_configs:
|
@@ -22,7 +22,6 @@ from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_req
|
|
22
22
|
)
|
23
23
|
from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod, Requester
|
24
24
|
from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
|
25
|
-
from airbyte_cdk.sources.streams.call_rate import APIBudget
|
26
25
|
from airbyte_cdk.sources.streams.http import HttpClient
|
27
26
|
from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler
|
28
27
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
@@ -56,7 +55,6 @@ class HttpRequester(Requester):
|
|
56
55
|
http_method: Union[str, HttpMethod] = HttpMethod.GET
|
57
56
|
request_options_provider: Optional[InterpolatedRequestOptionsProvider] = None
|
58
57
|
error_handler: Optional[ErrorHandler] = None
|
59
|
-
api_budget: Optional[APIBudget] = None
|
60
58
|
disable_retries: bool = False
|
61
59
|
message_repository: MessageRepository = NoopMessageRepository()
|
62
60
|
use_cache: bool = False
|
@@ -93,7 +91,6 @@ class HttpRequester(Requester):
|
|
93
91
|
name=self.name,
|
94
92
|
logger=self.logger,
|
95
93
|
error_handler=self.error_handler,
|
96
|
-
api_budget=self.api_budget,
|
97
94
|
authenticator=self._authenticator,
|
98
95
|
use_cache=self.use_cache,
|
99
96
|
backoff_strategy=backoff_strategies,
|
@@ -202,9 +199,6 @@ class HttpRequester(Requester):
|
|
202
199
|
Raise a ValueError if there's a key collision
|
203
200
|
Returned merged mapping otherwise
|
204
201
|
"""
|
205
|
-
|
206
|
-
is_body_json = requester_method.__name__ == "get_request_body_json"
|
207
|
-
|
208
202
|
return combine_mappings(
|
209
203
|
[
|
210
204
|
requester_method(
|
@@ -214,8 +208,7 @@ class HttpRequester(Requester):
|
|
214
208
|
),
|
215
209
|
auth_options_method(),
|
216
210
|
extra_options,
|
217
|
-
]
|
218
|
-
allow_same_value_merge=is_body_json,
|
211
|
+
]
|
219
212
|
)
|
220
213
|
|
221
214
|
def _request_headers(
|
@@ -23,9 +23,6 @@ from airbyte_cdk.sources.declarative.requesters.request_option import (
|
|
23
23
|
)
|
24
24
|
from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
|
25
25
|
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
|
26
|
-
from airbyte_cdk.utils.mapping_helpers import (
|
27
|
-
_validate_component_request_option_paths,
|
28
|
-
)
|
29
26
|
|
30
27
|
|
31
28
|
@dataclass
|
@@ -116,13 +113,6 @@ class DefaultPaginator(Paginator):
|
|
116
113
|
if isinstance(self.url_base, str):
|
117
114
|
self.url_base = InterpolatedString(string=self.url_base, parameters=parameters)
|
118
115
|
|
119
|
-
if self.page_token_option and not isinstance(self.page_token_option, RequestPath):
|
120
|
-
_validate_component_request_option_paths(
|
121
|
-
self.config,
|
122
|
-
self.page_size_option,
|
123
|
-
self.page_token_option,
|
124
|
-
)
|
125
|
-
|
126
116
|
def get_initial_token(self) -> Optional[Any]:
|
127
117
|
"""
|
128
118
|
Return the page token that should be used for the first request of a stream
|
@@ -197,7 +187,7 @@ class DefaultPaginator(Paginator):
|
|
197
187
|
def _get_request_options(
|
198
188
|
self, option_type: RequestOptionType, next_page_token: Optional[Mapping[str, Any]]
|
199
189
|
) -> MutableMapping[str, Any]:
|
200
|
-
options
|
190
|
+
options = {}
|
201
191
|
|
202
192
|
token = next_page_token.get("next_page_token") if next_page_token else None
|
203
193
|
if (
|
@@ -206,16 +196,15 @@ class DefaultPaginator(Paginator):
|
|
206
196
|
and isinstance(self.page_token_option, RequestOption)
|
207
197
|
and self.page_token_option.inject_into == option_type
|
208
198
|
):
|
209
|
-
self.page_token_option.
|
210
|
-
|
199
|
+
options[self.page_token_option.field_name.eval(config=self.config)] = token # type: ignore # field_name is always cast to an interpolated string
|
211
200
|
if (
|
212
201
|
self.page_size_option
|
213
202
|
and self.pagination_strategy.get_page_size()
|
214
203
|
and self.page_size_option.inject_into == option_type
|
215
204
|
):
|
216
|
-
|
217
|
-
|
218
|
-
|
205
|
+
options[self.page_size_option.field_name.eval(config=self.config)] = ( # type: ignore [union-attr]
|
206
|
+
self.pagination_strategy.get_page_size()
|
207
|
+
) # type: ignore # field_name is always cast to an interpolated string
|
219
208
|
return options
|
220
209
|
|
221
210
|
|
@@ -4,10 +4,9 @@
|
|
4
4
|
|
5
5
|
from dataclasses import InitVar, dataclass
|
6
6
|
from enum import Enum
|
7
|
-
from typing import Any,
|
7
|
+
from typing import Any, Mapping, Union
|
8
8
|
|
9
9
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
10
|
-
from airbyte_cdk.sources.types import Config
|
11
10
|
|
12
11
|
|
13
12
|
class RequestOptionType(Enum):
|
@@ -27,91 +26,13 @@ class RequestOption:
|
|
27
26
|
Describes an option to set on a request
|
28
27
|
|
29
28
|
Attributes:
|
30
|
-
field_name (str): Describes the name of the parameter to inject
|
31
|
-
field_path (list(str)): Describes the path to a nested field as a list of field names.
|
32
|
-
Only valid for body_json injection type, and mutually exclusive with field_name.
|
29
|
+
field_name (str): Describes the name of the parameter to inject
|
33
30
|
inject_into (RequestOptionType): Describes where in the HTTP request to inject the parameter
|
34
31
|
"""
|
35
32
|
|
33
|
+
field_name: Union[InterpolatedString, str]
|
36
34
|
inject_into: RequestOptionType
|
37
35
|
parameters: InitVar[Mapping[str, Any]]
|
38
|
-
field_name: Optional[Union[InterpolatedString, str]] = None
|
39
|
-
field_path: Optional[List[Union[InterpolatedString, str]]] = None
|
40
36
|
|
41
37
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
42
|
-
|
43
|
-
if self.field_name is None and self.field_path is None:
|
44
|
-
raise ValueError("RequestOption requires either a field_name or field_path")
|
45
|
-
|
46
|
-
if self.field_name is not None and self.field_path is not None:
|
47
|
-
raise ValueError(
|
48
|
-
"Only one of field_name or field_path can be provided to RequestOption"
|
49
|
-
)
|
50
|
-
|
51
|
-
# Nested field injection is only supported for body JSON injection
|
52
|
-
if self.field_path is not None and self.inject_into != RequestOptionType.body_json:
|
53
|
-
raise ValueError(
|
54
|
-
"Nested field injection is only supported for body JSON injection. Please use a top-level field_name for other injection types."
|
55
|
-
)
|
56
|
-
|
57
|
-
# Convert field_name and field_path into InterpolatedString objects if they are strings
|
58
|
-
if self.field_name is not None:
|
59
|
-
self.field_name = InterpolatedString.create(self.field_name, parameters=parameters)
|
60
|
-
elif self.field_path is not None:
|
61
|
-
self.field_path = [
|
62
|
-
InterpolatedString.create(segment, parameters=parameters)
|
63
|
-
for segment in self.field_path
|
64
|
-
]
|
65
|
-
|
66
|
-
@property
|
67
|
-
def _is_field_path(self) -> bool:
|
68
|
-
"""Returns whether this option is a field path (ie, a nested field)"""
|
69
|
-
return self.field_path is not None
|
70
|
-
|
71
|
-
def inject_into_request(
|
72
|
-
self,
|
73
|
-
target: MutableMapping[str, Any],
|
74
|
-
value: Any,
|
75
|
-
config: Config,
|
76
|
-
) -> None:
|
77
|
-
"""
|
78
|
-
Inject a request option value into a target request structure using either field_name or field_path.
|
79
|
-
For non-body-json injection, only top-level field names are supported.
|
80
|
-
For body-json injection, both field names and nested field paths are supported.
|
81
|
-
|
82
|
-
Args:
|
83
|
-
target: The request structure to inject the value into
|
84
|
-
value: The value to inject
|
85
|
-
config: The config object to use for interpolation
|
86
|
-
"""
|
87
|
-
if self._is_field_path:
|
88
|
-
if self.inject_into != RequestOptionType.body_json:
|
89
|
-
raise ValueError(
|
90
|
-
"Nested field injection is only supported for body JSON injection. Please use a top-level field_name for other injection types."
|
91
|
-
)
|
92
|
-
|
93
|
-
assert self.field_path is not None # for type checker
|
94
|
-
current = target
|
95
|
-
# Convert path segments into strings, evaluating any interpolated segments
|
96
|
-
# Example: ["data", "{{ config[user_type] }}", "id"] -> ["data", "admin", "id"]
|
97
|
-
*path_parts, final_key = [
|
98
|
-
str(
|
99
|
-
segment.eval(config=config)
|
100
|
-
if isinstance(segment, InterpolatedString)
|
101
|
-
else segment
|
102
|
-
)
|
103
|
-
for segment in self.field_path
|
104
|
-
]
|
105
|
-
|
106
|
-
# Build a nested dictionary structure and set the final value at the deepest level
|
107
|
-
for part in path_parts:
|
108
|
-
current = current.setdefault(part, {})
|
109
|
-
current[final_key] = value
|
110
|
-
else:
|
111
|
-
# For non-nested fields, evaluate the field name if it's an interpolated string
|
112
|
-
key = (
|
113
|
-
self.field_name.eval(config=config)
|
114
|
-
if isinstance(self.field_name, InterpolatedString)
|
115
|
-
else self.field_name
|
116
|
-
)
|
117
|
-
target[str(key)] = value
|
38
|
+
self.field_name = InterpolatedString.create(self.field_name, parameters=parameters)
|
@@ -80,13 +80,12 @@ class DatetimeBasedRequestOptionsProvider(RequestOptionsProvider):
|
|
80
80
|
options: MutableMapping[str, Any] = {}
|
81
81
|
if not stream_slice:
|
82
82
|
return options
|
83
|
-
|
84
83
|
if self.start_time_option and self.start_time_option.inject_into == option_type:
|
85
|
-
|
86
|
-
|
87
|
-
|
84
|
+
options[self.start_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore # field_name is always casted to an interpolated string
|
85
|
+
self._partition_field_start.eval(self.config)
|
86
|
+
)
|
88
87
|
if self.end_time_option and self.end_time_option.inject_into == option_type:
|
89
|
-
|
90
|
-
|
91
|
-
|
88
|
+
options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr]
|
89
|
+
self._partition_field_end.eval(self.config)
|
90
|
+
)
|
92
91
|
return options
|
@@ -6,7 +6,7 @@ from typing import Any, Iterable, Mapping, Optional
|
|
6
6
|
|
7
7
|
from typing_extensions import deprecated
|
8
8
|
|
9
|
-
from airbyte_cdk.
|
9
|
+
from airbyte_cdk.models import FailureType
|
10
10
|
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncPartition
|
11
11
|
from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
|
12
12
|
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
|
@@ -16,6 +16,7 @@ from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
|
16
16
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
17
17
|
from airbyte_cdk.sources.streams.core import StreamData
|
18
18
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
19
|
+
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
19
20
|
|
20
21
|
|
21
22
|
@deprecated(
|
@@ -56,9 +57,9 @@ class AsyncRetriever(Retriever):
|
|
56
57
|
|
57
58
|
return self.state
|
58
59
|
|
59
|
-
def
|
60
|
+
def _validate_and_get_stream_slice_partition(
|
60
61
|
self, stream_slice: Optional[StreamSlice] = None
|
61
|
-
) ->
|
62
|
+
) -> AsyncPartition:
|
62
63
|
"""
|
63
64
|
Validates the stream_slice argument and returns the partition from it.
|
64
65
|
|
@@ -72,7 +73,12 @@ class AsyncRetriever(Retriever):
|
|
72
73
|
AirbyteTracedException: If the stream_slice is not an instance of StreamSlice or if the partition is not present in the stream_slice.
|
73
74
|
|
74
75
|
"""
|
75
|
-
|
76
|
+
if not isinstance(stream_slice, StreamSlice) or "partition" not in stream_slice.partition:
|
77
|
+
raise AirbyteTracedException(
|
78
|
+
message="Invalid arguments to AsyncRetriever.read_records: stream_slice is not optional. Please contact Airbyte Support",
|
79
|
+
failure_type=FailureType.system_error,
|
80
|
+
)
|
81
|
+
return stream_slice["partition"] # type: ignore # stream_slice["partition"] has been added as an AsyncPartition as part of stream_slices
|
76
82
|
|
77
83
|
def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
|
78
84
|
return self.stream_slicer.stream_slices()
|
@@ -83,8 +89,8 @@ class AsyncRetriever(Retriever):
|
|
83
89
|
stream_slice: Optional[StreamSlice] = None,
|
84
90
|
) -> Iterable[StreamData]:
|
85
91
|
stream_state: StreamState = self._get_stream_state()
|
86
|
-
|
87
|
-
records: Iterable[Mapping[str, Any]] = self.stream_slicer.fetch_records(
|
92
|
+
partition: AsyncPartition = self._validate_and_get_stream_slice_partition(stream_slice)
|
93
|
+
records: Iterable[Mapping[str, Any]] = self.stream_slicer.fetch_records(partition)
|
88
94
|
|
89
95
|
yield from self.record_selector.filter_and_transform(
|
90
96
|
all_data=records,
|
@@ -128,9 +128,6 @@ class SimpleRetriever(Retriever):
|
|
128
128
|
Returned merged mapping otherwise
|
129
129
|
"""
|
130
130
|
# FIXME we should eventually remove the usage of stream_state as part of the interpolation
|
131
|
-
|
132
|
-
is_body_json = paginator_method.__name__ == "get_request_body_json"
|
133
|
-
|
134
131
|
mappings = [
|
135
132
|
paginator_method(
|
136
133
|
stream_state=stream_state,
|
@@ -146,7 +143,7 @@ class SimpleRetriever(Retriever):
|
|
146
143
|
next_page_token=next_page_token,
|
147
144
|
)
|
148
145
|
)
|
149
|
-
return combine_mappings(mappings
|
146
|
+
return combine_mappings(mappings)
|
150
147
|
|
151
148
|
def _request_headers(
|
152
149
|
self,
|
@@ -11,7 +11,6 @@ from pydantic.v1 import AnyUrl, BaseModel, Field
|
|
11
11
|
|
12
12
|
from airbyte_cdk import OneOfOptionConfig
|
13
13
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
14
|
-
from airbyte_cdk.sources.specs.transfer_modes import DeliverPermissions
|
15
14
|
from airbyte_cdk.sources.utils import schema_helpers
|
16
15
|
|
17
16
|
|
@@ -66,7 +65,7 @@ class AbstractFileBasedSpec(BaseModel):
|
|
66
65
|
order=10,
|
67
66
|
)
|
68
67
|
|
69
|
-
delivery_method: Union[DeliverRecords, DeliverRawFiles
|
68
|
+
delivery_method: Union[DeliverRecords, DeliverRawFiles] = Field(
|
70
69
|
title="Delivery Method",
|
71
70
|
discriminator="delivery_type",
|
72
71
|
type="object",
|
@@ -33,12 +33,6 @@ from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
|
|
33
33
|
FileBasedStreamConfig,
|
34
34
|
ValidationPolicy,
|
35
35
|
)
|
36
|
-
from airbyte_cdk.sources.file_based.config.validate_config_transfer_modes import (
|
37
|
-
include_identities_stream,
|
38
|
-
preserve_directory_structure,
|
39
|
-
use_file_transfer,
|
40
|
-
use_permissions_transfer,
|
41
|
-
)
|
42
36
|
from airbyte_cdk.sources.file_based.discovery_policy import (
|
43
37
|
AbstractDiscoveryPolicy,
|
44
38
|
DefaultDiscoveryPolicy,
|
@@ -55,12 +49,7 @@ from airbyte_cdk.sources.file_based.schema_validation_policies import (
|
|
55
49
|
DEFAULT_SCHEMA_VALIDATION_POLICIES,
|
56
50
|
AbstractSchemaValidationPolicy,
|
57
51
|
)
|
58
|
-
from airbyte_cdk.sources.file_based.stream import
|
59
|
-
AbstractFileBasedStream,
|
60
|
-
DefaultFileBasedStream,
|
61
|
-
FileIdentitiesStream,
|
62
|
-
PermissionsFileBasedStream,
|
63
|
-
)
|
52
|
+
from airbyte_cdk.sources.file_based.stream import AbstractFileBasedStream, DefaultFileBasedStream
|
64
53
|
from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamFacade
|
65
54
|
from airbyte_cdk.sources.file_based.stream.concurrent.cursor import (
|
66
55
|
AbstractConcurrentFileBasedCursor,
|
@@ -77,7 +66,6 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
|
77
66
|
DEFAULT_CONCURRENCY = 100
|
78
67
|
MAX_CONCURRENCY = 100
|
79
68
|
INITIAL_N_PARTITIONS = MAX_CONCURRENCY // 2
|
80
|
-
IDENTITIES_STREAM = "identities"
|
81
69
|
|
82
70
|
|
83
71
|
class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
@@ -169,20 +157,13 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
169
157
|
errors = []
|
170
158
|
tracebacks = []
|
171
159
|
for stream in streams:
|
172
|
-
if isinstance(stream, FileIdentitiesStream):
|
173
|
-
identity = next(iter(stream.load_identity_groups()))
|
174
|
-
if not identity:
|
175
|
-
errors.append(
|
176
|
-
"Unable to get identities for current configuration, please check your credentials"
|
177
|
-
)
|
178
|
-
continue
|
179
160
|
if not isinstance(stream, AbstractFileBasedStream):
|
180
161
|
raise ValueError(f"Stream {stream} is not a file-based stream.")
|
181
162
|
try:
|
182
163
|
parsed_config = self._get_parsed_config(config)
|
183
164
|
availability_method = (
|
184
165
|
stream.availability_strategy.check_availability
|
185
|
-
if
|
166
|
+
if self._use_file_transfer(parsed_config)
|
186
167
|
else stream.availability_strategy.check_availability_and_parsability
|
187
168
|
)
|
188
169
|
(
|
@@ -258,7 +239,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
258
239
|
message_repository=self.message_repository,
|
259
240
|
)
|
260
241
|
stream = FileBasedStreamFacade.create_from_stream(
|
261
|
-
stream=self.
|
242
|
+
stream=self._make_default_stream(
|
262
243
|
stream_config=stream_config,
|
263
244
|
cursor=cursor,
|
264
245
|
parsed_config=parsed_config,
|
@@ -289,7 +270,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
289
270
|
CursorField(DefaultFileBasedStream.ab_last_mod_col),
|
290
271
|
)
|
291
272
|
stream = FileBasedStreamFacade.create_from_stream(
|
292
|
-
stream=self.
|
273
|
+
stream=self._make_default_stream(
|
293
274
|
stream_config=stream_config,
|
294
275
|
cursor=cursor,
|
295
276
|
parsed_config=parsed_config,
|
@@ -301,17 +282,13 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
301
282
|
)
|
302
283
|
else:
|
303
284
|
cursor = self.cursor_cls(stream_config)
|
304
|
-
stream = self.
|
285
|
+
stream = self._make_default_stream(
|
305
286
|
stream_config=stream_config,
|
306
287
|
cursor=cursor,
|
307
288
|
parsed_config=parsed_config,
|
308
289
|
)
|
309
290
|
|
310
291
|
streams.append(stream)
|
311
|
-
|
312
|
-
if include_identities_stream(parsed_config):
|
313
|
-
identities_stream = self._make_identities_stream()
|
314
|
-
streams.append(identities_stream)
|
315
292
|
return streams
|
316
293
|
|
317
294
|
except ValidationError as exc:
|
@@ -333,48 +310,8 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
333
310
|
validation_policy=self._validate_and_get_validation_policy(stream_config),
|
334
311
|
errors_collector=self.errors_collector,
|
335
312
|
cursor=cursor,
|
336
|
-
use_file_transfer=
|
337
|
-
preserve_directory_structure=
|
338
|
-
)
|
339
|
-
|
340
|
-
def _make_permissions_stream(
|
341
|
-
self, stream_config: FileBasedStreamConfig, cursor: Optional[AbstractFileBasedCursor]
|
342
|
-
) -> AbstractFileBasedStream:
|
343
|
-
return PermissionsFileBasedStream(
|
344
|
-
config=stream_config,
|
345
|
-
catalog_schema=self.stream_schemas.get(stream_config.name),
|
346
|
-
stream_reader=self.stream_reader,
|
347
|
-
availability_strategy=self.availability_strategy,
|
348
|
-
discovery_policy=self.discovery_policy,
|
349
|
-
parsers=self.parsers,
|
350
|
-
validation_policy=self._validate_and_get_validation_policy(stream_config),
|
351
|
-
errors_collector=self.errors_collector,
|
352
|
-
cursor=cursor,
|
353
|
-
)
|
354
|
-
|
355
|
-
def _make_file_based_stream(
|
356
|
-
self,
|
357
|
-
stream_config: FileBasedStreamConfig,
|
358
|
-
cursor: Optional[AbstractFileBasedCursor],
|
359
|
-
parsed_config: AbstractFileBasedSpec,
|
360
|
-
) -> AbstractFileBasedStream:
|
361
|
-
"""
|
362
|
-
Creates different streams depending on the type of the transfer mode selected
|
363
|
-
"""
|
364
|
-
if use_permissions_transfer(parsed_config):
|
365
|
-
return self._make_permissions_stream(stream_config, cursor)
|
366
|
-
# we should have a stream for File transfer mode to decouple from DefaultFileBasedStream
|
367
|
-
else:
|
368
|
-
return self._make_default_stream(stream_config, cursor, parsed_config)
|
369
|
-
|
370
|
-
def _make_identities_stream(
|
371
|
-
self,
|
372
|
-
) -> Stream:
|
373
|
-
return FileIdentitiesStream(
|
374
|
-
catalog_schema=self.stream_schemas.get(FileIdentitiesStream.IDENTITIES_STREAM_NAME),
|
375
|
-
stream_reader=self.stream_reader,
|
376
|
-
discovery_policy=self.discovery_policy,
|
377
|
-
errors_collector=self.errors_collector,
|
313
|
+
use_file_transfer=self._use_file_transfer(parsed_config),
|
314
|
+
preserve_directory_structure=self._preserve_directory_structure(parsed_config),
|
378
315
|
)
|
379
316
|
|
380
317
|
def _get_stream_from_catalog(
|
@@ -441,3 +378,33 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
441
378
|
"`input_schema` and `schemaless` options cannot both be set",
|
442
379
|
model=FileBasedStreamConfig,
|
443
380
|
)
|
381
|
+
|
382
|
+
@staticmethod
|
383
|
+
def _use_file_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
|
384
|
+
use_file_transfer = (
|
385
|
+
hasattr(parsed_config.delivery_method, "delivery_type")
|
386
|
+
and parsed_config.delivery_method.delivery_type == "use_file_transfer"
|
387
|
+
)
|
388
|
+
return use_file_transfer
|
389
|
+
|
390
|
+
@staticmethod
|
391
|
+
def _preserve_directory_structure(parsed_config: AbstractFileBasedSpec) -> bool:
|
392
|
+
"""
|
393
|
+
Determines whether to preserve directory structure during file transfer.
|
394
|
+
|
395
|
+
When enabled, files maintain their subdirectory paths in the destination.
|
396
|
+
When disabled, files are flattened to the root of the destination.
|
397
|
+
|
398
|
+
Args:
|
399
|
+
parsed_config: The parsed configuration containing delivery method settings
|
400
|
+
|
401
|
+
Returns:
|
402
|
+
True if directory structure should be preserved (default), False otherwise
|
403
|
+
"""
|
404
|
+
if (
|
405
|
+
FileBasedSource._use_file_transfer(parsed_config)
|
406
|
+
and hasattr(parsed_config.delivery_method, "preserve_directory_structure")
|
407
|
+
and parsed_config.delivery_method.preserve_directory_structure is not None
|
408
|
+
):
|
409
|
+
return parsed_config.delivery_method.preserve_directory_structure
|
410
|
+
return True
|