airbyte-cdk 6.12.4.dev0__py3-none-any.whl → 6.13.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +93 -34
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +0 -1
- airbyte_cdk/models/__init__.py +10 -11
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +1 -1
- airbyte_cdk/sources/declarative/auth/__init__.py +2 -5
- airbyte_cdk/sources/declarative/auth/oauth.py +27 -12
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +25 -65
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +78 -1
- airbyte_cdk/sources/declarative/decoders/__init__.py +21 -3
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +10 -2
- airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
- airbyte_cdk/sources/declarative/incremental/__init__.py +10 -6
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +0 -14
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +49 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +96 -80
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +23 -5
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +52 -35
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +10 -7
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +9 -4
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +11 -6
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +16 -5
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +14 -13
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +7 -8
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +10 -7
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +12 -3
- airbyte_cdk/sources/declarative/resolvers/__init__.py +31 -8
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +20 -14
- airbyte_cdk/sources/declarative/retrievers/__init__.py +5 -2
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +9 -32
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +72 -65
- airbyte_cdk/sources/declarative/schema/__init__.py +14 -2
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +20 -3
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
- airbyte_cdk/sources/file_based/file_types/__init__.py +12 -3
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
- airbyte_cdk/sources/message/__init__.py +7 -1
- airbyte_cdk/sources/streams/__init__.py +1 -1
- airbyte_cdk/sources/streams/checkpoint/__init__.py +2 -3
- airbyte_cdk/sources/streams/concurrent/cursor.py +0 -1
- airbyte_cdk/sources/streams/http/__init__.py +2 -2
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +2 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +8 -3
- airbyte_cdk/test/mock_http/__init__.py +1 -1
- airbyte_cdk/test/mock_http/mocker.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +1 -1
- airbyte_cdk/utils/__init__.py +1 -1
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/METADATA +2 -2
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/RECORD +59 -58
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -344
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -2,17 +2,40 @@
|
|
2
2
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.sources.declarative.resolvers.components_resolver import ComponentsResolver, ComponentMappingDefinition, ResolvedComponentMappingDefinition
|
6
|
-
from airbyte_cdk.sources.declarative.resolvers.http_components_resolver import HttpComponentsResolver
|
7
|
-
from airbyte_cdk.sources.declarative.resolvers.config_components_resolver import ConfigComponentsResolver, StreamConfig
|
8
|
-
from airbyte_cdk.sources.declarative.models import HttpComponentsResolver as HttpComponentsResolverModel
|
9
|
-
from airbyte_cdk.sources.declarative.models import ConfigComponentsResolver as ConfigComponentsResolverModel
|
10
|
-
from pydantic.v1 import BaseModel
|
11
5
|
from typing import Mapping
|
12
6
|
|
7
|
+
from pydantic.v1 import BaseModel
|
8
|
+
|
9
|
+
from airbyte_cdk.sources.declarative.models import (
|
10
|
+
ConfigComponentsResolver as ConfigComponentsResolverModel,
|
11
|
+
)
|
12
|
+
from airbyte_cdk.sources.declarative.models import (
|
13
|
+
HttpComponentsResolver as HttpComponentsResolverModel,
|
14
|
+
)
|
15
|
+
from airbyte_cdk.sources.declarative.resolvers.components_resolver import (
|
16
|
+
ComponentMappingDefinition,
|
17
|
+
ComponentsResolver,
|
18
|
+
ResolvedComponentMappingDefinition,
|
19
|
+
)
|
20
|
+
from airbyte_cdk.sources.declarative.resolvers.config_components_resolver import (
|
21
|
+
ConfigComponentsResolver,
|
22
|
+
StreamConfig,
|
23
|
+
)
|
24
|
+
from airbyte_cdk.sources.declarative.resolvers.http_components_resolver import (
|
25
|
+
HttpComponentsResolver,
|
26
|
+
)
|
27
|
+
|
13
28
|
COMPONENTS_RESOLVER_TYPE_MAPPING: Mapping[str, type[BaseModel]] = {
|
14
29
|
"HttpComponentsResolver": HttpComponentsResolverModel,
|
15
|
-
"ConfigComponentsResolver": ConfigComponentsResolverModel
|
30
|
+
"ConfigComponentsResolver": ConfigComponentsResolverModel,
|
16
31
|
}
|
17
32
|
|
18
|
-
__all__ = [
|
33
|
+
__all__ = [
|
34
|
+
"ComponentsResolver",
|
35
|
+
"HttpComponentsResolver",
|
36
|
+
"ComponentMappingDefinition",
|
37
|
+
"ResolvedComponentMappingDefinition",
|
38
|
+
"StreamConfig",
|
39
|
+
"ConfigComponentsResolver",
|
40
|
+
"COMPONENTS_RESOLVER_TYPE_MAPPING",
|
41
|
+
]
|
@@ -88,19 +88,25 @@ class HttpComponentsResolver(ComponentsResolver):
|
|
88
88
|
"""
|
89
89
|
kwargs = {"stream_template_config": stream_template_config}
|
90
90
|
|
91
|
-
for
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
91
|
+
for stream_slice in self.retriever.stream_slices():
|
92
|
+
for components_values in self.retriever.read_records(
|
93
|
+
records_schema={}, stream_slice=stream_slice
|
94
|
+
):
|
95
|
+
updated_config = deepcopy(stream_template_config)
|
96
|
+
kwargs["components_values"] = components_values # type: ignore[assignment] # component_values will always be of type Mapping[str, Any]
|
97
|
+
kwargs["stream_slice"] = stream_slice # type: ignore[assignment] # stream_slice will always be of type Mapping[str, Any]
|
98
|
+
|
99
|
+
for resolved_component in self._resolved_components:
|
100
|
+
valid_types = (
|
101
|
+
(resolved_component.value_type,) if resolved_component.value_type else None
|
102
|
+
)
|
103
|
+
value = resolved_component.value.eval(
|
104
|
+
self.config, valid_types=valid_types, **kwargs
|
105
|
+
)
|
102
106
|
|
103
|
-
|
104
|
-
|
107
|
+
path = [
|
108
|
+
path.eval(self.config, **kwargs) for path in resolved_component.field_path
|
109
|
+
]
|
110
|
+
dpath.set(updated_config, path, value)
|
105
111
|
|
106
|
-
|
112
|
+
yield updated_config
|
@@ -2,8 +2,11 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
6
|
-
from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever, SimpleRetrieverTestReadDecorator
|
7
5
|
from airbyte_cdk.sources.declarative.retrievers.async_retriever import AsyncRetriever
|
6
|
+
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
7
|
+
from airbyte_cdk.sources.declarative.retrievers.simple_retriever import (
|
8
|
+
SimpleRetriever,
|
9
|
+
SimpleRetrieverTestReadDecorator,
|
10
|
+
)
|
8
11
|
|
9
12
|
__all__ = ["Retriever", "SimpleRetriever", "SimpleRetrieverTestReadDecorator", "AsyncRetriever"]
|
@@ -1,8 +1,8 @@
|
|
1
1
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
2
|
|
3
3
|
|
4
|
-
from dataclasses import InitVar, dataclass
|
5
|
-
from typing import Any,
|
4
|
+
from dataclasses import InitVar, dataclass
|
5
|
+
from typing import Any, Iterable, Mapping, Optional
|
6
6
|
|
7
7
|
from typing_extensions import deprecated
|
8
8
|
|
@@ -12,9 +12,10 @@ from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
|
|
12
12
|
AsyncPartition,
|
13
13
|
)
|
14
14
|
from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
|
15
|
-
from airbyte_cdk.sources.declarative.partition_routers import
|
16
|
-
|
17
|
-
|
15
|
+
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
|
16
|
+
AsyncJobPartitionRouter,
|
17
|
+
)
|
18
|
+
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
18
19
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
19
20
|
from airbyte_cdk.sources.streams.core import StreamData
|
20
21
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
@@ -29,15 +30,10 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
|
29
30
|
class AsyncRetriever(Retriever):
|
30
31
|
config: Config
|
31
32
|
parameters: InitVar[Mapping[str, Any]]
|
32
|
-
job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
|
33
33
|
record_selector: RecordSelector
|
34
|
-
stream_slicer:
|
35
|
-
default_factory=lambda: SinglePartitionRouter(parameters={})
|
36
|
-
)
|
34
|
+
stream_slicer: AsyncJobPartitionRouter
|
37
35
|
|
38
36
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
39
|
-
self._job_orchestrator_factory = self.job_orchestrator_factory
|
40
|
-
self.__job_orchestrator: Optional[AsyncJobOrchestrator] = None
|
41
37
|
self._parameters = parameters
|
42
38
|
|
43
39
|
@property
|
@@ -54,17 +50,6 @@ class AsyncRetriever(Retriever):
|
|
54
50
|
"""
|
55
51
|
pass
|
56
52
|
|
57
|
-
@property
|
58
|
-
def _job_orchestrator(self) -> AsyncJobOrchestrator:
|
59
|
-
if not self.__job_orchestrator:
|
60
|
-
raise AirbyteTracedException(
|
61
|
-
message="Invalid state within AsyncJobRetriever. Please contact Airbyte Support",
|
62
|
-
internal_message="AsyncPartitionRepository is expected to be accessed only after `stream_slices`",
|
63
|
-
failure_type=FailureType.system_error,
|
64
|
-
)
|
65
|
-
|
66
|
-
return self.__job_orchestrator
|
67
|
-
|
68
53
|
def _get_stream_state(self) -> StreamState:
|
69
54
|
"""
|
70
55
|
Gets the current state of the stream.
|
@@ -99,15 +84,7 @@ class AsyncRetriever(Retriever):
|
|
99
84
|
return stream_slice["partition"] # type: ignore # stream_slice["partition"] has been added as an AsyncPartition as part of stream_slices
|
100
85
|
|
101
86
|
def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
|
102
|
-
|
103
|
-
self.__job_orchestrator = self._job_orchestrator_factory(slices)
|
104
|
-
|
105
|
-
for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
|
106
|
-
yield StreamSlice(
|
107
|
-
partition=dict(completed_partition.stream_slice.partition)
|
108
|
-
| {"partition": completed_partition},
|
109
|
-
cursor_slice=completed_partition.stream_slice.cursor_slice,
|
110
|
-
)
|
87
|
+
return self.stream_slicer.stream_slices()
|
111
88
|
|
112
89
|
def read_records(
|
113
90
|
self,
|
@@ -116,7 +93,7 @@ class AsyncRetriever(Retriever):
|
|
116
93
|
) -> Iterable[StreamData]:
|
117
94
|
stream_state: StreamState = self._get_stream_state()
|
118
95
|
partition: AsyncPartition = self._validate_and_get_stream_slice_partition(stream_slice)
|
119
|
-
records: Iterable[Mapping[str, Any]] = self.
|
96
|
+
records: Iterable[Mapping[str, Any]] = self.stream_slicer.fetch_records(partition)
|
120
97
|
|
121
98
|
yield from self.record_selector.filter_and_transform(
|
122
99
|
all_data=records,
|
@@ -6,18 +6,7 @@ import json
|
|
6
6
|
from dataclasses import InitVar, dataclass, field
|
7
7
|
from functools import partial
|
8
8
|
from itertools import islice
|
9
|
-
from typing import
|
10
|
-
Any,
|
11
|
-
Callable,
|
12
|
-
Iterable,
|
13
|
-
List,
|
14
|
-
Mapping,
|
15
|
-
MutableMapping,
|
16
|
-
Optional,
|
17
|
-
Set,
|
18
|
-
Tuple,
|
19
|
-
Union,
|
20
|
-
)
|
9
|
+
from typing import Any, Callable, Iterable, List, Mapping, Optional, Set, Tuple, Union
|
21
10
|
|
22
11
|
import requests
|
23
12
|
|
@@ -90,9 +79,6 @@ class SimpleRetriever(Retriever):
|
|
90
79
|
|
91
80
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
92
81
|
self._paginator = self.paginator or NoPagination(parameters=parameters)
|
93
|
-
self._last_response: Optional[requests.Response] = None
|
94
|
-
self._last_page_size: int = 0
|
95
|
-
self._last_record: Optional[Record] = None
|
96
82
|
self._parameters = parameters
|
97
83
|
self._name = (
|
98
84
|
InterpolatedString(self._name, parameters=parameters)
|
@@ -100,10 +86,6 @@ class SimpleRetriever(Retriever):
|
|
100
86
|
else self._name
|
101
87
|
)
|
102
88
|
|
103
|
-
# This mapping is used during a resumable full refresh syncs to indicate whether a partition has started syncing
|
104
|
-
# records. Partitions serve as the key and map to True if they already began processing records
|
105
|
-
self._partition_started: MutableMapping[Any, bool] = dict()
|
106
|
-
|
107
89
|
@property # type: ignore
|
108
90
|
def name(self) -> str:
|
109
91
|
"""
|
@@ -178,7 +160,7 @@ class SimpleRetriever(Retriever):
|
|
178
160
|
stream_slice,
|
179
161
|
next_page_token,
|
180
162
|
self._paginator.get_request_headers,
|
181
|
-
self.
|
163
|
+
self.stream_slicer.get_request_headers,
|
182
164
|
)
|
183
165
|
if isinstance(headers, str):
|
184
166
|
raise ValueError("Request headers cannot be a string")
|
@@ -251,17 +233,13 @@ class SimpleRetriever(Retriever):
|
|
251
233
|
raise ValueError("Request body json cannot be a string")
|
252
234
|
return body_json
|
253
235
|
|
254
|
-
def _paginator_path(
|
255
|
-
self,
|
256
|
-
) -> Optional[str]:
|
236
|
+
def _paginator_path(self, next_page_token: Optional[Mapping[str, Any]] = None) -> Optional[str]:
|
257
237
|
"""
|
258
238
|
If the paginator points to a path, follow it, else return nothing so the requester is used.
|
259
|
-
:param stream_state:
|
260
|
-
:param stream_slice:
|
261
239
|
:param next_page_token:
|
262
240
|
:return:
|
263
241
|
"""
|
264
|
-
return self._paginator.path()
|
242
|
+
return self._paginator.path(next_page_token=next_page_token)
|
265
243
|
|
266
244
|
def _parse_response(
|
267
245
|
self,
|
@@ -272,22 +250,15 @@ class SimpleRetriever(Retriever):
|
|
272
250
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
273
251
|
) -> Iterable[Record]:
|
274
252
|
if not response:
|
275
|
-
self._last_response = None
|
276
253
|
yield from []
|
277
254
|
else:
|
278
|
-
self.
|
279
|
-
record_generator = self.record_selector.select_records(
|
255
|
+
yield from self.record_selector.select_records(
|
280
256
|
response=response,
|
281
257
|
stream_state=stream_state,
|
282
258
|
records_schema=records_schema,
|
283
259
|
stream_slice=stream_slice,
|
284
260
|
next_page_token=next_page_token,
|
285
261
|
)
|
286
|
-
self._last_page_size = 0
|
287
|
-
for record in record_generator:
|
288
|
-
self._last_page_size += 1
|
289
|
-
self._last_record = record
|
290
|
-
yield record
|
291
262
|
|
292
263
|
@property # type: ignore
|
293
264
|
def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
|
@@ -299,7 +270,13 @@ class SimpleRetriever(Retriever):
|
|
299
270
|
if not isinstance(value, property):
|
300
271
|
self._primary_key = value
|
301
272
|
|
302
|
-
def _next_page_token(
|
273
|
+
def _next_page_token(
|
274
|
+
self,
|
275
|
+
response: requests.Response,
|
276
|
+
last_page_size: int,
|
277
|
+
last_record: Optional[Record],
|
278
|
+
last_page_token_value: Optional[Any],
|
279
|
+
) -> Optional[Mapping[str, Any]]:
|
303
280
|
"""
|
304
281
|
Specifies a pagination strategy.
|
305
282
|
|
@@ -307,7 +284,12 @@ class SimpleRetriever(Retriever):
|
|
307
284
|
|
308
285
|
:return: The token for the next page from the input response object. Returning None means there are no more pages to read in this response.
|
309
286
|
"""
|
310
|
-
return self._paginator.next_page_token(
|
287
|
+
return self._paginator.next_page_token(
|
288
|
+
response=response,
|
289
|
+
last_page_size=last_page_size,
|
290
|
+
last_record=last_record,
|
291
|
+
last_page_token_value=last_page_token_value,
|
292
|
+
)
|
311
293
|
|
312
294
|
def _fetch_next_page(
|
313
295
|
self,
|
@@ -316,7 +298,7 @@ class SimpleRetriever(Retriever):
|
|
316
298
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
317
299
|
) -> Optional[requests.Response]:
|
318
300
|
return self.requester.send_request(
|
319
|
-
path=self._paginator_path(),
|
301
|
+
path=self._paginator_path(next_page_token=next_page_token),
|
320
302
|
stream_state=stream_state,
|
321
303
|
stream_slice=stream_slice,
|
322
304
|
next_page_token=next_page_token,
|
@@ -345,20 +327,37 @@ class SimpleRetriever(Retriever):
|
|
345
327
|
# This logic is similar to _read_pages in the HttpStream class. When making changes here, consider making changes there as well.
|
346
328
|
def _read_pages(
|
347
329
|
self,
|
348
|
-
records_generator_fn: Callable[[Optional[requests.Response]], Iterable[
|
330
|
+
records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]],
|
349
331
|
stream_state: Mapping[str, Any],
|
350
332
|
stream_slice: StreamSlice,
|
351
|
-
) -> Iterable[
|
333
|
+
) -> Iterable[Record]:
|
352
334
|
pagination_complete = False
|
353
|
-
|
335
|
+
initial_token = self._paginator.get_initial_token()
|
336
|
+
next_page_token: Optional[Mapping[str, Any]] = (
|
337
|
+
{"next_page_token": initial_token} if initial_token else None
|
338
|
+
)
|
354
339
|
while not pagination_complete:
|
355
340
|
response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
|
356
|
-
|
341
|
+
|
342
|
+
last_page_size = 0
|
343
|
+
last_record: Optional[Record] = None
|
344
|
+
for record in records_generator_fn(response):
|
345
|
+
last_page_size += 1
|
346
|
+
last_record = record
|
347
|
+
yield record
|
357
348
|
|
358
349
|
if not response:
|
359
350
|
pagination_complete = True
|
360
351
|
else:
|
361
|
-
|
352
|
+
last_page_token_value = (
|
353
|
+
next_page_token.get("next_page_token") if next_page_token else None
|
354
|
+
)
|
355
|
+
next_page_token = self._next_page_token(
|
356
|
+
response=response,
|
357
|
+
last_page_size=last_page_size,
|
358
|
+
last_record=last_record,
|
359
|
+
last_page_token_value=last_page_token_value,
|
360
|
+
)
|
362
361
|
if not next_page_token:
|
363
362
|
pagination_complete = True
|
364
363
|
|
@@ -367,19 +366,38 @@ class SimpleRetriever(Retriever):
|
|
367
366
|
|
368
367
|
def _read_single_page(
|
369
368
|
self,
|
370
|
-
records_generator_fn: Callable[[Optional[requests.Response]], Iterable[
|
369
|
+
records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]],
|
371
370
|
stream_state: Mapping[str, Any],
|
372
371
|
stream_slice: StreamSlice,
|
373
372
|
) -> Iterable[StreamData]:
|
374
|
-
|
375
|
-
|
373
|
+
initial_token = stream_state.get("next_page_token")
|
374
|
+
if initial_token is None:
|
375
|
+
initial_token = self._paginator.get_initial_token()
|
376
|
+
next_page_token: Optional[Mapping[str, Any]] = (
|
377
|
+
{"next_page_token": initial_token} if initial_token else None
|
378
|
+
)
|
379
|
+
|
380
|
+
response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
|
381
|
+
|
382
|
+
last_page_size = 0
|
383
|
+
last_record: Optional[Record] = None
|
384
|
+
for record in records_generator_fn(response):
|
385
|
+
last_page_size += 1
|
386
|
+
last_record = record
|
387
|
+
yield record
|
376
388
|
|
377
389
|
if not response:
|
378
|
-
next_page_token
|
390
|
+
next_page_token = {FULL_REFRESH_SYNC_COMPLETE_KEY: True}
|
379
391
|
else:
|
380
|
-
|
381
|
-
|
382
|
-
|
392
|
+
last_page_token_value = (
|
393
|
+
next_page_token.get("next_page_token") if next_page_token else None
|
394
|
+
)
|
395
|
+
next_page_token = self._next_page_token(
|
396
|
+
response=response,
|
397
|
+
last_page_size=last_page_size,
|
398
|
+
last_record=last_record,
|
399
|
+
last_page_token_value=last_page_token_value,
|
400
|
+
) or {FULL_REFRESH_SYNC_COMPLETE_KEY: True}
|
383
401
|
|
384
402
|
if self.cursor:
|
385
403
|
self.cursor.close_slice(
|
@@ -414,25 +432,14 @@ class SimpleRetriever(Retriever):
|
|
414
432
|
if self.cursor and isinstance(self.cursor, ResumableFullRefreshCursor):
|
415
433
|
stream_state = self.state
|
416
434
|
|
417
|
-
# Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to
|
418
|
-
# The platform deletes stream state for full refresh streams before starting a
|
419
|
-
# this value existing for the initial attempt
|
435
|
+
# Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to
|
436
|
+
# fetch more records. The platform deletes stream state for full refresh streams before starting a
|
437
|
+
# new job, so we don't need to worry about this value existing for the initial attempt
|
420
438
|
if stream_state.get(FULL_REFRESH_SYNC_COMPLETE_KEY):
|
421
439
|
return
|
422
|
-
cursor_value = stream_state.get("next_page_token")
|
423
|
-
|
424
|
-
# The first attempt to read a page for the current partition should reset the paginator to the current
|
425
|
-
# cursor state which is initially assigned to the incoming state from the platform
|
426
|
-
partition_key = self._to_partition_key(_slice.partition)
|
427
|
-
if partition_key not in self._partition_started:
|
428
|
-
self._partition_started[partition_key] = True
|
429
|
-
self._paginator.reset(reset_value=cursor_value)
|
430
440
|
|
431
441
|
yield from self._read_single_page(record_generator, stream_state, _slice)
|
432
442
|
else:
|
433
|
-
# Fixing paginator types has a long tail of dependencies
|
434
|
-
self._paginator.reset()
|
435
|
-
|
436
443
|
for stream_data in self._read_pages(record_generator, self.state, _slice):
|
437
444
|
current_record = self._extract_record(stream_data, _slice)
|
438
445
|
if self.cursor and current_record:
|
@@ -518,7 +525,7 @@ class SimpleRetriever(Retriever):
|
|
518
525
|
stream_state: Mapping[str, Any],
|
519
526
|
records_schema: Mapping[str, Any],
|
520
527
|
stream_slice: Optional[StreamSlice],
|
521
|
-
) -> Iterable[
|
528
|
+
) -> Iterable[Record]:
|
522
529
|
yield from self._parse_response(
|
523
530
|
response,
|
524
531
|
stream_slice=stream_slice,
|
@@ -562,7 +569,7 @@ class SimpleRetrieverTestReadDecorator(SimpleRetriever):
|
|
562
569
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
563
570
|
) -> Optional[requests.Response]:
|
564
571
|
return self.requester.send_request(
|
565
|
-
path=self._paginator_path(),
|
572
|
+
path=self._paginator_path(next_page_token=next_page_token),
|
566
573
|
stream_state=stream_state,
|
567
574
|
stream_slice=stream_slice,
|
568
575
|
next_page_token=next_page_token,
|
@@ -3,9 +3,21 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
from airbyte_cdk.sources.declarative.schema.default_schema_loader import DefaultSchemaLoader
|
6
|
+
from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import (
|
7
|
+
DynamicSchemaLoader,
|
8
|
+
SchemaTypeIdentifier,
|
9
|
+
TypesMap,
|
10
|
+
)
|
6
11
|
from airbyte_cdk.sources.declarative.schema.inline_schema_loader import InlineSchemaLoader
|
7
12
|
from airbyte_cdk.sources.declarative.schema.json_file_schema_loader import JsonFileSchemaLoader
|
8
13
|
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
9
|
-
from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import DynamicSchemaLoader, TypesMap, SchemaTypeIdentifier
|
10
14
|
|
11
|
-
__all__ = [
|
15
|
+
__all__ = [
|
16
|
+
"JsonFileSchemaLoader",
|
17
|
+
"DefaultSchemaLoader",
|
18
|
+
"SchemaLoader",
|
19
|
+
"InlineSchemaLoader",
|
20
|
+
"DynamicSchemaLoader",
|
21
|
+
"TypesMap",
|
22
|
+
"SchemaTypeIdentifier",
|
23
|
+
]
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
|
6
6
|
from copy import deepcopy
|
7
|
-
from dataclasses import InitVar, dataclass
|
7
|
+
from dataclasses import InitVar, dataclass, field
|
8
8
|
from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
9
9
|
|
10
10
|
import dpath
|
@@ -13,8 +13,9 @@ from typing_extensions import deprecated
|
|
13
13
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
14
14
|
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
15
15
|
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
16
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
16
17
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
17
|
-
from airbyte_cdk.sources.types import Config
|
18
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
18
19
|
|
19
20
|
AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
|
20
21
|
"string": {"type": ["null", "string"]},
|
@@ -103,6 +104,7 @@ class DynamicSchemaLoader(SchemaLoader):
|
|
103
104
|
config: Config
|
104
105
|
parameters: InitVar[Mapping[str, Any]]
|
105
106
|
schema_type_identifier: SchemaTypeIdentifier
|
107
|
+
schema_transformations: List[RecordTransformation] = field(default_factory=lambda: [])
|
106
108
|
|
107
109
|
def get_json_schema(self) -> Mapping[str, Any]:
|
108
110
|
"""
|
@@ -128,12 +130,27 @@ class DynamicSchemaLoader(SchemaLoader):
|
|
128
130
|
)
|
129
131
|
properties[key] = value
|
130
132
|
|
133
|
+
transformed_properties = self._transform(properties, {})
|
134
|
+
|
131
135
|
return {
|
132
136
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
133
137
|
"type": "object",
|
134
|
-
"properties":
|
138
|
+
"properties": transformed_properties,
|
135
139
|
}
|
136
140
|
|
141
|
+
def _transform(
|
142
|
+
self,
|
143
|
+
properties: Mapping[str, Any],
|
144
|
+
stream_state: StreamState,
|
145
|
+
stream_slice: Optional[StreamSlice] = None,
|
146
|
+
) -> Mapping[str, Any]:
|
147
|
+
for transformation in self.schema_transformations:
|
148
|
+
transformation.transform(
|
149
|
+
properties, # type: ignore # properties has type Mapping[str, Any], but Dict[str, Any] expected
|
150
|
+
config=self.config,
|
151
|
+
)
|
152
|
+
return properties
|
153
|
+
|
137
154
|
def _get_key(
|
138
155
|
self,
|
139
156
|
raw_schema: MutableMapping[str, Any],
|
@@ -1,4 +1,11 @@
|
|
1
|
-
from .abstract_file_based_availability_strategy import
|
1
|
+
from .abstract_file_based_availability_strategy import (
|
2
|
+
AbstractFileBasedAvailabilityStrategy,
|
3
|
+
AbstractFileBasedAvailabilityStrategyWrapper,
|
4
|
+
)
|
2
5
|
from .default_file_based_availability_strategy import DefaultFileBasedAvailabilityStrategy
|
3
6
|
|
4
|
-
__all__ = [
|
7
|
+
__all__ = [
|
8
|
+
"AbstractFileBasedAvailabilityStrategy",
|
9
|
+
"AbstractFileBasedAvailabilityStrategyWrapper",
|
10
|
+
"DefaultFileBasedAvailabilityStrategy",
|
11
|
+
]
|
@@ -1,4 +1,8 @@
|
|
1
|
-
from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import
|
2
|
-
|
1
|
+
from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import (
|
2
|
+
AbstractDiscoveryPolicy,
|
3
|
+
)
|
4
|
+
from airbyte_cdk.sources.file_based.discovery_policy.default_discovery_policy import (
|
5
|
+
DefaultDiscoveryPolicy,
|
6
|
+
)
|
3
7
|
|
4
8
|
__all__ = ["AbstractDiscoveryPolicy", "DefaultDiscoveryPolicy"]
|
@@ -1,8 +1,8 @@
|
|
1
1
|
from typing import Any, Mapping, Type
|
2
2
|
|
3
3
|
from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
|
4
|
-
from airbyte_cdk.sources.file_based.config.excel_format import ExcelFormat
|
5
4
|
from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
|
5
|
+
from airbyte_cdk.sources.file_based.config.excel_format import ExcelFormat
|
6
6
|
from airbyte_cdk.sources.file_based.config.jsonl_format import JsonlFormat
|
7
7
|
from airbyte_cdk.sources.file_based.config.parquet_format import ParquetFormat
|
8
8
|
from airbyte_cdk.sources.file_based.config.unstructured_format import UnstructuredFormat
|
@@ -10,11 +10,11 @@ from airbyte_cdk.sources.file_based.config.unstructured_format import Unstructur
|
|
10
10
|
from .avro_parser import AvroParser
|
11
11
|
from .csv_parser import CsvParser
|
12
12
|
from .excel_parser import ExcelParser
|
13
|
+
from .file_transfer import FileTransfer
|
13
14
|
from .file_type_parser import FileTypeParser
|
14
15
|
from .jsonl_parser import JsonlParser
|
15
16
|
from .parquet_parser import ParquetParser
|
16
17
|
from .unstructured_parser import UnstructuredParser
|
17
|
-
from .file_transfer import FileTransfer
|
18
18
|
|
19
19
|
default_parsers: Mapping[Type[Any], FileTypeParser] = {
|
20
20
|
AvroFormat: AvroParser(),
|
@@ -25,4 +25,13 @@ default_parsers: Mapping[Type[Any], FileTypeParser] = {
|
|
25
25
|
UnstructuredFormat: UnstructuredParser(),
|
26
26
|
}
|
27
27
|
|
28
|
-
__all__ = [
|
28
|
+
__all__ = [
|
29
|
+
"AvroParser",
|
30
|
+
"CsvParser",
|
31
|
+
"ExcelParser",
|
32
|
+
"JsonlParser",
|
33
|
+
"ParquetParser",
|
34
|
+
"UnstructuredParser",
|
35
|
+
"FileTransfer",
|
36
|
+
"default_parsers",
|
37
|
+
]
|
@@ -1,4 +1,6 @@
|
|
1
|
-
from airbyte_cdk.sources.file_based.schema_validation_policies.abstract_schema_validation_policy import
|
1
|
+
from airbyte_cdk.sources.file_based.schema_validation_policies.abstract_schema_validation_policy import (
|
2
|
+
AbstractSchemaValidationPolicy,
|
3
|
+
)
|
2
4
|
from airbyte_cdk.sources.file_based.schema_validation_policies.default_schema_validation_policies import (
|
3
5
|
DEFAULT_SCHEMA_VALIDATION_POLICIES,
|
4
6
|
EmitRecordPolicy,
|
@@ -2,4 +2,8 @@ from .abstract_concurrent_file_based_cursor import AbstractConcurrentFileBasedCu
|
|
2
2
|
from .file_based_concurrent_cursor import FileBasedConcurrentCursor
|
3
3
|
from .file_based_final_state_cursor import FileBasedFinalStateCursor
|
4
4
|
|
5
|
-
__all__ = [
|
5
|
+
__all__ = [
|
6
|
+
"AbstractConcurrentFileBasedCursor",
|
7
|
+
"FileBasedConcurrentCursor",
|
8
|
+
"FileBasedFinalStateCursor",
|
9
|
+
]
|
@@ -10,4 +10,10 @@ from .repository import (
|
|
10
10
|
NoopMessageRepository,
|
11
11
|
)
|
12
12
|
|
13
|
-
__all__ = [
|
13
|
+
__all__ = [
|
14
|
+
"InMemoryMessageRepository",
|
15
|
+
"LogAppenderMessageRepositoryDecorator",
|
16
|
+
"LogMessage",
|
17
|
+
"MessageRepository",
|
18
|
+
"NoopMessageRepository",
|
19
|
+
]
|
@@ -3,6 +3,6 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
# Initialize Streams Package
|
6
|
-
from .core import NO_CURSOR_STATE_KEY,
|
6
|
+
from .core import NO_CURSOR_STATE_KEY, CheckpointMixin, IncrementalMixin, Stream
|
7
7
|
|
8
8
|
__all__ = ["NO_CURSOR_STATE_KEY", "IncrementalMixin", "CheckpointMixin", "Stream"]
|
@@ -8,12 +8,11 @@ from .checkpoint_reader import (
|
|
8
8
|
FullRefreshCheckpointReader,
|
9
9
|
IncrementalCheckpointReader,
|
10
10
|
LegacyCursorBasedCheckpointReader,
|
11
|
-
ResumableFullRefreshCheckpointReader
|
11
|
+
ResumableFullRefreshCheckpointReader,
|
12
12
|
)
|
13
13
|
from .cursor import Cursor
|
14
14
|
from .resumable_full_refresh_cursor import ResumableFullRefreshCursor
|
15
15
|
|
16
|
-
|
17
16
|
__all__ = [
|
18
17
|
"CheckpointMode",
|
19
18
|
"CheckpointReader",
|
@@ -23,5 +22,5 @@ __all__ = [
|
|
23
22
|
"IncrementalCheckpointReader",
|
24
23
|
"LegacyCursorBasedCheckpointReader",
|
25
24
|
"ResumableFullRefreshCheckpointReader",
|
26
|
-
"ResumableFullRefreshCursor"
|
25
|
+
"ResumableFullRefreshCursor",
|
27
26
|
]
|