airbyte-cdk 6.13.1.dev41012__py3-none-any.whl → 6.14.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +27 -51
- airbyte_cdk/sources/declarative/interpolation/macros.py +21 -0
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +52 -35
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +10 -7
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +9 -4
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +11 -6
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +13 -11
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +14 -13
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +7 -8
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +10 -7
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +71 -64
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +3 -3
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +0 -6
- airbyte_cdk/sources/file_based/exceptions.py +0 -26
- airbyte_cdk/sources/file_based/file_based_source.py +5 -19
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +4 -17
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +2 -40
- {airbyte_cdk-6.13.1.dev41012.dist-info → airbyte_cdk-6.14.0.dev1.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.13.1.dev41012.dist-info → airbyte_cdk-6.14.0.dev1.dist-info}/RECORD +22 -22
- {airbyte_cdk-6.13.1.dev41012.dist-info → airbyte_cdk-6.14.0.dev1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.13.1.dev41012.dist-info → airbyte_cdk-6.14.0.dev1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.13.1.dev41012.dist-info → airbyte_cdk-6.14.0.dev1.dist-info}/entry_points.txt +0 -0
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from typing import Any,
|
6
|
+
from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple
|
7
7
|
|
8
8
|
from airbyte_cdk.models import (
|
9
9
|
AirbyteCatalog,
|
@@ -28,15 +28,11 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
28
28
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
29
29
|
DatetimeBasedCursor as DatetimeBasedCursorModel,
|
30
30
|
)
|
31
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
32
|
-
DeclarativeStream as DeclarativeStreamModel,
|
33
|
-
)
|
34
31
|
from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
35
|
-
ComponentDefinition,
|
36
32
|
ModelToComponentFactory,
|
37
33
|
)
|
38
34
|
from airbyte_cdk.sources.declarative.requesters import HttpRequester
|
39
|
-
from airbyte_cdk.sources.declarative.retrievers import
|
35
|
+
from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
|
40
36
|
from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
|
41
37
|
DeclarativePartitionFactory,
|
42
38
|
StreamSlicerPartitionGenerator,
|
@@ -52,7 +48,6 @@ from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
|
|
52
48
|
from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
|
53
49
|
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
54
50
|
from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
|
55
|
-
from airbyte_cdk.sources.types import Config, StreamState
|
56
51
|
|
57
52
|
|
58
53
|
class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
@@ -194,10 +189,11 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
194
189
|
# Some low-code sources use a combination of DeclarativeStream and regular Python streams. We can't inspect
|
195
190
|
# these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
|
196
191
|
# so we need to treat them as synchronous
|
197
|
-
if (
|
198
|
-
|
199
|
-
and name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
|
192
|
+
if isinstance(declarative_stream, DeclarativeStream) and (
|
193
|
+
name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
|
200
194
|
== "SimpleRetriever"
|
195
|
+
or name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
|
196
|
+
== "AsyncRetriever"
|
201
197
|
):
|
202
198
|
incremental_sync_component_definition = name_to_stream_mapping[
|
203
199
|
declarative_stream.name
|
@@ -217,6 +213,11 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
217
213
|
and not incremental_sync_component_definition
|
218
214
|
)
|
219
215
|
|
216
|
+
is_async_job_stream = (
|
217
|
+
name_to_stream_mapping[declarative_stream.name].get("retriever", {}).get("type")
|
218
|
+
== "AsyncRetriever"
|
219
|
+
)
|
220
|
+
|
220
221
|
if self._is_datetime_incremental_without_partition_routing(
|
221
222
|
declarative_stream, incremental_sync_component_definition
|
222
223
|
):
|
@@ -234,15 +235,25 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
234
235
|
stream_state=stream_state,
|
235
236
|
)
|
236
237
|
|
238
|
+
retriever = declarative_stream.retriever
|
239
|
+
|
240
|
+
# This is an optimization so that we don't invoke any cursor or state management flows within the
|
241
|
+
# low-code framework because state management is handled through the ConcurrentCursor.
|
242
|
+
if declarative_stream and isinstance(retriever, SimpleRetriever):
|
243
|
+
# Also a temporary hack. In the legacy Stream implementation, as part of the read,
|
244
|
+
# set_initial_state() is called to instantiate incoming state on the cursor. Although we no
|
245
|
+
# longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
|
246
|
+
# like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
|
247
|
+
# still rely on a DatetimeBasedCursor that is properly initialized with state.
|
248
|
+
if retriever.cursor:
|
249
|
+
retriever.cursor.set_initial_state(stream_state=stream_state)
|
250
|
+
retriever.cursor = None
|
251
|
+
|
237
252
|
partition_generator = StreamSlicerPartitionGenerator(
|
238
253
|
DeclarativePartitionFactory(
|
239
254
|
declarative_stream.name,
|
240
255
|
declarative_stream.get_json_schema(),
|
241
|
-
|
242
|
-
name_to_stream_mapping[declarative_stream.name],
|
243
|
-
config,
|
244
|
-
stream_state,
|
245
|
-
),
|
256
|
+
retriever,
|
246
257
|
self.message_repository,
|
247
258
|
),
|
248
259
|
cursor,
|
@@ -272,11 +283,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
272
283
|
DeclarativePartitionFactory(
|
273
284
|
declarative_stream.name,
|
274
285
|
declarative_stream.get_json_schema(),
|
275
|
-
|
276
|
-
name_to_stream_mapping[declarative_stream.name],
|
277
|
-
config,
|
278
|
-
{},
|
279
|
-
),
|
286
|
+
declarative_stream.retriever,
|
280
287
|
self.message_repository,
|
281
288
|
),
|
282
289
|
declarative_stream.retriever.stream_slicer,
|
@@ -415,34 +422,3 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
415
422
|
if stream.stream.name not in concurrent_stream_names
|
416
423
|
]
|
417
424
|
)
|
418
|
-
|
419
|
-
def _retriever_factory(
|
420
|
-
self, stream_config: ComponentDefinition, source_config: Config, stream_state: StreamState
|
421
|
-
) -> Callable[[], Retriever]:
|
422
|
-
def _factory_method() -> Retriever:
|
423
|
-
declarative_stream: DeclarativeStream = self._constructor.create_component(
|
424
|
-
DeclarativeStreamModel,
|
425
|
-
stream_config,
|
426
|
-
source_config,
|
427
|
-
emit_connector_builder_messages=self._emit_connector_builder_messages,
|
428
|
-
)
|
429
|
-
|
430
|
-
# This is an optimization so that we don't invoke any cursor or state management flows within the
|
431
|
-
# low-code framework because state management is handled through the ConcurrentCursor.
|
432
|
-
if (
|
433
|
-
declarative_stream
|
434
|
-
and declarative_stream.retriever
|
435
|
-
and isinstance(declarative_stream.retriever, SimpleRetriever)
|
436
|
-
):
|
437
|
-
# Also a temporary hack. In the legacy Stream implementation, as part of the read, set_initial_state() is
|
438
|
-
# called to instantiate incoming state on the cursor. Although we no longer rely on the legacy low-code cursor
|
439
|
-
# for concurrent checkpointing, low-code components like StopConditionPaginationStrategyDecorator and
|
440
|
-
# ClientSideIncrementalRecordFilterDecorator still rely on a DatetimeBasedCursor that is properly initialized
|
441
|
-
# with state.
|
442
|
-
if declarative_stream.retriever.cursor:
|
443
|
-
declarative_stream.retriever.cursor.set_initial_state(stream_state=stream_state)
|
444
|
-
declarative_stream.retriever.cursor = None
|
445
|
-
|
446
|
-
return declarative_stream.retriever
|
447
|
-
|
448
|
-
return _factory_method
|
@@ -94,6 +94,26 @@ def max(*args: typing.Any) -> typing.Any:
|
|
94
94
|
return builtins.max(*args)
|
95
95
|
|
96
96
|
|
97
|
+
def min(*args: typing.Any) -> typing.Any:
|
98
|
+
"""
|
99
|
+
Returns smallest object of an iterable, or two or more arguments.
|
100
|
+
|
101
|
+
min(iterable, *[, default=obj, key=func]) -> value
|
102
|
+
min(arg1, arg2, *args, *[, key=func]) -> value
|
103
|
+
|
104
|
+
Usage:
|
105
|
+
`"{{ min(2,3) }}"
|
106
|
+
|
107
|
+
With a single iterable argument, return its smallest item. The
|
108
|
+
default keyword-only argument specifies an object to return if
|
109
|
+
the provided iterable is empty.
|
110
|
+
With two or more arguments, return the smallest argument.
|
111
|
+
:param args: args to compare
|
112
|
+
:return: smallest argument
|
113
|
+
"""
|
114
|
+
return builtins.min(*args)
|
115
|
+
|
116
|
+
|
97
117
|
def day_delta(num_days: int, format: str = "%Y-%m-%dT%H:%M:%S.%f%z") -> str:
|
98
118
|
"""
|
99
119
|
Returns datetime of now() + num_days
|
@@ -147,6 +167,7 @@ _macros_list = [
|
|
147
167
|
today_utc,
|
148
168
|
timestamp,
|
149
169
|
max,
|
170
|
+
min,
|
150
171
|
day_delta,
|
151
172
|
duration,
|
152
173
|
format_datetime,
|
@@ -112,27 +112,39 @@ class DefaultPaginator(Paginator):
|
|
112
112
|
)
|
113
113
|
if isinstance(self.url_base, str):
|
114
114
|
self.url_base = InterpolatedString(string=self.url_base, parameters=parameters)
|
115
|
-
|
115
|
+
|
116
|
+
def get_initial_token(self) -> Optional[Any]:
|
117
|
+
"""
|
118
|
+
Return the page token that should be used for the first request of a stream
|
119
|
+
|
120
|
+
WARNING: get_initial_token() should not be used by streams that use RFR that perform checkpointing
|
121
|
+
of state using page numbers. Because paginators are stateless
|
122
|
+
"""
|
123
|
+
return self.pagination_strategy.initial_token
|
116
124
|
|
117
125
|
def next_page_token(
|
118
|
-
self,
|
126
|
+
self,
|
127
|
+
response: requests.Response,
|
128
|
+
last_page_size: int,
|
129
|
+
last_record: Optional[Record],
|
130
|
+
last_page_token_value: Optional[Any] = None,
|
119
131
|
) -> Optional[Mapping[str, Any]]:
|
120
|
-
|
121
|
-
response,
|
132
|
+
next_page_token = self.pagination_strategy.next_page_token(
|
133
|
+
response=response,
|
134
|
+
last_page_size=last_page_size,
|
135
|
+
last_record=last_record,
|
136
|
+
last_page_token_value=last_page_token_value,
|
122
137
|
)
|
123
|
-
if
|
124
|
-
return {"next_page_token":
|
138
|
+
if next_page_token:
|
139
|
+
return {"next_page_token": next_page_token}
|
125
140
|
else:
|
126
141
|
return None
|
127
142
|
|
128
|
-
def path(self) -> Optional[str]:
|
129
|
-
if
|
130
|
-
|
131
|
-
and self.page_token_option
|
132
|
-
and isinstance(self.page_token_option, RequestPath)
|
133
|
-
):
|
143
|
+
def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
|
144
|
+
token = next_page_token.get("next_page_token") if next_page_token else None
|
145
|
+
if token and self.page_token_option and isinstance(self.page_token_option, RequestPath):
|
134
146
|
# Replace url base to only return the path
|
135
|
-
return str(
|
147
|
+
return str(token).replace(self.url_base.eval(self.config), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
|
136
148
|
else:
|
137
149
|
return None
|
138
150
|
|
@@ -143,7 +155,7 @@ class DefaultPaginator(Paginator):
|
|
143
155
|
stream_slice: Optional[StreamSlice] = None,
|
144
156
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
145
157
|
) -> MutableMapping[str, Any]:
|
146
|
-
return self._get_request_options(RequestOptionType.request_parameter)
|
158
|
+
return self._get_request_options(RequestOptionType.request_parameter, next_page_token)
|
147
159
|
|
148
160
|
def get_request_headers(
|
149
161
|
self,
|
@@ -152,7 +164,7 @@ class DefaultPaginator(Paginator):
|
|
152
164
|
stream_slice: Optional[StreamSlice] = None,
|
153
165
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
154
166
|
) -> Mapping[str, str]:
|
155
|
-
return self._get_request_options(RequestOptionType.header)
|
167
|
+
return self._get_request_options(RequestOptionType.header, next_page_token)
|
156
168
|
|
157
169
|
def get_request_body_data(
|
158
170
|
self,
|
@@ -161,7 +173,7 @@ class DefaultPaginator(Paginator):
|
|
161
173
|
stream_slice: Optional[StreamSlice] = None,
|
162
174
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
163
175
|
) -> Mapping[str, Any]:
|
164
|
-
return self._get_request_options(RequestOptionType.body_data)
|
176
|
+
return self._get_request_options(RequestOptionType.body_data, next_page_token)
|
165
177
|
|
166
178
|
def get_request_body_json(
|
167
179
|
self,
|
@@ -170,25 +182,21 @@ class DefaultPaginator(Paginator):
|
|
170
182
|
stream_slice: Optional[StreamSlice] = None,
|
171
183
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
172
184
|
) -> Mapping[str, Any]:
|
173
|
-
return self._get_request_options(RequestOptionType.body_json)
|
174
|
-
|
175
|
-
def reset(self, reset_value: Optional[Any] = None) -> None:
|
176
|
-
if reset_value:
|
177
|
-
self.pagination_strategy.reset(reset_value=reset_value)
|
178
|
-
else:
|
179
|
-
self.pagination_strategy.reset()
|
180
|
-
self._token = self.pagination_strategy.initial_token
|
185
|
+
return self._get_request_options(RequestOptionType.body_json, next_page_token)
|
181
186
|
|
182
|
-
def _get_request_options(
|
187
|
+
def _get_request_options(
|
188
|
+
self, option_type: RequestOptionType, next_page_token: Optional[Mapping[str, Any]]
|
189
|
+
) -> MutableMapping[str, Any]:
|
183
190
|
options = {}
|
184
191
|
|
192
|
+
token = next_page_token.get("next_page_token") if next_page_token else None
|
185
193
|
if (
|
186
194
|
self.page_token_option
|
187
|
-
and
|
195
|
+
and token is not None
|
188
196
|
and isinstance(self.page_token_option, RequestOption)
|
189
197
|
and self.page_token_option.inject_into == option_type
|
190
198
|
):
|
191
|
-
options[self.page_token_option.field_name.eval(config=self.config)] =
|
199
|
+
options[self.page_token_option.field_name.eval(config=self.config)] = token # type: ignore # field_name is always cast to an interpolated string
|
192
200
|
if (
|
193
201
|
self.page_size_option
|
194
202
|
and self.pagination_strategy.get_page_size()
|
@@ -204,6 +212,9 @@ class PaginatorTestReadDecorator(Paginator):
|
|
204
212
|
"""
|
205
213
|
In some cases, we want to limit the number of requests that are made to the backend source. This class allows for limiting the number of
|
206
214
|
pages that are queried throughout a read command.
|
215
|
+
|
216
|
+
WARNING: This decorator is not currently thread-safe like the rest of the low-code framework because it has
|
217
|
+
an internal state to track the current number of pages counted so that it can exit early during a test read
|
207
218
|
"""
|
208
219
|
|
209
220
|
_PAGE_COUNT_BEFORE_FIRST_NEXT_CALL = 1
|
@@ -217,17 +228,27 @@ class PaginatorTestReadDecorator(Paginator):
|
|
217
228
|
self._decorated = decorated
|
218
229
|
self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
|
219
230
|
|
231
|
+
def get_initial_token(self) -> Optional[Any]:
|
232
|
+
self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
|
233
|
+
return self._decorated.get_initial_token()
|
234
|
+
|
220
235
|
def next_page_token(
|
221
|
-
self,
|
236
|
+
self,
|
237
|
+
response: requests.Response,
|
238
|
+
last_page_size: int,
|
239
|
+
last_record: Optional[Record],
|
240
|
+
last_page_token_value: Optional[Any] = None,
|
222
241
|
) -> Optional[Mapping[str, Any]]:
|
223
242
|
if self._page_count >= self._maximum_number_of_pages:
|
224
243
|
return None
|
225
244
|
|
226
245
|
self._page_count += 1
|
227
|
-
return self._decorated.next_page_token(
|
246
|
+
return self._decorated.next_page_token(
|
247
|
+
response, last_page_size, last_record, last_page_token_value
|
248
|
+
)
|
228
249
|
|
229
|
-
def path(self) -> Optional[str]:
|
230
|
-
return self._decorated.path()
|
250
|
+
def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
|
251
|
+
return self._decorated.path(next_page_token)
|
231
252
|
|
232
253
|
def get_request_params(
|
233
254
|
self,
|
@@ -272,7 +293,3 @@ class PaginatorTestReadDecorator(Paginator):
|
|
272
293
|
return self._decorated.get_request_body_json(
|
273
294
|
stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token
|
274
295
|
)
|
275
|
-
|
276
|
-
def reset(self, reset_value: Optional[Any] = None) -> None:
|
277
|
-
self._decorated.reset()
|
278
|
-
self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
|
@@ -19,7 +19,7 @@ class NoPagination(Paginator):
|
|
19
19
|
|
20
20
|
parameters: InitVar[Mapping[str, Any]]
|
21
21
|
|
22
|
-
def path(self) -> Optional[str]:
|
22
|
+
def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
|
23
23
|
return None
|
24
24
|
|
25
25
|
def get_request_params(
|
@@ -58,11 +58,14 @@ class NoPagination(Paginator):
|
|
58
58
|
) -> Mapping[str, Any]:
|
59
59
|
return {}
|
60
60
|
|
61
|
+
def get_initial_token(self) -> Optional[Any]:
|
62
|
+
return None
|
63
|
+
|
61
64
|
def next_page_token(
|
62
|
-
self,
|
63
|
-
|
65
|
+
self,
|
66
|
+
response: requests.Response,
|
67
|
+
last_page_size: int,
|
68
|
+
last_record: Optional[Record],
|
69
|
+
last_page_token_value: Optional[Any],
|
70
|
+
) -> Optional[Mapping[str, Any]]:
|
64
71
|
return {}
|
65
|
-
|
66
|
-
def reset(self, reset_value: Optional[Any] = None) -> None:
|
67
|
-
# No state to reset
|
68
|
-
pass
|
@@ -24,14 +24,18 @@ class Paginator(ABC, RequestOptionsProvider):
|
|
24
24
|
"""
|
25
25
|
|
26
26
|
@abstractmethod
|
27
|
-
def
|
27
|
+
def get_initial_token(self) -> Optional[Any]:
|
28
28
|
"""
|
29
|
-
|
29
|
+
Get the page token that should be included in the request to get the first page of records
|
30
30
|
"""
|
31
31
|
|
32
32
|
@abstractmethod
|
33
33
|
def next_page_token(
|
34
|
-
self,
|
34
|
+
self,
|
35
|
+
response: requests.Response,
|
36
|
+
last_page_size: int,
|
37
|
+
last_record: Optional[Record],
|
38
|
+
last_page_token_value: Optional[Any],
|
35
39
|
) -> Optional[Mapping[str, Any]]:
|
36
40
|
"""
|
37
41
|
Returns the next_page_token to use to fetch the next page of records.
|
@@ -39,12 +43,13 @@ class Paginator(ABC, RequestOptionsProvider):
|
|
39
43
|
:param response: the response to process
|
40
44
|
:param last_page_size: the number of records read from the response
|
41
45
|
:param last_record: the last record extracted from the response
|
46
|
+
:param last_page_token_value: The current value of the page token made on the last request
|
42
47
|
:return: A mapping {"next_page_token": <token>} for the next page from the input response object. Returning None means there are no more pages to read in this response.
|
43
48
|
"""
|
44
49
|
pass
|
45
50
|
|
46
51
|
@abstractmethod
|
47
|
-
def path(self) -> Optional[str]:
|
52
|
+
def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
|
48
53
|
"""
|
49
54
|
Returns the URL path to hit to fetch the next page of records
|
50
55
|
|
airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py
CHANGED
@@ -43,7 +43,6 @@ class CursorPaginationStrategy(PaginationStrategy):
|
|
43
43
|
)
|
44
44
|
|
45
45
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
46
|
-
self._initial_cursor = None
|
47
46
|
if isinstance(self.cursor_value, str):
|
48
47
|
self._cursor_value = InterpolatedString.create(self.cursor_value, parameters=parameters)
|
49
48
|
else:
|
@@ -57,10 +56,19 @@ class CursorPaginationStrategy(PaginationStrategy):
|
|
57
56
|
|
58
57
|
@property
|
59
58
|
def initial_token(self) -> Optional[Any]:
|
60
|
-
|
59
|
+
"""
|
60
|
+
CursorPaginationStrategy does not have an initial value because the next cursor is typically included
|
61
|
+
in the response of the first request. For Resumable Full Refresh streams that checkpoint the page
|
62
|
+
cursor, the next cursor should be read from the state or stream slice object.
|
63
|
+
"""
|
64
|
+
return None
|
61
65
|
|
62
66
|
def next_page_token(
|
63
|
-
self,
|
67
|
+
self,
|
68
|
+
response: requests.Response,
|
69
|
+
last_page_size: int,
|
70
|
+
last_record: Optional[Record],
|
71
|
+
last_page_token_value: Optional[Any] = None,
|
64
72
|
) -> Optional[Any]:
|
65
73
|
decoded_response = next(self.decoder.decode(response))
|
66
74
|
|
@@ -87,8 +95,5 @@ class CursorPaginationStrategy(PaginationStrategy):
|
|
87
95
|
)
|
88
96
|
return token if token else None
|
89
97
|
|
90
|
-
def reset(self, reset_value: Optional[Any] = None) -> None:
|
91
|
-
self._initial_cursor = reset_value
|
92
|
-
|
93
98
|
def get_page_size(self) -> Optional[int]:
|
94
99
|
return self.page_size
|
@@ -52,7 +52,6 @@ class OffsetIncrement(PaginationStrategy):
|
|
52
52
|
inject_on_first_request: bool = False
|
53
53
|
|
54
54
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
55
|
-
self._offset = 0
|
56
55
|
page_size = str(self.page_size) if isinstance(self.page_size, int) else self.page_size
|
57
56
|
if page_size:
|
58
57
|
self._page_size: Optional[InterpolatedString] = InterpolatedString(
|
@@ -64,11 +63,15 @@ class OffsetIncrement(PaginationStrategy):
|
|
64
63
|
@property
|
65
64
|
def initial_token(self) -> Optional[Any]:
|
66
65
|
if self.inject_on_first_request:
|
67
|
-
return
|
66
|
+
return 0
|
68
67
|
return None
|
69
68
|
|
70
69
|
def next_page_token(
|
71
|
-
self,
|
70
|
+
self,
|
71
|
+
response: requests.Response,
|
72
|
+
last_page_size: int,
|
73
|
+
last_record: Optional[Record],
|
74
|
+
last_page_token_value: Optional[Any] = None,
|
72
75
|
) -> Optional[Any]:
|
73
76
|
decoded_response = next(self.decoder.decode(response))
|
74
77
|
|
@@ -78,17 +81,16 @@ class OffsetIncrement(PaginationStrategy):
|
|
78
81
|
and last_page_size < self._page_size.eval(self.config, response=decoded_response)
|
79
82
|
) or last_page_size == 0:
|
80
83
|
return None
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
if not isinstance(reset_value, int):
|
84
|
+
elif last_page_token_value is None:
|
85
|
+
# If the OffsetIncrement strategy does not inject on the first request, the incoming last_page_token_value
|
86
|
+
# will be None. For this case, we assume that None was the first page and progress to the next offset
|
87
|
+
return 0 + last_page_size
|
88
|
+
elif not isinstance(last_page_token_value, int):
|
87
89
|
raise ValueError(
|
88
|
-
f"
|
90
|
+
f"Last page token value {last_page_token_value} for OffsetIncrement pagination strategy was not an integer"
|
89
91
|
)
|
90
92
|
else:
|
91
|
-
|
93
|
+
return last_page_token_value + last_page_size
|
92
94
|
|
93
95
|
def get_page_size(self) -> Optional[int]:
|
94
96
|
if self._page_size:
|
@@ -31,7 +31,6 @@ class PageIncrement(PaginationStrategy):
|
|
31
31
|
inject_on_first_request: bool = False
|
32
32
|
|
33
33
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
34
|
-
self._page = self.start_from_page
|
35
34
|
if isinstance(self.page_size, int) or (self.page_size is None):
|
36
35
|
self._page_size = self.page_size
|
37
36
|
else:
|
@@ -43,28 +42,30 @@ class PageIncrement(PaginationStrategy):
|
|
43
42
|
@property
|
44
43
|
def initial_token(self) -> Optional[Any]:
|
45
44
|
if self.inject_on_first_request:
|
46
|
-
return self.
|
45
|
+
return self.start_from_page
|
47
46
|
return None
|
48
47
|
|
49
48
|
def next_page_token(
|
50
|
-
self,
|
49
|
+
self,
|
50
|
+
response: requests.Response,
|
51
|
+
last_page_size: int,
|
52
|
+
last_record: Optional[Record],
|
53
|
+
last_page_token_value: Optional[Any],
|
51
54
|
) -> Optional[Any]:
|
52
55
|
# Stop paginating when there are fewer records than the page size or the current page has no records
|
53
56
|
if (self._page_size and last_page_size < self._page_size) or last_page_size == 0:
|
54
57
|
return None
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
self._page = self.start_from_page
|
62
|
-
elif not isinstance(reset_value, int):
|
58
|
+
elif last_page_token_value is None:
|
59
|
+
# If the PageIncrement strategy does not inject on the first request, the incoming last_page_token_value
|
60
|
+
# may be None. When this is the case, we assume we've already requested the first page specified by
|
61
|
+
# start_from_page and must now get the next page
|
62
|
+
return self.start_from_page + 1
|
63
|
+
elif not isinstance(last_page_token_value, int):
|
63
64
|
raise ValueError(
|
64
|
-
f"
|
65
|
+
f"Last page token value {last_page_token_value} for PageIncrement pagination strategy was not an integer"
|
65
66
|
)
|
66
67
|
else:
|
67
|
-
|
68
|
+
return last_page_token_value + 1
|
68
69
|
|
69
70
|
def get_page_size(self) -> Optional[int]:
|
70
71
|
return self._page_size
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from abc import abstractmethod
|
6
6
|
from dataclasses import dataclass
|
7
|
-
from typing import Any, Optional
|
7
|
+
from typing import Any, Mapping, Optional
|
8
8
|
|
9
9
|
import requests
|
10
10
|
|
@@ -26,22 +26,21 @@ class PaginationStrategy:
|
|
26
26
|
|
27
27
|
@abstractmethod
|
28
28
|
def next_page_token(
|
29
|
-
self,
|
29
|
+
self,
|
30
|
+
response: requests.Response,
|
31
|
+
last_page_size: int,
|
32
|
+
last_record: Optional[Record],
|
33
|
+
last_page_token_value: Optional[Any],
|
30
34
|
) -> Optional[Any]:
|
31
35
|
"""
|
32
36
|
:param response: response to process
|
33
37
|
:param last_page_size: the number of records read from the response
|
34
38
|
:param last_record: the last record extracted from the response
|
39
|
+
:param last_page_token_value: The current value of the page token made on the last request
|
35
40
|
:return: next page token. Returns None if there are no more pages to fetch
|
36
41
|
"""
|
37
42
|
pass
|
38
43
|
|
39
|
-
@abstractmethod
|
40
|
-
def reset(self, reset_value: Optional[Any] = None) -> None:
|
41
|
-
"""
|
42
|
-
Reset the pagination's inner state
|
43
|
-
"""
|
44
|
-
|
45
44
|
@abstractmethod
|
46
45
|
def get_page_size(self) -> Optional[int]:
|
47
46
|
"""
|
@@ -44,16 +44,19 @@ class StopConditionPaginationStrategyDecorator(PaginationStrategy):
|
|
44
44
|
self._stop_condition = stop_condition
|
45
45
|
|
46
46
|
def next_page_token(
|
47
|
-
self,
|
47
|
+
self,
|
48
|
+
response: requests.Response,
|
49
|
+
last_page_size: int,
|
50
|
+
last_record: Optional[Record],
|
51
|
+
last_page_token_value: Optional[Any] = None,
|
48
52
|
) -> Optional[Any]:
|
49
|
-
# We evaluate in reverse order because the assumption is that most of the APIs using data feed structure
|
50
|
-
# descending order. In terms of performance/memory, we return the records lazily
|
53
|
+
# We evaluate in reverse order because the assumption is that most of the APIs using data feed structure
|
54
|
+
# will return records in descending order. In terms of performance/memory, we return the records lazily
|
51
55
|
if last_record and self._stop_condition.is_met(last_record):
|
52
56
|
return None
|
53
|
-
return self._delegate.next_page_token(
|
54
|
-
|
55
|
-
|
56
|
-
self._delegate.reset(reset_value)
|
57
|
+
return self._delegate.next_page_token(
|
58
|
+
response, last_page_size, last_record, last_page_token_value
|
59
|
+
)
|
57
60
|
|
58
61
|
def get_page_size(self) -> Optional[int]:
|
59
62
|
return self._delegate.get_page_size()
|
@@ -6,18 +6,7 @@ import json
|
|
6
6
|
from dataclasses import InitVar, dataclass, field
|
7
7
|
from functools import partial
|
8
8
|
from itertools import islice
|
9
|
-
from typing import
|
10
|
-
Any,
|
11
|
-
Callable,
|
12
|
-
Iterable,
|
13
|
-
List,
|
14
|
-
Mapping,
|
15
|
-
MutableMapping,
|
16
|
-
Optional,
|
17
|
-
Set,
|
18
|
-
Tuple,
|
19
|
-
Union,
|
20
|
-
)
|
9
|
+
from typing import Any, Callable, Iterable, List, Mapping, Optional, Set, Tuple, Union
|
21
10
|
|
22
11
|
import requests
|
23
12
|
|
@@ -90,9 +79,6 @@ class SimpleRetriever(Retriever):
|
|
90
79
|
|
91
80
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
92
81
|
self._paginator = self.paginator or NoPagination(parameters=parameters)
|
93
|
-
self._last_response: Optional[requests.Response] = None
|
94
|
-
self._last_page_size: int = 0
|
95
|
-
self._last_record: Optional[Record] = None
|
96
82
|
self._parameters = parameters
|
97
83
|
self._name = (
|
98
84
|
InterpolatedString(self._name, parameters=parameters)
|
@@ -100,10 +86,6 @@ class SimpleRetriever(Retriever):
|
|
100
86
|
else self._name
|
101
87
|
)
|
102
88
|
|
103
|
-
# This mapping is used during a resumable full refresh syncs to indicate whether a partition has started syncing
|
104
|
-
# records. Partitions serve as the key and map to True if they already began processing records
|
105
|
-
self._partition_started: MutableMapping[Any, bool] = dict()
|
106
|
-
|
107
89
|
@property # type: ignore
|
108
90
|
def name(self) -> str:
|
109
91
|
"""
|
@@ -251,17 +233,13 @@ class SimpleRetriever(Retriever):
|
|
251
233
|
raise ValueError("Request body json cannot be a string")
|
252
234
|
return body_json
|
253
235
|
|
254
|
-
def _paginator_path(
|
255
|
-
self,
|
256
|
-
) -> Optional[str]:
|
236
|
+
def _paginator_path(self, next_page_token: Optional[Mapping[str, Any]] = None) -> Optional[str]:
|
257
237
|
"""
|
258
238
|
If the paginator points to a path, follow it, else return nothing so the requester is used.
|
259
|
-
:param stream_state:
|
260
|
-
:param stream_slice:
|
261
239
|
:param next_page_token:
|
262
240
|
:return:
|
263
241
|
"""
|
264
|
-
return self._paginator.path()
|
242
|
+
return self._paginator.path(next_page_token=next_page_token)
|
265
243
|
|
266
244
|
def _parse_response(
|
267
245
|
self,
|
@@ -272,22 +250,15 @@ class SimpleRetriever(Retriever):
|
|
272
250
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
273
251
|
) -> Iterable[Record]:
|
274
252
|
if not response:
|
275
|
-
self._last_response = None
|
276
253
|
yield from []
|
277
254
|
else:
|
278
|
-
self.
|
279
|
-
record_generator = self.record_selector.select_records(
|
255
|
+
yield from self.record_selector.select_records(
|
280
256
|
response=response,
|
281
257
|
stream_state=stream_state,
|
282
258
|
records_schema=records_schema,
|
283
259
|
stream_slice=stream_slice,
|
284
260
|
next_page_token=next_page_token,
|
285
261
|
)
|
286
|
-
self._last_page_size = 0
|
287
|
-
for record in record_generator:
|
288
|
-
self._last_page_size += 1
|
289
|
-
self._last_record = record
|
290
|
-
yield record
|
291
262
|
|
292
263
|
@property # type: ignore
|
293
264
|
def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
|
@@ -299,7 +270,13 @@ class SimpleRetriever(Retriever):
|
|
299
270
|
if not isinstance(value, property):
|
300
271
|
self._primary_key = value
|
301
272
|
|
302
|
-
def _next_page_token(
|
273
|
+
def _next_page_token(
|
274
|
+
self,
|
275
|
+
response: requests.Response,
|
276
|
+
last_page_size: int,
|
277
|
+
last_record: Optional[Record],
|
278
|
+
last_page_token_value: Optional[Any],
|
279
|
+
) -> Optional[Mapping[str, Any]]:
|
303
280
|
"""
|
304
281
|
Specifies a pagination strategy.
|
305
282
|
|
@@ -307,7 +284,12 @@ class SimpleRetriever(Retriever):
|
|
307
284
|
|
308
285
|
:return: The token for the next page from the input response object. Returning None means there are no more pages to read in this response.
|
309
286
|
"""
|
310
|
-
return self._paginator.next_page_token(
|
287
|
+
return self._paginator.next_page_token(
|
288
|
+
response=response,
|
289
|
+
last_page_size=last_page_size,
|
290
|
+
last_record=last_record,
|
291
|
+
last_page_token_value=last_page_token_value,
|
292
|
+
)
|
311
293
|
|
312
294
|
def _fetch_next_page(
|
313
295
|
self,
|
@@ -316,7 +298,7 @@ class SimpleRetriever(Retriever):
|
|
316
298
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
317
299
|
) -> Optional[requests.Response]:
|
318
300
|
return self.requester.send_request(
|
319
|
-
path=self._paginator_path(),
|
301
|
+
path=self._paginator_path(next_page_token=next_page_token),
|
320
302
|
stream_state=stream_state,
|
321
303
|
stream_slice=stream_slice,
|
322
304
|
next_page_token=next_page_token,
|
@@ -345,20 +327,37 @@ class SimpleRetriever(Retriever):
|
|
345
327
|
# This logic is similar to _read_pages in the HttpStream class. When making changes here, consider making changes there as well.
|
346
328
|
def _read_pages(
|
347
329
|
self,
|
348
|
-
records_generator_fn: Callable[[Optional[requests.Response]], Iterable[
|
330
|
+
records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]],
|
349
331
|
stream_state: Mapping[str, Any],
|
350
332
|
stream_slice: StreamSlice,
|
351
|
-
) -> Iterable[
|
333
|
+
) -> Iterable[Record]:
|
352
334
|
pagination_complete = False
|
353
|
-
|
335
|
+
initial_token = self._paginator.get_initial_token()
|
336
|
+
next_page_token: Optional[Mapping[str, Any]] = (
|
337
|
+
{"next_page_token": initial_token} if initial_token else None
|
338
|
+
)
|
354
339
|
while not pagination_complete:
|
355
340
|
response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
|
356
|
-
|
341
|
+
|
342
|
+
last_page_size = 0
|
343
|
+
last_record: Optional[Record] = None
|
344
|
+
for record in records_generator_fn(response):
|
345
|
+
last_page_size += 1
|
346
|
+
last_record = record
|
347
|
+
yield record
|
357
348
|
|
358
349
|
if not response:
|
359
350
|
pagination_complete = True
|
360
351
|
else:
|
361
|
-
|
352
|
+
last_page_token_value = (
|
353
|
+
next_page_token.get("next_page_token") if next_page_token else None
|
354
|
+
)
|
355
|
+
next_page_token = self._next_page_token(
|
356
|
+
response=response,
|
357
|
+
last_page_size=last_page_size,
|
358
|
+
last_record=last_record,
|
359
|
+
last_page_token_value=last_page_token_value,
|
360
|
+
)
|
362
361
|
if not next_page_token:
|
363
362
|
pagination_complete = True
|
364
363
|
|
@@ -367,19 +366,38 @@ class SimpleRetriever(Retriever):
|
|
367
366
|
|
368
367
|
def _read_single_page(
|
369
368
|
self,
|
370
|
-
records_generator_fn: Callable[[Optional[requests.Response]], Iterable[
|
369
|
+
records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]],
|
371
370
|
stream_state: Mapping[str, Any],
|
372
371
|
stream_slice: StreamSlice,
|
373
372
|
) -> Iterable[StreamData]:
|
374
|
-
|
375
|
-
|
373
|
+
initial_token = stream_state.get("next_page_token")
|
374
|
+
if initial_token is None:
|
375
|
+
initial_token = self._paginator.get_initial_token()
|
376
|
+
next_page_token: Optional[Mapping[str, Any]] = (
|
377
|
+
{"next_page_token": initial_token} if initial_token else None
|
378
|
+
)
|
379
|
+
|
380
|
+
response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
|
381
|
+
|
382
|
+
last_page_size = 0
|
383
|
+
last_record: Optional[Record] = None
|
384
|
+
for record in records_generator_fn(response):
|
385
|
+
last_page_size += 1
|
386
|
+
last_record = record
|
387
|
+
yield record
|
376
388
|
|
377
389
|
if not response:
|
378
|
-
next_page_token
|
390
|
+
next_page_token = {FULL_REFRESH_SYNC_COMPLETE_KEY: True}
|
379
391
|
else:
|
380
|
-
|
381
|
-
|
382
|
-
|
392
|
+
last_page_token_value = (
|
393
|
+
next_page_token.get("next_page_token") if next_page_token else None
|
394
|
+
)
|
395
|
+
next_page_token = self._next_page_token(
|
396
|
+
response=response,
|
397
|
+
last_page_size=last_page_size,
|
398
|
+
last_record=last_record,
|
399
|
+
last_page_token_value=last_page_token_value,
|
400
|
+
) or {FULL_REFRESH_SYNC_COMPLETE_KEY: True}
|
383
401
|
|
384
402
|
if self.cursor:
|
385
403
|
self.cursor.close_slice(
|
@@ -414,25 +432,14 @@ class SimpleRetriever(Retriever):
|
|
414
432
|
if self.cursor and isinstance(self.cursor, ResumableFullRefreshCursor):
|
415
433
|
stream_state = self.state
|
416
434
|
|
417
|
-
# Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to
|
418
|
-
# The platform deletes stream state for full refresh streams before starting a
|
419
|
-
# this value existing for the initial attempt
|
435
|
+
# Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to
|
436
|
+
# fetch more records. The platform deletes stream state for full refresh streams before starting a
|
437
|
+
# new job, so we don't need to worry about this value existing for the initial attempt
|
420
438
|
if stream_state.get(FULL_REFRESH_SYNC_COMPLETE_KEY):
|
421
439
|
return
|
422
|
-
cursor_value = stream_state.get("next_page_token")
|
423
|
-
|
424
|
-
# The first attempt to read a page for the current partition should reset the paginator to the current
|
425
|
-
# cursor state which is initially assigned to the incoming state from the platform
|
426
|
-
partition_key = self._to_partition_key(_slice.partition)
|
427
|
-
if partition_key not in self._partition_started:
|
428
|
-
self._partition_started[partition_key] = True
|
429
|
-
self._paginator.reset(reset_value=cursor_value)
|
430
440
|
|
431
441
|
yield from self._read_single_page(record_generator, stream_state, _slice)
|
432
442
|
else:
|
433
|
-
# Fixing paginator types has a long tail of dependencies
|
434
|
-
self._paginator.reset()
|
435
|
-
|
436
443
|
for stream_data in self._read_pages(record_generator, self.state, _slice):
|
437
444
|
current_record = self._extract_record(stream_data, _slice)
|
438
445
|
if self.cursor and current_record:
|
@@ -518,7 +525,7 @@ class SimpleRetriever(Retriever):
|
|
518
525
|
stream_state: Mapping[str, Any],
|
519
526
|
records_schema: Mapping[str, Any],
|
520
527
|
stream_slice: Optional[StreamSlice],
|
521
|
-
) -> Iterable[
|
528
|
+
) -> Iterable[Record]:
|
522
529
|
yield from self._parse_response(
|
523
530
|
response,
|
524
531
|
stream_slice=stream_slice,
|
@@ -562,7 +569,7 @@ class SimpleRetrieverTestReadDecorator(SimpleRetriever):
|
|
562
569
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
563
570
|
) -> Optional[requests.Response]:
|
564
571
|
return self.requester.send_request(
|
565
|
-
path=self._paginator_path(),
|
572
|
+
path=self._paginator_path(next_page_token=next_page_token),
|
566
573
|
stream_state=stream_state,
|
567
574
|
stream_slice=stream_slice,
|
568
575
|
next_page_token=next_page_token,
|
@@ -16,7 +16,7 @@ class DeclarativePartitionFactory:
|
|
16
16
|
self,
|
17
17
|
stream_name: str,
|
18
18
|
json_schema: Mapping[str, Any],
|
19
|
-
|
19
|
+
retriever: Retriever,
|
20
20
|
message_repository: MessageRepository,
|
21
21
|
) -> None:
|
22
22
|
"""
|
@@ -26,14 +26,14 @@ class DeclarativePartitionFactory:
|
|
26
26
|
"""
|
27
27
|
self._stream_name = stream_name
|
28
28
|
self._json_schema = json_schema
|
29
|
-
self.
|
29
|
+
self._retriever = retriever
|
30
30
|
self._message_repository = message_repository
|
31
31
|
|
32
32
|
def create(self, stream_slice: StreamSlice) -> Partition:
|
33
33
|
return DeclarativePartition(
|
34
34
|
self._stream_name,
|
35
35
|
self._json_schema,
|
36
|
-
self.
|
36
|
+
self._retriever,
|
37
37
|
self._message_repository,
|
38
38
|
stream_slice,
|
39
39
|
)
|
@@ -31,12 +31,6 @@ class DeliverRawFiles(BaseModel):
|
|
31
31
|
|
32
32
|
delivery_type: Literal["use_file_transfer"] = Field("use_file_transfer", const=True)
|
33
33
|
|
34
|
-
preserve_subdirectories_directories: bool = Field(
|
35
|
-
title="Preserve Subdirectories in File Paths",
|
36
|
-
description="If enabled replicate source folder structure",
|
37
|
-
default=True,
|
38
|
-
)
|
39
|
-
|
40
34
|
|
41
35
|
class AbstractFileBasedSpec(BaseModel):
|
42
36
|
"""
|
@@ -111,10 +111,6 @@ class ErrorListingFiles(BaseFileBasedSourceError):
|
|
111
111
|
pass
|
112
112
|
|
113
113
|
|
114
|
-
class DuplicatedFilesError(BaseFileBasedSourceError):
|
115
|
-
pass
|
116
|
-
|
117
|
-
|
118
114
|
class CustomFileBasedException(AirbyteTracedException):
|
119
115
|
"""
|
120
116
|
A specialized exception for file-based connectors.
|
@@ -127,25 +123,3 @@ class CustomFileBasedException(AirbyteTracedException):
|
|
127
123
|
|
128
124
|
class FileSizeLimitError(CustomFileBasedException):
|
129
125
|
pass
|
130
|
-
|
131
|
-
|
132
|
-
def format_duplicate_files_error_message(
|
133
|
-
stream_name: str, duplicated_files_names: List[dict[str, List[str]]]
|
134
|
-
) -> str:
|
135
|
-
duplicated_files_messages = []
|
136
|
-
for duplicated_file in duplicated_files_names:
|
137
|
-
for duplicated_file_name, file_paths in duplicated_file.items():
|
138
|
-
file_duplicated_message = (
|
139
|
-
f"{len(file_paths)} duplicates found for file name {duplicated_file_name}:\n\n"
|
140
|
-
+ "".join(f"\n - {file_paths}")
|
141
|
-
)
|
142
|
-
duplicated_files_messages.append(file_duplicated_message)
|
143
|
-
|
144
|
-
error_message = (
|
145
|
-
f"ERROR: Duplicate filenames found for stream {stream_name}. "
|
146
|
-
"Duplicate file names are not allowed if the Preserve Subdirectories in File Paths option is disabled. "
|
147
|
-
"Please remove or rename the duplicate files before attempting to re-run the sync.\n\n"
|
148
|
-
+ "\n".join(duplicated_files_messages)
|
149
|
-
)
|
150
|
-
|
151
|
-
return error_message
|
@@ -242,7 +242,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
242
242
|
stream=self._make_default_stream(
|
243
243
|
stream_config=stream_config,
|
244
244
|
cursor=cursor,
|
245
|
-
|
245
|
+
use_file_transfer=self._use_file_transfer(parsed_config),
|
246
246
|
),
|
247
247
|
source=self,
|
248
248
|
logger=self.logger,
|
@@ -273,7 +273,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
273
273
|
stream=self._make_default_stream(
|
274
274
|
stream_config=stream_config,
|
275
275
|
cursor=cursor,
|
276
|
-
|
276
|
+
use_file_transfer=self._use_file_transfer(parsed_config),
|
277
277
|
),
|
278
278
|
source=self,
|
279
279
|
logger=self.logger,
|
@@ -285,7 +285,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
285
285
|
stream = self._make_default_stream(
|
286
286
|
stream_config=stream_config,
|
287
287
|
cursor=cursor,
|
288
|
-
|
288
|
+
use_file_transfer=self._use_file_transfer(parsed_config),
|
289
289
|
)
|
290
290
|
|
291
291
|
streams.append(stream)
|
@@ -298,7 +298,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
298
298
|
self,
|
299
299
|
stream_config: FileBasedStreamConfig,
|
300
300
|
cursor: Optional[AbstractFileBasedCursor],
|
301
|
-
|
301
|
+
use_file_transfer: bool = False,
|
302
302
|
) -> AbstractFileBasedStream:
|
303
303
|
return DefaultFileBasedStream(
|
304
304
|
config=stream_config,
|
@@ -310,10 +310,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
310
310
|
validation_policy=self._validate_and_get_validation_policy(stream_config),
|
311
311
|
errors_collector=self.errors_collector,
|
312
312
|
cursor=cursor,
|
313
|
-
use_file_transfer=
|
314
|
-
preserve_subdirectories_directories=self._preserve_subdirectories_directories(
|
315
|
-
parsed_config
|
316
|
-
),
|
313
|
+
use_file_transfer=use_file_transfer,
|
317
314
|
)
|
318
315
|
|
319
316
|
def _get_stream_from_catalog(
|
@@ -388,14 +385,3 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
388
385
|
and parsed_config.delivery_method.delivery_type == "use_file_transfer"
|
389
386
|
)
|
390
387
|
return use_file_transfer
|
391
|
-
|
392
|
-
@staticmethod
|
393
|
-
def _preserve_subdirectories_directories(parsed_config: AbstractFileBasedSpec) -> bool:
|
394
|
-
# fall back to preserve subdirectories if config is not present or incomplete
|
395
|
-
if (
|
396
|
-
FileBasedSource._use_file_transfer(parsed_config)
|
397
|
-
and hasattr(parsed_config.delivery_method, "preserve_subdirectories_directories")
|
398
|
-
and parsed_config.delivery_method.preserve_subdirectories_directories is not None
|
399
|
-
):
|
400
|
-
return parsed_config.delivery_method.preserve_subdirectories_directories
|
401
|
-
return True
|
@@ -135,16 +135,6 @@ class AbstractFileBasedStreamReader(ABC):
|
|
135
135
|
return use_file_transfer
|
136
136
|
return False
|
137
137
|
|
138
|
-
def preserve_subdirectories_directories(self) -> bool:
|
139
|
-
# fall back to preserve subdirectories if config is not present or incomplete
|
140
|
-
if (
|
141
|
-
self.use_file_transfer()
|
142
|
-
and hasattr(self.config.delivery_method, "preserve_subdirectories_directories")
|
143
|
-
and self.config.delivery_method.preserve_subdirectories_directories is not None
|
144
|
-
):
|
145
|
-
return self.config.delivery_method.preserve_subdirectories_directories
|
146
|
-
return True
|
147
|
-
|
148
138
|
@abstractmethod
|
149
139
|
def get_file(
|
150
140
|
self, file: RemoteFile, local_directory: str, logger: logging.Logger
|
@@ -169,13 +159,10 @@ class AbstractFileBasedStreamReader(ABC):
|
|
169
159
|
"""
|
170
160
|
...
|
171
161
|
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
file_relative_path = file.uri.lstrip("/")
|
177
|
-
else:
|
178
|
-
file_relative_path = path.basename(file.uri)
|
162
|
+
@staticmethod
|
163
|
+
def _get_file_transfer_paths(file: RemoteFile, local_directory: str) -> List[str]:
|
164
|
+
# Remove left slashes from source path format to make relative path for writing locally
|
165
|
+
file_relative_path = file.uri.lstrip("/")
|
179
166
|
local_file_path = path.join(local_directory, file_relative_path)
|
180
167
|
|
181
168
|
# Ensure the local directory exists
|
@@ -5,24 +5,20 @@
|
|
5
5
|
import asyncio
|
6
6
|
import itertools
|
7
7
|
import traceback
|
8
|
-
from collections import defaultdict
|
9
8
|
from copy import deepcopy
|
10
9
|
from functools import cache
|
11
|
-
from
|
12
|
-
from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Set, Tuple, Union
|
10
|
+
from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Set, Union
|
13
11
|
|
14
12
|
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, FailureType, Level
|
15
13
|
from airbyte_cdk.models import Type as MessageType
|
16
14
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import PrimaryKeyType
|
17
15
|
from airbyte_cdk.sources.file_based.exceptions import (
|
18
|
-
DuplicatedFilesError,
|
19
16
|
FileBasedSourceError,
|
20
17
|
InvalidSchemaError,
|
21
18
|
MissingSchemaError,
|
22
19
|
RecordParseError,
|
23
20
|
SchemaInferenceError,
|
24
21
|
StopSyncPerValidationPolicy,
|
25
|
-
format_duplicate_files_error_message,
|
26
22
|
)
|
27
23
|
from airbyte_cdk.sources.file_based.file_types import FileTransfer
|
28
24
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
@@ -47,8 +43,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
47
43
|
"""
|
48
44
|
|
49
45
|
FILE_TRANSFER_KW = "use_file_transfer"
|
50
|
-
PRESERVE_SUBDIRECTORIES_KW = "preserve_subdirectories_directories"
|
51
|
-
FILES_KEY = "files"
|
52
46
|
DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
|
53
47
|
ab_last_mod_col = "_ab_source_file_last_modified"
|
54
48
|
ab_file_name_col = "_ab_source_file_url"
|
@@ -56,14 +50,10 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
56
50
|
source_file_url = "source_file_url"
|
57
51
|
airbyte_columns = [ab_last_mod_col, ab_file_name_col]
|
58
52
|
use_file_transfer = False
|
59
|
-
preserve_subdirectories_directories = True
|
60
53
|
|
61
54
|
def __init__(self, **kwargs: Any):
|
62
55
|
if self.FILE_TRANSFER_KW in kwargs:
|
63
56
|
self.use_file_transfer = kwargs.pop(self.FILE_TRANSFER_KW, False)
|
64
|
-
self.preserve_subdirectories_directories = kwargs.pop(
|
65
|
-
self.PRESERVE_SUBDIRECTORIES_KW, True
|
66
|
-
)
|
67
57
|
super().__init__(**kwargs)
|
68
58
|
|
69
59
|
@property
|
@@ -108,43 +98,15 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
108
98
|
else:
|
109
99
|
return super()._filter_schema_invalid_properties(configured_catalog_json_schema)
|
110
100
|
|
111
|
-
def _duplicated_files_names(
|
112
|
-
self, slices: List[dict[str, List[RemoteFile]]]
|
113
|
-
) -> List[dict[str, List[str]]]:
|
114
|
-
seen_file_names = set()
|
115
|
-
duplicates_file_names = set()
|
116
|
-
file_paths = defaultdict(list)
|
117
|
-
for file_slice in slices:
|
118
|
-
for file_found in file_slice[self.FILES_KEY]:
|
119
|
-
file_name = path.basename(file_found.uri)
|
120
|
-
if file_name not in seen_file_names:
|
121
|
-
seen_file_names.add(file_name)
|
122
|
-
else:
|
123
|
-
duplicates_file_names.add(file_name)
|
124
|
-
file_paths[file_name].append(file_found.uri)
|
125
|
-
return [
|
126
|
-
{duplicated_file: file_paths[duplicated_file]}
|
127
|
-
for duplicated_file in duplicates_file_names
|
128
|
-
]
|
129
|
-
|
130
101
|
def compute_slices(self) -> Iterable[Optional[Mapping[str, Any]]]:
|
131
102
|
# Sort files by last_modified, uri and return them grouped by last_modified
|
132
103
|
all_files = self.list_files()
|
133
104
|
files_to_read = self._cursor.get_files_to_sync(all_files, self.logger)
|
134
105
|
sorted_files_to_read = sorted(files_to_read, key=lambda f: (f.last_modified, f.uri))
|
135
106
|
slices = [
|
136
|
-
{
|
107
|
+
{"files": list(group[1])}
|
137
108
|
for group in itertools.groupby(sorted_files_to_read, lambda f: f.last_modified)
|
138
109
|
]
|
139
|
-
if slices and not self.preserve_subdirectories_directories:
|
140
|
-
duplicated_files_names = self._duplicated_files_names(slices)
|
141
|
-
if duplicated_files_names:
|
142
|
-
raise DuplicatedFilesError(
|
143
|
-
format_duplicate_files_error_message(
|
144
|
-
stream_name=self.name, duplicated_files_names=duplicated_files_names
|
145
|
-
),
|
146
|
-
stream=self.name,
|
147
|
-
)
|
148
110
|
return slices
|
149
111
|
|
150
112
|
def transform_record(
|
@@ -62,7 +62,7 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
|
|
62
62
|
airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
|
63
63
|
airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
|
64
64
|
airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
|
65
|
-
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=
|
65
|
+
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=v61HsAm_TmkhxbvOQS7Qvo4sNou-n9GtUT8thams6i0,22480
|
66
66
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
|
67
67
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
|
68
68
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
@@ -99,7 +99,7 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py,sha
|
|
99
99
|
airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=LYEZnZ_hB7rvBSZxG9s0RSrzsOkDWbBY0_P6qu5lEfc,3212
|
100
100
|
airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=-V5UddGm69UKEB6o_O1EIES9kfY8FV_X4Ji8w1yOuSA,981
|
101
101
|
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=BtsY_jtT4MihFqeQgc05HXj3Ndt-e2ESQgGwbg3Sdxc,6430
|
102
|
-
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=
|
102
|
+
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=Y5AWYxbJTUtJ_Jm7DV9qrZDiymFR9LST7fBt4piT2-U,4585
|
103
103
|
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=wX_dQ401siuwh3zHgSHRnSN1vIojI4Nufg3BwzZAzk0,16239
|
104
104
|
airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
105
105
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
|
@@ -135,15 +135,15 @@ airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.p
|
|
135
135
|
airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=o0520AmHMb7SAoeokVNwoOzuZzIAT6ryx9uFYGSOrs0,8664
|
136
136
|
airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=RqYPkgJFAWfcZBTc-JBcGHPm4JL1ZQOhs9GKU4MP2eE,14723
|
137
137
|
airbyte_cdk/sources/declarative/requesters/paginators/__init__.py,sha256=uArbKs9JKNCt7t9tZoeWwjDpyI1HoPp29FNW0JzvaEM,644
|
138
|
-
airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=
|
139
|
-
airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py,sha256
|
140
|
-
airbyte_cdk/sources/declarative/requesters/paginators/paginator.py,sha256=
|
138
|
+
airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=FnSl3qPvv5wD6ieAI2Ic5c4dqBk-3fRe4tCaWzq3YwM,11840
|
139
|
+
airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py,sha256=j6j9QRPaTbKQ2N661RFVKthhkWiodEp6ut0tKeEd0Ng,2019
|
140
|
+
airbyte_cdk/sources/declarative/requesters/paginators/paginator.py,sha256=OlN-y0PEOMzlUNUh3pzonoTpIJpGwkP4ibFengvpLVU,2230
|
141
141
|
airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py,sha256=2gly8fuZpDNwtu1Qg6oE2jBLGqQRdzSLJdnpk_iDV6I,767
|
142
|
-
airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py,sha256=
|
143
|
-
airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py,sha256=
|
144
|
-
airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py,sha256=
|
145
|
-
airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py,sha256=
|
146
|
-
airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py,sha256
|
142
|
+
airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py,sha256=yLzzK5YIRTkXd2Z-BS__AZXuTd6HXjJIxq05K-lQoxI,3898
|
143
|
+
airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py,sha256=WvGt_DTFcAgTR-NHrlrR7B71yG-L6jmfW-Gwm9iYzjY,3624
|
144
|
+
airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py,sha256=Z2i6a-oKMmOTxHxsTVSnyaShkJ3u8xZw1xIJdx2yxss,2731
|
145
|
+
airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py,sha256=UiHQI2lsRDPqM4nMvKMnmsXA3gFg5BFE4lCPEBhuCTs,1317
|
146
|
+
airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py,sha256=LoKXdUbSgHEtSwtA8DFrnX6SpQbRVVwreY8NguTKTcI,2229
|
147
147
|
airbyte_cdk/sources/declarative/requesters/request_option.py,sha256=_qmv8CLQQ3fERt6BuMZeRu6tZXscPoeARx1VJdWMQ_M,1055
|
148
148
|
airbyte_cdk/sources/declarative/requesters/request_options/__init__.py,sha256=WCwpKqM4wKqy-DHJaCHbKAlFqRVOqMi9K5qonxIfi_Y,809
|
149
149
|
airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py,sha256=FLkg0uzC9bc-zFnALWr0FLYpKsz8iK2xQsd4UOyeW08,3706
|
@@ -161,7 +161,7 @@ airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=Aio
|
|
161
161
|
airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=ix9m1dkR69DcXCXUKC5RK_ZZM7ojTLBQ4IkWQTfmfCk,456
|
162
162
|
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=_-d3MvHh-4r46i4wjQikD4ZygKA7TvuDu2i04qqULEg,3731
|
163
163
|
airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
|
164
|
-
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=
|
164
|
+
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=jxQ_9xcVD07r9PKhofitAqMkdX1k8ZNyy50qz5NwkFs,24540
|
165
165
|
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=HztgVVaZdil5UfgUZcv_Hyy84r89_EKRwyO2hoewNVg,749
|
166
166
|
airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
|
167
167
|
airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=H6A3NQ6kPPM-cUNPmdvDPc9xNzR1rQNrK95GbgCW334,8822
|
@@ -171,7 +171,7 @@ airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLn
|
|
171
171
|
airbyte_cdk/sources/declarative/spec/__init__.py,sha256=H0UwoRhgucbKBIzg85AXrifybVmfpwWpPdy22vZKVuo,141
|
172
172
|
airbyte_cdk/sources/declarative/spec/spec.py,sha256=ODSNUgkDOhnLQnwLjgSaME6R3kNeywjROvbNrWEnsgU,1876
|
173
173
|
airbyte_cdk/sources/declarative/stream_slicers/__init__.py,sha256=sI9vhc95RwJYOnA0VKjcbtKgFcmAbWjhdWBXFbAijOs,176
|
174
|
-
airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py,sha256=
|
174
|
+
airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py,sha256=ldmfzOhkA8yMPQKDOHO-bO8zUYJ0oVAs8BIZ-O57exk,3415
|
175
175
|
airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py,sha256=SOkIPBi2Wu7yxIvA15yFzUAB95a3IzA8LPq5DEqHQQc,725
|
176
176
|
airbyte_cdk/sources/declarative/transformations/__init__.py,sha256=CPJ8TlMpiUmvG3624VYu_NfTzxwKcfBjM2Q2wJ7fkSA,919
|
177
177
|
airbyte_cdk/sources/declarative/transformations/add_fields.py,sha256=r4YdAuAk2bQtNWJMztIIy2CC-NglD9NeK1s1TeO9wkw,5027
|
@@ -193,7 +193,7 @@ airbyte_cdk/sources/file_based/availability_strategy/__init__.py,sha256=ddKQfUmk
|
|
193
193
|
airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py,sha256=01Nd4b7ERAbp-OZo_8rrAzFXWPTMwr02SnWiN17nx8Q,2363
|
194
194
|
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=j9T5TimfWFUz7nqsaj-83G3xWmDpsmeSbDnaUNmz0UM,5849
|
195
195
|
airbyte_cdk/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
196
|
-
airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=
|
196
|
+
airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=tj-M1L5BTa5yIQ3jHo09CtCTSq_eR-68zgyOPqwsurw,6455
|
197
197
|
airbyte_cdk/sources/file_based/config/avro_format.py,sha256=NxTF96ewzn6HuhgodsY7Rpb-ybr1ZEWW5d4Vid64g5A,716
|
198
198
|
airbyte_cdk/sources/file_based/config/csv_format.py,sha256=NWekkyT8dTwiVK0mwa_krQD4FJPHSDfILo8kPAg3-Vs,8006
|
199
199
|
airbyte_cdk/sources/file_based/config/excel_format.py,sha256=9qAmTsT6SoVzNfNv0oBVkVCmiyqQuVAbfRKajjoa7Js,378
|
@@ -204,9 +204,9 @@ airbyte_cdk/sources/file_based/config/unstructured_format.py,sha256=tIbB9Pn1HqU6
|
|
204
204
|
airbyte_cdk/sources/file_based/discovery_policy/__init__.py,sha256=gl3ey6mZbyfraB9P3pFhf9UJp2JeTZ1SUFAopy2iBvY,301
|
205
205
|
airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha256=dCfXX529Rd5rtopg4VeEgTPJjFtqjtjzPq6LCw18Wt0,605
|
206
206
|
airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=-xujTidtrq6HC00WKbjQh1CZdT5LMuzkp5BLjqDmfTY,1007
|
207
|
-
airbyte_cdk/sources/file_based/exceptions.py,sha256=
|
208
|
-
airbyte_cdk/sources/file_based/file_based_source.py,sha256=
|
209
|
-
airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=
|
207
|
+
airbyte_cdk/sources/file_based/exceptions.py,sha256=AEELNIRzKPX6eopKd_2jhE7WiNeR0Aw7nQWVOL8fvkc,5760
|
208
|
+
airbyte_cdk/sources/file_based/file_based_source.py,sha256=RfpctRNLJ_EHKKEc2E1EZGYRfhG0Z9o6TgsKS4XrSNY,16652
|
209
|
+
airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=ohxKlqPuV7TGwjyRy_gaWUol8QN5lBSoCYoaqBtRh1c,6179
|
210
210
|
airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=blCLn0-2LC-ZdgcNyDEhqM2RiUvEjEBh-G4-t32ZtuM,1268
|
211
211
|
airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=XNx-JC-sgzH9u3nOJ2M59FxBXvtig8LN6BIkeDOavZA,10858
|
212
212
|
airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=QlCXB-ry3np67Q_VerQEPoWDOTcPTB6Go4ydZxY9ae4,20445
|
@@ -232,7 +232,7 @@ airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_c
|
|
232
232
|
airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
|
233
233
|
airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=om-x3gZFPgWDpi15S9RxZmR36VHnk8sytgN6LlBQhAw,1934
|
234
234
|
airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=VGV7xLyBribuBMVrXtO1xqkWJD86bl7yhXtjnwLMohM,7051
|
235
|
-
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=
|
235
|
+
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=rpwU6AOyhFLuXtcFKkcOHFWbRQ4kLCOKzAjcID_M87k,16770
|
236
236
|
airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
|
237
237
|
airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
|
238
238
|
airbyte_cdk/sources/http_logger.py,sha256=TyBmtRA6D9g0XDkKGvdM415b36RXDjgfkwRewDsH8-0,1576
|
@@ -340,8 +340,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
|
|
340
340
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
341
341
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
342
342
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
343
|
-
airbyte_cdk-6.
|
344
|
-
airbyte_cdk-6.
|
345
|
-
airbyte_cdk-6.
|
346
|
-
airbyte_cdk-6.
|
347
|
-
airbyte_cdk-6.
|
343
|
+
airbyte_cdk-6.14.0.dev1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
344
|
+
airbyte_cdk-6.14.0.dev1.dist-info/METADATA,sha256=IE3J33y4yRYF6vsHR7l-BFHI4_K1LVq5S431ivv5Sos,5993
|
345
|
+
airbyte_cdk-6.14.0.dev1.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
|
346
|
+
airbyte_cdk-6.14.0.dev1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
347
|
+
airbyte_cdk-6.14.0.dev1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{airbyte_cdk-6.13.1.dev41012.dist-info → airbyte_cdk-6.14.0.dev1.dist-info}/entry_points.txt
RENAMED
File without changes
|