airbyte-cdk 6.13.0.dev0__py3-none-any.whl → 6.13.1.dev4101__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +8 -25
  2. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +35 -52
  3. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +7 -10
  4. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +4 -9
  5. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +6 -11
  6. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +5 -16
  7. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +13 -14
  8. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +8 -7
  9. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +7 -10
  10. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +64 -71
  11. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +12 -0
  12. airbyte_cdk/sources/file_based/exceptions.py +26 -0
  13. airbyte_cdk/sources/file_based/file_based_source.py +18 -5
  14. airbyte_cdk/sources/file_based/file_based_stream_reader.py +17 -4
  15. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +40 -2
  16. {airbyte_cdk-6.13.0.dev0.dist-info → airbyte_cdk-6.13.1.dev4101.dist-info}/METADATA +2 -2
  17. {airbyte_cdk-6.13.0.dev0.dist-info → airbyte_cdk-6.13.1.dev4101.dist-info}/RECORD +20 -20
  18. {airbyte_cdk-6.13.0.dev0.dist-info → airbyte_cdk-6.13.1.dev4101.dist-info}/WHEEL +1 -1
  19. {airbyte_cdk-6.13.0.dev0.dist-info → airbyte_cdk-6.13.1.dev4101.dist-info}/LICENSE.txt +0 -0
  20. {airbyte_cdk-6.13.0.dev0.dist-info → airbyte_cdk-6.13.1.dev4101.dist-info}/entry_points.txt +0 -0
@@ -194,11 +194,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
194
194
  # Some low-code sources use a combination of DeclarativeStream and regular Python streams. We can't inspect
195
195
  # these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
196
196
  # so we need to treat them as synchronous
197
- if isinstance(declarative_stream, DeclarativeStream) and (
198
- name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
197
+ if (
198
+ isinstance(declarative_stream, DeclarativeStream)
199
+ and name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
199
200
  == "SimpleRetriever"
200
- or name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
201
- == "AsyncRetriever"
202
201
  ):
203
202
  incremental_sync_component_definition = name_to_stream_mapping[
204
203
  declarative_stream.name
@@ -218,11 +217,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
218
217
  and not incremental_sync_component_definition
219
218
  )
220
219
 
221
- is_async_job_stream = (
222
- name_to_stream_mapping[declarative_stream.name].get("retriever", {}).get("type")
223
- == "AsyncRetriever"
224
- )
225
-
226
220
  if self._is_datetime_incremental_without_partition_routing(
227
221
  declarative_stream, incremental_sync_component_definition
228
222
  ):
@@ -274,26 +268,15 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
274
268
  elif (
275
269
  is_substream_without_incremental or is_without_partition_router_or_cursor
276
270
  ) and hasattr(declarative_stream.retriever, "stream_slicer"):
277
- if is_async_job_stream:
278
- # A stream's AsyncRetriever must be shared across all partitions because it uses a
279
- # shared JobRepository to manage the state of jobs requests and when they are ready
280
- async_retriever = declarative_stream.retriever
281
-
282
- def async_retriever_factory_method() -> Retriever:
283
- return async_retriever
284
-
285
- retriever_factory = async_retriever_factory_method
286
- else:
287
- retriever_factory = self._retriever_factory(
288
- name_to_stream_mapping[declarative_stream.name],
289
- config,
290
- {},
291
- )
292
271
  partition_generator = StreamSlicerPartitionGenerator(
293
272
  DeclarativePartitionFactory(
294
273
  declarative_stream.name,
295
274
  declarative_stream.get_json_schema(),
296
- retriever_factory,
275
+ self._retriever_factory(
276
+ name_to_stream_mapping[declarative_stream.name],
277
+ config,
278
+ {},
279
+ ),
297
280
  self.message_repository,
298
281
  ),
299
282
  declarative_stream.retriever.stream_slicer,
@@ -112,39 +112,27 @@ class DefaultPaginator(Paginator):
112
112
  )
113
113
  if isinstance(self.url_base, str):
114
114
  self.url_base = InterpolatedString(string=self.url_base, parameters=parameters)
115
-
116
- def get_initial_token(self) -> Optional[Any]:
117
- """
118
- Return the page token that should be used for the first request of a stream
119
-
120
- WARNING: get_initial_token() should not be used by streams that use RFR that perform checkpointing
121
- of state using page numbers. Because paginators are stateless
122
- """
123
- return self.pagination_strategy.initial_token
115
+ self._token: Optional[Any] = self.pagination_strategy.initial_token
124
116
 
125
117
  def next_page_token(
126
- self,
127
- response: requests.Response,
128
- last_page_size: int,
129
- last_record: Optional[Record],
130
- last_page_token_value: Optional[Any] = None,
118
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
131
119
  ) -> Optional[Mapping[str, Any]]:
132
- next_page_token = self.pagination_strategy.next_page_token(
133
- response=response,
134
- last_page_size=last_page_size,
135
- last_record=last_record,
136
- last_page_token_value=last_page_token_value,
120
+ self._token = self.pagination_strategy.next_page_token(
121
+ response, last_page_size, last_record
137
122
  )
138
- if next_page_token:
139
- return {"next_page_token": next_page_token}
123
+ if self._token:
124
+ return {"next_page_token": self._token}
140
125
  else:
141
126
  return None
142
127
 
143
- def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
144
- token = next_page_token.get("next_page_token") if next_page_token else None
145
- if token and self.page_token_option and isinstance(self.page_token_option, RequestPath):
128
+ def path(self) -> Optional[str]:
129
+ if (
130
+ self._token
131
+ and self.page_token_option
132
+ and isinstance(self.page_token_option, RequestPath)
133
+ ):
146
134
  # Replace url base to only return the path
147
- return str(token).replace(self.url_base.eval(self.config), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
135
+ return str(self._token).replace(self.url_base.eval(self.config), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
148
136
  else:
149
137
  return None
150
138
 
@@ -155,7 +143,7 @@ class DefaultPaginator(Paginator):
155
143
  stream_slice: Optional[StreamSlice] = None,
156
144
  next_page_token: Optional[Mapping[str, Any]] = None,
157
145
  ) -> MutableMapping[str, Any]:
158
- return self._get_request_options(RequestOptionType.request_parameter, next_page_token)
146
+ return self._get_request_options(RequestOptionType.request_parameter)
159
147
 
160
148
  def get_request_headers(
161
149
  self,
@@ -164,7 +152,7 @@ class DefaultPaginator(Paginator):
164
152
  stream_slice: Optional[StreamSlice] = None,
165
153
  next_page_token: Optional[Mapping[str, Any]] = None,
166
154
  ) -> Mapping[str, str]:
167
- return self._get_request_options(RequestOptionType.header, next_page_token)
155
+ return self._get_request_options(RequestOptionType.header)
168
156
 
169
157
  def get_request_body_data(
170
158
  self,
@@ -173,7 +161,7 @@ class DefaultPaginator(Paginator):
173
161
  stream_slice: Optional[StreamSlice] = None,
174
162
  next_page_token: Optional[Mapping[str, Any]] = None,
175
163
  ) -> Mapping[str, Any]:
176
- return self._get_request_options(RequestOptionType.body_data, next_page_token)
164
+ return self._get_request_options(RequestOptionType.body_data)
177
165
 
178
166
  def get_request_body_json(
179
167
  self,
@@ -182,21 +170,25 @@ class DefaultPaginator(Paginator):
182
170
  stream_slice: Optional[StreamSlice] = None,
183
171
  next_page_token: Optional[Mapping[str, Any]] = None,
184
172
  ) -> Mapping[str, Any]:
185
- return self._get_request_options(RequestOptionType.body_json, next_page_token)
173
+ return self._get_request_options(RequestOptionType.body_json)
186
174
 
187
- def _get_request_options(
188
- self, option_type: RequestOptionType, next_page_token: Optional[Mapping[str, Any]]
189
- ) -> MutableMapping[str, Any]:
175
+ def reset(self, reset_value: Optional[Any] = None) -> None:
176
+ if reset_value:
177
+ self.pagination_strategy.reset(reset_value=reset_value)
178
+ else:
179
+ self.pagination_strategy.reset()
180
+ self._token = self.pagination_strategy.initial_token
181
+
182
+ def _get_request_options(self, option_type: RequestOptionType) -> MutableMapping[str, Any]:
190
183
  options = {}
191
184
 
192
- token = next_page_token.get("next_page_token") if next_page_token else None
193
185
  if (
194
186
  self.page_token_option
195
- and token is not None
187
+ and self._token is not None
196
188
  and isinstance(self.page_token_option, RequestOption)
197
189
  and self.page_token_option.inject_into == option_type
198
190
  ):
199
- options[self.page_token_option.field_name.eval(config=self.config)] = token # type: ignore # field_name is always cast to an interpolated string
191
+ options[self.page_token_option.field_name.eval(config=self.config)] = self._token # type: ignore # field_name is always cast to an interpolated string
200
192
  if (
201
193
  self.page_size_option
202
194
  and self.pagination_strategy.get_page_size()
@@ -212,9 +204,6 @@ class PaginatorTestReadDecorator(Paginator):
212
204
  """
213
205
  In some cases, we want to limit the number of requests that are made to the backend source. This class allows for limiting the number of
214
206
  pages that are queried throughout a read command.
215
-
216
- WARNING: This decorator is not currently thread-safe like the rest of the low-code framework because it has
217
- an internal state to track the current number of pages counted so that it can exit early during a test read
218
207
  """
219
208
 
220
209
  _PAGE_COUNT_BEFORE_FIRST_NEXT_CALL = 1
@@ -228,27 +217,17 @@ class PaginatorTestReadDecorator(Paginator):
228
217
  self._decorated = decorated
229
218
  self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
230
219
 
231
- def get_initial_token(self) -> Optional[Any]:
232
- self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
233
- return self._decorated.get_initial_token()
234
-
235
220
  def next_page_token(
236
- self,
237
- response: requests.Response,
238
- last_page_size: int,
239
- last_record: Optional[Record],
240
- last_page_token_value: Optional[Any] = None,
221
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
241
222
  ) -> Optional[Mapping[str, Any]]:
242
223
  if self._page_count >= self._maximum_number_of_pages:
243
224
  return None
244
225
 
245
226
  self._page_count += 1
246
- return self._decorated.next_page_token(
247
- response, last_page_size, last_record, last_page_token_value
248
- )
227
+ return self._decorated.next_page_token(response, last_page_size, last_record)
249
228
 
250
- def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
251
- return self._decorated.path(next_page_token)
229
+ def path(self) -> Optional[str]:
230
+ return self._decorated.path()
252
231
 
253
232
  def get_request_params(
254
233
  self,
@@ -293,3 +272,7 @@ class PaginatorTestReadDecorator(Paginator):
293
272
  return self._decorated.get_request_body_json(
294
273
  stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token
295
274
  )
275
+
276
+ def reset(self, reset_value: Optional[Any] = None) -> None:
277
+ self._decorated.reset()
278
+ self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
@@ -19,7 +19,7 @@ class NoPagination(Paginator):
19
19
 
20
20
  parameters: InitVar[Mapping[str, Any]]
21
21
 
22
- def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
22
+ def path(self) -> Optional[str]:
23
23
  return None
24
24
 
25
25
  def get_request_params(
@@ -58,14 +58,11 @@ class NoPagination(Paginator):
58
58
  ) -> Mapping[str, Any]:
59
59
  return {}
60
60
 
61
- def get_initial_token(self) -> Optional[Any]:
62
- return None
63
-
64
61
  def next_page_token(
65
- self,
66
- response: requests.Response,
67
- last_page_size: int,
68
- last_record: Optional[Record],
69
- last_page_token_value: Optional[Any],
70
- ) -> Optional[Mapping[str, Any]]:
62
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
63
+ ) -> Mapping[str, Any]:
71
64
  return {}
65
+
66
+ def reset(self, reset_value: Optional[Any] = None) -> None:
67
+ # No state to reset
68
+ pass
@@ -24,18 +24,14 @@ class Paginator(ABC, RequestOptionsProvider):
24
24
  """
25
25
 
26
26
  @abstractmethod
27
- def get_initial_token(self) -> Optional[Any]:
27
+ def reset(self, reset_value: Optional[Any] = None) -> None:
28
28
  """
29
- Get the page token that should be included in the request to get the first page of records
29
+ Reset the pagination's inner state
30
30
  """
31
31
 
32
32
  @abstractmethod
33
33
  def next_page_token(
34
- self,
35
- response: requests.Response,
36
- last_page_size: int,
37
- last_record: Optional[Record],
38
- last_page_token_value: Optional[Any],
34
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
39
35
  ) -> Optional[Mapping[str, Any]]:
40
36
  """
41
37
  Returns the next_page_token to use to fetch the next page of records.
@@ -43,13 +39,12 @@ class Paginator(ABC, RequestOptionsProvider):
43
39
  :param response: the response to process
44
40
  :param last_page_size: the number of records read from the response
45
41
  :param last_record: the last record extracted from the response
46
- :param last_page_token_value: The current value of the page token made on the last request
47
42
  :return: A mapping {"next_page_token": <token>} for the next page from the input response object. Returning None means there are no more pages to read in this response.
48
43
  """
49
44
  pass
50
45
 
51
46
  @abstractmethod
52
- def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
47
+ def path(self) -> Optional[str]:
53
48
  """
54
49
  Returns the URL path to hit to fetch the next page of records
55
50
 
@@ -43,6 +43,7 @@ class CursorPaginationStrategy(PaginationStrategy):
43
43
  )
44
44
 
45
45
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
46
+ self._initial_cursor = None
46
47
  if isinstance(self.cursor_value, str):
47
48
  self._cursor_value = InterpolatedString.create(self.cursor_value, parameters=parameters)
48
49
  else:
@@ -56,19 +57,10 @@ class CursorPaginationStrategy(PaginationStrategy):
56
57
 
57
58
  @property
58
59
  def initial_token(self) -> Optional[Any]:
59
- """
60
- CursorPaginationStrategy does not have an initial value because the next cursor is typically included
61
- in the response of the first request. For Resumable Full Refresh streams that checkpoint the page
62
- cursor, the next cursor should be read from the state or stream slice object.
63
- """
64
- return None
60
+ return self._initial_cursor
65
61
 
66
62
  def next_page_token(
67
- self,
68
- response: requests.Response,
69
- last_page_size: int,
70
- last_record: Optional[Record],
71
- last_page_token_value: Optional[Any] = None,
63
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
72
64
  ) -> Optional[Any]:
73
65
  decoded_response = next(self.decoder.decode(response))
74
66
 
@@ -95,5 +87,8 @@ class CursorPaginationStrategy(PaginationStrategy):
95
87
  )
96
88
  return token if token else None
97
89
 
90
+ def reset(self, reset_value: Optional[Any] = None) -> None:
91
+ self._initial_cursor = reset_value
92
+
98
93
  def get_page_size(self) -> Optional[int]:
99
94
  return self.page_size
@@ -52,6 +52,7 @@ class OffsetIncrement(PaginationStrategy):
52
52
  inject_on_first_request: bool = False
53
53
 
54
54
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
55
+ self._offset = 0
55
56
  page_size = str(self.page_size) if isinstance(self.page_size, int) else self.page_size
56
57
  if page_size:
57
58
  self._page_size: Optional[InterpolatedString] = InterpolatedString(
@@ -63,15 +64,11 @@ class OffsetIncrement(PaginationStrategy):
63
64
  @property
64
65
  def initial_token(self) -> Optional[Any]:
65
66
  if self.inject_on_first_request:
66
- return 0
67
+ return self._offset
67
68
  return None
68
69
 
69
70
  def next_page_token(
70
- self,
71
- response: requests.Response,
72
- last_page_size: int,
73
- last_record: Optional[Record],
74
- last_page_token_value: Optional[Any] = None,
71
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
75
72
  ) -> Optional[Any]:
76
73
  decoded_response = next(self.decoder.decode(response))
77
74
 
@@ -81,17 +78,9 @@ class OffsetIncrement(PaginationStrategy):
81
78
  and last_page_size < self._page_size.eval(self.config, response=decoded_response)
82
79
  ) or last_page_size == 0:
83
80
  return None
84
- elif last_page_token_value is None:
85
- # If the OffsetIncrement strategy does not inject on the first request, the incoming last_page_token_value
86
- # will be None. For this case, we assume that None was the first page and progress to the next offset
87
- return 0 + last_page_size
88
- elif not isinstance(last_page_token_value, int):
89
- raise ValueError(
90
- "The page token for a OffsetIncrement pagination strategy must be an integer"
91
- )
92
81
  else:
93
- next_page_token_value = last_page_token_value + last_page_size
94
- return next_page_token_value
82
+ self._offset += last_page_size
83
+ return self._offset
95
84
 
96
85
  def reset(self, reset_value: Optional[Any] = 0) -> None:
97
86
  if not isinstance(reset_value, int):
@@ -31,6 +31,7 @@ class PageIncrement(PaginationStrategy):
31
31
  inject_on_first_request: bool = False
32
32
 
33
33
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
34
+ self._page = self.start_from_page
34
35
  if isinstance(self.page_size, int) or (self.page_size is None):
35
36
  self._page_size = self.page_size
36
37
  else:
@@ -42,30 +43,28 @@ class PageIncrement(PaginationStrategy):
42
43
  @property
43
44
  def initial_token(self) -> Optional[Any]:
44
45
  if self.inject_on_first_request:
45
- return self.start_from_page
46
+ return self._page
46
47
  return None
47
48
 
48
49
  def next_page_token(
49
- self,
50
- response: requests.Response,
51
- last_page_size: int,
52
- last_record: Optional[Record],
53
- last_page_token_value: Optional[Any],
50
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
54
51
  ) -> Optional[Any]:
55
52
  # Stop paginating when there are fewer records than the page size or the current page has no records
56
53
  if (self._page_size and last_page_size < self._page_size) or last_page_size == 0:
57
54
  return None
58
- elif last_page_token_value is None:
59
- # If the PageIncrement strategy does not inject on the first request, the incoming last_page_token_value
60
- # may be None. When this is the case, we assume we've already requested the first page specified by
61
- # start_from_page and must now get the next page
62
- return self.start_from_page + 1
63
- elif not isinstance(last_page_token_value, int):
55
+ else:
56
+ self._page += 1
57
+ return self._page
58
+
59
+ def reset(self, reset_value: Optional[Any] = None) -> None:
60
+ if reset_value is None:
61
+ self._page = self.start_from_page
62
+ elif not isinstance(reset_value, int):
64
63
  raise ValueError(
65
- "The page token for a PageIncrement pagination strategy must be an integer"
64
+ f"Reset value {reset_value} for PageIncrement pagination strategy was not an integer"
66
65
  )
67
66
  else:
68
- return last_page_token_value + 1
67
+ self._page = reset_value
69
68
 
70
69
  def get_page_size(self) -> Optional[int]:
71
70
  return self._page_size
@@ -4,7 +4,7 @@
4
4
 
5
5
  from abc import abstractmethod
6
6
  from dataclasses import dataclass
7
- from typing import Any, Mapping, Optional
7
+ from typing import Any, Optional
8
8
 
9
9
  import requests
10
10
 
@@ -26,21 +26,22 @@ class PaginationStrategy:
26
26
 
27
27
  @abstractmethod
28
28
  def next_page_token(
29
- self,
30
- response: requests.Response,
31
- last_page_size: int,
32
- last_record: Optional[Record],
33
- last_page_token_value: Optional[Any],
29
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
34
30
  ) -> Optional[Any]:
35
31
  """
36
32
  :param response: response to process
37
33
  :param last_page_size: the number of records read from the response
38
34
  :param last_record: the last record extracted from the response
39
- :param last_page_token_value: The current value of the page token made on the last request
40
35
  :return: next page token. Returns None if there are no more pages to fetch
41
36
  """
42
37
  pass
43
38
 
39
+ @abstractmethod
40
+ def reset(self, reset_value: Optional[Any] = None) -> None:
41
+ """
42
+ Reset the pagination's inner state
43
+ """
44
+
44
45
  @abstractmethod
45
46
  def get_page_size(self) -> Optional[int]:
46
47
  """
@@ -44,19 +44,16 @@ class StopConditionPaginationStrategyDecorator(PaginationStrategy):
44
44
  self._stop_condition = stop_condition
45
45
 
46
46
  def next_page_token(
47
- self,
48
- response: requests.Response,
49
- last_page_size: int,
50
- last_record: Optional[Record],
51
- last_page_token_value: Optional[Any] = None,
47
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
52
48
  ) -> Optional[Any]:
53
- # We evaluate in reverse order because the assumption is that most of the APIs using data feed structure
54
- # will return records in descending order. In terms of performance/memory, we return the records lazily
49
+ # We evaluate in reverse order because the assumption is that most of the APIs using data feed structure will return records in
50
+ # descending order. In terms of performance/memory, we return the records lazily
55
51
  if last_record and self._stop_condition.is_met(last_record):
56
52
  return None
57
- return self._delegate.next_page_token(
58
- response, last_page_size, last_record, last_page_token_value
59
- )
53
+ return self._delegate.next_page_token(response, last_page_size, last_record)
54
+
55
+ def reset(self, reset_value: Optional[Any] = None) -> None:
56
+ self._delegate.reset(reset_value)
60
57
 
61
58
  def get_page_size(self) -> Optional[int]:
62
59
  return self._delegate.get_page_size()
@@ -6,7 +6,18 @@ import json
6
6
  from dataclasses import InitVar, dataclass, field
7
7
  from functools import partial
8
8
  from itertools import islice
9
- from typing import Any, Callable, Iterable, List, Mapping, Optional, Set, Tuple, Union
9
+ from typing import (
10
+ Any,
11
+ Callable,
12
+ Iterable,
13
+ List,
14
+ Mapping,
15
+ MutableMapping,
16
+ Optional,
17
+ Set,
18
+ Tuple,
19
+ Union,
20
+ )
10
21
 
11
22
  import requests
12
23
 
@@ -79,6 +90,9 @@ class SimpleRetriever(Retriever):
79
90
 
80
91
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
81
92
  self._paginator = self.paginator or NoPagination(parameters=parameters)
93
+ self._last_response: Optional[requests.Response] = None
94
+ self._last_page_size: int = 0
95
+ self._last_record: Optional[Record] = None
82
96
  self._parameters = parameters
83
97
  self._name = (
84
98
  InterpolatedString(self._name, parameters=parameters)
@@ -86,6 +100,10 @@ class SimpleRetriever(Retriever):
86
100
  else self._name
87
101
  )
88
102
 
103
+ # This mapping is used during a resumable full refresh syncs to indicate whether a partition has started syncing
104
+ # records. Partitions serve as the key and map to True if they already began processing records
105
+ self._partition_started: MutableMapping[Any, bool] = dict()
106
+
89
107
  @property # type: ignore
90
108
  def name(self) -> str:
91
109
  """
@@ -233,13 +251,17 @@ class SimpleRetriever(Retriever):
233
251
  raise ValueError("Request body json cannot be a string")
234
252
  return body_json
235
253
 
236
- def _paginator_path(self, next_page_token: Optional[Mapping[str, Any]] = None) -> Optional[str]:
254
+ def _paginator_path(
255
+ self,
256
+ ) -> Optional[str]:
237
257
  """
238
258
  If the paginator points to a path, follow it, else return nothing so the requester is used.
259
+ :param stream_state:
260
+ :param stream_slice:
239
261
  :param next_page_token:
240
262
  :return:
241
263
  """
242
- return self._paginator.path(next_page_token=next_page_token)
264
+ return self._paginator.path()
243
265
 
244
266
  def _parse_response(
245
267
  self,
@@ -250,15 +272,22 @@ class SimpleRetriever(Retriever):
250
272
  next_page_token: Optional[Mapping[str, Any]] = None,
251
273
  ) -> Iterable[Record]:
252
274
  if not response:
275
+ self._last_response = None
253
276
  yield from []
254
277
  else:
255
- yield from self.record_selector.select_records(
278
+ self._last_response = response
279
+ record_generator = self.record_selector.select_records(
256
280
  response=response,
257
281
  stream_state=stream_state,
258
282
  records_schema=records_schema,
259
283
  stream_slice=stream_slice,
260
284
  next_page_token=next_page_token,
261
285
  )
286
+ self._last_page_size = 0
287
+ for record in record_generator:
288
+ self._last_page_size += 1
289
+ self._last_record = record
290
+ yield record
262
291
 
263
292
  @property # type: ignore
264
293
  def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
@@ -270,13 +299,7 @@ class SimpleRetriever(Retriever):
270
299
  if not isinstance(value, property):
271
300
  self._primary_key = value
272
301
 
273
- def _next_page_token(
274
- self,
275
- response: requests.Response,
276
- last_page_size: int,
277
- last_record: Optional[Record],
278
- last_page_token_value: Optional[Any],
279
- ) -> Optional[Mapping[str, Any]]:
302
+ def _next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
280
303
  """
281
304
  Specifies a pagination strategy.
282
305
 
@@ -284,12 +307,7 @@ class SimpleRetriever(Retriever):
284
307
 
285
308
  :return: The token for the next page from the input response object. Returning None means there are no more pages to read in this response.
286
309
  """
287
- return self._paginator.next_page_token(
288
- response=response,
289
- last_page_size=last_page_size,
290
- last_record=last_record,
291
- last_page_token_value=last_page_token_value,
292
- )
310
+ return self._paginator.next_page_token(response, self._last_page_size, self._last_record)
293
311
 
294
312
  def _fetch_next_page(
295
313
  self,
@@ -298,7 +316,7 @@ class SimpleRetriever(Retriever):
298
316
  next_page_token: Optional[Mapping[str, Any]] = None,
299
317
  ) -> Optional[requests.Response]:
300
318
  return self.requester.send_request(
301
- path=self._paginator_path(next_page_token=next_page_token),
319
+ path=self._paginator_path(),
302
320
  stream_state=stream_state,
303
321
  stream_slice=stream_slice,
304
322
  next_page_token=next_page_token,
@@ -327,37 +345,20 @@ class SimpleRetriever(Retriever):
327
345
  # This logic is similar to _read_pages in the HttpStream class. When making changes here, consider making changes there as well.
328
346
  def _read_pages(
329
347
  self,
330
- records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]],
348
+ records_generator_fn: Callable[[Optional[requests.Response]], Iterable[StreamData]],
331
349
  stream_state: Mapping[str, Any],
332
350
  stream_slice: StreamSlice,
333
- ) -> Iterable[Record]:
351
+ ) -> Iterable[StreamData]:
334
352
  pagination_complete = False
335
- initial_token = self._paginator.get_initial_token()
336
- next_page_token: Optional[Mapping[str, Any]] = (
337
- {"next_page_token": initial_token} if initial_token else None
338
- )
353
+ next_page_token = None
339
354
  while not pagination_complete:
340
355
  response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
341
-
342
- last_page_size = 0
343
- last_record: Optional[Record] = None
344
- for record in records_generator_fn(response):
345
- last_page_size += 1
346
- last_record = record
347
- yield record
356
+ yield from records_generator_fn(response)
348
357
 
349
358
  if not response:
350
359
  pagination_complete = True
351
360
  else:
352
- last_page_token_value = (
353
- next_page_token.get("next_page_token") if next_page_token else None
354
- )
355
- next_page_token = self._next_page_token(
356
- response=response,
357
- last_page_size=last_page_size,
358
- last_record=last_record,
359
- last_page_token_value=last_page_token_value,
360
- )
361
+ next_page_token = self._next_page_token(response)
361
362
  if not next_page_token:
362
363
  pagination_complete = True
363
364
 
@@ -366,38 +367,19 @@ class SimpleRetriever(Retriever):
366
367
 
367
368
  def _read_single_page(
368
369
  self,
369
- records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]],
370
+ records_generator_fn: Callable[[Optional[requests.Response]], Iterable[StreamData]],
370
371
  stream_state: Mapping[str, Any],
371
372
  stream_slice: StreamSlice,
372
373
  ) -> Iterable[StreamData]:
373
- initial_token = stream_state.get("next_page_token")
374
- if initial_token is None:
375
- initial_token = self._paginator.get_initial_token()
376
- next_page_token: Optional[Mapping[str, Any]] = (
377
- {"next_page_token": initial_token} if initial_token else None
378
- )
379
-
380
- response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
381
-
382
- last_page_size = 0
383
- last_record: Optional[Record] = None
384
- for record in records_generator_fn(response):
385
- last_page_size += 1
386
- last_record = record
387
- yield record
374
+ response = self._fetch_next_page(stream_state, stream_slice)
375
+ yield from records_generator_fn(response)
388
376
 
389
377
  if not response:
390
- next_page_token = {FULL_REFRESH_SYNC_COMPLETE_KEY: True}
378
+ next_page_token: Mapping[str, Any] = {FULL_REFRESH_SYNC_COMPLETE_KEY: True}
391
379
  else:
392
- last_page_token_value = (
393
- next_page_token.get("next_page_token") if next_page_token else None
394
- )
395
- next_page_token = self._next_page_token(
396
- response=response,
397
- last_page_size=last_page_size,
398
- last_record=last_record,
399
- last_page_token_value=last_page_token_value,
400
- ) or {FULL_REFRESH_SYNC_COMPLETE_KEY: True}
380
+ next_page_token = self._next_page_token(response) or {
381
+ FULL_REFRESH_SYNC_COMPLETE_KEY: True
382
+ }
401
383
 
402
384
  if self.cursor:
403
385
  self.cursor.close_slice(
@@ -432,14 +414,25 @@ class SimpleRetriever(Retriever):
432
414
  if self.cursor and isinstance(self.cursor, ResumableFullRefreshCursor):
433
415
  stream_state = self.state
434
416
 
435
- # Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to
436
- # fetch more records. The platform deletes stream state for full refresh streams before starting a
437
- # new job, so we don't need to worry about this value existing for the initial attempt
417
+ # Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to fetch more records
418
+ # The platform deletes stream state for full refresh streams before starting a new job, so we don't need to worry about
419
+ # this value existing for the initial attempt
438
420
  if stream_state.get(FULL_REFRESH_SYNC_COMPLETE_KEY):
439
421
  return
422
+ cursor_value = stream_state.get("next_page_token")
423
+
424
+ # The first attempt to read a page for the current partition should reset the paginator to the current
425
+ # cursor state which is initially assigned to the incoming state from the platform
426
+ partition_key = self._to_partition_key(_slice.partition)
427
+ if partition_key not in self._partition_started:
428
+ self._partition_started[partition_key] = True
429
+ self._paginator.reset(reset_value=cursor_value)
440
430
 
441
431
  yield from self._read_single_page(record_generator, stream_state, _slice)
442
432
  else:
433
+ # Fixing paginator types has a long tail of dependencies
434
+ self._paginator.reset()
435
+
443
436
  for stream_data in self._read_pages(record_generator, self.state, _slice):
444
437
  current_record = self._extract_record(stream_data, _slice)
445
438
  if self.cursor and current_record:
@@ -525,7 +518,7 @@ class SimpleRetriever(Retriever):
525
518
  stream_state: Mapping[str, Any],
526
519
  records_schema: Mapping[str, Any],
527
520
  stream_slice: Optional[StreamSlice],
528
- ) -> Iterable[Record]:
521
+ ) -> Iterable[StreamData]:
529
522
  yield from self._parse_response(
530
523
  response,
531
524
  stream_slice=stream_slice,
@@ -569,7 +562,7 @@ class SimpleRetrieverTestReadDecorator(SimpleRetriever):
569
562
  next_page_token: Optional[Mapping[str, Any]] = None,
570
563
  ) -> Optional[requests.Response]:
571
564
  return self.requester.send_request(
572
- path=self._paginator_path(next_page_token=next_page_token),
565
+ path=self._paginator_path(),
573
566
  stream_state=stream_state,
574
567
  stream_slice=stream_slice,
575
568
  next_page_token=next_page_token,
@@ -31,6 +31,12 @@ class DeliverRawFiles(BaseModel):
31
31
 
32
32
  delivery_type: Literal["use_file_transfer"] = Field("use_file_transfer", const=True)
33
33
 
34
+ preserve_subdirectories_directories: bool = Field(
35
+ title="Preserve Subdirectories Directories",
36
+ description="Flag indicating we should preserve subdirectories directories",
37
+ default=True,
38
+ )
39
+
34
40
 
35
41
  class AbstractFileBasedSpec(BaseModel):
36
42
  """
@@ -65,6 +71,12 @@ class AbstractFileBasedSpec(BaseModel):
65
71
  airbyte_hidden=True,
66
72
  )
67
73
 
74
+ preserve_subdirectories_directories: bool = Field(
75
+ title="Preserve Subdirectories Directories",
76
+ description="Flag indicating we should preserve subdirectories directories",
77
+ default=True,
78
+ )
79
+
68
80
  @classmethod
69
81
  @abstractmethod
70
82
  def documentation_url(cls) -> AnyUrl:
@@ -111,6 +111,10 @@ class ErrorListingFiles(BaseFileBasedSourceError):
111
111
  pass
112
112
 
113
113
 
114
+ class DuplicatedFilesError(BaseFileBasedSourceError):
115
+ pass
116
+
117
+
114
118
  class CustomFileBasedException(AirbyteTracedException):
115
119
  """
116
120
  A specialized exception for file-based connectors.
@@ -123,3 +127,25 @@ class CustomFileBasedException(AirbyteTracedException):
123
127
 
124
128
  class FileSizeLimitError(CustomFileBasedException):
125
129
  pass
130
+
131
+
132
+ def format_duplicate_files_error_message(
133
+ stream_name: str, duplicated_files_names: List[dict[str, List[str]]]
134
+ ) -> str:
135
+ duplicated_files_messages = []
136
+ for duplicated_file in duplicated_files_names:
137
+ for duplicated_file_name, file_paths in duplicated_file.items():
138
+ file_duplicated_message = (
139
+ f"{len(file_paths)} duplicates found for file name {duplicated_file_name}:\n\n"
140
+ + "".join(f"\n - {file_paths}")
141
+ )
142
+ duplicated_files_messages.append(file_duplicated_message)
143
+
144
+ error_message = (
145
+ f"ERROR: Duplicate filenames found for stream {stream_name}. "
146
+ "Duplicate file names are not allowed if the Preserve Subdirectories in File Paths option is disabled. "
147
+ "Please remove or rename the duplicate files before attempting to re-run the sync.\n\n"
148
+ + "\n".join(duplicated_files_messages)
149
+ )
150
+
151
+ return error_message
@@ -242,7 +242,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
242
242
  stream=self._make_default_stream(
243
243
  stream_config=stream_config,
244
244
  cursor=cursor,
245
- use_file_transfer=self._use_file_transfer(parsed_config),
245
+ parsed_config=parsed_config,
246
246
  ),
247
247
  source=self,
248
248
  logger=self.logger,
@@ -273,7 +273,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
273
273
  stream=self._make_default_stream(
274
274
  stream_config=stream_config,
275
275
  cursor=cursor,
276
- use_file_transfer=self._use_file_transfer(parsed_config),
276
+ parsed_config=parsed_config,
277
277
  ),
278
278
  source=self,
279
279
  logger=self.logger,
@@ -285,7 +285,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
285
285
  stream = self._make_default_stream(
286
286
  stream_config=stream_config,
287
287
  cursor=cursor,
288
- use_file_transfer=self._use_file_transfer(parsed_config),
288
+ parsed_config=parsed_config,
289
289
  )
290
290
 
291
291
  streams.append(stream)
@@ -298,7 +298,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
298
298
  self,
299
299
  stream_config: FileBasedStreamConfig,
300
300
  cursor: Optional[AbstractFileBasedCursor],
301
- use_file_transfer: bool = False,
301
+ parsed_config: AbstractFileBasedSpec,
302
302
  ) -> AbstractFileBasedStream:
303
303
  return DefaultFileBasedStream(
304
304
  config=stream_config,
@@ -310,7 +310,10 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
310
310
  validation_policy=self._validate_and_get_validation_policy(stream_config),
311
311
  errors_collector=self.errors_collector,
312
312
  cursor=cursor,
313
- use_file_transfer=use_file_transfer,
313
+ use_file_transfer=self._use_file_transfer(parsed_config),
314
+ preserve_subdirectories_directories=self._preserve_subdirectories_directories(
315
+ parsed_config
316
+ ),
314
317
  )
315
318
 
316
319
  def _get_stream_from_catalog(
@@ -385,3 +388,13 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
385
388
  and parsed_config.delivery_method.delivery_type == "use_file_transfer"
386
389
  )
387
390
  return use_file_transfer
391
+
392
+ @staticmethod
393
+ def _preserve_subdirectories_directories(parsed_config: AbstractFileBasedSpec) -> bool:
394
+ # fall back to preserve subdirectories if config is not present or incomplete
395
+ if (
396
+ hasattr(parsed_config, "preserve_subdirectories_directories")
397
+ and parsed_config.preserve_subdirectories_directories is not None
398
+ ):
399
+ return parsed_config.preserve_subdirectories_directories
400
+ return True
@@ -135,6 +135,16 @@ class AbstractFileBasedStreamReader(ABC):
135
135
  return use_file_transfer
136
136
  return False
137
137
 
138
+ def preserve_subdirectories_directories(self) -> bool:
139
+ # fall back to preserve subdirectories if config is not present or incomplete
140
+ if (
141
+ self.config
142
+ and hasattr(self.config, "preserve_subdirectories_directories")
143
+ and self.config.preserve_subdirectories_directories is not None
144
+ ):
145
+ return self.config.preserve_subdirectories_directories
146
+ return True
147
+
138
148
  @abstractmethod
139
149
  def get_file(
140
150
  self, file: RemoteFile, local_directory: str, logger: logging.Logger
@@ -159,10 +169,13 @@ class AbstractFileBasedStreamReader(ABC):
159
169
  """
160
170
  ...
161
171
 
162
- @staticmethod
163
- def _get_file_transfer_paths(file: RemoteFile, local_directory: str) -> List[str]:
164
- # Remove left slashes from source path format to make relative path for writing locally
165
- file_relative_path = file.uri.lstrip("/")
172
+ def _get_file_transfer_paths(self, file: RemoteFile, local_directory: str) -> List[str]:
173
+ preserve_subdirectories_directories = self.preserve_subdirectories_directories()
174
+ if preserve_subdirectories_directories:
175
+ # Remove left slashes from source path format to make relative path for writing locally
176
+ file_relative_path = file.uri.lstrip("/")
177
+ else:
178
+ file_relative_path = path.basename(file.uri)
166
179
  local_file_path = path.join(local_directory, file_relative_path)
167
180
 
168
181
  # Ensure the local directory exists
@@ -5,20 +5,24 @@
5
5
  import asyncio
6
6
  import itertools
7
7
  import traceback
8
+ from collections import defaultdict
8
9
  from copy import deepcopy
9
10
  from functools import cache
10
- from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Set, Union
11
+ from os import path
12
+ from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Set, Tuple, Union
11
13
 
12
14
  from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, FailureType, Level
13
15
  from airbyte_cdk.models import Type as MessageType
14
16
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import PrimaryKeyType
15
17
  from airbyte_cdk.sources.file_based.exceptions import (
18
+ DuplicatedFilesError,
16
19
  FileBasedSourceError,
17
20
  InvalidSchemaError,
18
21
  MissingSchemaError,
19
22
  RecordParseError,
20
23
  SchemaInferenceError,
21
24
  StopSyncPerValidationPolicy,
25
+ format_duplicate_files_error_message,
22
26
  )
23
27
  from airbyte_cdk.sources.file_based.file_types import FileTransfer
24
28
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
@@ -43,6 +47,8 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
43
47
  """
44
48
 
45
49
  FILE_TRANSFER_KW = "use_file_transfer"
50
+ PRESERVE_SUBDIRECTORIES_KW = "preserve_subdirectories_directories"
51
+ FILES_KEY = "files"
46
52
  DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
47
53
  ab_last_mod_col = "_ab_source_file_last_modified"
48
54
  ab_file_name_col = "_ab_source_file_url"
@@ -50,10 +56,14 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
50
56
  source_file_url = "source_file_url"
51
57
  airbyte_columns = [ab_last_mod_col, ab_file_name_col]
52
58
  use_file_transfer = False
59
+ preserve_subdirectories_directories = True
53
60
 
54
61
  def __init__(self, **kwargs: Any):
55
62
  if self.FILE_TRANSFER_KW in kwargs:
56
63
  self.use_file_transfer = kwargs.pop(self.FILE_TRANSFER_KW, False)
64
+ self.preserve_subdirectories_directories = kwargs.pop(
65
+ self.PRESERVE_SUBDIRECTORIES_KW, True
66
+ )
57
67
  super().__init__(**kwargs)
58
68
 
59
69
  @property
@@ -98,15 +108,43 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
98
108
  else:
99
109
  return super()._filter_schema_invalid_properties(configured_catalog_json_schema)
100
110
 
111
+ def _duplicated_files_names(
112
+ self, slices: List[dict[str, List[RemoteFile]]]
113
+ ) -> List[dict[str, List[str]]]:
114
+ seen_file_names = set()
115
+ duplicates_file_names = set()
116
+ file_paths = defaultdict(list)
117
+ for file_slice in slices:
118
+ for file_found in file_slice[self.FILES_KEY]:
119
+ file_name = path.basename(file_found.uri)
120
+ if file_name not in seen_file_names:
121
+ seen_file_names.add(file_name)
122
+ else:
123
+ duplicates_file_names.add(file_name)
124
+ file_paths[file_name].append(file_found.uri)
125
+ return [
126
+ {duplicated_file: file_paths[duplicated_file]}
127
+ for duplicated_file in duplicates_file_names
128
+ ]
129
+
101
130
  def compute_slices(self) -> Iterable[Optional[Mapping[str, Any]]]:
102
131
  # Sort files by last_modified, uri and return them grouped by last_modified
103
132
  all_files = self.list_files()
104
133
  files_to_read = self._cursor.get_files_to_sync(all_files, self.logger)
105
134
  sorted_files_to_read = sorted(files_to_read, key=lambda f: (f.last_modified, f.uri))
106
135
  slices = [
107
- {"files": list(group[1])}
136
+ {self.FILES_KEY: list(group[1])}
108
137
  for group in itertools.groupby(sorted_files_to_read, lambda f: f.last_modified)
109
138
  ]
139
+ if slices and not self.preserve_subdirectories_directories:
140
+ duplicated_files_names = self._duplicated_files_names(slices)
141
+ if duplicated_files_names:
142
+ raise DuplicatedFilesError(
143
+ format_duplicate_files_error_message(
144
+ stream_name=self.name, duplicated_files_names=duplicated_files_names
145
+ ),
146
+ stream=self.name,
147
+ )
110
148
  return slices
111
149
 
112
150
  def transform_record(
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: airbyte-cdk
3
- Version: 6.13.0.dev0
3
+ Version: 6.13.1.dev4101
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -62,7 +62,7 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
62
62
  airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
63
63
  airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
64
64
  airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
65
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=G5bqKcYZTPhY9-CGwbmJDOdgThZOnIx75W3p0H1VlmM,24325
65
+ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=PxP4p2686wsf1gjsumGKnh2o2Jjnrqg8QLGijEIrp-A,23412
66
66
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
67
67
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
68
68
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
@@ -135,15 +135,15 @@ airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.p
135
135
  airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=o0520AmHMb7SAoeokVNwoOzuZzIAT6ryx9uFYGSOrs0,8664
136
136
  airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=RqYPkgJFAWfcZBTc-JBcGHPm4JL1ZQOhs9GKU4MP2eE,14723
137
137
  airbyte_cdk/sources/declarative/requesters/paginators/__init__.py,sha256=uArbKs9JKNCt7t9tZoeWwjDpyI1HoPp29FNW0JzvaEM,644
138
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=FnSl3qPvv5wD6ieAI2Ic5c4dqBk-3fRe4tCaWzq3YwM,11840
139
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py,sha256=j6j9QRPaTbKQ2N661RFVKthhkWiodEp6ut0tKeEd0Ng,2019
140
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py,sha256=OlN-y0PEOMzlUNUh3pzonoTpIJpGwkP4ibFengvpLVU,2230
138
+ airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=LxTq1hieznRWlYlfODdZbMDUml-g6NyBkdwVI2mCNMM,10910
139
+ airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py,sha256=-P-QOlefFhEe99bsB2y3yTvA8c8kCCbfBaTS6qPvF6I,1927
140
+ airbyte_cdk/sources/declarative/requesters/paginators/paginator.py,sha256=ZgyvH7DOrASQ5K__J5SRAXH3REUW2n3yPHnFW9xq4NU,1972
141
141
  airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py,sha256=2gly8fuZpDNwtu1Qg6oE2jBLGqQRdzSLJdnpk_iDV6I,767
142
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py,sha256=yLzzK5YIRTkXd2Z-BS__AZXuTd6HXjJIxq05K-lQoxI,3898
143
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py,sha256=8qOyydbRrZqL6XbbHksNetteumZ8AEAUNoOo4L6zZr0,3960
144
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py,sha256=a202Je_hxst_RwobRGRQT59nWoDI1tr8b-q3sMEcL4w,2701
145
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py,sha256=UiHQI2lsRDPqM4nMvKMnmsXA3gFg5BFE4lCPEBhuCTs,1317
146
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py,sha256=LoKXdUbSgHEtSwtA8DFrnX6SpQbRVVwreY8NguTKTcI,2229
142
+ airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py,sha256=vFzpNv8BdgXrYO5qhi2_Un4x4y-EAQWxinZtEPWz5KI,3654
143
+ airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py,sha256=pMPi6iQrhtrI9VRPj218QNM_OqD8lX8P3Tr9yloKoE8,3367
144
+ airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py,sha256=kQGpfr-dOwarxTIf2S4sHVulBzm8zSwQXBM7rOhkafA,2491
145
+ airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py,sha256=ABpO4t0UUziBZnyml8UT_NhlF6loekhQji57TpKnaiY,1290
146
+ airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py,sha256=-8NwokW-aKwv8DdeHh1ssODTobBYSOmIhH2-IjSjlNA,2213
147
147
  airbyte_cdk/sources/declarative/requesters/request_option.py,sha256=_qmv8CLQQ3fERt6BuMZeRu6tZXscPoeARx1VJdWMQ_M,1055
148
148
  airbyte_cdk/sources/declarative/requesters/request_options/__init__.py,sha256=WCwpKqM4wKqy-DHJaCHbKAlFqRVOqMi9K5qonxIfi_Y,809
149
149
  airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py,sha256=FLkg0uzC9bc-zFnALWr0FLYpKsz8iK2xQsd4UOyeW08,3706
@@ -161,7 +161,7 @@ airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=Aio
161
161
  airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=ix9m1dkR69DcXCXUKC5RK_ZZM7ojTLBQ4IkWQTfmfCk,456
162
162
  airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=_-d3MvHh-4r46i4wjQikD4ZygKA7TvuDu2i04qqULEg,3731
163
163
  airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
164
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=jxQ_9xcVD07r9PKhofitAqMkdX1k8ZNyy50qz5NwkFs,24540
164
+ airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=N4swGw5mfuTXJ2R7AKX18CHzizsr69pXwt5uSHLPi48,24172
165
165
  airbyte_cdk/sources/declarative/schema/__init__.py,sha256=HztgVVaZdil5UfgUZcv_Hyy84r89_EKRwyO2hoewNVg,749
166
166
  airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
167
167
  airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=H6A3NQ6kPPM-cUNPmdvDPc9xNzR1rQNrK95GbgCW334,8822
@@ -193,7 +193,7 @@ airbyte_cdk/sources/file_based/availability_strategy/__init__.py,sha256=ddKQfUmk
193
193
  airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py,sha256=01Nd4b7ERAbp-OZo_8rrAzFXWPTMwr02SnWiN17nx8Q,2363
194
194
  airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=j9T5TimfWFUz7nqsaj-83G3xWmDpsmeSbDnaUNmz0UM,5849
195
195
  airbyte_cdk/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
196
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=tj-M1L5BTa5yIQ3jHo09CtCTSq_eR-68zgyOPqwsurw,6455
196
+ airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=zjFFoLMb9TL3zJkTmm-YtmqQ0Y4EwIGFljM8s8h7hxU,6899
197
197
  airbyte_cdk/sources/file_based/config/avro_format.py,sha256=NxTF96ewzn6HuhgodsY7Rpb-ybr1ZEWW5d4Vid64g5A,716
198
198
  airbyte_cdk/sources/file_based/config/csv_format.py,sha256=NWekkyT8dTwiVK0mwa_krQD4FJPHSDfILo8kPAg3-Vs,8006
199
199
  airbyte_cdk/sources/file_based/config/excel_format.py,sha256=9qAmTsT6SoVzNfNv0oBVkVCmiyqQuVAbfRKajjoa7Js,378
@@ -204,9 +204,9 @@ airbyte_cdk/sources/file_based/config/unstructured_format.py,sha256=tIbB9Pn1HqU6
204
204
  airbyte_cdk/sources/file_based/discovery_policy/__init__.py,sha256=gl3ey6mZbyfraB9P3pFhf9UJp2JeTZ1SUFAopy2iBvY,301
205
205
  airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha256=dCfXX529Rd5rtopg4VeEgTPJjFtqjtjzPq6LCw18Wt0,605
206
206
  airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=-xujTidtrq6HC00WKbjQh1CZdT5LMuzkp5BLjqDmfTY,1007
207
- airbyte_cdk/sources/file_based/exceptions.py,sha256=AEELNIRzKPX6eopKd_2jhE7WiNeR0Aw7nQWVOL8fvkc,5760
208
- airbyte_cdk/sources/file_based/file_based_source.py,sha256=RfpctRNLJ_EHKKEc2E1EZGYRfhG0Z9o6TgsKS4XrSNY,16652
209
- airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=ohxKlqPuV7TGwjyRy_gaWUol8QN5lBSoCYoaqBtRh1c,6179
207
+ airbyte_cdk/sources/file_based/exceptions.py,sha256=KfOgQgssBKgsv3h5po2IG1DhZcH664Zf_fx96mBlPSg,6761
208
+ airbyte_cdk/sources/file_based/file_based_source.py,sha256=cnAncQfuXMAKK_u8W2GFlPXTaPUs_FAP-H6tXuNcgGg,17189
209
+ airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=MwH4kHELXvNvE_CBxedxgfZtyOS71N1OBY-x03kgLeo,6816
210
210
  airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=blCLn0-2LC-ZdgcNyDEhqM2RiUvEjEBh-G4-t32ZtuM,1268
211
211
  airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=XNx-JC-sgzH9u3nOJ2M59FxBXvtig8LN6BIkeDOavZA,10858
212
212
  airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=QlCXB-ry3np67Q_VerQEPoWDOTcPTB6Go4ydZxY9ae4,20445
@@ -232,7 +232,7 @@ airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_c
232
232
  airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
233
233
  airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=om-x3gZFPgWDpi15S9RxZmR36VHnk8sytgN6LlBQhAw,1934
234
234
  airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=VGV7xLyBribuBMVrXtO1xqkWJD86bl7yhXtjnwLMohM,7051
235
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=rpwU6AOyhFLuXtcFKkcOHFWbRQ4kLCOKzAjcID_M87k,16770
235
+ airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=HRjR0rQGc8cYK2PxpLgAvZQ--jvtV8QgS1QIxkemnko,18413
236
236
  airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
237
237
  airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
238
238
  airbyte_cdk/sources/http_logger.py,sha256=TyBmtRA6D9g0XDkKGvdM415b36RXDjgfkwRewDsH8-0,1576
@@ -340,8 +340,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
340
340
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
341
341
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
342
342
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
343
- airbyte_cdk-6.13.0.dev0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
344
- airbyte_cdk-6.13.0.dev0.dist-info/METADATA,sha256=DsnhBukVBErZ-7zXBww8Gtdw1nMQ0cGg7QpIzj_2Fvk,5993
345
- airbyte_cdk-6.13.0.dev0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
346
- airbyte_cdk-6.13.0.dev0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
347
- airbyte_cdk-6.13.0.dev0.dist-info/RECORD,,
343
+ airbyte_cdk-6.13.1.dev4101.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
344
+ airbyte_cdk-6.13.1.dev4101.dist-info/METADATA,sha256=qzqdg5215pSnyafRjwL5bK2Qw4Wvr7FpOuxWJUFZc6M,5996
345
+ airbyte_cdk-6.13.1.dev4101.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
346
+ airbyte_cdk-6.13.1.dev4101.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
347
+ airbyte_cdk-6.13.1.dev4101.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.1
2
+ Generator: poetry-core 2.0.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any