airbyte-cdk 6.13.1.dev4109__py3-none-any.whl → 6.13.1.dev41012__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. airbyte_cdk/entrypoint.py +1 -13
  2. airbyte_cdk/sources/declarative/auth/oauth.py +0 -26
  3. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +51 -24
  4. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +20 -128
  5. airbyte_cdk/sources/declarative/extractors/__init__.py +0 -2
  6. airbyte_cdk/sources/declarative/extractors/record_selector.py +7 -5
  7. airbyte_cdk/sources/declarative/interpolation/macros.py +0 -21
  8. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +11 -97
  9. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +14 -71
  10. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +4 -33
  11. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +35 -52
  12. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +7 -10
  13. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +4 -9
  14. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +6 -11
  15. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +11 -13
  16. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +13 -14
  17. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +7 -6
  18. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +7 -10
  19. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +4 -1
  20. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +64 -71
  21. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +4 -4
  22. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +1 -3
  23. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +3 -8
  24. airbyte_cdk/sources/file_based/exceptions.py +23 -31
  25. airbyte_cdk/sources/file_based/file_based_source.py +8 -17
  26. airbyte_cdk/sources/file_based/file_based_stream_reader.py +6 -7
  27. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +2 -25
  28. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +20 -10
  29. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +4 -20
  30. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +4 -34
  31. airbyte_cdk/sources/types.py +0 -3
  32. {airbyte_cdk-6.13.1.dev4109.dist-info → airbyte_cdk-6.13.1.dev41012.dist-info}/METADATA +2 -2
  33. {airbyte_cdk-6.13.1.dev4109.dist-info → airbyte_cdk-6.13.1.dev41012.dist-info}/RECORD +36 -39
  34. {airbyte_cdk-6.13.1.dev4109.dist-info → airbyte_cdk-6.13.1.dev41012.dist-info}/WHEEL +1 -1
  35. airbyte_cdk/sources/declarative/extractors/type_transformer.py +0 -55
  36. airbyte_cdk/sources/declarative/requesters/README.md +0 -57
  37. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +0 -61
  38. {airbyte_cdk-6.13.1.dev4109.dist-info → airbyte_cdk-6.13.1.dev41012.dist-info}/LICENSE.txt +0 -0
  39. {airbyte_cdk-6.13.1.dev4109.dist-info → airbyte_cdk-6.13.1.dev41012.dist-info}/entry_points.txt +0 -0
@@ -112,39 +112,27 @@ class DefaultPaginator(Paginator):
112
112
  )
113
113
  if isinstance(self.url_base, str):
114
114
  self.url_base = InterpolatedString(string=self.url_base, parameters=parameters)
115
-
116
- def get_initial_token(self) -> Optional[Any]:
117
- """
118
- Return the page token that should be used for the first request of a stream
119
-
120
- WARNING: get_initial_token() should not be used by streams that use RFR that perform checkpointing
121
- of state using page numbers. Because paginators are stateless
122
- """
123
- return self.pagination_strategy.initial_token
115
+ self._token: Optional[Any] = self.pagination_strategy.initial_token
124
116
 
125
117
  def next_page_token(
126
- self,
127
- response: requests.Response,
128
- last_page_size: int,
129
- last_record: Optional[Record],
130
- last_page_token_value: Optional[Any] = None,
118
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
131
119
  ) -> Optional[Mapping[str, Any]]:
132
- next_page_token = self.pagination_strategy.next_page_token(
133
- response=response,
134
- last_page_size=last_page_size,
135
- last_record=last_record,
136
- last_page_token_value=last_page_token_value,
120
+ self._token = self.pagination_strategy.next_page_token(
121
+ response, last_page_size, last_record
137
122
  )
138
- if next_page_token:
139
- return {"next_page_token": next_page_token}
123
+ if self._token:
124
+ return {"next_page_token": self._token}
140
125
  else:
141
126
  return None
142
127
 
143
- def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
144
- token = next_page_token.get("next_page_token") if next_page_token else None
145
- if token and self.page_token_option and isinstance(self.page_token_option, RequestPath):
128
+ def path(self) -> Optional[str]:
129
+ if (
130
+ self._token
131
+ and self.page_token_option
132
+ and isinstance(self.page_token_option, RequestPath)
133
+ ):
146
134
  # Replace url base to only return the path
147
- return str(token).replace(self.url_base.eval(self.config), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
135
+ return str(self._token).replace(self.url_base.eval(self.config), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
148
136
  else:
149
137
  return None
150
138
 
@@ -155,7 +143,7 @@ class DefaultPaginator(Paginator):
155
143
  stream_slice: Optional[StreamSlice] = None,
156
144
  next_page_token: Optional[Mapping[str, Any]] = None,
157
145
  ) -> MutableMapping[str, Any]:
158
- return self._get_request_options(RequestOptionType.request_parameter, next_page_token)
146
+ return self._get_request_options(RequestOptionType.request_parameter)
159
147
 
160
148
  def get_request_headers(
161
149
  self,
@@ -164,7 +152,7 @@ class DefaultPaginator(Paginator):
164
152
  stream_slice: Optional[StreamSlice] = None,
165
153
  next_page_token: Optional[Mapping[str, Any]] = None,
166
154
  ) -> Mapping[str, str]:
167
- return self._get_request_options(RequestOptionType.header, next_page_token)
155
+ return self._get_request_options(RequestOptionType.header)
168
156
 
169
157
  def get_request_body_data(
170
158
  self,
@@ -173,7 +161,7 @@ class DefaultPaginator(Paginator):
173
161
  stream_slice: Optional[StreamSlice] = None,
174
162
  next_page_token: Optional[Mapping[str, Any]] = None,
175
163
  ) -> Mapping[str, Any]:
176
- return self._get_request_options(RequestOptionType.body_data, next_page_token)
164
+ return self._get_request_options(RequestOptionType.body_data)
177
165
 
178
166
  def get_request_body_json(
179
167
  self,
@@ -182,21 +170,25 @@ class DefaultPaginator(Paginator):
182
170
  stream_slice: Optional[StreamSlice] = None,
183
171
  next_page_token: Optional[Mapping[str, Any]] = None,
184
172
  ) -> Mapping[str, Any]:
185
- return self._get_request_options(RequestOptionType.body_json, next_page_token)
173
+ return self._get_request_options(RequestOptionType.body_json)
186
174
 
187
- def _get_request_options(
188
- self, option_type: RequestOptionType, next_page_token: Optional[Mapping[str, Any]]
189
- ) -> MutableMapping[str, Any]:
175
+ def reset(self, reset_value: Optional[Any] = None) -> None:
176
+ if reset_value:
177
+ self.pagination_strategy.reset(reset_value=reset_value)
178
+ else:
179
+ self.pagination_strategy.reset()
180
+ self._token = self.pagination_strategy.initial_token
181
+
182
+ def _get_request_options(self, option_type: RequestOptionType) -> MutableMapping[str, Any]:
190
183
  options = {}
191
184
 
192
- token = next_page_token.get("next_page_token") if next_page_token else None
193
185
  if (
194
186
  self.page_token_option
195
- and token is not None
187
+ and self._token is not None
196
188
  and isinstance(self.page_token_option, RequestOption)
197
189
  and self.page_token_option.inject_into == option_type
198
190
  ):
199
- options[self.page_token_option.field_name.eval(config=self.config)] = token # type: ignore # field_name is always cast to an interpolated string
191
+ options[self.page_token_option.field_name.eval(config=self.config)] = self._token # type: ignore # field_name is always cast to an interpolated string
200
192
  if (
201
193
  self.page_size_option
202
194
  and self.pagination_strategy.get_page_size()
@@ -212,9 +204,6 @@ class PaginatorTestReadDecorator(Paginator):
212
204
  """
213
205
  In some cases, we want to limit the number of requests that are made to the backend source. This class allows for limiting the number of
214
206
  pages that are queried throughout a read command.
215
-
216
- WARNING: This decorator is not currently thread-safe like the rest of the low-code framework because it has
217
- an internal state to track the current number of pages counted so that it can exit early during a test read
218
207
  """
219
208
 
220
209
  _PAGE_COUNT_BEFORE_FIRST_NEXT_CALL = 1
@@ -228,27 +217,17 @@ class PaginatorTestReadDecorator(Paginator):
228
217
  self._decorated = decorated
229
218
  self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
230
219
 
231
- def get_initial_token(self) -> Optional[Any]:
232
- self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
233
- return self._decorated.get_initial_token()
234
-
235
220
  def next_page_token(
236
- self,
237
- response: requests.Response,
238
- last_page_size: int,
239
- last_record: Optional[Record],
240
- last_page_token_value: Optional[Any] = None,
221
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
241
222
  ) -> Optional[Mapping[str, Any]]:
242
223
  if self._page_count >= self._maximum_number_of_pages:
243
224
  return None
244
225
 
245
226
  self._page_count += 1
246
- return self._decorated.next_page_token(
247
- response, last_page_size, last_record, last_page_token_value
248
- )
227
+ return self._decorated.next_page_token(response, last_page_size, last_record)
249
228
 
250
- def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
251
- return self._decorated.path(next_page_token)
229
+ def path(self) -> Optional[str]:
230
+ return self._decorated.path()
252
231
 
253
232
  def get_request_params(
254
233
  self,
@@ -293,3 +272,7 @@ class PaginatorTestReadDecorator(Paginator):
293
272
  return self._decorated.get_request_body_json(
294
273
  stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token
295
274
  )
275
+
276
+ def reset(self, reset_value: Optional[Any] = None) -> None:
277
+ self._decorated.reset()
278
+ self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
@@ -19,7 +19,7 @@ class NoPagination(Paginator):
19
19
 
20
20
  parameters: InitVar[Mapping[str, Any]]
21
21
 
22
- def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
22
+ def path(self) -> Optional[str]:
23
23
  return None
24
24
 
25
25
  def get_request_params(
@@ -58,14 +58,11 @@ class NoPagination(Paginator):
58
58
  ) -> Mapping[str, Any]:
59
59
  return {}
60
60
 
61
- def get_initial_token(self) -> Optional[Any]:
62
- return None
63
-
64
61
  def next_page_token(
65
- self,
66
- response: requests.Response,
67
- last_page_size: int,
68
- last_record: Optional[Record],
69
- last_page_token_value: Optional[Any],
70
- ) -> Optional[Mapping[str, Any]]:
62
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
63
+ ) -> Mapping[str, Any]:
71
64
  return {}
65
+
66
+ def reset(self, reset_value: Optional[Any] = None) -> None:
67
+ # No state to reset
68
+ pass
@@ -24,18 +24,14 @@ class Paginator(ABC, RequestOptionsProvider):
24
24
  """
25
25
 
26
26
  @abstractmethod
27
- def get_initial_token(self) -> Optional[Any]:
27
+ def reset(self, reset_value: Optional[Any] = None) -> None:
28
28
  """
29
- Get the page token that should be included in the request to get the first page of records
29
+ Reset the pagination's inner state
30
30
  """
31
31
 
32
32
  @abstractmethod
33
33
  def next_page_token(
34
- self,
35
- response: requests.Response,
36
- last_page_size: int,
37
- last_record: Optional[Record],
38
- last_page_token_value: Optional[Any],
34
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
39
35
  ) -> Optional[Mapping[str, Any]]:
40
36
  """
41
37
  Returns the next_page_token to use to fetch the next page of records.
@@ -43,13 +39,12 @@ class Paginator(ABC, RequestOptionsProvider):
43
39
  :param response: the response to process
44
40
  :param last_page_size: the number of records read from the response
45
41
  :param last_record: the last record extracted from the response
46
- :param last_page_token_value: The current value of the page token made on the last request
47
42
  :return: A mapping {"next_page_token": <token>} for the next page from the input response object. Returning None means there are no more pages to read in this response.
48
43
  """
49
44
  pass
50
45
 
51
46
  @abstractmethod
52
- def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
47
+ def path(self) -> Optional[str]:
53
48
  """
54
49
  Returns the URL path to hit to fetch the next page of records
55
50
 
@@ -43,6 +43,7 @@ class CursorPaginationStrategy(PaginationStrategy):
43
43
  )
44
44
 
45
45
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
46
+ self._initial_cursor = None
46
47
  if isinstance(self.cursor_value, str):
47
48
  self._cursor_value = InterpolatedString.create(self.cursor_value, parameters=parameters)
48
49
  else:
@@ -56,19 +57,10 @@ class CursorPaginationStrategy(PaginationStrategy):
56
57
 
57
58
  @property
58
59
  def initial_token(self) -> Optional[Any]:
59
- """
60
- CursorPaginationStrategy does not have an initial value because the next cursor is typically included
61
- in the response of the first request. For Resumable Full Refresh streams that checkpoint the page
62
- cursor, the next cursor should be read from the state or stream slice object.
63
- """
64
- return None
60
+ return self._initial_cursor
65
61
 
66
62
  def next_page_token(
67
- self,
68
- response: requests.Response,
69
- last_page_size: int,
70
- last_record: Optional[Record],
71
- last_page_token_value: Optional[Any] = None,
63
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
72
64
  ) -> Optional[Any]:
73
65
  decoded_response = next(self.decoder.decode(response))
74
66
 
@@ -95,5 +87,8 @@ class CursorPaginationStrategy(PaginationStrategy):
95
87
  )
96
88
  return token if token else None
97
89
 
90
+ def reset(self, reset_value: Optional[Any] = None) -> None:
91
+ self._initial_cursor = reset_value
92
+
98
93
  def get_page_size(self) -> Optional[int]:
99
94
  return self.page_size
@@ -52,6 +52,7 @@ class OffsetIncrement(PaginationStrategy):
52
52
  inject_on_first_request: bool = False
53
53
 
54
54
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
55
+ self._offset = 0
55
56
  page_size = str(self.page_size) if isinstance(self.page_size, int) else self.page_size
56
57
  if page_size:
57
58
  self._page_size: Optional[InterpolatedString] = InterpolatedString(
@@ -63,15 +64,11 @@ class OffsetIncrement(PaginationStrategy):
63
64
  @property
64
65
  def initial_token(self) -> Optional[Any]:
65
66
  if self.inject_on_first_request:
66
- return 0
67
+ return self._offset
67
68
  return None
68
69
 
69
70
  def next_page_token(
70
- self,
71
- response: requests.Response,
72
- last_page_size: int,
73
- last_record: Optional[Record],
74
- last_page_token_value: Optional[Any] = None,
71
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
75
72
  ) -> Optional[Any]:
76
73
  decoded_response = next(self.decoder.decode(response))
77
74
 
@@ -81,16 +78,17 @@ class OffsetIncrement(PaginationStrategy):
81
78
  and last_page_size < self._page_size.eval(self.config, response=decoded_response)
82
79
  ) or last_page_size == 0:
83
80
  return None
84
- elif last_page_token_value is None:
85
- # If the OffsetIncrement strategy does not inject on the first request, the incoming last_page_token_value
86
- # will be None. For this case, we assume that None was the first page and progress to the next offset
87
- return 0 + last_page_size
88
- elif not isinstance(last_page_token_value, int):
81
+ else:
82
+ self._offset += last_page_size
83
+ return self._offset
84
+
85
+ def reset(self, reset_value: Optional[Any] = 0) -> None:
86
+ if not isinstance(reset_value, int):
89
87
  raise ValueError(
90
- f"Last page token value {last_page_token_value} for OffsetIncrement pagination strategy was not an integer"
88
+ f"Reset value {reset_value} for OffsetIncrement pagination strategy was not an integer"
91
89
  )
92
90
  else:
93
- return last_page_token_value + last_page_size
91
+ self._offset = reset_value
94
92
 
95
93
  def get_page_size(self) -> Optional[int]:
96
94
  if self._page_size:
@@ -31,6 +31,7 @@ class PageIncrement(PaginationStrategy):
31
31
  inject_on_first_request: bool = False
32
32
 
33
33
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
34
+ self._page = self.start_from_page
34
35
  if isinstance(self.page_size, int) or (self.page_size is None):
35
36
  self._page_size = self.page_size
36
37
  else:
@@ -42,30 +43,28 @@ class PageIncrement(PaginationStrategy):
42
43
  @property
43
44
  def initial_token(self) -> Optional[Any]:
44
45
  if self.inject_on_first_request:
45
- return self.start_from_page
46
+ return self._page
46
47
  return None
47
48
 
48
49
  def next_page_token(
49
- self,
50
- response: requests.Response,
51
- last_page_size: int,
52
- last_record: Optional[Record],
53
- last_page_token_value: Optional[Any],
50
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
54
51
  ) -> Optional[Any]:
55
52
  # Stop paginating when there are fewer records than the page size or the current page has no records
56
53
  if (self._page_size and last_page_size < self._page_size) or last_page_size == 0:
57
54
  return None
58
- elif last_page_token_value is None:
59
- # If the PageIncrement strategy does not inject on the first request, the incoming last_page_token_value
60
- # may be None. When this is the case, we assume we've already requested the first page specified by
61
- # start_from_page and must now get the next page
62
- return self.start_from_page + 1
63
- elif not isinstance(last_page_token_value, int):
55
+ else:
56
+ self._page += 1
57
+ return self._page
58
+
59
+ def reset(self, reset_value: Optional[Any] = None) -> None:
60
+ if reset_value is None:
61
+ self._page = self.start_from_page
62
+ elif not isinstance(reset_value, int):
64
63
  raise ValueError(
65
- f"Last page token value {last_page_token_value} for PageIncrement pagination strategy was not an integer"
64
+ f"Reset value {reset_value} for PageIncrement pagination strategy was not an integer"
66
65
  )
67
66
  else:
68
- return last_page_token_value + 1
67
+ self._page = reset_value
69
68
 
70
69
  def get_page_size(self) -> Optional[int]:
71
70
  return self._page_size
@@ -26,21 +26,22 @@ class PaginationStrategy:
26
26
 
27
27
  @abstractmethod
28
28
  def next_page_token(
29
- self,
30
- response: requests.Response,
31
- last_page_size: int,
32
- last_record: Optional[Record],
33
- last_page_token_value: Optional[Any],
29
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
34
30
  ) -> Optional[Any]:
35
31
  """
36
32
  :param response: response to process
37
33
  :param last_page_size: the number of records read from the response
38
34
  :param last_record: the last record extracted from the response
39
- :param last_page_token_value: The current value of the page token made on the last request
40
35
  :return: next page token. Returns None if there are no more pages to fetch
41
36
  """
42
37
  pass
43
38
 
39
+ @abstractmethod
40
+ def reset(self, reset_value: Optional[Any] = None) -> None:
41
+ """
42
+ Reset the pagination's inner state
43
+ """
44
+
44
45
  @abstractmethod
45
46
  def get_page_size(self) -> Optional[int]:
46
47
  """
@@ -44,19 +44,16 @@ class StopConditionPaginationStrategyDecorator(PaginationStrategy):
44
44
  self._stop_condition = stop_condition
45
45
 
46
46
  def next_page_token(
47
- self,
48
- response: requests.Response,
49
- last_page_size: int,
50
- last_record: Optional[Record],
51
- last_page_token_value: Optional[Any] = None,
47
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
52
48
  ) -> Optional[Any]:
53
- # We evaluate in reverse order because the assumption is that most of the APIs using data feed structure
54
- # will return records in descending order. In terms of performance/memory, we return the records lazily
49
+ # We evaluate in reverse order because the assumption is that most of the APIs using data feed structure will return records in
50
+ # descending order. In terms of performance/memory, we return the records lazily
55
51
  if last_record and self._stop_condition.is_met(last_record):
56
52
  return None
57
- return self._delegate.next_page_token(
58
- response, last_page_size, last_record, last_page_token_value
59
- )
53
+ return self._delegate.next_page_token(response, last_page_size, last_record)
54
+
55
+ def reset(self, reset_value: Optional[Any] = None) -> None:
56
+ self._delegate.reset(reset_value)
60
57
 
61
58
  def get_page_size(self) -> Optional[int]:
62
59
  return self._delegate.get_page_size()
@@ -7,7 +7,10 @@ from typing import Any, Iterable, Mapping, Optional
7
7
  from typing_extensions import deprecated
8
8
 
9
9
  from airbyte_cdk.models import FailureType
10
- from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncPartition
10
+ from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
11
+ AsyncJobOrchestrator,
12
+ AsyncPartition,
13
+ )
11
14
  from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
12
15
  from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
13
16
  AsyncJobPartitionRouter,