airbyte-cdk 6.34.0.dev2__py3-none-any.whl → 6.34.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +12 -16
  2. airbyte_cdk/connector_builder/message_grouper.py +448 -0
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  4. airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
  5. airbyte_cdk/sources/declarative/auth/oauth.py +1 -6
  6. airbyte_cdk/sources/declarative/auth/token.py +8 -3
  7. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +19 -30
  8. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +85 -203
  9. airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
  10. airbyte_cdk/sources/declarative/decoders/__init__.py +4 -0
  11. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +2 -7
  12. airbyte_cdk/sources/declarative/decoders/json_decoder.py +58 -12
  13. airbyte_cdk/sources/declarative/extractors/record_selector.py +3 -12
  14. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +38 -122
  15. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -12
  16. airbyte_cdk/sources/declarative/manifest_declarative_source.py +0 -9
  17. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +41 -150
  18. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +84 -234
  19. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  20. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
  21. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +18 -26
  22. airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -8
  23. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -16
  24. airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
  25. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
  26. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
  27. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -4
  28. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -2
  29. airbyte_cdk/sources/file_based/file_based_source.py +37 -70
  30. airbyte_cdk/sources/file_based/file_based_stream_reader.py +12 -107
  31. airbyte_cdk/sources/file_based/stream/__init__.py +1 -10
  32. airbyte_cdk/sources/streams/call_rate.py +47 -185
  33. airbyte_cdk/sources/streams/http/http.py +2 -1
  34. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +56 -217
  35. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +73 -144
  36. airbyte_cdk/test/mock_http/mocker.py +1 -9
  37. airbyte_cdk/test/mock_http/response.py +3 -6
  38. airbyte_cdk/utils/datetime_helpers.py +66 -48
  39. airbyte_cdk/utils/mapping_helpers.py +26 -126
  40. {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/METADATA +1 -1
  41. {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/RECORD +45 -54
  42. airbyte_cdk/connector_builder/test_reader/__init__.py +0 -7
  43. airbyte_cdk/connector_builder/test_reader/helpers.py +0 -591
  44. airbyte_cdk/connector_builder/test_reader/message_grouper.py +0 -160
  45. airbyte_cdk/connector_builder/test_reader/reader.py +0 -441
  46. airbyte_cdk/connector_builder/test_reader/types.py +0 -75
  47. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +0 -81
  48. airbyte_cdk/sources/file_based/stream/identities_stream.py +0 -47
  49. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +0 -85
  50. airbyte_cdk/sources/specs/transfer_modes.py +0 -26
  51. airbyte_cdk/sources/streams/permissions/identities_stream.py +0 -75
  52. {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE.txt +0 -0
  53. {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE_SHORT +0 -0
  54. {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/WHEEL +0 -0
  55. {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/entry_points.txt +0 -0
@@ -41,7 +41,6 @@ class RecordSelector(HttpSelector):
41
41
  _name: Union[InterpolatedString, str] = field(init=False, repr=False, default="")
42
42
  record_filter: Optional[RecordFilter] = None
43
43
  transformations: List[RecordTransformation] = field(default_factory=lambda: [])
44
- transform_before_filtering: bool = False
45
44
 
46
45
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
47
46
  self._parameters = parameters
@@ -105,17 +104,9 @@ class RecordSelector(HttpSelector):
105
104
  Until we decide to move this logic away from the selector, we made this method public so that users like AsyncJobRetriever could
106
105
  share the logic of doing transformations on a set of records.
107
106
  """
108
- if self.transform_before_filtering:
109
- transformed_data = self._transform(all_data, stream_state, stream_slice)
110
- transformed_filtered_data = self._filter(
111
- transformed_data, stream_state, stream_slice, next_page_token
112
- )
113
- else:
114
- filtered_data = self._filter(all_data, stream_state, stream_slice, next_page_token)
115
- transformed_filtered_data = self._transform(filtered_data, stream_state, stream_slice)
116
- normalized_data = self._normalize_by_schema(
117
- transformed_filtered_data, schema=records_schema
118
- )
107
+ filtered_data = self._filter(all_data, stream_state, stream_slice, next_page_token)
108
+ transformed_data = self._transform(filtered_data, stream_state, stream_slice)
109
+ normalized_data = self._normalize_by_schema(transformed_data, schema=records_schema)
119
110
  for data in normalized_data:
120
111
  yield Record(data=data, stream_name=self.name, associated_slice=stream_slice)
121
112
 
@@ -5,7 +5,6 @@
5
5
  import copy
6
6
  import logging
7
7
  import threading
8
- import time
9
8
  from collections import OrderedDict
10
9
  from copy import deepcopy
11
10
  from datetime import timedelta
@@ -59,8 +58,7 @@ class ConcurrentPerPartitionCursor(Cursor):
59
58
  CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
60
59
  """
61
60
 
62
- DEFAULT_MAX_PARTITIONS_NUMBER = 25_000
63
- SWITCH_TO_GLOBAL_LIMIT = 10_000
61
+ DEFAULT_MAX_PARTITIONS_NUMBER = 10000
64
62
  _NO_STATE: Mapping[str, Any] = {}
65
63
  _NO_CURSOR_STATE: Mapping[str, Any] = {}
66
64
  _GLOBAL_STATE_KEY = "state"
@@ -95,21 +93,15 @@ class ConcurrentPerPartitionCursor(Cursor):
95
93
  # the oldest partitions can be efficiently removed, maintaining the most recent partitions.
96
94
  self._cursor_per_partition: OrderedDict[str, ConcurrentCursor] = OrderedDict()
97
95
  self._semaphore_per_partition: OrderedDict[str, threading.Semaphore] = OrderedDict()
98
-
99
- # Parent-state tracking: store each partition’s parent state in creation order
100
- self._partition_parent_state_map: OrderedDict[str, Mapping[str, Any]] = OrderedDict()
101
-
102
96
  self._finished_partitions: set[str] = set()
103
97
  self._lock = threading.Lock()
104
98
  self._timer = Timer()
105
99
  self._new_global_cursor: Optional[StreamState] = None
106
100
  self._lookback_window: int = 0
107
101
  self._parent_state: Optional[StreamState] = None
108
- self._number_of_partitions: int = 0
102
+ self._over_limit: int = 0
109
103
  self._use_global_cursor: bool = False
110
104
  self._partition_serializer = PerPartitionKeySerializer()
111
- # Track the last time a state message was emitted
112
- self._last_emission_time: float = 0.0
113
105
 
114
106
  self._set_initial_state(stream_state)
115
107
 
@@ -149,55 +141,22 @@ class ConcurrentPerPartitionCursor(Cursor):
149
141
  raise ValueError("stream_slice cannot be None")
150
142
 
151
143
  partition_key = self._to_partition_key(stream_slice.partition)
144
+ self._cursor_per_partition[partition_key].close_partition(partition=partition)
152
145
  with self._lock:
153
146
  self._semaphore_per_partition[partition_key].acquire()
154
- if not self._use_global_cursor:
155
- self._cursor_per_partition[partition_key].close_partition(partition=partition)
156
- cursor = self._cursor_per_partition[partition_key]
147
+ cursor = self._cursor_per_partition[partition_key]
148
+ if (
149
+ partition_key in self._finished_partitions
150
+ and self._semaphore_per_partition[partition_key]._value == 0
151
+ ):
157
152
  if (
158
- partition_key in self._finished_partitions
159
- and self._semaphore_per_partition[partition_key]._value == 0
153
+ self._new_global_cursor is None
154
+ or self._new_global_cursor[self.cursor_field.cursor_field_key]
155
+ < cursor.state[self.cursor_field.cursor_field_key]
160
156
  ):
161
- self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
162
-
163
- self._check_and_update_parent_state()
164
-
165
- self._emit_state_message()
166
-
167
- def _check_and_update_parent_state(self) -> None:
168
- """
169
- Pop the leftmost partition state from _partition_parent_state_map only if
170
- *all partitions* up to (and including) that partition key in _semaphore_per_partition
171
- are fully finished (i.e. in _finished_partitions and semaphore._value == 0).
172
- """
173
- last_closed_state = None
174
-
175
- while self._partition_parent_state_map:
176
- # Look at the earliest partition key in creation order
177
- earliest_key = next(iter(self._partition_parent_state_map))
178
-
179
- # Verify ALL partitions from the left up to earliest_key are finished
180
- all_left_finished = True
181
- for p_key, sem in self._semaphore_per_partition.items():
182
- # If any earlier partition is still not finished, we must stop
183
- if p_key not in self._finished_partitions or sem._value != 0:
184
- all_left_finished = False
185
- break
186
- # Once we've reached earliest_key in the semaphore order, we can stop checking
187
- if p_key == earliest_key:
188
- break
189
-
190
- # If the partitions up to earliest_key are not all finished, break the while-loop
191
- if not all_left_finished:
192
- break
193
-
194
- # Otherwise, pop the leftmost entry from parent-state map
195
- _, closed_parent_state = self._partition_parent_state_map.popitem(last=False)
196
- last_closed_state = closed_parent_state
197
-
198
- # Update _parent_state if we actually popped at least one partition
199
- if last_closed_state is not None:
200
- self._parent_state = last_closed_state
157
+ self._new_global_cursor = copy.deepcopy(cursor.state)
158
+ if not self._use_global_cursor:
159
+ self._emit_state_message()
201
160
 
202
161
  def ensure_at_least_one_state_emitted(self) -> None:
203
162
  """
@@ -210,23 +169,9 @@ class ConcurrentPerPartitionCursor(Cursor):
210
169
  self._global_cursor = self._new_global_cursor
211
170
  self._lookback_window = self._timer.finish()
212
171
  self._parent_state = self._partition_router.get_stream_state()
213
- self._emit_state_message(throttle=False)
172
+ self._emit_state_message()
214
173
 
215
- def _throttle_state_message(self) -> Optional[float]:
216
- """
217
- Throttles the state message emission to once every 60 seconds.
218
- """
219
- current_time = time.time()
220
- if current_time - self._last_emission_time <= 60:
221
- return None
222
- return current_time
223
-
224
- def _emit_state_message(self, throttle: bool = True) -> None:
225
- if throttle:
226
- current_time = self._throttle_state_message()
227
- if current_time is None:
228
- return
229
- self._last_emission_time = current_time
174
+ def _emit_state_message(self) -> None:
230
175
  self._connector_state_manager.update_state_for_stream(
231
176
  self._stream_name,
232
177
  self._stream_namespace,
@@ -243,19 +188,13 @@ class ConcurrentPerPartitionCursor(Cursor):
243
188
 
244
189
  slices = self._partition_router.stream_slices()
245
190
  self._timer.start()
246
- for partition, last, parent_state in iterate_with_last_flag_and_state(
247
- slices, self._partition_router.get_stream_state
248
- ):
249
- yield from self._generate_slices_from_partition(partition, parent_state)
191
+ for partition in slices:
192
+ yield from self._generate_slices_from_partition(partition)
250
193
 
251
- def _generate_slices_from_partition(
252
- self, partition: StreamSlice, parent_state: Mapping[str, Any]
253
- ) -> Iterable[StreamSlice]:
194
+ def _generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
254
195
  # Ensure the maximum number of partitions is not exceeded
255
196
  self._ensure_partition_limit()
256
197
 
257
- partition_key = self._to_partition_key(partition.partition)
258
-
259
198
  cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
260
199
  if not cursor:
261
200
  cursor = self._create_cursor(
@@ -263,27 +202,18 @@ class ConcurrentPerPartitionCursor(Cursor):
263
202
  self._lookback_window if self._global_cursor else 0,
264
203
  )
265
204
  with self._lock:
266
- self._number_of_partitions += 1
267
- self._cursor_per_partition[partition_key] = cursor
268
- self._semaphore_per_partition[partition_key] = threading.Semaphore(0)
269
-
270
- with self._lock:
271
- if (
272
- len(self._partition_parent_state_map) == 0
273
- or self._partition_parent_state_map[
274
- next(reversed(self._partition_parent_state_map))
275
- ]
276
- != parent_state
277
- ):
278
- self._partition_parent_state_map[partition_key] = deepcopy(parent_state)
205
+ self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
206
+ self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
207
+ threading.Semaphore(0)
208
+ )
279
209
 
280
210
  for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
281
211
  cursor.stream_slices(),
282
212
  lambda: None,
283
213
  ):
284
- self._semaphore_per_partition[partition_key].release()
214
+ self._semaphore_per_partition[self._to_partition_key(partition.partition)].release()
285
215
  if is_last_slice:
286
- self._finished_partitions.add(partition_key)
216
+ self._finished_partitions.add(self._to_partition_key(partition.partition))
287
217
  yield StreamSlice(
288
218
  partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
289
219
  )
@@ -302,15 +232,9 @@ class ConcurrentPerPartitionCursor(Cursor):
302
232
  - Logs a warning each time a partition is removed, indicating whether it was finished
303
233
  or removed due to being the oldest.
304
234
  """
305
- if not self._use_global_cursor and self.limit_reached():
306
- logger.info(
307
- f"Exceeded the 'SWITCH_TO_GLOBAL_LIMIT' of {self.SWITCH_TO_GLOBAL_LIMIT}. "
308
- f"Switching to global cursor for {self._stream_name}."
309
- )
310
- self._use_global_cursor = True
311
-
312
235
  with self._lock:
313
236
  while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
237
+ self._over_limit += 1
314
238
  # Try removing finished partitions first
315
239
  for partition_key in list(self._cursor_per_partition.keys()):
316
240
  if (
@@ -321,7 +245,7 @@ class ConcurrentPerPartitionCursor(Cursor):
321
245
  partition_key
322
246
  ) # Remove the oldest partition
323
247
  logger.warning(
324
- f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._number_of_partitions - self.DEFAULT_MAX_PARTITIONS_NUMBER}."
248
+ f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._over_limit}."
325
249
  )
326
250
  break
327
251
  else:
@@ -330,7 +254,7 @@ class ConcurrentPerPartitionCursor(Cursor):
330
254
  1
331
255
  ] # Remove the oldest partition
332
256
  logger.warning(
333
- f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._number_of_partitions - self.DEFAULT_MAX_PARTITIONS_NUMBER}."
257
+ f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
334
258
  )
335
259
 
336
260
  def _set_initial_state(self, stream_state: StreamState) -> None:
@@ -390,10 +314,12 @@ class ConcurrentPerPartitionCursor(Cursor):
390
314
  self._lookback_window = int(stream_state.get("lookback_window", 0))
391
315
 
392
316
  for state in stream_state.get(self._PERPARTITION_STATE_KEY, []):
393
- self._number_of_partitions += 1
394
317
  self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
395
318
  self._create_cursor(state["cursor"])
396
319
  )
320
+ self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
321
+ threading.Semaphore(0)
322
+ )
397
323
 
398
324
  # set default state for missing partitions if it is per partition with fallback to global
399
325
  if self._GLOBAL_STATE_KEY in stream_state:
@@ -428,26 +354,16 @@ class ConcurrentPerPartitionCursor(Cursor):
428
354
  self._new_global_cursor = deepcopy(fixed_global_state)
429
355
 
430
356
  def observe(self, record: Record) -> None:
357
+ if not self._use_global_cursor and self.limit_reached():
358
+ self._use_global_cursor = True
359
+
431
360
  if not record.associated_slice:
432
361
  raise ValueError(
433
362
  "Invalid state as stream slices that are emitted should refer to an existing cursor"
434
363
  )
435
-
436
- record_cursor = self._connector_state_converter.output_format(
437
- self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
438
- )
439
- self._update_global_cursor(record_cursor)
440
- if not self._use_global_cursor:
441
- self._cursor_per_partition[
442
- self._to_partition_key(record.associated_slice.partition)
443
- ].observe(record)
444
-
445
- def _update_global_cursor(self, value: Any) -> None:
446
- if (
447
- self._new_global_cursor is None
448
- or self._new_global_cursor[self.cursor_field.cursor_field_key] < value
449
- ):
450
- self._new_global_cursor = {self.cursor_field.cursor_field_key: copy.deepcopy(value)}
364
+ self._cursor_per_partition[
365
+ self._to_partition_key(record.associated_slice.partition)
366
+ ].observe(record)
451
367
 
452
368
  def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
453
369
  return self._partition_serializer.to_partition_key(partition)
@@ -481,4 +397,4 @@ class ConcurrentPerPartitionCursor(Cursor):
481
397
  return cursor
482
398
 
483
399
  def limit_reached(self) -> bool:
484
- return self._number_of_partitions > self.SWITCH_TO_GLOBAL_LIMIT
400
+ return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
@@ -21,7 +21,6 @@ from airbyte_cdk.sources.declarative.requesters.request_option import (
21
21
  )
22
22
  from airbyte_cdk.sources.message import MessageRepository
23
23
  from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
24
- from airbyte_cdk.utils.mapping_helpers import _validate_component_request_option_paths
25
24
 
26
25
 
27
26
  @dataclass
@@ -123,10 +122,6 @@ class DatetimeBasedCursor(DeclarativeCursor):
123
122
  if not self.cursor_datetime_formats:
124
123
  self.cursor_datetime_formats = [self.datetime_format]
125
124
 
126
- _validate_component_request_option_paths(
127
- self.config, self.start_time_option, self.end_time_option
128
- )
129
-
130
125
  def get_stream_state(self) -> StreamState:
131
126
  return {self.cursor_field.eval(self.config): self._cursor} if self._cursor else {} # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
132
127
 
@@ -370,15 +365,14 @@ class DatetimeBasedCursor(DeclarativeCursor):
370
365
  options: MutableMapping[str, Any] = {}
371
366
  if not stream_slice:
372
367
  return options
373
-
374
368
  if self.start_time_option and self.start_time_option.inject_into == option_type:
375
- start_time_value = stream_slice.get(self._partition_field_start.eval(self.config))
376
- self.start_time_option.inject_into_request(options, start_time_value, self.config)
377
-
369
+ options[self.start_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore # field_name is always casted to an interpolated string
370
+ self._partition_field_start.eval(self.config)
371
+ )
378
372
  if self.end_time_option and self.end_time_option.inject_into == option_type:
379
- end_time_value = stream_slice.get(self._partition_field_end.eval(self.config))
380
- self.end_time_option.inject_into_request(options, end_time_value, self.config)
381
-
373
+ options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr]
374
+ self._partition_field_end.eval(self.config)
375
+ )
382
376
  return options
383
377
 
384
378
  def should_be_synced(self, record: Record) -> bool:
@@ -137,10 +137,6 @@ class ManifestDeclarativeSource(DeclarativeSource):
137
137
  self._source_config, config
138
138
  )
139
139
 
140
- api_budget_model = self._source_config.get("api_budget")
141
- if api_budget_model:
142
- self._constructor.set_api_budget(api_budget_model, config)
143
-
144
140
  source_streams = [
145
141
  self._constructor.create_component(
146
142
  DeclarativeStreamModel,
@@ -369,11 +365,6 @@ class ManifestDeclarativeSource(DeclarativeSource):
369
365
  # Ensure that each stream is created with a unique name
370
366
  name = dynamic_stream.get("name")
371
367
 
372
- if not isinstance(name, str):
373
- raise ValueError(
374
- f"Expected stream name {name} to be a string, got {type(name)}."
375
- )
376
-
377
368
  if name in seen_dynamic_streams:
378
369
  error_message = f"Dynamic streams list contains a duplicate name: {name}. Please contact Airbyte Support."
379
370
  failure_type = FailureType.system_error
@@ -642,48 +642,6 @@ class OAuthAuthenticator(BaseModel):
642
642
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
643
643
 
644
644
 
645
- class Rate(BaseModel):
646
- class Config:
647
- extra = Extra.allow
648
-
649
- limit: int = Field(
650
- ...,
651
- description="The maximum number of calls allowed within the interval.",
652
- title="Limit",
653
- )
654
- interval: str = Field(
655
- ...,
656
- description="The time interval for the rate limit.",
657
- examples=["PT1H", "P1D"],
658
- title="Interval",
659
- )
660
-
661
-
662
- class HttpRequestRegexMatcher(BaseModel):
663
- class Config:
664
- extra = Extra.allow
665
-
666
- method: Optional[str] = Field(
667
- None, description="The HTTP method to match (e.g., GET, POST).", title="Method"
668
- )
669
- url_base: Optional[str] = Field(
670
- None,
671
- description='The base URL (scheme and host, e.g. "https://api.example.com") to match.',
672
- title="URL Base",
673
- )
674
- url_path_pattern: Optional[str] = Field(
675
- None,
676
- description="A regular expression pattern to match the URL path.",
677
- title="URL Path Pattern",
678
- )
679
- params: Optional[Dict[str, Any]] = Field(
680
- None, description="The query parameters to match.", title="Parameters"
681
- )
682
- headers: Optional[Dict[str, Any]] = Field(
683
- None, description="The headers to match.", title="Headers"
684
- )
685
-
686
-
687
645
  class DpathExtractor(BaseModel):
688
646
  type: Literal["DpathExtractor"]
689
647
  field_path: List[str] = Field(
@@ -929,6 +887,15 @@ class CustomDecoder(BaseModel):
929
887
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
930
888
 
931
889
 
890
+ class GzipJsonDecoder(BaseModel):
891
+ class Config:
892
+ extra = Extra.allow
893
+
894
+ type: Literal["GzipJsonDecoder"]
895
+ encoding: Optional[str] = "utf-8"
896
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
897
+
898
+
932
899
  class MinMaxDatetime(BaseModel):
933
900
  type: Literal["MinMaxDatetime"]
934
901
  datetime: str = Field(
@@ -1233,17 +1200,11 @@ class InjectInto(Enum):
1233
1200
 
1234
1201
  class RequestOption(BaseModel):
1235
1202
  type: Literal["RequestOption"]
1236
- field_name: Optional[str] = Field(
1237
- None,
1238
- description="Configures which key should be used in the location that the descriptor is being injected into. We hope to eventually deprecate this field in favor of `field_path` for all request_options, but must currently maintain it for backwards compatibility in the Builder.",
1203
+ field_name: str = Field(
1204
+ ...,
1205
+ description="Configures which key should be used in the location that the descriptor is being injected into",
1239
1206
  examples=["segment_id"],
1240
- title="Field Name",
1241
- )
1242
- field_path: Optional[List[str]] = Field(
1243
- None,
1244
- description="Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries)",
1245
- examples=[["data", "viewer", "id"]],
1246
- title="Field Path",
1207
+ title="Request Option",
1247
1208
  )
1248
1209
  inject_into: InjectInto = Field(
1249
1210
  ...,
@@ -1307,8 +1268,18 @@ class LegacySessionTokenAuthenticator(BaseModel):
1307
1268
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1308
1269
 
1309
1270
 
1310
- class CsvDecoder(BaseModel):
1311
- type: Literal["CsvDecoder"]
1271
+ class JsonParser(BaseModel):
1272
+ type: Literal["JsonParser"]
1273
+ encoding: Optional[str] = "utf-8"
1274
+
1275
+
1276
+ class JsonLineParser(BaseModel):
1277
+ type: Literal["JsonLineParser"]
1278
+ encoding: Optional[str] = "utf-8"
1279
+
1280
+
1281
+ class CsvParser(BaseModel):
1282
+ type: Literal["CsvParser"]
1312
1283
  encoding: Optional[str] = "utf-8"
1313
1284
  delimiter: Optional[str] = ","
1314
1285
 
@@ -1607,55 +1578,6 @@ class DatetimeBasedCursor(BaseModel):
1607
1578
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1608
1579
 
1609
1580
 
1610
- class FixedWindowCallRatePolicy(BaseModel):
1611
- class Config:
1612
- extra = Extra.allow
1613
-
1614
- type: Literal["FixedWindowCallRatePolicy"]
1615
- period: str = Field(
1616
- ..., description="The time interval for the rate limit window.", title="Period"
1617
- )
1618
- call_limit: int = Field(
1619
- ...,
1620
- description="The maximum number of calls allowed within the period.",
1621
- title="Call Limit",
1622
- )
1623
- matchers: List[HttpRequestRegexMatcher] = Field(
1624
- ...,
1625
- description="List of matchers that define which requests this policy applies to.",
1626
- title="Matchers",
1627
- )
1628
-
1629
-
1630
- class MovingWindowCallRatePolicy(BaseModel):
1631
- class Config:
1632
- extra = Extra.allow
1633
-
1634
- type: Literal["MovingWindowCallRatePolicy"]
1635
- rates: List[Rate] = Field(
1636
- ...,
1637
- description="List of rates that define the call limits for different time intervals.",
1638
- title="Rates",
1639
- )
1640
- matchers: List[HttpRequestRegexMatcher] = Field(
1641
- ...,
1642
- description="List of matchers that define which requests this policy applies to.",
1643
- title="Matchers",
1644
- )
1645
-
1646
-
1647
- class UnlimitedCallRatePolicy(BaseModel):
1648
- class Config:
1649
- extra = Extra.allow
1650
-
1651
- type: Literal["UnlimitedCallRatePolicy"]
1652
- matchers: List[HttpRequestRegexMatcher] = Field(
1653
- ...,
1654
- description="List of matchers that define which requests this policy applies to.",
1655
- title="Matchers",
1656
- )
1657
-
1658
-
1659
1581
  class DefaultErrorHandler(BaseModel):
1660
1582
  type: Literal["DefaultErrorHandler"]
1661
1583
  backoff_strategies: Optional[
@@ -1752,9 +1674,9 @@ class RecordSelector(BaseModel):
1752
1674
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1753
1675
 
1754
1676
 
1755
- class GzipDecoder(BaseModel):
1756
- type: Literal["GzipDecoder"]
1757
- decoder: Union[CsvDecoder, GzipDecoder, JsonDecoder, JsonlDecoder]
1677
+ class GzipParser(BaseModel):
1678
+ type: Literal["GzipParser"]
1679
+ inner_parser: Union[JsonLineParser, CsvParser, JsonParser]
1758
1680
 
1759
1681
 
1760
1682
  class Spec(BaseModel):
@@ -1787,51 +1709,23 @@ class CompositeErrorHandler(BaseModel):
1787
1709
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1788
1710
 
1789
1711
 
1790
- class HTTPAPIBudget(BaseModel):
1791
- class Config:
1792
- extra = Extra.allow
1793
-
1794
- type: Literal["HTTPAPIBudget"]
1795
- policies: List[
1796
- Union[
1797
- FixedWindowCallRatePolicy,
1798
- MovingWindowCallRatePolicy,
1799
- UnlimitedCallRatePolicy,
1800
- ]
1801
- ] = Field(
1802
- ...,
1803
- description="List of call rate policies that define how many calls are allowed.",
1804
- title="Policies",
1805
- )
1806
- ratelimit_reset_header: Optional[str] = Field(
1807
- "ratelimit-reset",
1808
- description="The HTTP response header name that indicates when the rate limit resets.",
1809
- title="Rate Limit Reset Header",
1810
- )
1811
- ratelimit_remaining_header: Optional[str] = Field(
1812
- "ratelimit-remaining",
1813
- description="The HTTP response header name that indicates the number of remaining allowed calls.",
1814
- title="Rate Limit Remaining Header",
1815
- )
1816
- status_codes_for_ratelimit_hit: Optional[List[int]] = Field(
1817
- [429],
1818
- description="List of HTTP status codes that indicate a rate limit has been hit.",
1819
- title="Status Codes for Rate Limit Hit",
1820
- )
1821
-
1822
-
1823
1712
  class ZipfileDecoder(BaseModel):
1824
1713
  class Config:
1825
1714
  extra = Extra.allow
1826
1715
 
1827
1716
  type: Literal["ZipfileDecoder"]
1828
- decoder: Union[CsvDecoder, GzipDecoder, JsonDecoder, JsonlDecoder] = Field(
1717
+ parser: Union[GzipParser, JsonParser, JsonLineParser, CsvParser] = Field(
1829
1718
  ...,
1830
1719
  description="Parser to parse the decompressed data from the zipfile(s).",
1831
1720
  title="Parser",
1832
1721
  )
1833
1722
 
1834
1723
 
1724
+ class CompositeRawDecoder(BaseModel):
1725
+ type: Literal["CompositeRawDecoder"]
1726
+ parser: Union[GzipParser, JsonParser, JsonLineParser, CsvParser]
1727
+
1728
+
1835
1729
  class DeclarativeSource1(BaseModel):
1836
1730
  class Config:
1837
1731
  extra = Extra.forbid
@@ -1848,7 +1742,6 @@ class DeclarativeSource1(BaseModel):
1848
1742
  definitions: Optional[Dict[str, Any]] = None
1849
1743
  spec: Optional[Spec] = None
1850
1744
  concurrency_level: Optional[ConcurrencyLevel] = None
1851
- api_budget: Optional[HTTPAPIBudget] = None
1852
1745
  metadata: Optional[Dict[str, Any]] = Field(
1853
1746
  None,
1854
1747
  description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.",
@@ -1875,7 +1768,6 @@ class DeclarativeSource2(BaseModel):
1875
1768
  definitions: Optional[Dict[str, Any]] = None
1876
1769
  spec: Optional[Spec] = None
1877
1770
  concurrency_level: Optional[ConcurrencyLevel] = None
1878
- api_budget: Optional[HTTPAPIBudget] = None
1879
1771
  metadata: Optional[Dict[str, Any]] = Field(
1880
1772
  None,
1881
1773
  description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.",
@@ -2035,7 +1927,7 @@ class SessionTokenAuthenticator(BaseModel):
2035
1927
  description="Authentication method to use for requests sent to the API, specifying how to inject the session token.",
2036
1928
  title="Data Request Authentication",
2037
1929
  )
2038
- decoder: Optional[Union[JsonDecoder, XmlDecoder]] = Field(
1930
+ decoder: Optional[Union[JsonDecoder, XmlDecoder, CompositeRawDecoder]] = Field(
2039
1931
  None, description="Component used to decode the response.", title="Decoder"
2040
1932
  )
2041
1933
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
@@ -2235,12 +2127,12 @@ class SimpleRetriever(BaseModel):
2235
2127
  decoder: Optional[
2236
2128
  Union[
2237
2129
  CustomDecoder,
2238
- CsvDecoder,
2239
- GzipDecoder,
2240
2130
  JsonDecoder,
2241
2131
  JsonlDecoder,
2242
2132
  IterableDecoder,
2243
2133
  XmlDecoder,
2134
+ GzipJsonDecoder,
2135
+ CompositeRawDecoder,
2244
2136
  ZipfileDecoder,
2245
2137
  ]
2246
2138
  ] = Field(
@@ -2313,12 +2205,12 @@ class AsyncRetriever(BaseModel):
2313
2205
  decoder: Optional[
2314
2206
  Union[
2315
2207
  CustomDecoder,
2316
- CsvDecoder,
2317
- GzipDecoder,
2318
2208
  JsonDecoder,
2319
2209
  JsonlDecoder,
2320
2210
  IterableDecoder,
2321
2211
  XmlDecoder,
2212
+ GzipJsonDecoder,
2213
+ CompositeRawDecoder,
2322
2214
  ZipfileDecoder,
2323
2215
  ]
2324
2216
  ] = Field(
@@ -2329,12 +2221,12 @@ class AsyncRetriever(BaseModel):
2329
2221
  download_decoder: Optional[
2330
2222
  Union[
2331
2223
  CustomDecoder,
2332
- CsvDecoder,
2333
- GzipDecoder,
2334
2224
  JsonDecoder,
2335
2225
  JsonlDecoder,
2336
2226
  IterableDecoder,
2337
2227
  XmlDecoder,
2228
+ GzipJsonDecoder,
2229
+ CompositeRawDecoder,
2338
2230
  ZipfileDecoder,
2339
2231
  ]
2340
2232
  ] = Field(
@@ -2379,7 +2271,6 @@ class DynamicDeclarativeStream(BaseModel):
2379
2271
 
2380
2272
 
2381
2273
  ComplexFieldType.update_forward_refs()
2382
- GzipDecoder.update_forward_refs()
2383
2274
  CompositeErrorHandler.update_forward_refs()
2384
2275
  DeclarativeSource1.update_forward_refs()
2385
2276
  DeclarativeSource2.update_forward_refs()