airbyte-cdk 6.34.1.dev0__py3-none-any.whl → 6.34.1.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +16 -12
  2. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  3. airbyte_cdk/connector_builder/test_reader/helpers.py +591 -0
  4. airbyte_cdk/connector_builder/test_reader/message_grouper.py +160 -0
  5. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  6. airbyte_cdk/connector_builder/test_reader/types.py +75 -0
  7. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  8. airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
  9. airbyte_cdk/sources/declarative/auth/oauth.py +6 -1
  10. airbyte_cdk/sources/declarative/auth/token.py +3 -8
  11. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +30 -79
  12. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +203 -100
  13. airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
  14. airbyte_cdk/sources/declarative/decoders/__init__.py +0 -4
  15. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +7 -2
  16. airbyte_cdk/sources/declarative/decoders/json_decoder.py +12 -58
  17. airbyte_cdk/sources/declarative/extractors/record_selector.py +12 -3
  18. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +56 -25
  19. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +12 -6
  20. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +6 -2
  21. airbyte_cdk/sources/declarative/interpolation/jinja.py +13 -0
  22. airbyte_cdk/sources/declarative/manifest_declarative_source.py +9 -0
  23. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +150 -41
  24. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +234 -84
  25. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  26. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
  27. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +26 -18
  28. airbyte_cdk/sources/declarative/requesters/http_requester.py +8 -2
  29. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +16 -5
  30. airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
  31. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
  32. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +1 -4
  33. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +0 -3
  34. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -47
  35. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
  36. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +4 -3
  37. airbyte_cdk/sources/declarative/transformations/add_fields.py +4 -4
  38. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -1
  39. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  40. airbyte_cdk/sources/file_based/file_based_source.py +70 -37
  41. airbyte_cdk/sources/file_based/file_based_stream_reader.py +107 -12
  42. airbyte_cdk/sources/file_based/stream/__init__.py +10 -1
  43. airbyte_cdk/sources/file_based/stream/identities_stream.py +47 -0
  44. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +85 -0
  45. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  46. airbyte_cdk/sources/streams/call_rate.py +185 -47
  47. airbyte_cdk/sources/streams/http/http.py +1 -2
  48. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +217 -56
  49. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +144 -73
  50. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  51. airbyte_cdk/test/mock_http/mocker.py +9 -1
  52. airbyte_cdk/test/mock_http/response.py +6 -3
  53. airbyte_cdk/utils/datetime_helpers.py +48 -66
  54. airbyte_cdk/utils/mapping_helpers.py +126 -26
  55. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/METADATA +1 -1
  56. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/RECORD +60 -51
  57. airbyte_cdk/connector_builder/message_grouper.py +0 -448
  58. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/LICENSE.txt +0 -0
  59. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/LICENSE_SHORT +0 -0
  60. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/WHEEL +0 -0
  61. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/entry_points.txt +0 -0
@@ -5,6 +5,7 @@
5
5
  import copy
6
6
  import logging
7
7
  import threading
8
+ import time
8
9
  from collections import OrderedDict
9
10
  from copy import deepcopy
10
11
  from datetime import timedelta
@@ -58,7 +59,8 @@ class ConcurrentPerPartitionCursor(Cursor):
58
59
  CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
59
60
  """
60
61
 
61
- DEFAULT_MAX_PARTITIONS_NUMBER = 10000
62
+ DEFAULT_MAX_PARTITIONS_NUMBER = 25_000
63
+ SWITCH_TO_GLOBAL_LIMIT = 10_000
62
64
  _NO_STATE: Mapping[str, Any] = {}
63
65
  _NO_CURSOR_STATE: Mapping[str, Any] = {}
64
66
  _GLOBAL_STATE_KEY = "state"
@@ -99,9 +101,11 @@ class ConcurrentPerPartitionCursor(Cursor):
99
101
  self._new_global_cursor: Optional[StreamState] = None
100
102
  self._lookback_window: int = 0
101
103
  self._parent_state: Optional[StreamState] = None
102
- self._over_limit: int = 0
104
+ self._number_of_partitions: int = 0
103
105
  self._use_global_cursor: bool = False
104
106
  self._partition_serializer = PerPartitionKeySerializer()
107
+ # Track the last time a state message was emitted
108
+ self._last_emission_time: float = 0.0
105
109
 
106
110
  self._set_initial_state(stream_state)
107
111
 
@@ -141,21 +145,16 @@ class ConcurrentPerPartitionCursor(Cursor):
141
145
  raise ValueError("stream_slice cannot be None")
142
146
 
143
147
  partition_key = self._to_partition_key(stream_slice.partition)
144
- self._cursor_per_partition[partition_key].close_partition(partition=partition)
145
148
  with self._lock:
146
149
  self._semaphore_per_partition[partition_key].acquire()
147
- cursor = self._cursor_per_partition[partition_key]
148
- if (
149
- partition_key in self._finished_partitions
150
- and self._semaphore_per_partition[partition_key]._value == 0
151
- ):
150
+ if not self._use_global_cursor:
151
+ self._cursor_per_partition[partition_key].close_partition(partition=partition)
152
+ cursor = self._cursor_per_partition[partition_key]
152
153
  if (
153
- self._new_global_cursor is None
154
- or self._new_global_cursor[self.cursor_field.cursor_field_key]
155
- < cursor.state[self.cursor_field.cursor_field_key]
154
+ partition_key in self._finished_partitions
155
+ and self._semaphore_per_partition[partition_key]._value == 0
156
156
  ):
157
- self._new_global_cursor = copy.deepcopy(cursor.state)
158
- if not self._use_global_cursor:
157
+ self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
159
158
  self._emit_state_message()
160
159
 
161
160
  def ensure_at_least_one_state_emitted(self) -> None:
@@ -169,9 +168,23 @@ class ConcurrentPerPartitionCursor(Cursor):
169
168
  self._global_cursor = self._new_global_cursor
170
169
  self._lookback_window = self._timer.finish()
171
170
  self._parent_state = self._partition_router.get_stream_state()
172
- self._emit_state_message()
171
+ self._emit_state_message(throttle=False)
173
172
 
174
- def _emit_state_message(self) -> None:
173
+ def _throttle_state_message(self) -> Optional[float]:
174
+ """
175
+ Throttles the state message emission to once every 60 seconds.
176
+ """
177
+ current_time = time.time()
178
+ if current_time - self._last_emission_time <= 60:
179
+ return None
180
+ return current_time
181
+
182
+ def _emit_state_message(self, throttle: bool = True) -> None:
183
+ if throttle:
184
+ current_time = self._throttle_state_message()
185
+ if current_time is None:
186
+ return
187
+ self._last_emission_time = current_time
175
188
  self._connector_state_manager.update_state_for_stream(
176
189
  self._stream_name,
177
190
  self._stream_namespace,
@@ -202,6 +215,7 @@ class ConcurrentPerPartitionCursor(Cursor):
202
215
  self._lookback_window if self._global_cursor else 0,
203
216
  )
204
217
  with self._lock:
218
+ self._number_of_partitions += 1
205
219
  self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
206
220
  self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
207
221
  threading.Semaphore(0)
@@ -232,9 +246,15 @@ class ConcurrentPerPartitionCursor(Cursor):
232
246
  - Logs a warning each time a partition is removed, indicating whether it was finished
233
247
  or removed due to being the oldest.
234
248
  """
249
+ if not self._use_global_cursor and self.limit_reached():
250
+ logger.info(
251
+ f"Exceeded the 'SWITCH_TO_GLOBAL_LIMIT' of {self.SWITCH_TO_GLOBAL_LIMIT}. "
252
+ f"Switching to global cursor for {self._stream_name}."
253
+ )
254
+ self._use_global_cursor = True
255
+
235
256
  with self._lock:
236
257
  while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
237
- self._over_limit += 1
238
258
  # Try removing finished partitions first
239
259
  for partition_key in list(self._cursor_per_partition.keys()):
240
260
  if (
@@ -245,7 +265,7 @@ class ConcurrentPerPartitionCursor(Cursor):
245
265
  partition_key
246
266
  ) # Remove the oldest partition
247
267
  logger.warning(
248
- f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._over_limit}."
268
+ f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._number_of_partitions - self.DEFAULT_MAX_PARTITIONS_NUMBER}."
249
269
  )
250
270
  break
251
271
  else:
@@ -254,7 +274,7 @@ class ConcurrentPerPartitionCursor(Cursor):
254
274
  1
255
275
  ] # Remove the oldest partition
256
276
  logger.warning(
257
- f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
277
+ f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._number_of_partitions - self.DEFAULT_MAX_PARTITIONS_NUMBER}."
258
278
  )
259
279
 
260
280
  def _set_initial_state(self, stream_state: StreamState) -> None:
@@ -314,6 +334,7 @@ class ConcurrentPerPartitionCursor(Cursor):
314
334
  self._lookback_window = int(stream_state.get("lookback_window", 0))
315
335
 
316
336
  for state in stream_state.get(self._PERPARTITION_STATE_KEY, []):
337
+ self._number_of_partitions += 1
317
338
  self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
318
339
  self._create_cursor(state["cursor"])
319
340
  )
@@ -354,16 +375,26 @@ class ConcurrentPerPartitionCursor(Cursor):
354
375
  self._new_global_cursor = deepcopy(fixed_global_state)
355
376
 
356
377
  def observe(self, record: Record) -> None:
357
- if not self._use_global_cursor and self.limit_reached():
358
- self._use_global_cursor = True
359
-
360
378
  if not record.associated_slice:
361
379
  raise ValueError(
362
380
  "Invalid state as stream slices that are emitted should refer to an existing cursor"
363
381
  )
364
- self._cursor_per_partition[
365
- self._to_partition_key(record.associated_slice.partition)
366
- ].observe(record)
382
+
383
+ record_cursor = self._connector_state_converter.output_format(
384
+ self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
385
+ )
386
+ self._update_global_cursor(record_cursor)
387
+ if not self._use_global_cursor:
388
+ self._cursor_per_partition[
389
+ self._to_partition_key(record.associated_slice.partition)
390
+ ].observe(record)
391
+
392
+ def _update_global_cursor(self, value: Any) -> None:
393
+ if (
394
+ self._new_global_cursor is None
395
+ or self._new_global_cursor[self.cursor_field.cursor_field_key] < value
396
+ ):
397
+ self._new_global_cursor = {self.cursor_field.cursor_field_key: copy.deepcopy(value)}
367
398
 
368
399
  def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
369
400
  return self._partition_serializer.to_partition_key(partition)
@@ -397,4 +428,4 @@ class ConcurrentPerPartitionCursor(Cursor):
397
428
  return cursor
398
429
 
399
430
  def limit_reached(self) -> bool:
400
- return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
431
+ return self._number_of_partitions > self.SWITCH_TO_GLOBAL_LIMIT
@@ -21,6 +21,7 @@ from airbyte_cdk.sources.declarative.requesters.request_option import (
21
21
  )
22
22
  from airbyte_cdk.sources.message import MessageRepository
23
23
  from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
24
+ from airbyte_cdk.utils.mapping_helpers import _validate_component_request_option_paths
24
25
 
25
26
 
26
27
  @dataclass
@@ -122,6 +123,10 @@ class DatetimeBasedCursor(DeclarativeCursor):
122
123
  if not self.cursor_datetime_formats:
123
124
  self.cursor_datetime_formats = [self.datetime_format]
124
125
 
126
+ _validate_component_request_option_paths(
127
+ self.config, self.start_time_option, self.end_time_option
128
+ )
129
+
125
130
  def get_stream_state(self) -> StreamState:
126
131
  return {self.cursor_field.eval(self.config): self._cursor} if self._cursor else {} # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
127
132
 
@@ -365,14 +370,15 @@ class DatetimeBasedCursor(DeclarativeCursor):
365
370
  options: MutableMapping[str, Any] = {}
366
371
  if not stream_slice:
367
372
  return options
373
+
368
374
  if self.start_time_option and self.start_time_option.inject_into == option_type:
369
- options[self.start_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore # field_name is always casted to an interpolated string
370
- self._partition_field_start.eval(self.config)
371
- )
375
+ start_time_value = stream_slice.get(self._partition_field_start.eval(self.config))
376
+ self.start_time_option.inject_into_request(options, start_time_value, self.config)
377
+
372
378
  if self.end_time_option and self.end_time_option.inject_into == option_type:
373
- options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr]
374
- self._partition_field_end.eval(self.config)
375
- )
379
+ end_time_value = stream_slice.get(self._partition_field_end.eval(self.config))
380
+ self.end_time_option.inject_into_request(options, end_time_value, self.config)
381
+
376
382
  return options
377
383
 
378
384
  def should_be_synced(self, record: Record) -> bool:
@@ -115,7 +115,9 @@ class GlobalSubstreamCursor(DeclarativeCursor):
115
115
  * Yield the last slice. At that point, once there are as many slices yielded as closes, the global slice will be closed too
116
116
  """
117
117
  slice_generator = (
118
- StreamSlice(partition=partition, cursor_slice=cursor_slice)
118
+ StreamSlice(
119
+ partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
120
+ )
119
121
  for partition in self._partition_router.stream_slices()
120
122
  for cursor_slice in self._stream_cursor.stream_slices()
121
123
  )
@@ -131,7 +133,9 @@ class GlobalSubstreamCursor(DeclarativeCursor):
131
133
 
132
134
  def generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
133
135
  slice_generator = (
134
- StreamSlice(partition=partition, cursor_slice=cursor_slice)
136
+ StreamSlice(
137
+ partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
138
+ )
135
139
  for cursor_slice in self._stream_cursor.stream_slices()
136
140
  )
137
141
 
@@ -11,10 +11,12 @@ from jinja2.environment import Template
11
11
  from jinja2.exceptions import UndefinedError
12
12
  from jinja2.sandbox import SandboxedEnvironment
13
13
 
14
+ from airbyte_cdk.models import FailureType
14
15
  from airbyte_cdk.sources.declarative.interpolation.filters import filters
15
16
  from airbyte_cdk.sources.declarative.interpolation.interpolation import Interpolation
16
17
  from airbyte_cdk.sources.declarative.interpolation.macros import macros
17
18
  from airbyte_cdk.sources.types import Config
19
+ from airbyte_cdk.utils import AirbyteTracedException
18
20
 
19
21
 
20
22
  class StreamPartitionAccessEnvironment(SandboxedEnvironment):
@@ -36,6 +38,10 @@ _ALIASES = {
36
38
  "stream_partition": "stream_slice", # Use stream_partition to access partition router's values
37
39
  }
38
40
 
41
+ _UNSUPPORTED_INTERPOLATION_VARIABLES: Mapping[str, str] = {
42
+ "stream_state": "`stream_state` is no longer supported for interpolation. We recommend using `stream_interval` instead. Please reference the CDK Migration Guide for more information.",
43
+ }
44
+
39
45
  # These extensions are not installed so they're not currently a problem,
40
46
  # but we're still explicitly removing them from the jinja context.
41
47
  # At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
@@ -95,6 +101,13 @@ class JinjaInterpolation(Interpolation):
95
101
  elif equivalent in context:
96
102
  context[alias] = context[equivalent]
97
103
 
104
+ for variable_name in _UNSUPPORTED_INTERPOLATION_VARIABLES:
105
+ if variable_name in input_str:
106
+ raise AirbyteTracedException(
107
+ message=_UNSUPPORTED_INTERPOLATION_VARIABLES[variable_name],
108
+ internal_message=_UNSUPPORTED_INTERPOLATION_VARIABLES[variable_name],
109
+ failure_type=FailureType.config_error,
110
+ )
98
111
  try:
99
112
  if isinstance(input_str, str):
100
113
  result = self._eval(input_str, context)
@@ -137,6 +137,10 @@ class ManifestDeclarativeSource(DeclarativeSource):
137
137
  self._source_config, config
138
138
  )
139
139
 
140
+ api_budget_model = self._source_config.get("api_budget")
141
+ if api_budget_model:
142
+ self._constructor.set_api_budget(api_budget_model, config)
143
+
140
144
  source_streams = [
141
145
  self._constructor.create_component(
142
146
  DeclarativeStreamModel,
@@ -365,6 +369,11 @@ class ManifestDeclarativeSource(DeclarativeSource):
365
369
  # Ensure that each stream is created with a unique name
366
370
  name = dynamic_stream.get("name")
367
371
 
372
+ if not isinstance(name, str):
373
+ raise ValueError(
374
+ f"Expected stream name {name} to be a string, got {type(name)}."
375
+ )
376
+
368
377
  if name in seen_dynamic_streams:
369
378
  error_message = f"Dynamic streams list contains a duplicate name: {name}. Please contact Airbyte Support."
370
379
  failure_type = FailureType.system_error
@@ -642,6 +642,48 @@ class OAuthAuthenticator(BaseModel):
642
642
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
643
643
 
644
644
 
645
+ class Rate(BaseModel):
646
+ class Config:
647
+ extra = Extra.allow
648
+
649
+ limit: int = Field(
650
+ ...,
651
+ description="The maximum number of calls allowed within the interval.",
652
+ title="Limit",
653
+ )
654
+ interval: str = Field(
655
+ ...,
656
+ description="The time interval for the rate limit.",
657
+ examples=["PT1H", "P1D"],
658
+ title="Interval",
659
+ )
660
+
661
+
662
+ class HttpRequestRegexMatcher(BaseModel):
663
+ class Config:
664
+ extra = Extra.allow
665
+
666
+ method: Optional[str] = Field(
667
+ None, description="The HTTP method to match (e.g., GET, POST).", title="Method"
668
+ )
669
+ url_base: Optional[str] = Field(
670
+ None,
671
+ description='The base URL (scheme and host, e.g. "https://api.example.com") to match.',
672
+ title="URL Base",
673
+ )
674
+ url_path_pattern: Optional[str] = Field(
675
+ None,
676
+ description="A regular expression pattern to match the URL path.",
677
+ title="URL Path Pattern",
678
+ )
679
+ params: Optional[Dict[str, Any]] = Field(
680
+ None, description="The query parameters to match.", title="Parameters"
681
+ )
682
+ headers: Optional[Dict[str, Any]] = Field(
683
+ None, description="The headers to match.", title="Headers"
684
+ )
685
+
686
+
645
687
  class DpathExtractor(BaseModel):
646
688
  type: Literal["DpathExtractor"]
647
689
  field_path: List[str] = Field(
@@ -887,15 +929,6 @@ class CustomDecoder(BaseModel):
887
929
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
888
930
 
889
931
 
890
- class GzipJsonDecoder(BaseModel):
891
- class Config:
892
- extra = Extra.allow
893
-
894
- type: Literal["GzipJsonDecoder"]
895
- encoding: Optional[str] = "utf-8"
896
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
897
-
898
-
899
932
  class MinMaxDatetime(BaseModel):
900
933
  type: Literal["MinMaxDatetime"]
901
934
  datetime: str = Field(
@@ -1200,11 +1233,17 @@ class InjectInto(Enum):
1200
1233
 
1201
1234
  class RequestOption(BaseModel):
1202
1235
  type: Literal["RequestOption"]
1203
- field_name: str = Field(
1204
- ...,
1205
- description="Configures which key should be used in the location that the descriptor is being injected into",
1236
+ field_name: Optional[str] = Field(
1237
+ None,
1238
+ description="Configures which key should be used in the location that the descriptor is being injected into. We hope to eventually deprecate this field in favor of `field_path` for all request_options, but must currently maintain it for backwards compatibility in the Builder.",
1206
1239
  examples=["segment_id"],
1207
- title="Request Option",
1240
+ title="Field Name",
1241
+ )
1242
+ field_path: Optional[List[str]] = Field(
1243
+ None,
1244
+ description="Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries)",
1245
+ examples=[["data", "viewer", "id"]],
1246
+ title="Field Path",
1208
1247
  )
1209
1248
  inject_into: InjectInto = Field(
1210
1249
  ...,
@@ -1268,18 +1307,8 @@ class LegacySessionTokenAuthenticator(BaseModel):
1268
1307
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1269
1308
 
1270
1309
 
1271
- class JsonParser(BaseModel):
1272
- type: Literal["JsonParser"]
1273
- encoding: Optional[str] = "utf-8"
1274
-
1275
-
1276
- class JsonLineParser(BaseModel):
1277
- type: Literal["JsonLineParser"]
1278
- encoding: Optional[str] = "utf-8"
1279
-
1280
-
1281
- class CsvParser(BaseModel):
1282
- type: Literal["CsvParser"]
1310
+ class CsvDecoder(BaseModel):
1311
+ type: Literal["CsvDecoder"]
1283
1312
  encoding: Optional[str] = "utf-8"
1284
1313
  delimiter: Optional[str] = ","
1285
1314
 
@@ -1578,6 +1607,55 @@ class DatetimeBasedCursor(BaseModel):
1578
1607
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1579
1608
 
1580
1609
 
1610
+ class FixedWindowCallRatePolicy(BaseModel):
1611
+ class Config:
1612
+ extra = Extra.allow
1613
+
1614
+ type: Literal["FixedWindowCallRatePolicy"]
1615
+ period: str = Field(
1616
+ ..., description="The time interval for the rate limit window.", title="Period"
1617
+ )
1618
+ call_limit: int = Field(
1619
+ ...,
1620
+ description="The maximum number of calls allowed within the period.",
1621
+ title="Call Limit",
1622
+ )
1623
+ matchers: List[HttpRequestRegexMatcher] = Field(
1624
+ ...,
1625
+ description="List of matchers that define which requests this policy applies to.",
1626
+ title="Matchers",
1627
+ )
1628
+
1629
+
1630
+ class MovingWindowCallRatePolicy(BaseModel):
1631
+ class Config:
1632
+ extra = Extra.allow
1633
+
1634
+ type: Literal["MovingWindowCallRatePolicy"]
1635
+ rates: List[Rate] = Field(
1636
+ ...,
1637
+ description="List of rates that define the call limits for different time intervals.",
1638
+ title="Rates",
1639
+ )
1640
+ matchers: List[HttpRequestRegexMatcher] = Field(
1641
+ ...,
1642
+ description="List of matchers that define which requests this policy applies to.",
1643
+ title="Matchers",
1644
+ )
1645
+
1646
+
1647
+ class UnlimitedCallRatePolicy(BaseModel):
1648
+ class Config:
1649
+ extra = Extra.allow
1650
+
1651
+ type: Literal["UnlimitedCallRatePolicy"]
1652
+ matchers: List[HttpRequestRegexMatcher] = Field(
1653
+ ...,
1654
+ description="List of matchers that define which requests this policy applies to.",
1655
+ title="Matchers",
1656
+ )
1657
+
1658
+
1581
1659
  class DefaultErrorHandler(BaseModel):
1582
1660
  type: Literal["DefaultErrorHandler"]
1583
1661
  backoff_strategies: Optional[
@@ -1674,9 +1752,9 @@ class RecordSelector(BaseModel):
1674
1752
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1675
1753
 
1676
1754
 
1677
- class GzipParser(BaseModel):
1678
- type: Literal["GzipParser"]
1679
- inner_parser: Union[JsonLineParser, CsvParser, JsonParser]
1755
+ class GzipDecoder(BaseModel):
1756
+ type: Literal["GzipDecoder"]
1757
+ decoder: Union[CsvDecoder, GzipDecoder, JsonDecoder, JsonlDecoder]
1680
1758
 
1681
1759
 
1682
1760
  class Spec(BaseModel):
@@ -1709,23 +1787,51 @@ class CompositeErrorHandler(BaseModel):
1709
1787
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1710
1788
 
1711
1789
 
1790
+ class HTTPAPIBudget(BaseModel):
1791
+ class Config:
1792
+ extra = Extra.allow
1793
+
1794
+ type: Literal["HTTPAPIBudget"]
1795
+ policies: List[
1796
+ Union[
1797
+ FixedWindowCallRatePolicy,
1798
+ MovingWindowCallRatePolicy,
1799
+ UnlimitedCallRatePolicy,
1800
+ ]
1801
+ ] = Field(
1802
+ ...,
1803
+ description="List of call rate policies that define how many calls are allowed.",
1804
+ title="Policies",
1805
+ )
1806
+ ratelimit_reset_header: Optional[str] = Field(
1807
+ "ratelimit-reset",
1808
+ description="The HTTP response header name that indicates when the rate limit resets.",
1809
+ title="Rate Limit Reset Header",
1810
+ )
1811
+ ratelimit_remaining_header: Optional[str] = Field(
1812
+ "ratelimit-remaining",
1813
+ description="The HTTP response header name that indicates the number of remaining allowed calls.",
1814
+ title="Rate Limit Remaining Header",
1815
+ )
1816
+ status_codes_for_ratelimit_hit: Optional[List[int]] = Field(
1817
+ [429],
1818
+ description="List of HTTP status codes that indicate a rate limit has been hit.",
1819
+ title="Status Codes for Rate Limit Hit",
1820
+ )
1821
+
1822
+
1712
1823
  class ZipfileDecoder(BaseModel):
1713
1824
  class Config:
1714
1825
  extra = Extra.allow
1715
1826
 
1716
1827
  type: Literal["ZipfileDecoder"]
1717
- parser: Union[GzipParser, JsonParser, JsonLineParser, CsvParser] = Field(
1828
+ decoder: Union[CsvDecoder, GzipDecoder, JsonDecoder, JsonlDecoder] = Field(
1718
1829
  ...,
1719
1830
  description="Parser to parse the decompressed data from the zipfile(s).",
1720
1831
  title="Parser",
1721
1832
  )
1722
1833
 
1723
1834
 
1724
- class CompositeRawDecoder(BaseModel):
1725
- type: Literal["CompositeRawDecoder"]
1726
- parser: Union[GzipParser, JsonParser, JsonLineParser, CsvParser]
1727
-
1728
-
1729
1835
  class DeclarativeSource1(BaseModel):
1730
1836
  class Config:
1731
1837
  extra = Extra.forbid
@@ -1742,6 +1848,7 @@ class DeclarativeSource1(BaseModel):
1742
1848
  definitions: Optional[Dict[str, Any]] = None
1743
1849
  spec: Optional[Spec] = None
1744
1850
  concurrency_level: Optional[ConcurrencyLevel] = None
1851
+ api_budget: Optional[HTTPAPIBudget] = None
1745
1852
  metadata: Optional[Dict[str, Any]] = Field(
1746
1853
  None,
1747
1854
  description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.",
@@ -1768,6 +1875,7 @@ class DeclarativeSource2(BaseModel):
1768
1875
  definitions: Optional[Dict[str, Any]] = None
1769
1876
  spec: Optional[Spec] = None
1770
1877
  concurrency_level: Optional[ConcurrencyLevel] = None
1878
+ api_budget: Optional[HTTPAPIBudget] = None
1771
1879
  metadata: Optional[Dict[str, Any]] = Field(
1772
1880
  None,
1773
1881
  description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.",
@@ -1927,7 +2035,7 @@ class SessionTokenAuthenticator(BaseModel):
1927
2035
  description="Authentication method to use for requests sent to the API, specifying how to inject the session token.",
1928
2036
  title="Data Request Authentication",
1929
2037
  )
1930
- decoder: Optional[Union[JsonDecoder, XmlDecoder, CompositeRawDecoder]] = Field(
2038
+ decoder: Optional[Union[JsonDecoder, XmlDecoder]] = Field(
1931
2039
  None, description="Component used to decode the response.", title="Decoder"
1932
2040
  )
1933
2041
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
@@ -2127,12 +2235,12 @@ class SimpleRetriever(BaseModel):
2127
2235
  decoder: Optional[
2128
2236
  Union[
2129
2237
  CustomDecoder,
2238
+ CsvDecoder,
2239
+ GzipDecoder,
2130
2240
  JsonDecoder,
2131
2241
  JsonlDecoder,
2132
2242
  IterableDecoder,
2133
2243
  XmlDecoder,
2134
- GzipJsonDecoder,
2135
- CompositeRawDecoder,
2136
2244
  ZipfileDecoder,
2137
2245
  ]
2138
2246
  ] = Field(
@@ -2205,12 +2313,12 @@ class AsyncRetriever(BaseModel):
2205
2313
  decoder: Optional[
2206
2314
  Union[
2207
2315
  CustomDecoder,
2316
+ CsvDecoder,
2317
+ GzipDecoder,
2208
2318
  JsonDecoder,
2209
2319
  JsonlDecoder,
2210
2320
  IterableDecoder,
2211
2321
  XmlDecoder,
2212
- GzipJsonDecoder,
2213
- CompositeRawDecoder,
2214
2322
  ZipfileDecoder,
2215
2323
  ]
2216
2324
  ] = Field(
@@ -2221,12 +2329,12 @@ class AsyncRetriever(BaseModel):
2221
2329
  download_decoder: Optional[
2222
2330
  Union[
2223
2331
  CustomDecoder,
2332
+ CsvDecoder,
2333
+ GzipDecoder,
2224
2334
  JsonDecoder,
2225
2335
  JsonlDecoder,
2226
2336
  IterableDecoder,
2227
2337
  XmlDecoder,
2228
- GzipJsonDecoder,
2229
- CompositeRawDecoder,
2230
2338
  ZipfileDecoder,
2231
2339
  ]
2232
2340
  ] = Field(
@@ -2271,6 +2379,7 @@ class DynamicDeclarativeStream(BaseModel):
2271
2379
 
2272
2380
 
2273
2381
  ComplexFieldType.update_forward_refs()
2382
+ GzipDecoder.update_forward_refs()
2274
2383
  CompositeErrorHandler.update_forward_refs()
2275
2384
  DeclarativeSource1.update_forward_refs()
2276
2385
  DeclarativeSource2.update_forward_refs()