airbyte-cdk 6.34.0.dev1__py3-none-any.whl → 6.34.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +12 -16
  2. airbyte_cdk/connector_builder/message_grouper.py +448 -0
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  4. airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
  5. airbyte_cdk/sources/declarative/auth/oauth.py +1 -6
  6. airbyte_cdk/sources/declarative/auth/token.py +8 -3
  7. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +19 -30
  8. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +85 -203
  9. airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
  10. airbyte_cdk/sources/declarative/decoders/__init__.py +4 -0
  11. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +2 -7
  12. airbyte_cdk/sources/declarative/decoders/json_decoder.py +58 -12
  13. airbyte_cdk/sources/declarative/extractors/record_selector.py +3 -12
  14. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +25 -56
  15. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -12
  16. airbyte_cdk/sources/declarative/manifest_declarative_source.py +0 -9
  17. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +41 -150
  18. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +84 -234
  19. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  20. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
  21. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +18 -26
  22. airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -8
  23. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -16
  24. airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
  25. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
  26. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
  27. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -4
  28. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -2
  29. airbyte_cdk/sources/file_based/file_based_source.py +37 -70
  30. airbyte_cdk/sources/file_based/file_based_stream_reader.py +12 -107
  31. airbyte_cdk/sources/file_based/stream/__init__.py +1 -10
  32. airbyte_cdk/sources/streams/call_rate.py +47 -185
  33. airbyte_cdk/sources/streams/http/http.py +2 -1
  34. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +56 -217
  35. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +73 -144
  36. airbyte_cdk/utils/datetime_helpers.py +66 -48
  37. airbyte_cdk/utils/mapping_helpers.py +26 -126
  38. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/METADATA +1 -1
  39. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/RECORD +43 -52
  40. airbyte_cdk/connector_builder/test_reader/__init__.py +0 -7
  41. airbyte_cdk/connector_builder/test_reader/helpers.py +0 -591
  42. airbyte_cdk/connector_builder/test_reader/message_grouper.py +0 -160
  43. airbyte_cdk/connector_builder/test_reader/reader.py +0 -441
  44. airbyte_cdk/connector_builder/test_reader/types.py +0 -75
  45. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +0 -81
  46. airbyte_cdk/sources/file_based/stream/identities_stream.py +0 -47
  47. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +0 -85
  48. airbyte_cdk/sources/specs/transfer_modes.py +0 -26
  49. airbyte_cdk/sources/streams/permissions/identities_stream.py +0 -75
  50. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE.txt +0 -0
  51. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE_SHORT +0 -0
  52. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/WHEEL +0 -0
  53. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/entry_points.txt +0 -0
@@ -41,7 +41,6 @@ class RecordSelector(HttpSelector):
41
41
  _name: Union[InterpolatedString, str] = field(init=False, repr=False, default="")
42
42
  record_filter: Optional[RecordFilter] = None
43
43
  transformations: List[RecordTransformation] = field(default_factory=lambda: [])
44
- transform_before_filtering: bool = False
45
44
 
46
45
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
47
46
  self._parameters = parameters
@@ -105,17 +104,9 @@ class RecordSelector(HttpSelector):
105
104
  Until we decide to move this logic away from the selector, we made this method public so that users like AsyncJobRetriever could
106
105
  share the logic of doing transformations on a set of records.
107
106
  """
108
- if self.transform_before_filtering:
109
- transformed_data = self._transform(all_data, stream_state, stream_slice)
110
- transformed_filtered_data = self._filter(
111
- transformed_data, stream_state, stream_slice, next_page_token
112
- )
113
- else:
114
- filtered_data = self._filter(all_data, stream_state, stream_slice, next_page_token)
115
- transformed_filtered_data = self._transform(filtered_data, stream_state, stream_slice)
116
- normalized_data = self._normalize_by_schema(
117
- transformed_filtered_data, schema=records_schema
118
- )
107
+ filtered_data = self._filter(all_data, stream_state, stream_slice, next_page_token)
108
+ transformed_data = self._transform(filtered_data, stream_state, stream_slice)
109
+ normalized_data = self._normalize_by_schema(transformed_data, schema=records_schema)
119
110
  for data in normalized_data:
120
111
  yield Record(data=data, stream_name=self.name, associated_slice=stream_slice)
121
112
 
@@ -5,7 +5,6 @@
5
5
  import copy
6
6
  import logging
7
7
  import threading
8
- import time
9
8
  from collections import OrderedDict
10
9
  from copy import deepcopy
11
10
  from datetime import timedelta
@@ -59,8 +58,7 @@ class ConcurrentPerPartitionCursor(Cursor):
59
58
  CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
60
59
  """
61
60
 
62
- DEFAULT_MAX_PARTITIONS_NUMBER = 25_000
63
- SWITCH_TO_GLOBAL_LIMIT = 10_000
61
+ DEFAULT_MAX_PARTITIONS_NUMBER = 10000
64
62
  _NO_STATE: Mapping[str, Any] = {}
65
63
  _NO_CURSOR_STATE: Mapping[str, Any] = {}
66
64
  _GLOBAL_STATE_KEY = "state"
@@ -101,11 +99,9 @@ class ConcurrentPerPartitionCursor(Cursor):
101
99
  self._new_global_cursor: Optional[StreamState] = None
102
100
  self._lookback_window: int = 0
103
101
  self._parent_state: Optional[StreamState] = None
104
- self._number_of_partitions: int = 0
102
+ self._over_limit: int = 0
105
103
  self._use_global_cursor: bool = False
106
104
  self._partition_serializer = PerPartitionKeySerializer()
107
- # Track the last time a state message was emitted
108
- self._last_emission_time: float = 0.0
109
105
 
110
106
  self._set_initial_state(stream_state)
111
107
 
@@ -145,16 +141,21 @@ class ConcurrentPerPartitionCursor(Cursor):
145
141
  raise ValueError("stream_slice cannot be None")
146
142
 
147
143
  partition_key = self._to_partition_key(stream_slice.partition)
144
+ self._cursor_per_partition[partition_key].close_partition(partition=partition)
148
145
  with self._lock:
149
146
  self._semaphore_per_partition[partition_key].acquire()
150
- if not self._use_global_cursor:
151
- self._cursor_per_partition[partition_key].close_partition(partition=partition)
152
- cursor = self._cursor_per_partition[partition_key]
147
+ cursor = self._cursor_per_partition[partition_key]
148
+ if (
149
+ partition_key in self._finished_partitions
150
+ and self._semaphore_per_partition[partition_key]._value == 0
151
+ ):
153
152
  if (
154
- partition_key in self._finished_partitions
155
- and self._semaphore_per_partition[partition_key]._value == 0
153
+ self._new_global_cursor is None
154
+ or self._new_global_cursor[self.cursor_field.cursor_field_key]
155
+ < cursor.state[self.cursor_field.cursor_field_key]
156
156
  ):
157
- self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
157
+ self._new_global_cursor = copy.deepcopy(cursor.state)
158
+ if not self._use_global_cursor:
158
159
  self._emit_state_message()
159
160
 
160
161
  def ensure_at_least_one_state_emitted(self) -> None:
@@ -168,23 +169,9 @@ class ConcurrentPerPartitionCursor(Cursor):
168
169
  self._global_cursor = self._new_global_cursor
169
170
  self._lookback_window = self._timer.finish()
170
171
  self._parent_state = self._partition_router.get_stream_state()
171
- self._emit_state_message(throttle=False)
172
+ self._emit_state_message()
172
173
 
173
- def _throttle_state_message(self) -> Optional[float]:
174
- """
175
- Throttles the state message emission to once every 60 seconds.
176
- """
177
- current_time = time.time()
178
- if current_time - self._last_emission_time <= 60:
179
- return None
180
- return current_time
181
-
182
- def _emit_state_message(self, throttle: bool = True) -> None:
183
- if throttle:
184
- current_time = self._throttle_state_message()
185
- if current_time is None:
186
- return
187
- self._last_emission_time = current_time
174
+ def _emit_state_message(self) -> None:
188
175
  self._connector_state_manager.update_state_for_stream(
189
176
  self._stream_name,
190
177
  self._stream_namespace,
@@ -215,7 +202,6 @@ class ConcurrentPerPartitionCursor(Cursor):
215
202
  self._lookback_window if self._global_cursor else 0,
216
203
  )
217
204
  with self._lock:
218
- self._number_of_partitions += 1
219
205
  self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
220
206
  self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
221
207
  threading.Semaphore(0)
@@ -246,15 +232,9 @@ class ConcurrentPerPartitionCursor(Cursor):
246
232
  - Logs a warning each time a partition is removed, indicating whether it was finished
247
233
  or removed due to being the oldest.
248
234
  """
249
- if not self._use_global_cursor and self.limit_reached():
250
- logger.info(
251
- f"Exceeded the 'SWITCH_TO_GLOBAL_LIMIT' of {self.SWITCH_TO_GLOBAL_LIMIT}. "
252
- f"Switching to global cursor for {self._stream_name}."
253
- )
254
- self._use_global_cursor = True
255
-
256
235
  with self._lock:
257
236
  while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
237
+ self._over_limit += 1
258
238
  # Try removing finished partitions first
259
239
  for partition_key in list(self._cursor_per_partition.keys()):
260
240
  if (
@@ -265,7 +245,7 @@ class ConcurrentPerPartitionCursor(Cursor):
265
245
  partition_key
266
246
  ) # Remove the oldest partition
267
247
  logger.warning(
268
- f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._number_of_partitions - self.DEFAULT_MAX_PARTITIONS_NUMBER}."
248
+ f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._over_limit}."
269
249
  )
270
250
  break
271
251
  else:
@@ -274,7 +254,7 @@ class ConcurrentPerPartitionCursor(Cursor):
274
254
  1
275
255
  ] # Remove the oldest partition
276
256
  logger.warning(
277
- f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._number_of_partitions - self.DEFAULT_MAX_PARTITIONS_NUMBER}."
257
+ f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
278
258
  )
279
259
 
280
260
  def _set_initial_state(self, stream_state: StreamState) -> None:
@@ -334,7 +314,6 @@ class ConcurrentPerPartitionCursor(Cursor):
334
314
  self._lookback_window = int(stream_state.get("lookback_window", 0))
335
315
 
336
316
  for state in stream_state.get(self._PERPARTITION_STATE_KEY, []):
337
- self._number_of_partitions += 1
338
317
  self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
339
318
  self._create_cursor(state["cursor"])
340
319
  )
@@ -375,26 +354,16 @@ class ConcurrentPerPartitionCursor(Cursor):
375
354
  self._new_global_cursor = deepcopy(fixed_global_state)
376
355
 
377
356
  def observe(self, record: Record) -> None:
357
+ if not self._use_global_cursor and self.limit_reached():
358
+ self._use_global_cursor = True
359
+
378
360
  if not record.associated_slice:
379
361
  raise ValueError(
380
362
  "Invalid state as stream slices that are emitted should refer to an existing cursor"
381
363
  )
382
-
383
- record_cursor = self._connector_state_converter.output_format(
384
- self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
385
- )
386
- self._update_global_cursor(record_cursor)
387
- if not self._use_global_cursor:
388
- self._cursor_per_partition[
389
- self._to_partition_key(record.associated_slice.partition)
390
- ].observe(record)
391
-
392
- def _update_global_cursor(self, value: Any) -> None:
393
- if (
394
- self._new_global_cursor is None
395
- or self._new_global_cursor[self.cursor_field.cursor_field_key] < value
396
- ):
397
- self._new_global_cursor = {self.cursor_field.cursor_field_key: copy.deepcopy(value)}
364
+ self._cursor_per_partition[
365
+ self._to_partition_key(record.associated_slice.partition)
366
+ ].observe(record)
398
367
 
399
368
  def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
400
369
  return self._partition_serializer.to_partition_key(partition)
@@ -428,4 +397,4 @@ class ConcurrentPerPartitionCursor(Cursor):
428
397
  return cursor
429
398
 
430
399
  def limit_reached(self) -> bool:
431
- return self._number_of_partitions > self.SWITCH_TO_GLOBAL_LIMIT
400
+ return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
@@ -21,7 +21,6 @@ from airbyte_cdk.sources.declarative.requesters.request_option import (
21
21
  )
22
22
  from airbyte_cdk.sources.message import MessageRepository
23
23
  from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
24
- from airbyte_cdk.utils.mapping_helpers import _validate_component_request_option_paths
25
24
 
26
25
 
27
26
  @dataclass
@@ -123,10 +122,6 @@ class DatetimeBasedCursor(DeclarativeCursor):
123
122
  if not self.cursor_datetime_formats:
124
123
  self.cursor_datetime_formats = [self.datetime_format]
125
124
 
126
- _validate_component_request_option_paths(
127
- self.config, self.start_time_option, self.end_time_option
128
- )
129
-
130
125
  def get_stream_state(self) -> StreamState:
131
126
  return {self.cursor_field.eval(self.config): self._cursor} if self._cursor else {} # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
132
127
 
@@ -370,15 +365,14 @@ class DatetimeBasedCursor(DeclarativeCursor):
370
365
  options: MutableMapping[str, Any] = {}
371
366
  if not stream_slice:
372
367
  return options
373
-
374
368
  if self.start_time_option and self.start_time_option.inject_into == option_type:
375
- start_time_value = stream_slice.get(self._partition_field_start.eval(self.config))
376
- self.start_time_option.inject_into_request(options, start_time_value, self.config)
377
-
369
+ options[self.start_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore # field_name is always casted to an interpolated string
370
+ self._partition_field_start.eval(self.config)
371
+ )
378
372
  if self.end_time_option and self.end_time_option.inject_into == option_type:
379
- end_time_value = stream_slice.get(self._partition_field_end.eval(self.config))
380
- self.end_time_option.inject_into_request(options, end_time_value, self.config)
381
-
373
+ options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr]
374
+ self._partition_field_end.eval(self.config)
375
+ )
382
376
  return options
383
377
 
384
378
  def should_be_synced(self, record: Record) -> bool:
@@ -137,10 +137,6 @@ class ManifestDeclarativeSource(DeclarativeSource):
137
137
  self._source_config, config
138
138
  )
139
139
 
140
- api_budget_model = self._source_config.get("api_budget")
141
- if api_budget_model:
142
- self._constructor.set_api_budget(api_budget_model, config)
143
-
144
140
  source_streams = [
145
141
  self._constructor.create_component(
146
142
  DeclarativeStreamModel,
@@ -369,11 +365,6 @@ class ManifestDeclarativeSource(DeclarativeSource):
369
365
  # Ensure that each stream is created with a unique name
370
366
  name = dynamic_stream.get("name")
371
367
 
372
- if not isinstance(name, str):
373
- raise ValueError(
374
- f"Expected stream name {name} to be a string, got {type(name)}."
375
- )
376
-
377
368
  if name in seen_dynamic_streams:
378
369
  error_message = f"Dynamic streams list contains a duplicate name: {name}. Please contact Airbyte Support."
379
370
  failure_type = FailureType.system_error
@@ -642,48 +642,6 @@ class OAuthAuthenticator(BaseModel):
642
642
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
643
643
 
644
644
 
645
- class Rate(BaseModel):
646
- class Config:
647
- extra = Extra.allow
648
-
649
- limit: int = Field(
650
- ...,
651
- description="The maximum number of calls allowed within the interval.",
652
- title="Limit",
653
- )
654
- interval: str = Field(
655
- ...,
656
- description="The time interval for the rate limit.",
657
- examples=["PT1H", "P1D"],
658
- title="Interval",
659
- )
660
-
661
-
662
- class HttpRequestRegexMatcher(BaseModel):
663
- class Config:
664
- extra = Extra.allow
665
-
666
- method: Optional[str] = Field(
667
- None, description="The HTTP method to match (e.g., GET, POST).", title="Method"
668
- )
669
- url_base: Optional[str] = Field(
670
- None,
671
- description='The base URL (scheme and host, e.g. "https://api.example.com") to match.',
672
- title="URL Base",
673
- )
674
- url_path_pattern: Optional[str] = Field(
675
- None,
676
- description="A regular expression pattern to match the URL path.",
677
- title="URL Path Pattern",
678
- )
679
- params: Optional[Dict[str, Any]] = Field(
680
- None, description="The query parameters to match.", title="Parameters"
681
- )
682
- headers: Optional[Dict[str, Any]] = Field(
683
- None, description="The headers to match.", title="Headers"
684
- )
685
-
686
-
687
645
  class DpathExtractor(BaseModel):
688
646
  type: Literal["DpathExtractor"]
689
647
  field_path: List[str] = Field(
@@ -929,6 +887,15 @@ class CustomDecoder(BaseModel):
929
887
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
930
888
 
931
889
 
890
+ class GzipJsonDecoder(BaseModel):
891
+ class Config:
892
+ extra = Extra.allow
893
+
894
+ type: Literal["GzipJsonDecoder"]
895
+ encoding: Optional[str] = "utf-8"
896
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
897
+
898
+
932
899
  class MinMaxDatetime(BaseModel):
933
900
  type: Literal["MinMaxDatetime"]
934
901
  datetime: str = Field(
@@ -1233,17 +1200,11 @@ class InjectInto(Enum):
1233
1200
 
1234
1201
  class RequestOption(BaseModel):
1235
1202
  type: Literal["RequestOption"]
1236
- field_name: Optional[str] = Field(
1237
- None,
1238
- description="Configures which key should be used in the location that the descriptor is being injected into. We hope to eventually deprecate this field in favor of `field_path` for all request_options, but must currently maintain it for backwards compatibility in the Builder.",
1203
+ field_name: str = Field(
1204
+ ...,
1205
+ description="Configures which key should be used in the location that the descriptor is being injected into",
1239
1206
  examples=["segment_id"],
1240
- title="Field Name",
1241
- )
1242
- field_path: Optional[List[str]] = Field(
1243
- None,
1244
- description="Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries)",
1245
- examples=[["data", "viewer", "id"]],
1246
- title="Field Path",
1207
+ title="Request Option",
1247
1208
  )
1248
1209
  inject_into: InjectInto = Field(
1249
1210
  ...,
@@ -1307,8 +1268,18 @@ class LegacySessionTokenAuthenticator(BaseModel):
1307
1268
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1308
1269
 
1309
1270
 
1310
- class CsvDecoder(BaseModel):
1311
- type: Literal["CsvDecoder"]
1271
+ class JsonParser(BaseModel):
1272
+ type: Literal["JsonParser"]
1273
+ encoding: Optional[str] = "utf-8"
1274
+
1275
+
1276
+ class JsonLineParser(BaseModel):
1277
+ type: Literal["JsonLineParser"]
1278
+ encoding: Optional[str] = "utf-8"
1279
+
1280
+
1281
+ class CsvParser(BaseModel):
1282
+ type: Literal["CsvParser"]
1312
1283
  encoding: Optional[str] = "utf-8"
1313
1284
  delimiter: Optional[str] = ","
1314
1285
 
@@ -1607,55 +1578,6 @@ class DatetimeBasedCursor(BaseModel):
1607
1578
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1608
1579
 
1609
1580
 
1610
- class FixedWindowCallRatePolicy(BaseModel):
1611
- class Config:
1612
- extra = Extra.allow
1613
-
1614
- type: Literal["FixedWindowCallRatePolicy"]
1615
- period: str = Field(
1616
- ..., description="The time interval for the rate limit window.", title="Period"
1617
- )
1618
- call_limit: int = Field(
1619
- ...,
1620
- description="The maximum number of calls allowed within the period.",
1621
- title="Call Limit",
1622
- )
1623
- matchers: List[HttpRequestRegexMatcher] = Field(
1624
- ...,
1625
- description="List of matchers that define which requests this policy applies to.",
1626
- title="Matchers",
1627
- )
1628
-
1629
-
1630
- class MovingWindowCallRatePolicy(BaseModel):
1631
- class Config:
1632
- extra = Extra.allow
1633
-
1634
- type: Literal["MovingWindowCallRatePolicy"]
1635
- rates: List[Rate] = Field(
1636
- ...,
1637
- description="List of rates that define the call limits for different time intervals.",
1638
- title="Rates",
1639
- )
1640
- matchers: List[HttpRequestRegexMatcher] = Field(
1641
- ...,
1642
- description="List of matchers that define which requests this policy applies to.",
1643
- title="Matchers",
1644
- )
1645
-
1646
-
1647
- class UnlimitedCallRatePolicy(BaseModel):
1648
- class Config:
1649
- extra = Extra.allow
1650
-
1651
- type: Literal["UnlimitedCallRatePolicy"]
1652
- matchers: List[HttpRequestRegexMatcher] = Field(
1653
- ...,
1654
- description="List of matchers that define which requests this policy applies to.",
1655
- title="Matchers",
1656
- )
1657
-
1658
-
1659
1581
  class DefaultErrorHandler(BaseModel):
1660
1582
  type: Literal["DefaultErrorHandler"]
1661
1583
  backoff_strategies: Optional[
@@ -1752,9 +1674,9 @@ class RecordSelector(BaseModel):
1752
1674
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1753
1675
 
1754
1676
 
1755
- class GzipDecoder(BaseModel):
1756
- type: Literal["GzipDecoder"]
1757
- decoder: Union[CsvDecoder, GzipDecoder, JsonDecoder, JsonlDecoder]
1677
+ class GzipParser(BaseModel):
1678
+ type: Literal["GzipParser"]
1679
+ inner_parser: Union[JsonLineParser, CsvParser, JsonParser]
1758
1680
 
1759
1681
 
1760
1682
  class Spec(BaseModel):
@@ -1787,51 +1709,23 @@ class CompositeErrorHandler(BaseModel):
1787
1709
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1788
1710
 
1789
1711
 
1790
- class HTTPAPIBudget(BaseModel):
1791
- class Config:
1792
- extra = Extra.allow
1793
-
1794
- type: Literal["HTTPAPIBudget"]
1795
- policies: List[
1796
- Union[
1797
- FixedWindowCallRatePolicy,
1798
- MovingWindowCallRatePolicy,
1799
- UnlimitedCallRatePolicy,
1800
- ]
1801
- ] = Field(
1802
- ...,
1803
- description="List of call rate policies that define how many calls are allowed.",
1804
- title="Policies",
1805
- )
1806
- ratelimit_reset_header: Optional[str] = Field(
1807
- "ratelimit-reset",
1808
- description="The HTTP response header name that indicates when the rate limit resets.",
1809
- title="Rate Limit Reset Header",
1810
- )
1811
- ratelimit_remaining_header: Optional[str] = Field(
1812
- "ratelimit-remaining",
1813
- description="The HTTP response header name that indicates the number of remaining allowed calls.",
1814
- title="Rate Limit Remaining Header",
1815
- )
1816
- status_codes_for_ratelimit_hit: Optional[List[int]] = Field(
1817
- [429],
1818
- description="List of HTTP status codes that indicate a rate limit has been hit.",
1819
- title="Status Codes for Rate Limit Hit",
1820
- )
1821
-
1822
-
1823
1712
  class ZipfileDecoder(BaseModel):
1824
1713
  class Config:
1825
1714
  extra = Extra.allow
1826
1715
 
1827
1716
  type: Literal["ZipfileDecoder"]
1828
- decoder: Union[CsvDecoder, GzipDecoder, JsonDecoder, JsonlDecoder] = Field(
1717
+ parser: Union[GzipParser, JsonParser, JsonLineParser, CsvParser] = Field(
1829
1718
  ...,
1830
1719
  description="Parser to parse the decompressed data from the zipfile(s).",
1831
1720
  title="Parser",
1832
1721
  )
1833
1722
 
1834
1723
 
1724
+ class CompositeRawDecoder(BaseModel):
1725
+ type: Literal["CompositeRawDecoder"]
1726
+ parser: Union[GzipParser, JsonParser, JsonLineParser, CsvParser]
1727
+
1728
+
1835
1729
  class DeclarativeSource1(BaseModel):
1836
1730
  class Config:
1837
1731
  extra = Extra.forbid
@@ -1848,7 +1742,6 @@ class DeclarativeSource1(BaseModel):
1848
1742
  definitions: Optional[Dict[str, Any]] = None
1849
1743
  spec: Optional[Spec] = None
1850
1744
  concurrency_level: Optional[ConcurrencyLevel] = None
1851
- api_budget: Optional[HTTPAPIBudget] = None
1852
1745
  metadata: Optional[Dict[str, Any]] = Field(
1853
1746
  None,
1854
1747
  description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.",
@@ -1875,7 +1768,6 @@ class DeclarativeSource2(BaseModel):
1875
1768
  definitions: Optional[Dict[str, Any]] = None
1876
1769
  spec: Optional[Spec] = None
1877
1770
  concurrency_level: Optional[ConcurrencyLevel] = None
1878
- api_budget: Optional[HTTPAPIBudget] = None
1879
1771
  metadata: Optional[Dict[str, Any]] = Field(
1880
1772
  None,
1881
1773
  description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.",
@@ -2035,7 +1927,7 @@ class SessionTokenAuthenticator(BaseModel):
2035
1927
  description="Authentication method to use for requests sent to the API, specifying how to inject the session token.",
2036
1928
  title="Data Request Authentication",
2037
1929
  )
2038
- decoder: Optional[Union[JsonDecoder, XmlDecoder]] = Field(
1930
+ decoder: Optional[Union[JsonDecoder, XmlDecoder, CompositeRawDecoder]] = Field(
2039
1931
  None, description="Component used to decode the response.", title="Decoder"
2040
1932
  )
2041
1933
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
@@ -2235,12 +2127,12 @@ class SimpleRetriever(BaseModel):
2235
2127
  decoder: Optional[
2236
2128
  Union[
2237
2129
  CustomDecoder,
2238
- CsvDecoder,
2239
- GzipDecoder,
2240
2130
  JsonDecoder,
2241
2131
  JsonlDecoder,
2242
2132
  IterableDecoder,
2243
2133
  XmlDecoder,
2134
+ GzipJsonDecoder,
2135
+ CompositeRawDecoder,
2244
2136
  ZipfileDecoder,
2245
2137
  ]
2246
2138
  ] = Field(
@@ -2313,12 +2205,12 @@ class AsyncRetriever(BaseModel):
2313
2205
  decoder: Optional[
2314
2206
  Union[
2315
2207
  CustomDecoder,
2316
- CsvDecoder,
2317
- GzipDecoder,
2318
2208
  JsonDecoder,
2319
2209
  JsonlDecoder,
2320
2210
  IterableDecoder,
2321
2211
  XmlDecoder,
2212
+ GzipJsonDecoder,
2213
+ CompositeRawDecoder,
2322
2214
  ZipfileDecoder,
2323
2215
  ]
2324
2216
  ] = Field(
@@ -2329,12 +2221,12 @@ class AsyncRetriever(BaseModel):
2329
2221
  download_decoder: Optional[
2330
2222
  Union[
2331
2223
  CustomDecoder,
2332
- CsvDecoder,
2333
- GzipDecoder,
2334
2224
  JsonDecoder,
2335
2225
  JsonlDecoder,
2336
2226
  IterableDecoder,
2337
2227
  XmlDecoder,
2228
+ GzipJsonDecoder,
2229
+ CompositeRawDecoder,
2338
2230
  ZipfileDecoder,
2339
2231
  ]
2340
2232
  ] = Field(
@@ -2379,7 +2271,6 @@ class DynamicDeclarativeStream(BaseModel):
2379
2271
 
2380
2272
 
2381
2273
  ComplexFieldType.update_forward_refs()
2382
- GzipDecoder.update_forward_refs()
2383
2274
  CompositeErrorHandler.update_forward_refs()
2384
2275
  DeclarativeSource1.update_forward_refs()
2385
2276
  DeclarativeSource2.update_forward_refs()