airbyte-cdk 6.21.1.dev0__py3-none-any.whl → 6.26.0.dev4103__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +6 -0
  2. airbyte_cdk/connector_builder/connector_builder_handler.py +1 -0
  3. airbyte_cdk/sources/declarative/auth/oauth.py +68 -11
  4. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +81 -16
  5. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +58 -2
  6. airbyte_cdk/sources/declarative/decoders/__init__.py +9 -1
  7. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +59 -0
  8. airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
  9. airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
  10. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +334 -0
  11. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
  12. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +35 -3
  13. airbyte_cdk/sources/declarative/manifest_declarative_source.py +15 -4
  14. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +50 -14
  15. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +143 -0
  16. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +220 -22
  17. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +6 -2
  18. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +22 -0
  19. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
  20. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +15 -0
  21. airbyte_cdk/sources/file_based/config/identities_based_stream_config.py +8 -0
  22. airbyte_cdk/sources/file_based/config/permissions.py +34 -0
  23. airbyte_cdk/sources/file_based/file_based_source.py +65 -1
  24. airbyte_cdk/sources/file_based/file_based_stream_reader.py +33 -0
  25. airbyte_cdk/sources/file_based/schema_helpers.py +25 -0
  26. airbyte_cdk/sources/file_based/stream/__init__.py +2 -1
  27. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +29 -0
  28. airbyte_cdk/sources/file_based/stream/identities_stream.py +99 -0
  29. airbyte_cdk/sources/http_logger.py +1 -1
  30. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  31. airbyte_cdk/sources/streams/concurrent/cursor.py +51 -57
  32. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  33. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +20 -20
  34. airbyte_cdk/test/utils/manifest_only_fixtures.py +1 -2
  35. {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/METADATA +3 -3
  36. {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/RECORD +39 -31
  37. {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/LICENSE.txt +0 -0
  38. {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/WHEEL +0 -0
  39. {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/entry_points.txt +0 -0
@@ -171,6 +171,12 @@ def create_declarative_source(
171
171
  "Invalid config: `__injected_declarative_manifest` should be provided at the root "
172
172
  f"of the config but config only has keys: {list(config.keys() if config else [])}"
173
173
  )
174
+ if not isinstance(config["__injected_declarative_manifest"], dict):
175
+ raise ValueError(
176
+ "Invalid config: `__injected_declarative_manifest` should be a dictionary, "
177
+ f"but got type: {type(config['__injected_declarative_manifest'])}"
178
+ )
179
+
174
180
  return ConcurrentDeclarativeSource(
175
181
  config=config,
176
182
  catalog=catalog,
@@ -52,6 +52,7 @@ def get_limits(config: Mapping[str, Any]) -> TestReadLimits:
52
52
  def create_source(config: Mapping[str, Any], limits: TestReadLimits) -> ManifestDeclarativeSource:
53
53
  manifest = config["__injected_declarative_manifest"]
54
54
  return ManifestDeclarativeSource(
55
+ config=config,
55
56
  emit_connector_builder_messages=True,
56
57
  source_config=manifest,
57
58
  component_factory=ModelToComponentFactory(
@@ -3,11 +3,12 @@
3
3
  #
4
4
 
5
5
  from dataclasses import InitVar, dataclass, field
6
- from typing import Any, List, Mapping, Optional, Union
6
+ from typing import Any, List, Mapping, MutableMapping, Optional, Union
7
7
 
8
8
  import pendulum
9
9
 
10
10
  from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
11
+ from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
11
12
  from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping
12
13
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
13
14
  from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
@@ -44,10 +45,10 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
44
45
  message_repository (MessageRepository): the message repository used to emit logs on HTTP requests
45
46
  """
46
47
 
47
- client_id: Union[InterpolatedString, str]
48
- client_secret: Union[InterpolatedString, str]
49
48
  config: Mapping[str, Any]
50
49
  parameters: InitVar[Mapping[str, Any]]
50
+ client_id: Optional[Union[InterpolatedString, str]] = None
51
+ client_secret: Optional[Union[InterpolatedString, str]] = None
51
52
  token_refresh_endpoint: Optional[Union[InterpolatedString, str]] = None
52
53
  refresh_token: Optional[Union[InterpolatedString, str]] = None
53
54
  scopes: Optional[List[str]] = None
@@ -66,6 +67,8 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
66
67
  grant_type_name: Union[InterpolatedString, str] = "grant_type"
67
68
  grant_type: Union[InterpolatedString, str] = "refresh_token"
68
69
  message_repository: MessageRepository = NoopMessageRepository()
70
+ profile_assertion: Optional[DeclarativeAuthenticator] = None
71
+ use_profile_assertion: Optional[Union[InterpolatedBoolean, str, bool]] = False
69
72
 
70
73
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
71
74
  super().__init__()
@@ -76,11 +79,19 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
76
79
  else:
77
80
  self._token_refresh_endpoint = None
78
81
  self._client_id_name = InterpolatedString.create(self.client_id_name, parameters=parameters)
79
- self._client_id = InterpolatedString.create(self.client_id, parameters=parameters)
82
+ self._client_id = (
83
+ InterpolatedString.create(self.client_id, parameters=parameters)
84
+ if self.client_id
85
+ else self.client_id
86
+ )
80
87
  self._client_secret_name = InterpolatedString.create(
81
88
  self.client_secret_name, parameters=parameters
82
89
  )
83
- self._client_secret = InterpolatedString.create(self.client_secret, parameters=parameters)
90
+ self._client_secret = (
91
+ InterpolatedString.create(self.client_secret, parameters=parameters)
92
+ if self.client_secret
93
+ else self.client_secret
94
+ )
84
95
  self._refresh_token_name = InterpolatedString.create(
85
96
  self.refresh_token_name, parameters=parameters
86
97
  )
@@ -99,7 +110,12 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
99
110
  self.grant_type_name = InterpolatedString.create(
100
111
  self.grant_type_name, parameters=parameters
101
112
  )
102
- self.grant_type = InterpolatedString.create(self.grant_type, parameters=parameters)
113
+ self.grant_type = InterpolatedString.create(
114
+ "urn:ietf:params:oauth:grant-type:jwt-bearer"
115
+ if self.use_profile_assertion
116
+ else self.grant_type,
117
+ parameters=parameters,
118
+ )
103
119
  self._refresh_request_body = InterpolatedMapping(
104
120
  self.refresh_request_body or {}, parameters=parameters
105
121
  )
@@ -115,6 +131,13 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
115
131
  if self.token_expiry_date
116
132
  else pendulum.now().subtract(days=1) # type: ignore # substract does not have type hints
117
133
  )
134
+ self.use_profile_assertion = (
135
+ InterpolatedBoolean(self.use_profile_assertion, parameters=parameters)
136
+ if isinstance(self.use_profile_assertion, str)
137
+ else self.use_profile_assertion
138
+ )
139
+ self.assertion_name = "assertion"
140
+
118
141
  if self.access_token_value is not None:
119
142
  self._access_token_value = InterpolatedString.create(
120
143
  self.access_token_value, parameters=parameters
@@ -126,9 +149,20 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
126
149
  self._access_token_value if self.access_token_value else None
127
150
  )
128
151
 
152
+ if not self.use_profile_assertion and any(
153
+ client_creds is None for client_creds in [self.client_id, self.client_secret]
154
+ ):
155
+ raise ValueError(
156
+ "OAuthAuthenticator configuration error: Both 'client_id' and 'client_secret' are required for the "
157
+ "basic OAuth flow."
158
+ )
159
+ if self.profile_assertion is None and self.use_profile_assertion:
160
+ raise ValueError(
161
+ "OAuthAuthenticator configuration error: 'profile_assertion' is required when using the profile assertion flow."
162
+ )
129
163
  if self.get_grant_type() == "refresh_token" and self._refresh_token is None:
130
164
  raise ValueError(
131
- "OAuthAuthenticator needs a refresh_token parameter if grant_type is set to `refresh_token`"
165
+ "OAuthAuthenticator configuration error: A 'refresh_token' is required when the 'grant_type' is set to 'refresh_token'."
132
166
  )
133
167
 
134
168
  def get_token_refresh_endpoint(self) -> Optional[str]:
@@ -145,19 +179,21 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
145
179
  return self._client_id_name.eval(self.config) # type: ignore # eval returns a string in this context
146
180
 
147
181
  def get_client_id(self) -> str:
148
- client_id: str = self._client_id.eval(self.config)
182
+ client_id = self._client_id.eval(self.config) if self._client_id else self._client_id
149
183
  if not client_id:
150
184
  raise ValueError("OAuthAuthenticator was unable to evaluate client_id parameter")
151
- return client_id
185
+ return client_id # type: ignore # value will be returned as a string, or an error will be raised
152
186
 
153
187
  def get_client_secret_name(self) -> str:
154
188
  return self._client_secret_name.eval(self.config) # type: ignore # eval returns a string in this context
155
189
 
156
190
  def get_client_secret(self) -> str:
157
- client_secret: str = self._client_secret.eval(self.config)
191
+ client_secret = (
192
+ self._client_secret.eval(self.config) if self._client_secret else self._client_secret
193
+ )
158
194
  if not client_secret:
159
195
  raise ValueError("OAuthAuthenticator was unable to evaluate client_secret parameter")
160
- return client_secret
196
+ return client_secret # type: ignore # value will be returned as a string, or an error will be raised
161
197
 
162
198
  def get_refresh_token_name(self) -> str:
163
199
  return self._refresh_token_name.eval(self.config) # type: ignore # eval returns a string in this context
@@ -192,6 +228,27 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
192
228
  def set_token_expiry_date(self, value: Union[str, int]) -> None:
193
229
  self._token_expiry_date = self._parse_token_expiration_date(value)
194
230
 
231
+ def get_assertion_name(self) -> str:
232
+ return self.assertion_name
233
+
234
+ def get_assertion(self) -> str:
235
+ if self.profile_assertion is None:
236
+ raise ValueError("profile_assertion is not set")
237
+ return self.profile_assertion.token
238
+
239
+ def build_refresh_request_body(self) -> Mapping[str, Any]:
240
+ """
241
+ Returns the request body to set on the refresh request
242
+
243
+ Override to define additional parameters
244
+ """
245
+ if self.use_profile_assertion:
246
+ return {
247
+ self.get_grant_type_name(): self.get_grant_type(),
248
+ self.get_assertion_name(): self.get_assertion(),
249
+ }
250
+ return super().build_refresh_request_body()
251
+
195
252
  @property
196
253
  def access_token(self) -> str:
197
254
  if self._access_token is None:
@@ -20,6 +20,9 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
20
20
  ClientSideIncrementalRecordFilterDecorator,
21
21
  )
22
22
  from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
23
+ from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
24
+ PerPartitionWithGlobalCursor,
25
+ )
23
26
  from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
24
27
  from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
25
28
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
@@ -32,7 +35,7 @@ from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
32
35
  ModelToComponentFactory,
33
36
  )
34
37
  from airbyte_cdk.sources.declarative.requesters import HttpRequester
35
- from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
38
+ from airbyte_cdk.sources.declarative.retrievers import Retriever, SimpleRetriever
36
39
  from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
37
40
  DeclarativePartitionFactory,
38
41
  StreamSlicerPartitionGenerator,
@@ -77,6 +80,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
77
80
 
78
81
  super().__init__(
79
82
  source_config=source_config,
83
+ config=config,
80
84
  debug=debug,
81
85
  emit_connector_builder_messages=emit_connector_builder_messages,
82
86
  component_factory=component_factory,
@@ -230,21 +234,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
230
234
  stream_state=stream_state,
231
235
  )
232
236
 
233
- retriever = declarative_stream.retriever
234
-
235
- # This is an optimization so that we don't invoke any cursor or state management flows within the
236
- # low-code framework because state management is handled through the ConcurrentCursor.
237
- if declarative_stream and isinstance(retriever, SimpleRetriever):
238
- # Also a temporary hack. In the legacy Stream implementation, as part of the read,
239
- # set_initial_state() is called to instantiate incoming state on the cursor. Although we no
240
- # longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
241
- # like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
242
- # still rely on a DatetimeBasedCursor that is properly initialized with state.
243
- if retriever.cursor:
244
- retriever.cursor.set_initial_state(stream_state=stream_state)
245
- # We zero it out here, but since this is a cursor reference, the state is still properly
246
- # instantiated for the other components that reference it
247
- retriever.cursor = None
237
+ retriever = self._get_retriever(declarative_stream, stream_state)
248
238
 
249
239
  partition_generator = StreamSlicerPartitionGenerator(
250
240
  DeclarativePartitionFactory(
@@ -304,6 +294,60 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
304
294
  cursor=final_state_cursor,
305
295
  )
306
296
  )
297
+ elif (
298
+ incremental_sync_component_definition
299
+ and incremental_sync_component_definition.get("type", "")
300
+ == DatetimeBasedCursorModel.__name__
301
+ and self._stream_supports_concurrent_partition_processing(
302
+ declarative_stream=declarative_stream
303
+ )
304
+ and hasattr(declarative_stream.retriever, "stream_slicer")
305
+ and isinstance(
306
+ declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
307
+ )
308
+ ):
309
+ stream_state = state_manager.get_stream_state(
310
+ stream_name=declarative_stream.name, namespace=declarative_stream.namespace
311
+ )
312
+ partition_router = declarative_stream.retriever.stream_slicer._partition_router
313
+
314
+ perpartition_cursor = (
315
+ self._constructor.create_concurrent_cursor_from_perpartition_cursor(
316
+ state_manager=state_manager,
317
+ model_type=DatetimeBasedCursorModel,
318
+ component_definition=incremental_sync_component_definition,
319
+ stream_name=declarative_stream.name,
320
+ stream_namespace=declarative_stream.namespace,
321
+ config=config or {},
322
+ stream_state=stream_state,
323
+ partition_router=partition_router,
324
+ )
325
+ )
326
+
327
+ retriever = self._get_retriever(declarative_stream, stream_state)
328
+
329
+ partition_generator = StreamSlicerPartitionGenerator(
330
+ DeclarativePartitionFactory(
331
+ declarative_stream.name,
332
+ declarative_stream.get_json_schema(),
333
+ retriever,
334
+ self.message_repository,
335
+ ),
336
+ perpartition_cursor,
337
+ )
338
+
339
+ concurrent_streams.append(
340
+ DefaultStream(
341
+ partition_generator=partition_generator,
342
+ name=declarative_stream.name,
343
+ json_schema=declarative_stream.get_json_schema(),
344
+ availability_strategy=AlwaysAvailableAvailabilityStrategy(),
345
+ primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
346
+ cursor_field=perpartition_cursor.cursor_field.cursor_field_key,
347
+ logger=self.logger,
348
+ cursor=perpartition_cursor,
349
+ )
350
+ )
307
351
  else:
308
352
  synchronous_streams.append(declarative_stream)
309
353
  else:
@@ -394,6 +438,27 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
394
438
  return False
395
439
  return True
396
440
 
441
+ def _get_retriever(
442
+ self, declarative_stream: DeclarativeStream, stream_state: Mapping[str, Any]
443
+ ) -> Retriever:
444
+ retriever = declarative_stream.retriever
445
+
446
+ # This is an optimization so that we don't invoke any cursor or state management flows within the
447
+ # low-code framework because state management is handled through the ConcurrentCursor.
448
+ if declarative_stream and isinstance(retriever, SimpleRetriever):
449
+ # Also a temporary hack. In the legacy Stream implementation, as part of the read,
450
+ # set_initial_state() is called to instantiate incoming state on the cursor. Although we no
451
+ # longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
452
+ # like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
453
+ # still rely on a DatetimeBasedCursor that is properly initialized with state.
454
+ if retriever.cursor:
455
+ retriever.cursor.set_initial_state(stream_state=stream_state)
456
+ # We zero it out here, but since this is a cursor reference, the state is still properly
457
+ # instantiated for the other components that reference it
458
+ retriever.cursor = None
459
+
460
+ return retriever
461
+
397
462
  @staticmethod
398
463
  def _select_streams(
399
464
  streams: List[AbstractStream], configured_catalog: ConfiguredAirbyteCatalog
@@ -784,6 +784,29 @@ definitions:
784
784
  type:
785
785
  type: string
786
786
  enum: [DatetimeBasedCursor]
787
+ clamping:
788
+ title: Date Range Clamping
789
+ description: This option is used to adjust the upper and lower boundaries of each datetime window to beginning and end of the provided target period (day, week, month)
790
+ type: object
791
+ required:
792
+ - target
793
+ properties:
794
+ target:
795
+ title: Target
796
+ description: The period of time that datetime windows will be clamped by
797
+ # This should ideally be an enum. However, we don't use an enum because we want to allow for connectors
798
+ # to support interpolation on the connector config to get the target which is an arbitrary string
799
+ type: string
800
+ interpolation_context:
801
+ - config
802
+ examples:
803
+ - "DAY"
804
+ - "WEEK"
805
+ - "MONTH"
806
+ - "{{ config['target'] }}"
807
+ target_details:
808
+ type: object
809
+ additionalProperties: true
787
810
  cursor_field:
788
811
  title: Cursor Field
789
812
  description: The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.
@@ -1058,8 +1081,6 @@ definitions:
1058
1081
  type: object
1059
1082
  required:
1060
1083
  - type
1061
- - client_id
1062
- - client_secret
1063
1084
  properties:
1064
1085
  type:
1065
1086
  type: string
@@ -1254,6 +1275,15 @@ definitions:
1254
1275
  default: []
1255
1276
  examples:
1256
1277
  - ["invalid_grant", "invalid_permissions"]
1278
+ profile_assertion:
1279
+ title: Profile Assertion
1280
+ description: The authenticator being used to authenticate the client authenticator.
1281
+ "$ref": "#/definitions/JwtAuthenticator"
1282
+ use_profile_assertion:
1283
+ title: Use Profile Assertion
1284
+ description: Enable using profile assertion as a flow for OAuth authorization.
1285
+ type: boolean
1286
+ default: false
1257
1287
  $parameters:
1258
1288
  type: object
1259
1289
  additionalProperties: true
@@ -1514,6 +1544,7 @@ definitions:
1514
1544
  anyOf:
1515
1545
  - "$ref": "#/definitions/JsonDecoder"
1516
1546
  - "$ref": "#/definitions/XmlDecoder"
1547
+ - "$ref": "#/definitions/CompositeRawDecoder"
1517
1548
  $parameters:
1518
1549
  type: object
1519
1550
  additionalProperties: true
@@ -2100,6 +2131,26 @@ definitions:
2100
2131
  $parameters:
2101
2132
  type: object
2102
2133
  additionalProperties: true
2134
+ ZipfileDecoder:
2135
+ title: Zipfile Decoder
2136
+ description: Decoder for response data that is returned as zipfile(s).
2137
+ type: object
2138
+ additionalProperties: true
2139
+ required:
2140
+ - type
2141
+ - parser
2142
+ properties:
2143
+ type:
2144
+ type: string
2145
+ enum: [ZipfileDecoder]
2146
+ parser:
2147
+ title: Parser
2148
+ description: Parser to parse the decompressed data from the zipfile(s).
2149
+ anyOf:
2150
+ - "$ref": "#/definitions/GzipParser"
2151
+ - "$ref": "#/definitions/JsonParser"
2152
+ - "$ref": "#/definitions/JsonLineParser"
2153
+ - "$ref": "#/definitions/CsvParser"
2103
2154
  ListPartitionRouter:
2104
2155
  title: List Partition Router
2105
2156
  description: A Partition router that specifies a list of attributes where each attribute describes a portion of the complete data set for a stream. During a sync, each value is iterated over and can be used as input to outbound API requests.
@@ -2928,6 +2979,7 @@ definitions:
2928
2979
  - "$ref": "#/definitions/XmlDecoder"
2929
2980
  - "$ref": "#/definitions/GzipJsonDecoder"
2930
2981
  - "$ref": "#/definitions/CompositeRawDecoder"
2982
+ - "$ref": "#/definitions/ZipfileDecoder"
2931
2983
  $parameters:
2932
2984
  type: object
2933
2985
  additionalProperties: true
@@ -3126,6 +3178,8 @@ definitions:
3126
3178
  - "$ref": "#/definitions/IterableDecoder"
3127
3179
  - "$ref": "#/definitions/XmlDecoder"
3128
3180
  - "$ref": "#/definitions/GzipJsonDecoder"
3181
+ - "$ref": "#/definitions/CompositeRawDecoder"
3182
+ - "$ref": "#/definitions/ZipfileDecoder"
3129
3183
  download_decoder:
3130
3184
  title: Download Decoder
3131
3185
  description: Component decoding the download response so records can be extracted.
@@ -3136,6 +3190,8 @@ definitions:
3136
3190
  - "$ref": "#/definitions/IterableDecoder"
3137
3191
  - "$ref": "#/definitions/XmlDecoder"
3138
3192
  - "$ref": "#/definitions/GzipJsonDecoder"
3193
+ - "$ref": "#/definitions/CompositeRawDecoder"
3194
+ - "$ref": "#/definitions/ZipfileDecoder"
3139
3195
  $parameters:
3140
3196
  type: object
3141
3197
  additionalProperties: true
@@ -2,7 +2,12 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import CompositeRawDecoder
5
+ from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
6
+ CompositeRawDecoder,
7
+ GzipParser,
8
+ JsonParser,
9
+ Parser,
10
+ )
6
11
  from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
7
12
  from airbyte_cdk.sources.declarative.decoders.json_decoder import (
8
13
  GzipJsonDecoder,
@@ -15,15 +20,18 @@ from airbyte_cdk.sources.declarative.decoders.pagination_decoder_decorator impor
15
20
  PaginationDecoderDecorator,
16
21
  )
17
22
  from airbyte_cdk.sources.declarative.decoders.xml_decoder import XmlDecoder
23
+ from airbyte_cdk.sources.declarative.decoders.zipfile_decoder import ZipfileDecoder
18
24
 
19
25
  __all__ = [
20
26
  "Decoder",
21
27
  "CompositeRawDecoder",
22
28
  "JsonDecoder",
29
+ "JsonParser",
23
30
  "JsonlDecoder",
24
31
  "IterableDecoder",
25
32
  "GzipJsonDecoder",
26
33
  "NoopDecoder",
27
34
  "PaginationDecoderDecorator",
28
35
  "XmlDecoder",
36
+ "ZipfileDecoder",
29
37
  ]
@@ -0,0 +1,59 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import logging
6
+ import zipfile
7
+ from dataclasses import dataclass
8
+ from io import BytesIO
9
+ from typing import Any, Generator, MutableMapping
10
+
11
+ import orjson
12
+ import requests
13
+
14
+ from airbyte_cdk.models import FailureType
15
+ from airbyte_cdk.sources.declarative.decoders import Decoder
16
+ from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
17
+ Parser,
18
+ )
19
+ from airbyte_cdk.utils import AirbyteTracedException
20
+
21
+ logger = logging.getLogger("airbyte")
22
+
23
+
24
+ @dataclass
25
+ class ZipfileDecoder(Decoder):
26
+ parser: Parser
27
+
28
+ def is_stream_response(self) -> bool:
29
+ return False
30
+
31
+ def decode(
32
+ self, response: requests.Response
33
+ ) -> Generator[MutableMapping[str, Any], None, None]:
34
+ try:
35
+ with zipfile.ZipFile(BytesIO(response.content)) as zip_file:
36
+ for file_name in zip_file.namelist():
37
+ unzipped_content = zip_file.read(file_name)
38
+ buffered_content = BytesIO(unzipped_content)
39
+ try:
40
+ yield from self.parser.parse(buffered_content)
41
+ except Exception as e:
42
+ logger.error(
43
+ f"Failed to parse file: {file_name} from zip file: {response.request.url} with exception {e}."
44
+ )
45
+ raise AirbyteTracedException(
46
+ message=f"Failed to parse file: {file_name} from zip file.",
47
+ internal_message=f"Failed to parse file: {file_name} from zip file: {response.request.url}.",
48
+ failure_type=FailureType.system_error,
49
+ ) from e
50
+ except zipfile.BadZipFile as e:
51
+ logger.error(
52
+ f"Received an invalid zip file in response to URL: {response.request.url}. "
53
+ f"The size of the response body is: {len(response.content)}"
54
+ )
55
+ raise AirbyteTracedException(
56
+ message="Received an invalid zip file in response.",
57
+ internal_message=f"Received an invalid zip file in response to URL: {response.request.url}.",
58
+ failure_type=FailureType.system_error,
59
+ ) from e
@@ -59,13 +59,11 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
59
59
 
60
60
  def __init__(
61
61
  self,
62
- date_time_based_cursor: DatetimeBasedCursor,
63
- substream_cursor: Optional[Union[PerPartitionWithGlobalCursor, GlobalSubstreamCursor]],
62
+ cursor: Union[DatetimeBasedCursor, PerPartitionWithGlobalCursor, GlobalSubstreamCursor],
64
63
  **kwargs: Any,
65
64
  ):
66
65
  super().__init__(**kwargs)
67
- self._date_time_based_cursor = date_time_based_cursor
68
- self._substream_cursor = substream_cursor
66
+ self._cursor = cursor
69
67
 
70
68
  def filter_records(
71
69
  self,
@@ -77,7 +75,7 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
77
75
  records = (
78
76
  record
79
77
  for record in records
80
- if (self._substream_cursor or self._date_time_based_cursor).should_be_synced(
78
+ if self._cursor.should_be_synced(
81
79
  # Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
82
80
  # Record stream name is empty cause it is not used durig the filtering
83
81
  Record(data=record, associated_slice=stream_slice, stream_name="")
@@ -2,6 +2,10 @@
2
2
  # Copyright (c) 2022 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import (
6
+ ConcurrentCursorFactory,
7
+ ConcurrentPerPartitionCursor,
8
+ )
5
9
  from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
6
10
  from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
7
11
  from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
@@ -21,6 +25,8 @@ from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor i
21
25
 
22
26
  __all__ = [
23
27
  "CursorFactory",
28
+ "ConcurrentCursorFactory",
29
+ "ConcurrentPerPartitionCursor",
24
30
  "DatetimeBasedCursor",
25
31
  "DeclarativeCursor",
26
32
  "GlobalSubstreamCursor",