airbyte-cdk 6.34.0.dev2__py3-none-any.whl → 6.34.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +12 -16
  2. airbyte_cdk/connector_builder/message_grouper.py +448 -0
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  4. airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
  5. airbyte_cdk/sources/declarative/auth/oauth.py +1 -6
  6. airbyte_cdk/sources/declarative/auth/token.py +8 -3
  7. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +19 -30
  8. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +85 -203
  9. airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
  10. airbyte_cdk/sources/declarative/decoders/__init__.py +4 -0
  11. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +2 -7
  12. airbyte_cdk/sources/declarative/decoders/json_decoder.py +58 -12
  13. airbyte_cdk/sources/declarative/extractors/record_selector.py +3 -12
  14. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +38 -122
  15. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -12
  16. airbyte_cdk/sources/declarative/manifest_declarative_source.py +0 -9
  17. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +41 -150
  18. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +84 -234
  19. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  20. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
  21. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +18 -26
  22. airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -8
  23. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -16
  24. airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
  25. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
  26. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
  27. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -4
  28. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -2
  29. airbyte_cdk/sources/file_based/file_based_source.py +37 -70
  30. airbyte_cdk/sources/file_based/file_based_stream_reader.py +12 -107
  31. airbyte_cdk/sources/file_based/stream/__init__.py +1 -10
  32. airbyte_cdk/sources/streams/call_rate.py +47 -185
  33. airbyte_cdk/sources/streams/http/http.py +2 -1
  34. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +56 -217
  35. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +73 -144
  36. airbyte_cdk/test/mock_http/mocker.py +1 -9
  37. airbyte_cdk/test/mock_http/response.py +3 -6
  38. airbyte_cdk/utils/datetime_helpers.py +66 -48
  39. airbyte_cdk/utils/mapping_helpers.py +26 -126
  40. {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/METADATA +1 -1
  41. {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/RECORD +45 -54
  42. airbyte_cdk/connector_builder/test_reader/__init__.py +0 -7
  43. airbyte_cdk/connector_builder/test_reader/helpers.py +0 -591
  44. airbyte_cdk/connector_builder/test_reader/message_grouper.py +0 -160
  45. airbyte_cdk/connector_builder/test_reader/reader.py +0 -441
  46. airbyte_cdk/connector_builder/test_reader/types.py +0 -75
  47. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +0 -81
  48. airbyte_cdk/sources/file_based/stream/identities_stream.py +0 -47
  49. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +0 -85
  50. airbyte_cdk/sources/specs/transfer_modes.py +0 -26
  51. airbyte_cdk/sources/streams/permissions/identities_stream.py +0 -75
  52. {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE.txt +0 -0
  53. {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE_SHORT +0 -0
  54. {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/WHEEL +0 -0
  55. {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/entry_points.txt +0 -0
@@ -3,7 +3,7 @@
3
3
  #
4
4
 
5
5
  import logging
6
- from typing import Any, Generic, Iterator, List, Mapping, MutableMapping, Optional, Tuple
6
+ from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple
7
7
 
8
8
  from airbyte_cdk.models import (
9
9
  AirbyteCatalog,
@@ -19,7 +19,6 @@ from airbyte_cdk.sources.declarative.extractors import RecordSelector
19
19
  from airbyte_cdk.sources.declarative.extractors.record_filter import (
20
20
  ClientSideIncrementalRecordFilterDecorator,
21
21
  )
22
- from airbyte_cdk.sources.declarative.incremental import ConcurrentPerPartitionCursor
23
22
  from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
24
23
  from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
25
24
  PerPartitionWithGlobalCursor,
@@ -47,6 +46,7 @@ from airbyte_cdk.sources.declarative.types import ConnectionDefinition
47
46
  from airbyte_cdk.sources.source import TState
48
47
  from airbyte_cdk.sources.streams import Stream
49
48
  from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
49
+ from airbyte_cdk.sources.streams.concurrent.abstract_stream_facade import AbstractStreamFacade
50
50
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
51
51
  AlwaysAvailableAvailabilityStrategy,
52
52
  )
@@ -121,6 +121,12 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
121
121
  message_repository=self.message_repository,
122
122
  )
123
123
 
124
+ # TODO: Remove this. This property is necessary to safely migrate Stripe during the transition state.
125
+ @property
126
+ def is_partially_declarative(self) -> bool:
127
+ """This flag used to avoid unexpected AbstractStreamFacade processing as concurrent streams."""
128
+ return False
129
+
124
130
  def read(
125
131
  self,
126
132
  logger: logging.Logger,
@@ -224,7 +230,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
224
230
  stream_state = self._connector_state_manager.get_stream_state(
225
231
  stream_name=declarative_stream.name, namespace=declarative_stream.namespace
226
232
  )
227
- stream_state = self._migrate_state(declarative_stream, stream_state)
228
233
 
229
234
  retriever = self._get_retriever(declarative_stream, stream_state)
230
235
 
@@ -233,7 +238,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
233
238
  ):
234
239
  cursor = declarative_stream.retriever.stream_slicer.stream_slicer
235
240
 
236
- if not isinstance(cursor, ConcurrentCursor | ConcurrentPerPartitionCursor):
241
+ if not isinstance(cursor, ConcurrentCursor):
237
242
  # This should never happen since we instantiate ConcurrentCursor in
238
243
  # model_to_component_factory.py
239
244
  raise ValueError(
@@ -332,8 +337,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
332
337
  stream_state = self._connector_state_manager.get_stream_state(
333
338
  stream_name=declarative_stream.name, namespace=declarative_stream.namespace
334
339
  )
335
- stream_state = self._migrate_state(declarative_stream, stream_state)
336
-
337
340
  partition_router = declarative_stream.retriever.stream_slicer._partition_router
338
341
 
339
342
  perpartition_cursor = (
@@ -375,6 +378,14 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
375
378
  )
376
379
  else:
377
380
  synchronous_streams.append(declarative_stream)
381
+ # TODO: Remove this. This check is necessary to safely migrate Stripe during the transition state.
382
+ # Condition below needs to ensure that concurrent support is not lost for sources that already support
383
+ # it before migration, but now are only partially migrated to declarative implementation (e.g., Stripe).
384
+ elif (
385
+ isinstance(declarative_stream, AbstractStreamFacade)
386
+ and self.is_partially_declarative
387
+ ):
388
+ concurrent_streams.append(declarative_stream.get_underlying_stream())
378
389
  else:
379
390
  synchronous_streams.append(declarative_stream)
380
391
 
@@ -478,21 +489,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
478
489
  # Also a temporary hack. In the legacy Stream implementation, as part of the read,
479
490
  # set_initial_state() is called to instantiate incoming state on the cursor. Although we no
480
491
  # longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
481
- # like StopConditionPaginationStrategyDecorator still rely on a DatetimeBasedCursor that is
482
- # properly initialized with state.
492
+ # like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
493
+ # still rely on a DatetimeBasedCursor that is properly initialized with state.
483
494
  if retriever.cursor:
484
495
  retriever.cursor.set_initial_state(stream_state=stream_state)
485
-
486
- # Similar to above, the ClientSideIncrementalRecordFilterDecorator cursor is a separate instance
487
- # from the one initialized on the SimpleRetriever, so it also must also have state initialized
488
- # for semi-incremental streams using is_client_side_incremental to filter properly
489
- if isinstance(retriever.record_selector, RecordSelector) and isinstance(
490
- retriever.record_selector.record_filter, ClientSideIncrementalRecordFilterDecorator
491
- ):
492
- retriever.record_selector.record_filter._cursor.set_initial_state(
493
- stream_state=stream_state
494
- ) # type: ignore # After non-concurrent cursors are deprecated we can remove these cursor workarounds
495
-
496
496
  # We zero it out here, but since this is a cursor reference, the state is still properly
497
497
  # instantiated for the other components that reference it
498
498
  retriever.cursor = None
@@ -524,14 +524,3 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
524
524
  if stream.stream.name not in concurrent_stream_names
525
525
  ]
526
526
  )
527
-
528
- @staticmethod
529
- def _migrate_state(
530
- declarative_stream: DeclarativeStream, stream_state: MutableMapping[str, Any]
531
- ) -> MutableMapping[str, Any]:
532
- for state_migration in declarative_stream.state_migrations:
533
- if state_migration.should_migrate(stream_state):
534
- # The state variable is expected to be mutable but the migrate method returns an immutable mapping.
535
- stream_state = dict(state_migration.migrate(stream_state))
536
-
537
- return stream_state
@@ -40,8 +40,6 @@ properties:
40
40
  "$ref": "#/definitions/Spec"
41
41
  concurrency_level:
42
42
  "$ref": "#/definitions/ConcurrencyLevel"
43
- api_budget:
44
- "$ref": "#/definitions/HTTPAPIBudget"
45
43
  metadata:
46
44
  type: object
47
45
  description: For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.
@@ -796,7 +794,7 @@ definitions:
796
794
  description: This option is used to adjust the upper and lower boundaries of each datetime window to beginning and end of the provided target period (day, week, month)
797
795
  type: object
798
796
  required:
799
- - target
797
+ - target
800
798
  properties:
801
799
  target:
802
800
  title: Target
@@ -1367,170 +1365,6 @@ definitions:
1367
1365
  $parameters:
1368
1366
  type: object
1369
1367
  additional_properties: true
1370
- HTTPAPIBudget:
1371
- title: HTTP API Budget
1372
- description: >
1373
- Defines how many requests can be made to the API in a given time frame. `HTTPAPIBudget` extracts the remaining
1374
- call count and the reset time from HTTP response headers using the header names provided by
1375
- `ratelimit_remaining_header` and `ratelimit_reset_header`. Only requests using `HttpRequester`
1376
- are rate-limited; custom components that bypass `HttpRequester` are not covered by this budget.
1377
- type: object
1378
- required:
1379
- - type
1380
- - policies
1381
- properties:
1382
- type:
1383
- type: string
1384
- enum: [HTTPAPIBudget]
1385
- policies:
1386
- title: Policies
1387
- description: List of call rate policies that define how many calls are allowed.
1388
- type: array
1389
- items:
1390
- anyOf:
1391
- - "$ref": "#/definitions/FixedWindowCallRatePolicy"
1392
- - "$ref": "#/definitions/MovingWindowCallRatePolicy"
1393
- - "$ref": "#/definitions/UnlimitedCallRatePolicy"
1394
- ratelimit_reset_header:
1395
- title: Rate Limit Reset Header
1396
- description: The HTTP response header name that indicates when the rate limit resets.
1397
- type: string
1398
- default: "ratelimit-reset"
1399
- ratelimit_remaining_header:
1400
- title: Rate Limit Remaining Header
1401
- description: The HTTP response header name that indicates the number of remaining allowed calls.
1402
- type: string
1403
- default: "ratelimit-remaining"
1404
- status_codes_for_ratelimit_hit:
1405
- title: Status Codes for Rate Limit Hit
1406
- description: List of HTTP status codes that indicate a rate limit has been hit.
1407
- type: array
1408
- items:
1409
- type: integer
1410
- default: [429]
1411
- additionalProperties: true
1412
- FixedWindowCallRatePolicy:
1413
- title: Fixed Window Call Rate Policy
1414
- description: A policy that allows a fixed number of calls within a specific time window.
1415
- type: object
1416
- required:
1417
- - type
1418
- - period
1419
- - call_limit
1420
- - matchers
1421
- properties:
1422
- type:
1423
- type: string
1424
- enum: [FixedWindowCallRatePolicy]
1425
- period:
1426
- title: Period
1427
- description: The time interval for the rate limit window.
1428
- type: string
1429
- call_limit:
1430
- title: Call Limit
1431
- description: The maximum number of calls allowed within the period.
1432
- type: integer
1433
- matchers:
1434
- title: Matchers
1435
- description: List of matchers that define which requests this policy applies to.
1436
- type: array
1437
- items:
1438
- "$ref": "#/definitions/HttpRequestRegexMatcher"
1439
- additionalProperties: true
1440
- MovingWindowCallRatePolicy:
1441
- title: Moving Window Call Rate Policy
1442
- description: A policy that allows a fixed number of calls within a moving time window.
1443
- type: object
1444
- required:
1445
- - type
1446
- - rates
1447
- - matchers
1448
- properties:
1449
- type:
1450
- type: string
1451
- enum: [MovingWindowCallRatePolicy]
1452
- rates:
1453
- title: Rates
1454
- description: List of rates that define the call limits for different time intervals.
1455
- type: array
1456
- items:
1457
- "$ref": "#/definitions/Rate"
1458
- matchers:
1459
- title: Matchers
1460
- description: List of matchers that define which requests this policy applies to.
1461
- type: array
1462
- items:
1463
- "$ref": "#/definitions/HttpRequestRegexMatcher"
1464
- additionalProperties: true
1465
- UnlimitedCallRatePolicy:
1466
- title: Unlimited Call Rate Policy
1467
- description: A policy that allows unlimited calls for specific requests.
1468
- type: object
1469
- required:
1470
- - type
1471
- - matchers
1472
- properties:
1473
- type:
1474
- type: string
1475
- enum: [UnlimitedCallRatePolicy]
1476
- matchers:
1477
- title: Matchers
1478
- description: List of matchers that define which requests this policy applies to.
1479
- type: array
1480
- items:
1481
- "$ref": "#/definitions/HttpRequestRegexMatcher"
1482
- additionalProperties: true
1483
- Rate:
1484
- title: Rate
1485
- description: Defines a rate limit with a specific number of calls allowed within a time interval.
1486
- type: object
1487
- required:
1488
- - limit
1489
- - interval
1490
- properties:
1491
- limit:
1492
- title: Limit
1493
- description: The maximum number of calls allowed within the interval.
1494
- type: integer
1495
- interval:
1496
- title: Interval
1497
- description: The time interval for the rate limit.
1498
- type: string
1499
- examples:
1500
- - "PT1H"
1501
- - "P1D"
1502
- additionalProperties: true
1503
- HttpRequestRegexMatcher:
1504
- title: HTTP Request Matcher
1505
- description: >
1506
- Matches HTTP requests based on method, base URL, URL path pattern, query parameters, and headers.
1507
- Use `url_base` to specify the scheme and host (without trailing slash) and
1508
- `url_path_pattern` to apply a regex to the request path.
1509
- type: object
1510
- properties:
1511
- method:
1512
- title: Method
1513
- description: The HTTP method to match (e.g., GET, POST).
1514
- type: string
1515
- url_base:
1516
- title: URL Base
1517
- description: The base URL (scheme and host, e.g. "https://api.example.com") to match.
1518
- type: string
1519
- url_path_pattern:
1520
- title: URL Path Pattern
1521
- description: A regular expression pattern to match the URL path.
1522
- type: string
1523
- params:
1524
- title: Parameters
1525
- description: The query parameters to match.
1526
- type: object
1527
- additionalProperties: true
1528
- headers:
1529
- title: Headers
1530
- description: The headers to match.
1531
- type: object
1532
- additionalProperties: true
1533
- additionalProperties: true
1534
1368
  DefaultErrorHandler:
1535
1369
  title: Default Error Handler
1536
1370
  description: Component defining how to handle errors. Default behavior includes only retrying server errors (HTTP 5XX) and too many requests (HTTP 429) with an exponential backoff.
@@ -1715,6 +1549,7 @@ definitions:
1715
1549
  anyOf:
1716
1550
  - "$ref": "#/definitions/JsonDecoder"
1717
1551
  - "$ref": "#/definitions/XmlDecoder"
1552
+ - "$ref": "#/definitions/CompositeRawDecoder"
1718
1553
  $parameters:
1719
1554
  type: object
1720
1555
  additionalProperties: true
@@ -2298,6 +2133,23 @@ definitions:
2298
2133
  $parameters:
2299
2134
  type: object
2300
2135
  additionalProperties: true
2136
+ GzipJsonDecoder:
2137
+ title: GzipJson Decoder
2138
+ description: Use this if the response is Gzip compressed Json.
2139
+ type: object
2140
+ additionalProperties: true
2141
+ required:
2142
+ - type
2143
+ properties:
2144
+ type:
2145
+ type: string
2146
+ enum: [GzipJsonDecoder]
2147
+ encoding:
2148
+ type: string
2149
+ default: utf-8
2150
+ $parameters:
2151
+ type: object
2152
+ additionalProperties: true
2301
2153
  ZipfileDecoder:
2302
2154
  title: Zipfile Decoder
2303
2155
  description: Decoder for response data that is returned as zipfile(s).
@@ -2305,19 +2157,19 @@ definitions:
2305
2157
  additionalProperties: true
2306
2158
  required:
2307
2159
  - type
2308
- - decoder
2160
+ - parser
2309
2161
  properties:
2310
2162
  type:
2311
2163
  type: string
2312
2164
  enum: [ZipfileDecoder]
2313
- decoder:
2165
+ parser:
2314
2166
  title: Parser
2315
2167
  description: Parser to parse the decompressed data from the zipfile(s).
2316
2168
  anyOf:
2317
- - "$ref": "#/definitions/CsvDecoder"
2318
- - "$ref": "#/definitions/GzipDecoder"
2319
- - "$ref": "#/definitions/JsonDecoder"
2320
- - "$ref": "#/definitions/JsonlDecoder"
2169
+ - "$ref": "#/definitions/GzipParser"
2170
+ - "$ref": "#/definitions/JsonParser"
2171
+ - "$ref": "#/definitions/JsonLineParser"
2172
+ - "$ref": "#/definitions/CsvParser"
2321
2173
  ListPartitionRouter:
2322
2174
  title: List Partition Router
2323
2175
  description: A Partition router that specifies a list of attributes where each attribute describes a portion of the complete data set for a stream. During a sync, each value is iterated over and can be used as input to outbound API requests.
@@ -2995,35 +2847,25 @@ definitions:
2995
2847
  enum: [RequestPath]
2996
2848
  RequestOption:
2997
2849
  title: Request Option
2998
- description: Specifies the key field or path and where in the request a component's value should be injected.
2850
+ description: Specifies the key field and where in the request a component's value should be injected.
2999
2851
  type: object
3000
2852
  required:
3001
2853
  - type
2854
+ - field_name
3002
2855
  - inject_into
3003
2856
  properties:
3004
2857
  type:
3005
2858
  type: string
3006
2859
  enum: [RequestOption]
3007
2860
  field_name:
3008
- title: Field Name
3009
- description: Configures which key should be used in the location that the descriptor is being injected into. We hope to eventually deprecate this field in favor of `field_path` for all request_options, but must currently maintain it for backwards compatibility in the Builder.
2861
+ title: Request Option
2862
+ description: Configures which key should be used in the location that the descriptor is being injected into
3010
2863
  type: string
3011
2864
  examples:
3012
2865
  - segment_id
3013
2866
  interpolation_context:
3014
2867
  - config
3015
2868
  - parameters
3016
- field_path:
3017
- title: Field Path
3018
- description: Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries)
3019
- type: array
3020
- items:
3021
- type: string
3022
- examples:
3023
- - ["data", "viewer", "id"]
3024
- interpolation_context:
3025
- - config
3026
- - parameters
3027
2869
  inject_into:
3028
2870
  title: Inject Into
3029
2871
  description: Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.
@@ -3150,39 +2992,79 @@ definitions:
3150
2992
  description: Component decoding the response so records can be extracted.
3151
2993
  anyOf:
3152
2994
  - "$ref": "#/definitions/CustomDecoder"
3153
- - "$ref": "#/definitions/CsvDecoder"
3154
- - "$ref": "#/definitions/GzipDecoder"
3155
2995
  - "$ref": "#/definitions/JsonDecoder"
3156
2996
  - "$ref": "#/definitions/JsonlDecoder"
3157
2997
  - "$ref": "#/definitions/IterableDecoder"
3158
2998
  - "$ref": "#/definitions/XmlDecoder"
2999
+ - "$ref": "#/definitions/GzipJsonDecoder"
3000
+ - "$ref": "#/definitions/CompositeRawDecoder"
3159
3001
  - "$ref": "#/definitions/ZipfileDecoder"
3160
3002
  $parameters:
3161
3003
  type: object
3162
3004
  additionalProperties: true
3163
- GzipDecoder:
3005
+ CompositeRawDecoder:
3006
+ description: "(This is experimental, use at your own risk)"
3164
3007
  type: object
3165
3008
  required:
3166
3009
  - type
3167
- - decoder
3010
+ - parser
3168
3011
  properties:
3169
3012
  type:
3170
3013
  type: string
3171
- enum: [GzipDecoder]
3172
- decoder:
3014
+ enum: [CompositeRawDecoder]
3015
+ parser:
3173
3016
  anyOf:
3174
- - "$ref": "#/definitions/CsvDecoder"
3175
- - "$ref": "#/definitions/GzipDecoder"
3176
- - "$ref": "#/definitions/JsonDecoder"
3177
- - "$ref": "#/definitions/JsonlDecoder"
3178
- CsvDecoder:
3017
+ - "$ref": "#/definitions/GzipParser"
3018
+ - "$ref": "#/definitions/JsonParser"
3019
+ - "$ref": "#/definitions/JsonLineParser"
3020
+ - "$ref": "#/definitions/CsvParser"
3021
+ # PARSERS
3022
+ GzipParser:
3023
+ type: object
3024
+ required:
3025
+ - type
3026
+ - inner_parser
3027
+ properties:
3028
+ type:
3029
+ type: string
3030
+ enum: [GzipParser]
3031
+ inner_parser:
3032
+ anyOf:
3033
+ - "$ref": "#/definitions/JsonLineParser"
3034
+ - "$ref": "#/definitions/CsvParser"
3035
+ - "$ref": "#/definitions/JsonParser"
3036
+ JsonParser:
3037
+ title: JsonParser
3038
+ description: Parser used for parsing str, bytes, or bytearray data and returning data in a dictionary format.
3039
+ type: object
3040
+ required:
3041
+ - type
3042
+ properties:
3043
+ type:
3044
+ type: string
3045
+ enum: [JsonParser]
3046
+ encoding:
3047
+ type: string
3048
+ default: utf-8
3049
+ JsonLineParser:
3050
+ type: object
3051
+ required:
3052
+ - type
3053
+ properties:
3054
+ type:
3055
+ type: string
3056
+ enum: [JsonLineParser]
3057
+ encoding:
3058
+ type: string
3059
+ default: utf-8
3060
+ CsvParser:
3179
3061
  type: object
3180
3062
  required:
3181
3063
  - type
3182
3064
  properties:
3183
3065
  type:
3184
3066
  type: string
3185
- enum: [CsvDecoder]
3067
+ enum: [CsvParser]
3186
3068
  encoding:
3187
3069
  type: string
3188
3070
  default: utf-8
@@ -3310,24 +3192,24 @@ definitions:
3310
3192
  description: Component decoding the response so records can be extracted.
3311
3193
  anyOf:
3312
3194
  - "$ref": "#/definitions/CustomDecoder"
3313
- - "$ref": "#/definitions/CsvDecoder"
3314
- - "$ref": "#/definitions/GzipDecoder"
3315
3195
  - "$ref": "#/definitions/JsonDecoder"
3316
3196
  - "$ref": "#/definitions/JsonlDecoder"
3317
3197
  - "$ref": "#/definitions/IterableDecoder"
3318
3198
  - "$ref": "#/definitions/XmlDecoder"
3199
+ - "$ref": "#/definitions/GzipJsonDecoder"
3200
+ - "$ref": "#/definitions/CompositeRawDecoder"
3319
3201
  - "$ref": "#/definitions/ZipfileDecoder"
3320
3202
  download_decoder:
3321
3203
  title: Download Decoder
3322
3204
  description: Component decoding the download response so records can be extracted.
3323
3205
  anyOf:
3324
3206
  - "$ref": "#/definitions/CustomDecoder"
3325
- - "$ref": "#/definitions/CsvDecoder"
3326
- - "$ref": "#/definitions/GzipDecoder"
3327
3207
  - "$ref": "#/definitions/JsonDecoder"
3328
3208
  - "$ref": "#/definitions/JsonlDecoder"
3329
3209
  - "$ref": "#/definitions/IterableDecoder"
3330
3210
  - "$ref": "#/definitions/XmlDecoder"
3211
+ - "$ref": "#/definitions/GzipJsonDecoder"
3212
+ - "$ref": "#/definitions/CompositeRawDecoder"
3331
3213
  - "$ref": "#/definitions/ZipfileDecoder"
3332
3214
  $parameters:
3333
3215
  type: object
@@ -138,9 +138,7 @@ class DeclarativeStream(Stream):
138
138
  """
139
139
  :param: stream_state We knowingly avoid using stream_state as we want cursors to manage their own state.
140
140
  """
141
- if stream_slice is None or (
142
- not isinstance(stream_slice, StreamSlice) and stream_slice == {}
143
- ):
141
+ if stream_slice is None or stream_slice == {}:
144
142
  # As the parameter is Optional, many would just call `read_records(sync_mode)` during testing without specifying the field
145
143
  # As part of the declarative model without custom components, this should never happen as the CDK would wire up a
146
144
  # SinglePartitionRouter that would create this StreamSlice properly
@@ -10,8 +10,10 @@ from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
10
10
  )
11
11
  from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
12
12
  from airbyte_cdk.sources.declarative.decoders.json_decoder import (
13
+ GzipJsonDecoder,
13
14
  IterableDecoder,
14
15
  JsonDecoder,
16
+ JsonlDecoder,
15
17
  )
16
18
  from airbyte_cdk.sources.declarative.decoders.noop_decoder import NoopDecoder
17
19
  from airbyte_cdk.sources.declarative.decoders.pagination_decoder_decorator import (
@@ -25,7 +27,9 @@ __all__ = [
25
27
  "CompositeRawDecoder",
26
28
  "JsonDecoder",
27
29
  "JsonParser",
30
+ "JsonlDecoder",
28
31
  "IterableDecoder",
32
+ "GzipJsonDecoder",
29
33
  "NoopDecoder",
30
34
  "PaginationDecoderDecorator",
31
35
  "XmlDecoder",
@@ -1,6 +1,5 @@
1
1
  import csv
2
2
  import gzip
3
- import io
4
3
  import json
5
4
  import logging
6
5
  from abc import ABC, abstractmethod
@@ -131,15 +130,11 @@ class CompositeRawDecoder(Decoder):
131
130
  """
132
131
 
133
132
  parser: Parser
134
- stream_response: bool = True
135
133
 
136
134
  def is_stream_response(self) -> bool:
137
- return self.stream_response
135
+ return True
138
136
 
139
137
  def decode(
140
138
  self, response: requests.Response
141
139
  ) -> Generator[MutableMapping[str, Any], None, None]:
142
- if self.is_stream_response():
143
- yield from self.parser.parse(data=response.raw) # type: ignore[arg-type]
144
- else:
145
- yield from self.parser.parse(data=io.BytesIO(response.content))
140
+ yield from self.parser.parse(data=response.raw) # type: ignore[arg-type]
@@ -10,24 +10,21 @@ from typing import Any, Generator, List, Mapping, MutableMapping, Optional
10
10
  import orjson
11
11
  import requests
12
12
 
13
- from airbyte_cdk.sources.declarative.decoders import CompositeRawDecoder, JsonParser
14
13
  from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
15
14
 
16
15
  logger = logging.getLogger("airbyte")
17
16
 
18
17
 
18
+ @dataclass
19
19
  class JsonDecoder(Decoder):
20
20
  """
21
21
  Decoder strategy that returns the json-encoded content of a response, if any.
22
-
23
- Usually, we would try to instantiate the equivalent `CompositeRawDecoder(parser=JsonParser(), stream_response=False)` but there were specific historical behaviors related to the JsonDecoder that we didn't know if we could remove like the fallback on {} in case of errors.
24
22
  """
25
23
 
26
- def __init__(self, parameters: Mapping[str, Any]):
27
- self._decoder = CompositeRawDecoder(parser=JsonParser(), stream_response=False)
24
+ parameters: InitVar[Mapping[str, Any]]
28
25
 
29
26
  def is_stream_response(self) -> bool:
30
- return self._decoder.is_stream_response()
27
+ return False
31
28
 
32
29
  def decode(
33
30
  self, response: requests.Response
@@ -35,16 +32,25 @@ class JsonDecoder(Decoder):
35
32
  """
36
33
  Given the response is an empty string or an emtpy list, the function will return a generator with an empty mapping.
37
34
  """
38
- has_yielded = False
39
35
  try:
40
- for element in self._decoder.decode(response):
41
- yield element
42
- has_yielded = True
43
- except Exception:
36
+ body_json = response.json()
37
+ yield from self.parse_body_json(body_json)
38
+ except requests.exceptions.JSONDecodeError:
39
+ logger.warning(
40
+ f"Response cannot be parsed into json: {response.status_code=}, {response.text=}"
41
+ )
44
42
  yield {}
45
43
 
46
- if not has_yielded:
44
+ @staticmethod
45
+ def parse_body_json(
46
+ body_json: MutableMapping[str, Any] | List[MutableMapping[str, Any]],
47
+ ) -> Generator[MutableMapping[str, Any], None, None]:
48
+ if not isinstance(body_json, list):
49
+ body_json = [body_json]
50
+ if len(body_json) == 0:
47
51
  yield {}
52
+ else:
53
+ yield from body_json
48
54
 
49
55
 
50
56
  @dataclass
@@ -63,3 +69,43 @@ class IterableDecoder(Decoder):
63
69
  ) -> Generator[MutableMapping[str, Any], None, None]:
64
70
  for line in response.iter_lines():
65
71
  yield {"record": line.decode()}
72
+
73
+
74
+ @dataclass
75
+ class JsonlDecoder(Decoder):
76
+ """
77
+ Decoder strategy that returns the json-encoded content of the response, if any.
78
+ """
79
+
80
+ parameters: InitVar[Mapping[str, Any]]
81
+
82
+ def is_stream_response(self) -> bool:
83
+ return True
84
+
85
+ def decode(
86
+ self, response: requests.Response
87
+ ) -> Generator[MutableMapping[str, Any], None, None]:
88
+ # TODO???: set delimiter? usually it is `\n` but maybe it would be useful to set optional?
89
+ # https://github.com/airbytehq/airbyte-internal-issues/issues/8436
90
+ for record in response.iter_lines():
91
+ yield orjson.loads(record)
92
+
93
+
94
+ @dataclass
95
+ class GzipJsonDecoder(JsonDecoder):
96
+ encoding: Optional[str]
97
+
98
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
99
+ if self.encoding:
100
+ try:
101
+ codecs.lookup(self.encoding)
102
+ except LookupError:
103
+ raise ValueError(
104
+ f"Invalid encoding '{self.encoding}'. Please check provided encoding"
105
+ )
106
+
107
+ def decode(
108
+ self, response: requests.Response
109
+ ) -> Generator[MutableMapping[str, Any], None, None]:
110
+ raw_string = decompress(response.content).decode(encoding=self.encoding or "utf-8")
111
+ yield from self.parse_body_json(orjson.loads(raw_string))