airbyte-cdk 6.34.1.dev0__py3-none-any.whl → 6.34.1.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +16 -12
  2. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  3. airbyte_cdk/connector_builder/test_reader/helpers.py +591 -0
  4. airbyte_cdk/connector_builder/test_reader/message_grouper.py +160 -0
  5. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  6. airbyte_cdk/connector_builder/test_reader/types.py +75 -0
  7. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  8. airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
  9. airbyte_cdk/sources/declarative/auth/oauth.py +6 -1
  10. airbyte_cdk/sources/declarative/auth/token.py +3 -8
  11. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +30 -79
  12. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +203 -100
  13. airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
  14. airbyte_cdk/sources/declarative/decoders/__init__.py +0 -4
  15. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +7 -2
  16. airbyte_cdk/sources/declarative/decoders/json_decoder.py +12 -58
  17. airbyte_cdk/sources/declarative/extractors/record_selector.py +12 -3
  18. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +56 -25
  19. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +12 -6
  20. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +6 -2
  21. airbyte_cdk/sources/declarative/interpolation/jinja.py +13 -0
  22. airbyte_cdk/sources/declarative/manifest_declarative_source.py +9 -0
  23. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +150 -41
  24. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +234 -84
  25. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  26. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
  27. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +26 -18
  28. airbyte_cdk/sources/declarative/requesters/http_requester.py +8 -2
  29. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +16 -5
  30. airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
  31. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
  32. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +1 -4
  33. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +0 -3
  34. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -47
  35. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
  36. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +4 -3
  37. airbyte_cdk/sources/declarative/transformations/add_fields.py +4 -4
  38. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -1
  39. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  40. airbyte_cdk/sources/file_based/file_based_source.py +70 -37
  41. airbyte_cdk/sources/file_based/file_based_stream_reader.py +107 -12
  42. airbyte_cdk/sources/file_based/stream/__init__.py +10 -1
  43. airbyte_cdk/sources/file_based/stream/identities_stream.py +47 -0
  44. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +85 -0
  45. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  46. airbyte_cdk/sources/streams/call_rate.py +185 -47
  47. airbyte_cdk/sources/streams/http/http.py +1 -2
  48. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +217 -56
  49. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +144 -73
  50. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  51. airbyte_cdk/test/mock_http/mocker.py +9 -1
  52. airbyte_cdk/test/mock_http/response.py +6 -3
  53. airbyte_cdk/utils/datetime_helpers.py +48 -66
  54. airbyte_cdk/utils/mapping_helpers.py +126 -26
  55. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/METADATA +1 -1
  56. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/RECORD +60 -51
  57. airbyte_cdk/connector_builder/message_grouper.py +0 -448
  58. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/LICENSE.txt +0 -0
  59. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/LICENSE_SHORT +0 -0
  60. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/WHEEL +0 -0
  61. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/entry_points.txt +0 -0
@@ -40,6 +40,8 @@ properties:
40
40
  "$ref": "#/definitions/Spec"
41
41
  concurrency_level:
42
42
  "$ref": "#/definitions/ConcurrencyLevel"
43
+ api_budget:
44
+ "$ref": "#/definitions/HTTPAPIBudget"
43
45
  metadata:
44
46
  type: object
45
47
  description: For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.
@@ -80,7 +82,6 @@ definitions:
80
82
  - stream_interval
81
83
  - stream_partition
82
84
  - stream_slice
83
- - stream_state
84
85
  examples:
85
86
  - "{{ record['updates'] }}"
86
87
  - "{{ record['MetaData']['LastUpdatedTime'] }}"
@@ -794,7 +795,7 @@ definitions:
794
795
  description: This option is used to adjust the upper and lower boundaries of each datetime window to beginning and end of the provided target period (day, week, month)
795
796
  type: object
796
797
  required:
797
- - target
798
+ - target
798
799
  properties:
799
800
  target:
800
801
  title: Target
@@ -1365,6 +1366,170 @@ definitions:
1365
1366
  $parameters:
1366
1367
  type: object
1367
1368
  additional_properties: true
1369
+ HTTPAPIBudget:
1370
+ title: HTTP API Budget
1371
+ description: >
1372
+ Defines how many requests can be made to the API in a given time frame. `HTTPAPIBudget` extracts the remaining
1373
+ call count and the reset time from HTTP response headers using the header names provided by
1374
+ `ratelimit_remaining_header` and `ratelimit_reset_header`. Only requests using `HttpRequester`
1375
+ are rate-limited; custom components that bypass `HttpRequester` are not covered by this budget.
1376
+ type: object
1377
+ required:
1378
+ - type
1379
+ - policies
1380
+ properties:
1381
+ type:
1382
+ type: string
1383
+ enum: [HTTPAPIBudget]
1384
+ policies:
1385
+ title: Policies
1386
+ description: List of call rate policies that define how many calls are allowed.
1387
+ type: array
1388
+ items:
1389
+ anyOf:
1390
+ - "$ref": "#/definitions/FixedWindowCallRatePolicy"
1391
+ - "$ref": "#/definitions/MovingWindowCallRatePolicy"
1392
+ - "$ref": "#/definitions/UnlimitedCallRatePolicy"
1393
+ ratelimit_reset_header:
1394
+ title: Rate Limit Reset Header
1395
+ description: The HTTP response header name that indicates when the rate limit resets.
1396
+ type: string
1397
+ default: "ratelimit-reset"
1398
+ ratelimit_remaining_header:
1399
+ title: Rate Limit Remaining Header
1400
+ description: The HTTP response header name that indicates the number of remaining allowed calls.
1401
+ type: string
1402
+ default: "ratelimit-remaining"
1403
+ status_codes_for_ratelimit_hit:
1404
+ title: Status Codes for Rate Limit Hit
1405
+ description: List of HTTP status codes that indicate a rate limit has been hit.
1406
+ type: array
1407
+ items:
1408
+ type: integer
1409
+ default: [429]
1410
+ additionalProperties: true
1411
+ FixedWindowCallRatePolicy:
1412
+ title: Fixed Window Call Rate Policy
1413
+ description: A policy that allows a fixed number of calls within a specific time window.
1414
+ type: object
1415
+ required:
1416
+ - type
1417
+ - period
1418
+ - call_limit
1419
+ - matchers
1420
+ properties:
1421
+ type:
1422
+ type: string
1423
+ enum: [FixedWindowCallRatePolicy]
1424
+ period:
1425
+ title: Period
1426
+ description: The time interval for the rate limit window.
1427
+ type: string
1428
+ call_limit:
1429
+ title: Call Limit
1430
+ description: The maximum number of calls allowed within the period.
1431
+ type: integer
1432
+ matchers:
1433
+ title: Matchers
1434
+ description: List of matchers that define which requests this policy applies to.
1435
+ type: array
1436
+ items:
1437
+ "$ref": "#/definitions/HttpRequestRegexMatcher"
1438
+ additionalProperties: true
1439
+ MovingWindowCallRatePolicy:
1440
+ title: Moving Window Call Rate Policy
1441
+ description: A policy that allows a fixed number of calls within a moving time window.
1442
+ type: object
1443
+ required:
1444
+ - type
1445
+ - rates
1446
+ - matchers
1447
+ properties:
1448
+ type:
1449
+ type: string
1450
+ enum: [MovingWindowCallRatePolicy]
1451
+ rates:
1452
+ title: Rates
1453
+ description: List of rates that define the call limits for different time intervals.
1454
+ type: array
1455
+ items:
1456
+ "$ref": "#/definitions/Rate"
1457
+ matchers:
1458
+ title: Matchers
1459
+ description: List of matchers that define which requests this policy applies to.
1460
+ type: array
1461
+ items:
1462
+ "$ref": "#/definitions/HttpRequestRegexMatcher"
1463
+ additionalProperties: true
1464
+ UnlimitedCallRatePolicy:
1465
+ title: Unlimited Call Rate Policy
1466
+ description: A policy that allows unlimited calls for specific requests.
1467
+ type: object
1468
+ required:
1469
+ - type
1470
+ - matchers
1471
+ properties:
1472
+ type:
1473
+ type: string
1474
+ enum: [UnlimitedCallRatePolicy]
1475
+ matchers:
1476
+ title: Matchers
1477
+ description: List of matchers that define which requests this policy applies to.
1478
+ type: array
1479
+ items:
1480
+ "$ref": "#/definitions/HttpRequestRegexMatcher"
1481
+ additionalProperties: true
1482
+ Rate:
1483
+ title: Rate
1484
+ description: Defines a rate limit with a specific number of calls allowed within a time interval.
1485
+ type: object
1486
+ required:
1487
+ - limit
1488
+ - interval
1489
+ properties:
1490
+ limit:
1491
+ title: Limit
1492
+ description: The maximum number of calls allowed within the interval.
1493
+ type: integer
1494
+ interval:
1495
+ title: Interval
1496
+ description: The time interval for the rate limit.
1497
+ type: string
1498
+ examples:
1499
+ - "PT1H"
1500
+ - "P1D"
1501
+ additionalProperties: true
1502
+ HttpRequestRegexMatcher:
1503
+ title: HTTP Request Matcher
1504
+ description: >
1505
+ Matches HTTP requests based on method, base URL, URL path pattern, query parameters, and headers.
1506
+ Use `url_base` to specify the scheme and host (without trailing slash) and
1507
+ `url_path_pattern` to apply a regex to the request path.
1508
+ type: object
1509
+ properties:
1510
+ method:
1511
+ title: Method
1512
+ description: The HTTP method to match (e.g., GET, POST).
1513
+ type: string
1514
+ url_base:
1515
+ title: URL Base
1516
+ description: The base URL (scheme and host, e.g. "https://api.example.com") to match.
1517
+ type: string
1518
+ url_path_pattern:
1519
+ title: URL Path Pattern
1520
+ description: A regular expression pattern to match the URL path.
1521
+ type: string
1522
+ params:
1523
+ title: Parameters
1524
+ description: The query parameters to match.
1525
+ type: object
1526
+ additionalProperties: true
1527
+ headers:
1528
+ title: Headers
1529
+ description: The headers to match.
1530
+ type: object
1531
+ additionalProperties: true
1532
+ additionalProperties: true
1368
1533
  DefaultErrorHandler:
1369
1534
  title: Default Error Handler
1370
1535
  description: Component defining how to handle errors. Default behavior includes only retrying server errors (HTTP 5XX) and too many requests (HTTP 429) with an exponential backoff.
@@ -1549,7 +1714,6 @@ definitions:
1549
1714
  anyOf:
1550
1715
  - "$ref": "#/definitions/JsonDecoder"
1551
1716
  - "$ref": "#/definitions/XmlDecoder"
1552
- - "$ref": "#/definitions/CompositeRawDecoder"
1553
1717
  $parameters:
1554
1718
  type: object
1555
1719
  additionalProperties: true
@@ -1611,7 +1775,6 @@ definitions:
1611
1775
  - stream_interval
1612
1776
  - stream_partition
1613
1777
  - stream_slice
1614
- - stream_state
1615
1778
  examples:
1616
1779
  - "/products"
1617
1780
  - "/quotes/{{ stream_partition['id'] }}/quote_line_groups"
@@ -1661,7 +1824,6 @@ definitions:
1661
1824
  - stream_interval
1662
1825
  - stream_partition
1663
1826
  - stream_slice
1664
- - stream_state
1665
1827
  examples:
1666
1828
  - |
1667
1829
  [{"clause": {"type": "timestamp", "operator": 10, "parameters":
@@ -1679,7 +1841,6 @@ definitions:
1679
1841
  - stream_interval
1680
1842
  - stream_partition
1681
1843
  - stream_slice
1682
- - stream_state
1683
1844
  examples:
1684
1845
  - sort_order: "ASC"
1685
1846
  sort_field: "CREATED_AT"
@@ -1700,7 +1861,6 @@ definitions:
1700
1861
  - stream_interval
1701
1862
  - stream_partition
1702
1863
  - stream_slice
1703
- - stream_state
1704
1864
  examples:
1705
1865
  - Output-Format: JSON
1706
1866
  - Version: "{{ config['version'] }}"
@@ -1717,7 +1877,6 @@ definitions:
1717
1877
  - stream_interval
1718
1878
  - stream_partition
1719
1879
  - stream_slice
1720
- - stream_state
1721
1880
  examples:
1722
1881
  - unit: "day"
1723
1882
  - query: 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"'
@@ -2072,7 +2231,6 @@ definitions:
2072
2231
  interpolation_context:
2073
2232
  - config
2074
2233
  - record
2075
- - stream_state
2076
2234
  - stream_slice
2077
2235
  new:
2078
2236
  type: string
@@ -2086,7 +2244,6 @@ definitions:
2086
2244
  interpolation_context:
2087
2245
  - config
2088
2246
  - record
2089
- - stream_state
2090
2247
  - stream_slice
2091
2248
  $parameters:
2092
2249
  type: object
@@ -2133,23 +2290,6 @@ definitions:
2133
2290
  $parameters:
2134
2291
  type: object
2135
2292
  additionalProperties: true
2136
- GzipJsonDecoder:
2137
- title: GzipJson Decoder
2138
- description: Use this if the response is Gzip compressed Json.
2139
- type: object
2140
- additionalProperties: true
2141
- required:
2142
- - type
2143
- properties:
2144
- type:
2145
- type: string
2146
- enum: [GzipJsonDecoder]
2147
- encoding:
2148
- type: string
2149
- default: utf-8
2150
- $parameters:
2151
- type: object
2152
- additionalProperties: true
2153
2293
  ZipfileDecoder:
2154
2294
  title: Zipfile Decoder
2155
2295
  description: Decoder for response data that is returned as zipfile(s).
@@ -2157,19 +2297,19 @@ definitions:
2157
2297
  additionalProperties: true
2158
2298
  required:
2159
2299
  - type
2160
- - parser
2300
+ - decoder
2161
2301
  properties:
2162
2302
  type:
2163
2303
  type: string
2164
2304
  enum: [ZipfileDecoder]
2165
- parser:
2305
+ decoder:
2166
2306
  title: Parser
2167
2307
  description: Parser to parse the decompressed data from the zipfile(s).
2168
2308
  anyOf:
2169
- - "$ref": "#/definitions/GzipParser"
2170
- - "$ref": "#/definitions/JsonParser"
2171
- - "$ref": "#/definitions/JsonLineParser"
2172
- - "$ref": "#/definitions/CsvParser"
2309
+ - "$ref": "#/definitions/CsvDecoder"
2310
+ - "$ref": "#/definitions/GzipDecoder"
2311
+ - "$ref": "#/definitions/JsonDecoder"
2312
+ - "$ref": "#/definitions/JsonlDecoder"
2173
2313
  ListPartitionRouter:
2174
2314
  title: List Partition Router
2175
2315
  description: A Partition router that specifies a list of attributes where each attribute describes a portion of the complete data set for a stream. During a sync, each value is iterated over and can be used as input to outbound API requests.
@@ -2753,7 +2893,6 @@ definitions:
2753
2893
  - stream_interval
2754
2894
  - stream_partition
2755
2895
  - stream_slice
2756
- - stream_state
2757
2896
  examples:
2758
2897
  - "{{ record['created_at'] >= stream_interval['start_time'] }}"
2759
2898
  - "{{ record.status in ['active', 'expired'] }}"
@@ -2847,25 +2986,35 @@ definitions:
2847
2986
  enum: [RequestPath]
2848
2987
  RequestOption:
2849
2988
  title: Request Option
2850
- description: Specifies the key field and where in the request a component's value should be injected.
2989
+ description: Specifies the key field or path and where in the request a component's value should be injected.
2851
2990
  type: object
2852
2991
  required:
2853
2992
  - type
2854
- - field_name
2855
2993
  - inject_into
2856
2994
  properties:
2857
2995
  type:
2858
2996
  type: string
2859
2997
  enum: [RequestOption]
2860
2998
  field_name:
2861
- title: Request Option
2862
- description: Configures which key should be used in the location that the descriptor is being injected into
2999
+ title: Field Name
3000
+ description: Configures which key should be used in the location that the descriptor is being injected into. We hope to eventually deprecate this field in favor of `field_path` for all request_options, but must currently maintain it for backwards compatibility in the Builder.
2863
3001
  type: string
2864
3002
  examples:
2865
3003
  - segment_id
2866
3004
  interpolation_context:
2867
3005
  - config
2868
3006
  - parameters
3007
+ field_path:
3008
+ title: Field Path
3009
+ description: Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries)
3010
+ type: array
3011
+ items:
3012
+ type: string
3013
+ examples:
3014
+ - ["data", "viewer", "id"]
3015
+ interpolation_context:
3016
+ - config
3017
+ - parameters
2869
3018
  inject_into:
2870
3019
  title: Inject Into
2871
3020
  description: Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.
@@ -2992,79 +3141,39 @@ definitions:
2992
3141
  description: Component decoding the response so records can be extracted.
2993
3142
  anyOf:
2994
3143
  - "$ref": "#/definitions/CustomDecoder"
3144
+ - "$ref": "#/definitions/CsvDecoder"
3145
+ - "$ref": "#/definitions/GzipDecoder"
2995
3146
  - "$ref": "#/definitions/JsonDecoder"
2996
3147
  - "$ref": "#/definitions/JsonlDecoder"
2997
3148
  - "$ref": "#/definitions/IterableDecoder"
2998
3149
  - "$ref": "#/definitions/XmlDecoder"
2999
- - "$ref": "#/definitions/GzipJsonDecoder"
3000
- - "$ref": "#/definitions/CompositeRawDecoder"
3001
3150
  - "$ref": "#/definitions/ZipfileDecoder"
3002
3151
  $parameters:
3003
3152
  type: object
3004
3153
  additionalProperties: true
3005
- CompositeRawDecoder:
3006
- description: "(This is experimental, use at your own risk)"
3154
+ GzipDecoder:
3007
3155
  type: object
3008
3156
  required:
3009
3157
  - type
3010
- - parser
3158
+ - decoder
3011
3159
  properties:
3012
3160
  type:
3013
3161
  type: string
3014
- enum: [CompositeRawDecoder]
3015
- parser:
3016
- anyOf:
3017
- - "$ref": "#/definitions/GzipParser"
3018
- - "$ref": "#/definitions/JsonParser"
3019
- - "$ref": "#/definitions/JsonLineParser"
3020
- - "$ref": "#/definitions/CsvParser"
3021
- # PARSERS
3022
- GzipParser:
3023
- type: object
3024
- required:
3025
- - type
3026
- - inner_parser
3027
- properties:
3028
- type:
3029
- type: string
3030
- enum: [GzipParser]
3031
- inner_parser:
3162
+ enum: [GzipDecoder]
3163
+ decoder:
3032
3164
  anyOf:
3033
- - "$ref": "#/definitions/JsonLineParser"
3034
- - "$ref": "#/definitions/CsvParser"
3035
- - "$ref": "#/definitions/JsonParser"
3036
- JsonParser:
3037
- title: JsonParser
3038
- description: Parser used for parsing str, bytes, or bytearray data and returning data in a dictionary format.
3039
- type: object
3040
- required:
3041
- - type
3042
- properties:
3043
- type:
3044
- type: string
3045
- enum: [JsonParser]
3046
- encoding:
3047
- type: string
3048
- default: utf-8
3049
- JsonLineParser:
3050
- type: object
3051
- required:
3052
- - type
3053
- properties:
3054
- type:
3055
- type: string
3056
- enum: [JsonLineParser]
3057
- encoding:
3058
- type: string
3059
- default: utf-8
3060
- CsvParser:
3165
+ - "$ref": "#/definitions/CsvDecoder"
3166
+ - "$ref": "#/definitions/GzipDecoder"
3167
+ - "$ref": "#/definitions/JsonDecoder"
3168
+ - "$ref": "#/definitions/JsonlDecoder"
3169
+ CsvDecoder:
3061
3170
  type: object
3062
3171
  required:
3063
3172
  - type
3064
3173
  properties:
3065
3174
  type:
3066
3175
  type: string
3067
- enum: [CsvParser]
3176
+ enum: [CsvDecoder]
3068
3177
  encoding:
3069
3178
  type: string
3070
3179
  default: utf-8
@@ -3192,24 +3301,24 @@ definitions:
3192
3301
  description: Component decoding the response so records can be extracted.
3193
3302
  anyOf:
3194
3303
  - "$ref": "#/definitions/CustomDecoder"
3304
+ - "$ref": "#/definitions/CsvDecoder"
3305
+ - "$ref": "#/definitions/GzipDecoder"
3195
3306
  - "$ref": "#/definitions/JsonDecoder"
3196
3307
  - "$ref": "#/definitions/JsonlDecoder"
3197
3308
  - "$ref": "#/definitions/IterableDecoder"
3198
3309
  - "$ref": "#/definitions/XmlDecoder"
3199
- - "$ref": "#/definitions/GzipJsonDecoder"
3200
- - "$ref": "#/definitions/CompositeRawDecoder"
3201
3310
  - "$ref": "#/definitions/ZipfileDecoder"
3202
3311
  download_decoder:
3203
3312
  title: Download Decoder
3204
3313
  description: Component decoding the download response so records can be extracted.
3205
3314
  anyOf:
3206
3315
  - "$ref": "#/definitions/CustomDecoder"
3316
+ - "$ref": "#/definitions/CsvDecoder"
3317
+ - "$ref": "#/definitions/GzipDecoder"
3207
3318
  - "$ref": "#/definitions/JsonDecoder"
3208
3319
  - "$ref": "#/definitions/JsonlDecoder"
3209
3320
  - "$ref": "#/definitions/IterableDecoder"
3210
3321
  - "$ref": "#/definitions/XmlDecoder"
3211
- - "$ref": "#/definitions/GzipJsonDecoder"
3212
- - "$ref": "#/definitions/CompositeRawDecoder"
3213
3322
  - "$ref": "#/definitions/ZipfileDecoder"
3214
3323
  $parameters:
3215
3324
  type: object
@@ -3571,12 +3680,6 @@ interpolation:
3571
3680
  - title: stream_slice
3572
3681
  description: This variable is deprecated. Use stream_interval or stream_partition instead.
3573
3682
  type: object
3574
- - title: stream_state
3575
- description: The current state of the stream. The object's keys are defined by the incremental sync's cursor_field the and partition router's values.
3576
- type: object
3577
- examples:
3578
- - created_at: "2020-01-01 00:00:00.000+00:00"
3579
- - updated_at: "2020-01-02 00:00:00.000+00:00"
3580
3683
  macros:
3581
3684
  - title: now_utc
3582
3685
  description: Returns the current date and time in the UTC timezone.
@@ -138,7 +138,9 @@ class DeclarativeStream(Stream):
138
138
  """
139
139
  :param: stream_state We knowingly avoid using stream_state as we want cursors to manage their own state.
140
140
  """
141
- if stream_slice is None or stream_slice == {}:
141
+ if stream_slice is None or (
142
+ not isinstance(stream_slice, StreamSlice) and stream_slice == {}
143
+ ):
142
144
  # As the parameter is Optional, many would just call `read_records(sync_mode)` during testing without specifying the field
143
145
  # As part of the declarative model without custom components, this should never happen as the CDK would wire up a
144
146
  # SinglePartitionRouter that would create this StreamSlice properly
@@ -10,10 +10,8 @@ from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
10
10
  )
11
11
  from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
12
12
  from airbyte_cdk.sources.declarative.decoders.json_decoder import (
13
- GzipJsonDecoder,
14
13
  IterableDecoder,
15
14
  JsonDecoder,
16
- JsonlDecoder,
17
15
  )
18
16
  from airbyte_cdk.sources.declarative.decoders.noop_decoder import NoopDecoder
19
17
  from airbyte_cdk.sources.declarative.decoders.pagination_decoder_decorator import (
@@ -27,9 +25,7 @@ __all__ = [
27
25
  "CompositeRawDecoder",
28
26
  "JsonDecoder",
29
27
  "JsonParser",
30
- "JsonlDecoder",
31
28
  "IterableDecoder",
32
- "GzipJsonDecoder",
33
29
  "NoopDecoder",
34
30
  "PaginationDecoderDecorator",
35
31
  "XmlDecoder",
@@ -1,5 +1,6 @@
1
1
  import csv
2
2
  import gzip
3
+ import io
3
4
  import json
4
5
  import logging
5
6
  from abc import ABC, abstractmethod
@@ -130,11 +131,15 @@ class CompositeRawDecoder(Decoder):
130
131
  """
131
132
 
132
133
  parser: Parser
134
+ stream_response: bool = True
133
135
 
134
136
  def is_stream_response(self) -> bool:
135
- return True
137
+ return self.stream_response
136
138
 
137
139
  def decode(
138
140
  self, response: requests.Response
139
141
  ) -> Generator[MutableMapping[str, Any], None, None]:
140
- yield from self.parser.parse(data=response.raw) # type: ignore[arg-type]
142
+ if self.is_stream_response():
143
+ yield from self.parser.parse(data=response.raw) # type: ignore[arg-type]
144
+ else:
145
+ yield from self.parser.parse(data=io.BytesIO(response.content))
@@ -10,21 +10,24 @@ from typing import Any, Generator, List, Mapping, MutableMapping, Optional
10
10
  import orjson
11
11
  import requests
12
12
 
13
+ from airbyte_cdk.sources.declarative.decoders import CompositeRawDecoder, JsonParser
13
14
  from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
14
15
 
15
16
  logger = logging.getLogger("airbyte")
16
17
 
17
18
 
18
- @dataclass
19
19
  class JsonDecoder(Decoder):
20
20
  """
21
21
  Decoder strategy that returns the json-encoded content of a response, if any.
22
+
23
+ Usually, we would try to instantiate the equivalent `CompositeRawDecoder(parser=JsonParser(), stream_response=False)` but there were specific historical behaviors related to the JsonDecoder that we didn't know if we could remove like the fallback on {} in case of errors.
22
24
  """
23
25
 
24
- parameters: InitVar[Mapping[str, Any]]
26
+ def __init__(self, parameters: Mapping[str, Any]):
27
+ self._decoder = CompositeRawDecoder(parser=JsonParser(), stream_response=False)
25
28
 
26
29
  def is_stream_response(self) -> bool:
27
- return False
30
+ return self._decoder.is_stream_response()
28
31
 
29
32
  def decode(
30
33
  self, response: requests.Response
@@ -32,25 +35,16 @@ class JsonDecoder(Decoder):
32
35
  """
33
36
  Given the response is an empty string or an emtpy list, the function will return a generator with an empty mapping.
34
37
  """
38
+ has_yielded = False
35
39
  try:
36
- body_json = response.json()
37
- yield from self.parse_body_json(body_json)
38
- except requests.exceptions.JSONDecodeError:
39
- logger.warning(
40
- f"Response cannot be parsed into json: {response.status_code=}, {response.text=}"
41
- )
40
+ for element in self._decoder.decode(response):
41
+ yield element
42
+ has_yielded = True
43
+ except Exception:
42
44
  yield {}
43
45
 
44
- @staticmethod
45
- def parse_body_json(
46
- body_json: MutableMapping[str, Any] | List[MutableMapping[str, Any]],
47
- ) -> Generator[MutableMapping[str, Any], None, None]:
48
- if not isinstance(body_json, list):
49
- body_json = [body_json]
50
- if len(body_json) == 0:
46
+ if not has_yielded:
51
47
  yield {}
52
- else:
53
- yield from body_json
54
48
 
55
49
 
56
50
  @dataclass
@@ -69,43 +63,3 @@ class IterableDecoder(Decoder):
69
63
  ) -> Generator[MutableMapping[str, Any], None, None]:
70
64
  for line in response.iter_lines():
71
65
  yield {"record": line.decode()}
72
-
73
-
74
- @dataclass
75
- class JsonlDecoder(Decoder):
76
- """
77
- Decoder strategy that returns the json-encoded content of the response, if any.
78
- """
79
-
80
- parameters: InitVar[Mapping[str, Any]]
81
-
82
- def is_stream_response(self) -> bool:
83
- return True
84
-
85
- def decode(
86
- self, response: requests.Response
87
- ) -> Generator[MutableMapping[str, Any], None, None]:
88
- # TODO???: set delimiter? usually it is `\n` but maybe it would be useful to set optional?
89
- # https://github.com/airbytehq/airbyte-internal-issues/issues/8436
90
- for record in response.iter_lines():
91
- yield orjson.loads(record)
92
-
93
-
94
- @dataclass
95
- class GzipJsonDecoder(JsonDecoder):
96
- encoding: Optional[str]
97
-
98
- def __post_init__(self, parameters: Mapping[str, Any]) -> None:
99
- if self.encoding:
100
- try:
101
- codecs.lookup(self.encoding)
102
- except LookupError:
103
- raise ValueError(
104
- f"Invalid encoding '{self.encoding}'. Please check provided encoding"
105
- )
106
-
107
- def decode(
108
- self, response: requests.Response
109
- ) -> Generator[MutableMapping[str, Any], None, None]:
110
- raw_string = decompress(response.content).decode(encoding=self.encoding or "utf-8")
111
- yield from self.parse_body_json(orjson.loads(raw_string))
@@ -41,6 +41,7 @@ class RecordSelector(HttpSelector):
41
41
  _name: Union[InterpolatedString, str] = field(init=False, repr=False, default="")
42
42
  record_filter: Optional[RecordFilter] = None
43
43
  transformations: List[RecordTransformation] = field(default_factory=lambda: [])
44
+ transform_before_filtering: bool = False
44
45
 
45
46
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
46
47
  self._parameters = parameters
@@ -104,9 +105,17 @@ class RecordSelector(HttpSelector):
104
105
  Until we decide to move this logic away from the selector, we made this method public so that users like AsyncJobRetriever could
105
106
  share the logic of doing transformations on a set of records.
106
107
  """
107
- filtered_data = self._filter(all_data, stream_state, stream_slice, next_page_token)
108
- transformed_data = self._transform(filtered_data, stream_state, stream_slice)
109
- normalized_data = self._normalize_by_schema(transformed_data, schema=records_schema)
108
+ if self.transform_before_filtering:
109
+ transformed_data = self._transform(all_data, stream_state, stream_slice)
110
+ transformed_filtered_data = self._filter(
111
+ transformed_data, stream_state, stream_slice, next_page_token
112
+ )
113
+ else:
114
+ filtered_data = self._filter(all_data, stream_state, stream_slice, next_page_token)
115
+ transformed_filtered_data = self._transform(filtered_data, stream_state, stream_slice)
116
+ normalized_data = self._normalize_by_schema(
117
+ transformed_filtered_data, schema=records_schema
118
+ )
110
119
  for data in normalized_data:
111
120
  yield Record(data=data, stream_name=self.name, associated_slice=stream_slice)
112
121