airbyte-cdk 6.34.1.dev0__py3-none-any.whl → 6.35.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +16 -12
- airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
- airbyte_cdk/connector_builder/test_reader/helpers.py +591 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +160 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
- airbyte_cdk/connector_builder/test_reader/types.py +75 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
- airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
- airbyte_cdk/sources/declarative/auth/oauth.py +6 -1
- airbyte_cdk/sources/declarative/auth/token.py +3 -8
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +30 -79
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +213 -100
- airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
- airbyte_cdk/sources/declarative/decoders/__init__.py +0 -4
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +18 -3
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +12 -58
- airbyte_cdk/sources/declarative/extractors/record_selector.py +12 -3
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +56 -25
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +12 -6
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/__init__.py +1 -1
- airbyte_cdk/sources/declarative/interpolation/filters.py +2 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +1 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +1 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +1 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +1 -1
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +2 -1
- airbyte_cdk/sources/declarative/interpolation/jinja.py +14 -1
- airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +9 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +150 -41
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +234 -84
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +26 -18
- airbyte_cdk/sources/declarative/requesters/http_requester.py +8 -2
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +16 -5
- airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +1 -4
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +0 -3
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -47
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +4 -3
- airbyte_cdk/sources/declarative/transformations/add_fields.py +4 -4
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -1
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
- airbyte_cdk/sources/file_based/file_based_source.py +70 -37
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +107 -12
- airbyte_cdk/sources/file_based/stream/__init__.py +10 -1
- airbyte_cdk/sources/file_based/stream/identities_stream.py +47 -0
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +85 -0
- airbyte_cdk/sources/specs/transfer_modes.py +26 -0
- airbyte_cdk/sources/streams/call_rate.py +185 -47
- airbyte_cdk/sources/streams/http/http.py +1 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +217 -56
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +144 -73
- airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
- airbyte_cdk/test/mock_http/mocker.py +9 -1
- airbyte_cdk/test/mock_http/response.py +6 -3
- airbyte_cdk/utils/datetime_helpers.py +48 -66
- airbyte_cdk/utils/mapping_helpers.py +126 -26
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/RECORD +68 -59
- airbyte_cdk/connector_builder/message_grouper.py +0 -448
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/entry_points.txt +0 -0
@@ -40,6 +40,8 @@ properties:
|
|
40
40
|
"$ref": "#/definitions/Spec"
|
41
41
|
concurrency_level:
|
42
42
|
"$ref": "#/definitions/ConcurrencyLevel"
|
43
|
+
api_budget:
|
44
|
+
"$ref": "#/definitions/HTTPAPIBudget"
|
43
45
|
metadata:
|
44
46
|
type: object
|
45
47
|
description: For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.
|
@@ -80,7 +82,6 @@ definitions:
|
|
80
82
|
- stream_interval
|
81
83
|
- stream_partition
|
82
84
|
- stream_slice
|
83
|
-
- stream_state
|
84
85
|
examples:
|
85
86
|
- "{{ record['updates'] }}"
|
86
87
|
- "{{ record['MetaData']['LastUpdatedTime'] }}"
|
@@ -794,7 +795,7 @@ definitions:
|
|
794
795
|
description: This option is used to adjust the upper and lower boundaries of each datetime window to beginning and end of the provided target period (day, week, month)
|
795
796
|
type: object
|
796
797
|
required:
|
797
|
-
|
798
|
+
- target
|
798
799
|
properties:
|
799
800
|
target:
|
800
801
|
title: Target
|
@@ -1365,6 +1366,170 @@ definitions:
|
|
1365
1366
|
$parameters:
|
1366
1367
|
type: object
|
1367
1368
|
additional_properties: true
|
1369
|
+
HTTPAPIBudget:
|
1370
|
+
title: HTTP API Budget
|
1371
|
+
description: >
|
1372
|
+
Defines how many requests can be made to the API in a given time frame. `HTTPAPIBudget` extracts the remaining
|
1373
|
+
call count and the reset time from HTTP response headers using the header names provided by
|
1374
|
+
`ratelimit_remaining_header` and `ratelimit_reset_header`. Only requests using `HttpRequester`
|
1375
|
+
are rate-limited; custom components that bypass `HttpRequester` are not covered by this budget.
|
1376
|
+
type: object
|
1377
|
+
required:
|
1378
|
+
- type
|
1379
|
+
- policies
|
1380
|
+
properties:
|
1381
|
+
type:
|
1382
|
+
type: string
|
1383
|
+
enum: [HTTPAPIBudget]
|
1384
|
+
policies:
|
1385
|
+
title: Policies
|
1386
|
+
description: List of call rate policies that define how many calls are allowed.
|
1387
|
+
type: array
|
1388
|
+
items:
|
1389
|
+
anyOf:
|
1390
|
+
- "$ref": "#/definitions/FixedWindowCallRatePolicy"
|
1391
|
+
- "$ref": "#/definitions/MovingWindowCallRatePolicy"
|
1392
|
+
- "$ref": "#/definitions/UnlimitedCallRatePolicy"
|
1393
|
+
ratelimit_reset_header:
|
1394
|
+
title: Rate Limit Reset Header
|
1395
|
+
description: The HTTP response header name that indicates when the rate limit resets.
|
1396
|
+
type: string
|
1397
|
+
default: "ratelimit-reset"
|
1398
|
+
ratelimit_remaining_header:
|
1399
|
+
title: Rate Limit Remaining Header
|
1400
|
+
description: The HTTP response header name that indicates the number of remaining allowed calls.
|
1401
|
+
type: string
|
1402
|
+
default: "ratelimit-remaining"
|
1403
|
+
status_codes_for_ratelimit_hit:
|
1404
|
+
title: Status Codes for Rate Limit Hit
|
1405
|
+
description: List of HTTP status codes that indicate a rate limit has been hit.
|
1406
|
+
type: array
|
1407
|
+
items:
|
1408
|
+
type: integer
|
1409
|
+
default: [429]
|
1410
|
+
additionalProperties: true
|
1411
|
+
FixedWindowCallRatePolicy:
|
1412
|
+
title: Fixed Window Call Rate Policy
|
1413
|
+
description: A policy that allows a fixed number of calls within a specific time window.
|
1414
|
+
type: object
|
1415
|
+
required:
|
1416
|
+
- type
|
1417
|
+
- period
|
1418
|
+
- call_limit
|
1419
|
+
- matchers
|
1420
|
+
properties:
|
1421
|
+
type:
|
1422
|
+
type: string
|
1423
|
+
enum: [FixedWindowCallRatePolicy]
|
1424
|
+
period:
|
1425
|
+
title: Period
|
1426
|
+
description: The time interval for the rate limit window.
|
1427
|
+
type: string
|
1428
|
+
call_limit:
|
1429
|
+
title: Call Limit
|
1430
|
+
description: The maximum number of calls allowed within the period.
|
1431
|
+
type: integer
|
1432
|
+
matchers:
|
1433
|
+
title: Matchers
|
1434
|
+
description: List of matchers that define which requests this policy applies to.
|
1435
|
+
type: array
|
1436
|
+
items:
|
1437
|
+
"$ref": "#/definitions/HttpRequestRegexMatcher"
|
1438
|
+
additionalProperties: true
|
1439
|
+
MovingWindowCallRatePolicy:
|
1440
|
+
title: Moving Window Call Rate Policy
|
1441
|
+
description: A policy that allows a fixed number of calls within a moving time window.
|
1442
|
+
type: object
|
1443
|
+
required:
|
1444
|
+
- type
|
1445
|
+
- rates
|
1446
|
+
- matchers
|
1447
|
+
properties:
|
1448
|
+
type:
|
1449
|
+
type: string
|
1450
|
+
enum: [MovingWindowCallRatePolicy]
|
1451
|
+
rates:
|
1452
|
+
title: Rates
|
1453
|
+
description: List of rates that define the call limits for different time intervals.
|
1454
|
+
type: array
|
1455
|
+
items:
|
1456
|
+
"$ref": "#/definitions/Rate"
|
1457
|
+
matchers:
|
1458
|
+
title: Matchers
|
1459
|
+
description: List of matchers that define which requests this policy applies to.
|
1460
|
+
type: array
|
1461
|
+
items:
|
1462
|
+
"$ref": "#/definitions/HttpRequestRegexMatcher"
|
1463
|
+
additionalProperties: true
|
1464
|
+
UnlimitedCallRatePolicy:
|
1465
|
+
title: Unlimited Call Rate Policy
|
1466
|
+
description: A policy that allows unlimited calls for specific requests.
|
1467
|
+
type: object
|
1468
|
+
required:
|
1469
|
+
- type
|
1470
|
+
- matchers
|
1471
|
+
properties:
|
1472
|
+
type:
|
1473
|
+
type: string
|
1474
|
+
enum: [UnlimitedCallRatePolicy]
|
1475
|
+
matchers:
|
1476
|
+
title: Matchers
|
1477
|
+
description: List of matchers that define which requests this policy applies to.
|
1478
|
+
type: array
|
1479
|
+
items:
|
1480
|
+
"$ref": "#/definitions/HttpRequestRegexMatcher"
|
1481
|
+
additionalProperties: true
|
1482
|
+
Rate:
|
1483
|
+
title: Rate
|
1484
|
+
description: Defines a rate limit with a specific number of calls allowed within a time interval.
|
1485
|
+
type: object
|
1486
|
+
required:
|
1487
|
+
- limit
|
1488
|
+
- interval
|
1489
|
+
properties:
|
1490
|
+
limit:
|
1491
|
+
title: Limit
|
1492
|
+
description: The maximum number of calls allowed within the interval.
|
1493
|
+
type: integer
|
1494
|
+
interval:
|
1495
|
+
title: Interval
|
1496
|
+
description: The time interval for the rate limit.
|
1497
|
+
type: string
|
1498
|
+
examples:
|
1499
|
+
- "PT1H"
|
1500
|
+
- "P1D"
|
1501
|
+
additionalProperties: true
|
1502
|
+
HttpRequestRegexMatcher:
|
1503
|
+
title: HTTP Request Matcher
|
1504
|
+
description: >
|
1505
|
+
Matches HTTP requests based on method, base URL, URL path pattern, query parameters, and headers.
|
1506
|
+
Use `url_base` to specify the scheme and host (without trailing slash) and
|
1507
|
+
`url_path_pattern` to apply a regex to the request path.
|
1508
|
+
type: object
|
1509
|
+
properties:
|
1510
|
+
method:
|
1511
|
+
title: Method
|
1512
|
+
description: The HTTP method to match (e.g., GET, POST).
|
1513
|
+
type: string
|
1514
|
+
url_base:
|
1515
|
+
title: URL Base
|
1516
|
+
description: The base URL (scheme and host, e.g. "https://api.example.com") to match.
|
1517
|
+
type: string
|
1518
|
+
url_path_pattern:
|
1519
|
+
title: URL Path Pattern
|
1520
|
+
description: A regular expression pattern to match the URL path.
|
1521
|
+
type: string
|
1522
|
+
params:
|
1523
|
+
title: Parameters
|
1524
|
+
description: The query parameters to match.
|
1525
|
+
type: object
|
1526
|
+
additionalProperties: true
|
1527
|
+
headers:
|
1528
|
+
title: Headers
|
1529
|
+
description: The headers to match.
|
1530
|
+
type: object
|
1531
|
+
additionalProperties: true
|
1532
|
+
additionalProperties: true
|
1368
1533
|
DefaultErrorHandler:
|
1369
1534
|
title: Default Error Handler
|
1370
1535
|
description: Component defining how to handle errors. Default behavior includes only retrying server errors (HTTP 5XX) and too many requests (HTTP 429) with an exponential backoff.
|
@@ -1549,7 +1714,6 @@ definitions:
|
|
1549
1714
|
anyOf:
|
1550
1715
|
- "$ref": "#/definitions/JsonDecoder"
|
1551
1716
|
- "$ref": "#/definitions/XmlDecoder"
|
1552
|
-
- "$ref": "#/definitions/CompositeRawDecoder"
|
1553
1717
|
$parameters:
|
1554
1718
|
type: object
|
1555
1719
|
additionalProperties: true
|
@@ -1611,7 +1775,6 @@ definitions:
|
|
1611
1775
|
- stream_interval
|
1612
1776
|
- stream_partition
|
1613
1777
|
- stream_slice
|
1614
|
-
- stream_state
|
1615
1778
|
examples:
|
1616
1779
|
- "/products"
|
1617
1780
|
- "/quotes/{{ stream_partition['id'] }}/quote_line_groups"
|
@@ -1661,7 +1824,6 @@ definitions:
|
|
1661
1824
|
- stream_interval
|
1662
1825
|
- stream_partition
|
1663
1826
|
- stream_slice
|
1664
|
-
- stream_state
|
1665
1827
|
examples:
|
1666
1828
|
- |
|
1667
1829
|
[{"clause": {"type": "timestamp", "operator": 10, "parameters":
|
@@ -1679,7 +1841,6 @@ definitions:
|
|
1679
1841
|
- stream_interval
|
1680
1842
|
- stream_partition
|
1681
1843
|
- stream_slice
|
1682
|
-
- stream_state
|
1683
1844
|
examples:
|
1684
1845
|
- sort_order: "ASC"
|
1685
1846
|
sort_field: "CREATED_AT"
|
@@ -1700,7 +1861,6 @@ definitions:
|
|
1700
1861
|
- stream_interval
|
1701
1862
|
- stream_partition
|
1702
1863
|
- stream_slice
|
1703
|
-
- stream_state
|
1704
1864
|
examples:
|
1705
1865
|
- Output-Format: JSON
|
1706
1866
|
- Version: "{{ config['version'] }}"
|
@@ -1717,7 +1877,6 @@ definitions:
|
|
1717
1877
|
- stream_interval
|
1718
1878
|
- stream_partition
|
1719
1879
|
- stream_slice
|
1720
|
-
- stream_state
|
1721
1880
|
examples:
|
1722
1881
|
- unit: "day"
|
1723
1882
|
- query: 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"'
|
@@ -2072,7 +2231,6 @@ definitions:
|
|
2072
2231
|
interpolation_context:
|
2073
2232
|
- config
|
2074
2233
|
- record
|
2075
|
-
- stream_state
|
2076
2234
|
- stream_slice
|
2077
2235
|
new:
|
2078
2236
|
type: string
|
@@ -2086,7 +2244,6 @@ definitions:
|
|
2086
2244
|
interpolation_context:
|
2087
2245
|
- config
|
2088
2246
|
- record
|
2089
|
-
- stream_state
|
2090
2247
|
- stream_slice
|
2091
2248
|
$parameters:
|
2092
2249
|
type: object
|
@@ -2133,23 +2290,6 @@ definitions:
|
|
2133
2290
|
$parameters:
|
2134
2291
|
type: object
|
2135
2292
|
additionalProperties: true
|
2136
|
-
GzipJsonDecoder:
|
2137
|
-
title: GzipJson Decoder
|
2138
|
-
description: Use this if the response is Gzip compressed Json.
|
2139
|
-
type: object
|
2140
|
-
additionalProperties: true
|
2141
|
-
required:
|
2142
|
-
- type
|
2143
|
-
properties:
|
2144
|
-
type:
|
2145
|
-
type: string
|
2146
|
-
enum: [GzipJsonDecoder]
|
2147
|
-
encoding:
|
2148
|
-
type: string
|
2149
|
-
default: utf-8
|
2150
|
-
$parameters:
|
2151
|
-
type: object
|
2152
|
-
additionalProperties: true
|
2153
2293
|
ZipfileDecoder:
|
2154
2294
|
title: Zipfile Decoder
|
2155
2295
|
description: Decoder for response data that is returned as zipfile(s).
|
@@ -2157,19 +2297,19 @@ definitions:
|
|
2157
2297
|
additionalProperties: true
|
2158
2298
|
required:
|
2159
2299
|
- type
|
2160
|
-
-
|
2300
|
+
- decoder
|
2161
2301
|
properties:
|
2162
2302
|
type:
|
2163
2303
|
type: string
|
2164
2304
|
enum: [ZipfileDecoder]
|
2165
|
-
|
2305
|
+
decoder:
|
2166
2306
|
title: Parser
|
2167
2307
|
description: Parser to parse the decompressed data from the zipfile(s).
|
2168
2308
|
anyOf:
|
2169
|
-
- "$ref": "#/definitions/
|
2170
|
-
- "$ref": "#/definitions/
|
2171
|
-
- "$ref": "#/definitions/
|
2172
|
-
- "$ref": "#/definitions/
|
2309
|
+
- "$ref": "#/definitions/CsvDecoder"
|
2310
|
+
- "$ref": "#/definitions/GzipDecoder"
|
2311
|
+
- "$ref": "#/definitions/JsonDecoder"
|
2312
|
+
- "$ref": "#/definitions/JsonlDecoder"
|
2173
2313
|
ListPartitionRouter:
|
2174
2314
|
title: List Partition Router
|
2175
2315
|
description: A Partition router that specifies a list of attributes where each attribute describes a portion of the complete data set for a stream. During a sync, each value is iterated over and can be used as input to outbound API requests.
|
@@ -2753,7 +2893,6 @@ definitions:
|
|
2753
2893
|
- stream_interval
|
2754
2894
|
- stream_partition
|
2755
2895
|
- stream_slice
|
2756
|
-
- stream_state
|
2757
2896
|
examples:
|
2758
2897
|
- "{{ record['created_at'] >= stream_interval['start_time'] }}"
|
2759
2898
|
- "{{ record.status in ['active', 'expired'] }}"
|
@@ -2847,25 +2986,35 @@ definitions:
|
|
2847
2986
|
enum: [RequestPath]
|
2848
2987
|
RequestOption:
|
2849
2988
|
title: Request Option
|
2850
|
-
description: Specifies the key field and where in the request a component's value should be injected.
|
2989
|
+
description: Specifies the key field or path and where in the request a component's value should be injected.
|
2851
2990
|
type: object
|
2852
2991
|
required:
|
2853
2992
|
- type
|
2854
|
-
- field_name
|
2855
2993
|
- inject_into
|
2856
2994
|
properties:
|
2857
2995
|
type:
|
2858
2996
|
type: string
|
2859
2997
|
enum: [RequestOption]
|
2860
2998
|
field_name:
|
2861
|
-
title:
|
2862
|
-
description: Configures which key should be used in the location that the descriptor is being injected into
|
2999
|
+
title: Field Name
|
3000
|
+
description: Configures which key should be used in the location that the descriptor is being injected into. We hope to eventually deprecate this field in favor of `field_path` for all request_options, but must currently maintain it for backwards compatibility in the Builder.
|
2863
3001
|
type: string
|
2864
3002
|
examples:
|
2865
3003
|
- segment_id
|
2866
3004
|
interpolation_context:
|
2867
3005
|
- config
|
2868
3006
|
- parameters
|
3007
|
+
field_path:
|
3008
|
+
title: Field Path
|
3009
|
+
description: Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries)
|
3010
|
+
type: array
|
3011
|
+
items:
|
3012
|
+
type: string
|
3013
|
+
examples:
|
3014
|
+
- ["data", "viewer", "id"]
|
3015
|
+
interpolation_context:
|
3016
|
+
- config
|
3017
|
+
- parameters
|
2869
3018
|
inject_into:
|
2870
3019
|
title: Inject Into
|
2871
3020
|
description: Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.
|
@@ -2992,79 +3141,39 @@ definitions:
|
|
2992
3141
|
description: Component decoding the response so records can be extracted.
|
2993
3142
|
anyOf:
|
2994
3143
|
- "$ref": "#/definitions/CustomDecoder"
|
3144
|
+
- "$ref": "#/definitions/CsvDecoder"
|
3145
|
+
- "$ref": "#/definitions/GzipDecoder"
|
2995
3146
|
- "$ref": "#/definitions/JsonDecoder"
|
2996
3147
|
- "$ref": "#/definitions/JsonlDecoder"
|
2997
3148
|
- "$ref": "#/definitions/IterableDecoder"
|
2998
3149
|
- "$ref": "#/definitions/XmlDecoder"
|
2999
|
-
- "$ref": "#/definitions/GzipJsonDecoder"
|
3000
|
-
- "$ref": "#/definitions/CompositeRawDecoder"
|
3001
3150
|
- "$ref": "#/definitions/ZipfileDecoder"
|
3002
3151
|
$parameters:
|
3003
3152
|
type: object
|
3004
3153
|
additionalProperties: true
|
3005
|
-
|
3006
|
-
description: "(This is experimental, use at your own risk)"
|
3154
|
+
GzipDecoder:
|
3007
3155
|
type: object
|
3008
3156
|
required:
|
3009
3157
|
- type
|
3010
|
-
-
|
3158
|
+
- decoder
|
3011
3159
|
properties:
|
3012
3160
|
type:
|
3013
3161
|
type: string
|
3014
|
-
enum: [
|
3015
|
-
|
3016
|
-
anyOf:
|
3017
|
-
- "$ref": "#/definitions/GzipParser"
|
3018
|
-
- "$ref": "#/definitions/JsonParser"
|
3019
|
-
- "$ref": "#/definitions/JsonLineParser"
|
3020
|
-
- "$ref": "#/definitions/CsvParser"
|
3021
|
-
# PARSERS
|
3022
|
-
GzipParser:
|
3023
|
-
type: object
|
3024
|
-
required:
|
3025
|
-
- type
|
3026
|
-
- inner_parser
|
3027
|
-
properties:
|
3028
|
-
type:
|
3029
|
-
type: string
|
3030
|
-
enum: [GzipParser]
|
3031
|
-
inner_parser:
|
3162
|
+
enum: [GzipDecoder]
|
3163
|
+
decoder:
|
3032
3164
|
anyOf:
|
3033
|
-
- "$ref": "#/definitions/
|
3034
|
-
- "$ref": "#/definitions/
|
3035
|
-
- "$ref": "#/definitions/
|
3036
|
-
|
3037
|
-
|
3038
|
-
description: Parser used for parsing str, bytes, or bytearray data and returning data in a dictionary format.
|
3039
|
-
type: object
|
3040
|
-
required:
|
3041
|
-
- type
|
3042
|
-
properties:
|
3043
|
-
type:
|
3044
|
-
type: string
|
3045
|
-
enum: [JsonParser]
|
3046
|
-
encoding:
|
3047
|
-
type: string
|
3048
|
-
default: utf-8
|
3049
|
-
JsonLineParser:
|
3050
|
-
type: object
|
3051
|
-
required:
|
3052
|
-
- type
|
3053
|
-
properties:
|
3054
|
-
type:
|
3055
|
-
type: string
|
3056
|
-
enum: [JsonLineParser]
|
3057
|
-
encoding:
|
3058
|
-
type: string
|
3059
|
-
default: utf-8
|
3060
|
-
CsvParser:
|
3165
|
+
- "$ref": "#/definitions/CsvDecoder"
|
3166
|
+
- "$ref": "#/definitions/GzipDecoder"
|
3167
|
+
- "$ref": "#/definitions/JsonDecoder"
|
3168
|
+
- "$ref": "#/definitions/JsonlDecoder"
|
3169
|
+
CsvDecoder:
|
3061
3170
|
type: object
|
3062
3171
|
required:
|
3063
3172
|
- type
|
3064
3173
|
properties:
|
3065
3174
|
type:
|
3066
3175
|
type: string
|
3067
|
-
enum: [
|
3176
|
+
enum: [CsvDecoder]
|
3068
3177
|
encoding:
|
3069
3178
|
type: string
|
3070
3179
|
default: utf-8
|
@@ -3192,24 +3301,24 @@ definitions:
|
|
3192
3301
|
description: Component decoding the response so records can be extracted.
|
3193
3302
|
anyOf:
|
3194
3303
|
- "$ref": "#/definitions/CustomDecoder"
|
3304
|
+
- "$ref": "#/definitions/CsvDecoder"
|
3305
|
+
- "$ref": "#/definitions/GzipDecoder"
|
3195
3306
|
- "$ref": "#/definitions/JsonDecoder"
|
3196
3307
|
- "$ref": "#/definitions/JsonlDecoder"
|
3197
3308
|
- "$ref": "#/definitions/IterableDecoder"
|
3198
3309
|
- "$ref": "#/definitions/XmlDecoder"
|
3199
|
-
- "$ref": "#/definitions/GzipJsonDecoder"
|
3200
|
-
- "$ref": "#/definitions/CompositeRawDecoder"
|
3201
3310
|
- "$ref": "#/definitions/ZipfileDecoder"
|
3202
3311
|
download_decoder:
|
3203
3312
|
title: Download Decoder
|
3204
3313
|
description: Component decoding the download response so records can be extracted.
|
3205
3314
|
anyOf:
|
3206
3315
|
- "$ref": "#/definitions/CustomDecoder"
|
3316
|
+
- "$ref": "#/definitions/CsvDecoder"
|
3317
|
+
- "$ref": "#/definitions/GzipDecoder"
|
3207
3318
|
- "$ref": "#/definitions/JsonDecoder"
|
3208
3319
|
- "$ref": "#/definitions/JsonlDecoder"
|
3209
3320
|
- "$ref": "#/definitions/IterableDecoder"
|
3210
3321
|
- "$ref": "#/definitions/XmlDecoder"
|
3211
|
-
- "$ref": "#/definitions/GzipJsonDecoder"
|
3212
|
-
- "$ref": "#/definitions/CompositeRawDecoder"
|
3213
3322
|
- "$ref": "#/definitions/ZipfileDecoder"
|
3214
3323
|
$parameters:
|
3215
3324
|
type: object
|
@@ -3571,12 +3680,6 @@ interpolation:
|
|
3571
3680
|
- title: stream_slice
|
3572
3681
|
description: This variable is deprecated. Use stream_interval or stream_partition instead.
|
3573
3682
|
type: object
|
3574
|
-
- title: stream_state
|
3575
|
-
description: The current state of the stream. The object's keys are defined by the incremental sync's cursor_field the and partition router's values.
|
3576
|
-
type: object
|
3577
|
-
examples:
|
3578
|
-
- created_at: "2020-01-01 00:00:00.000+00:00"
|
3579
|
-
- updated_at: "2020-01-02 00:00:00.000+00:00"
|
3580
3683
|
macros:
|
3581
3684
|
- title: now_utc
|
3582
3685
|
description: Returns the current date and time in the UTC timezone.
|
@@ -3641,6 +3744,16 @@ interpolation:
|
|
3641
3744
|
- "{{ format_datetime(config['start_time'], '%Y-%m-%d') }}"
|
3642
3745
|
- "{{ format_datetime(config['start_date'], '%Y-%m-%dT%H:%M:%S.%fZ') }}"
|
3643
3746
|
- "{{ format_datetime(config['start_date'], '%Y-%m-%dT%H:%M:%S.%fZ', '%a, %d %b %Y %H:%M:%S %z') }}"
|
3747
|
+
- title: str_to_datetime
|
3748
|
+
description: Converts a string to a datetime object with UTC timezone.
|
3749
|
+
arguments:
|
3750
|
+
s: The string to convert.
|
3751
|
+
return_type: datetime.datetime
|
3752
|
+
examples:
|
3753
|
+
- "{{ str_to_datetime('2022-01-14') }}"
|
3754
|
+
- "{{ str_to_datetime('2022-01-01 13:45:30') }}"
|
3755
|
+
- "{{ str_to_datetime('2022-01-01T13:45:30+00:00') }}"
|
3756
|
+
- "{{ str_to_datetime('2022-01-01T13:45:30.123456Z') }}"
|
3644
3757
|
filters:
|
3645
3758
|
- title: hash
|
3646
3759
|
description: Convert the specified value to a hashed string.
|
@@ -138,7 +138,9 @@ class DeclarativeStream(Stream):
|
|
138
138
|
"""
|
139
139
|
:param: stream_state We knowingly avoid using stream_state as we want cursors to manage their own state.
|
140
140
|
"""
|
141
|
-
if stream_slice is None or
|
141
|
+
if stream_slice is None or (
|
142
|
+
not isinstance(stream_slice, StreamSlice) and stream_slice == {}
|
143
|
+
):
|
142
144
|
# As the parameter is Optional, many would just call `read_records(sync_mode)` during testing without specifying the field
|
143
145
|
# As part of the declarative model without custom components, this should never happen as the CDK would wire up a
|
144
146
|
# SinglePartitionRouter that would create this StreamSlice properly
|
@@ -10,10 +10,8 @@ from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
|
|
10
10
|
)
|
11
11
|
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
12
12
|
from airbyte_cdk.sources.declarative.decoders.json_decoder import (
|
13
|
-
GzipJsonDecoder,
|
14
13
|
IterableDecoder,
|
15
14
|
JsonDecoder,
|
16
|
-
JsonlDecoder,
|
17
15
|
)
|
18
16
|
from airbyte_cdk.sources.declarative.decoders.noop_decoder import NoopDecoder
|
19
17
|
from airbyte_cdk.sources.declarative.decoders.pagination_decoder_decorator import (
|
@@ -27,9 +25,7 @@ __all__ = [
|
|
27
25
|
"CompositeRawDecoder",
|
28
26
|
"JsonDecoder",
|
29
27
|
"JsonParser",
|
30
|
-
"JsonlDecoder",
|
31
28
|
"IterableDecoder",
|
32
|
-
"GzipJsonDecoder",
|
33
29
|
"NoopDecoder",
|
34
30
|
"PaginationDecoderDecorator",
|
35
31
|
"XmlDecoder",
|
@@ -1,5 +1,6 @@
|
|
1
1
|
import csv
|
2
2
|
import gzip
|
3
|
+
import io
|
3
4
|
import json
|
4
5
|
import logging
|
5
6
|
from abc import ABC, abstractmethod
|
@@ -106,6 +107,16 @@ class CsvParser(Parser):
|
|
106
107
|
encoding: Optional[str] = "utf-8"
|
107
108
|
delimiter: Optional[str] = ","
|
108
109
|
|
110
|
+
def _get_delimiter(self) -> Optional[str]:
|
111
|
+
"""
|
112
|
+
Get delimiter from the configuration. Check for the escape character and decode it.
|
113
|
+
"""
|
114
|
+
if self.delimiter is not None:
|
115
|
+
if self.delimiter.startswith("\\"):
|
116
|
+
self.delimiter = self.delimiter.encode("utf-8").decode("unicode_escape")
|
117
|
+
|
118
|
+
return self.delimiter
|
119
|
+
|
109
120
|
def parse(
|
110
121
|
self,
|
111
122
|
data: BufferedIOBase,
|
@@ -114,7 +125,7 @@ class CsvParser(Parser):
|
|
114
125
|
Parse CSV data from decompressed bytes.
|
115
126
|
"""
|
116
127
|
text_data = TextIOWrapper(data, encoding=self.encoding) # type: ignore
|
117
|
-
reader = csv.DictReader(text_data, delimiter=self.
|
128
|
+
reader = csv.DictReader(text_data, delimiter=self._get_delimiter() or ",")
|
118
129
|
yield from reader
|
119
130
|
|
120
131
|
|
@@ -130,11 +141,15 @@ class CompositeRawDecoder(Decoder):
|
|
130
141
|
"""
|
131
142
|
|
132
143
|
parser: Parser
|
144
|
+
stream_response: bool = True
|
133
145
|
|
134
146
|
def is_stream_response(self) -> bool:
|
135
|
-
return
|
147
|
+
return self.stream_response
|
136
148
|
|
137
149
|
def decode(
|
138
150
|
self, response: requests.Response
|
139
151
|
) -> Generator[MutableMapping[str, Any], None, None]:
|
140
|
-
|
152
|
+
if self.is_stream_response():
|
153
|
+
yield from self.parser.parse(data=response.raw) # type: ignore[arg-type]
|
154
|
+
else:
|
155
|
+
yield from self.parser.parse(data=io.BytesIO(response.content))
|
@@ -10,21 +10,24 @@ from typing import Any, Generator, List, Mapping, MutableMapping, Optional
|
|
10
10
|
import orjson
|
11
11
|
import requests
|
12
12
|
|
13
|
+
from airbyte_cdk.sources.declarative.decoders import CompositeRawDecoder, JsonParser
|
13
14
|
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
14
15
|
|
15
16
|
logger = logging.getLogger("airbyte")
|
16
17
|
|
17
18
|
|
18
|
-
@dataclass
|
19
19
|
class JsonDecoder(Decoder):
|
20
20
|
"""
|
21
21
|
Decoder strategy that returns the json-encoded content of a response, if any.
|
22
|
+
|
23
|
+
Usually, we would try to instantiate the equivalent `CompositeRawDecoder(parser=JsonParser(), stream_response=False)` but there were specific historical behaviors related to the JsonDecoder that we didn't know if we could remove like the fallback on {} in case of errors.
|
22
24
|
"""
|
23
25
|
|
24
|
-
parameters:
|
26
|
+
def __init__(self, parameters: Mapping[str, Any]):
|
27
|
+
self._decoder = CompositeRawDecoder(parser=JsonParser(), stream_response=False)
|
25
28
|
|
26
29
|
def is_stream_response(self) -> bool:
|
27
|
-
return
|
30
|
+
return self._decoder.is_stream_response()
|
28
31
|
|
29
32
|
def decode(
|
30
33
|
self, response: requests.Response
|
@@ -32,25 +35,16 @@ class JsonDecoder(Decoder):
|
|
32
35
|
"""
|
33
36
|
Given the response is an empty string or an emtpy list, the function will return a generator with an empty mapping.
|
34
37
|
"""
|
38
|
+
has_yielded = False
|
35
39
|
try:
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
f"Response cannot be parsed into json: {response.status_code=}, {response.text=}"
|
41
|
-
)
|
40
|
+
for element in self._decoder.decode(response):
|
41
|
+
yield element
|
42
|
+
has_yielded = True
|
43
|
+
except Exception:
|
42
44
|
yield {}
|
43
45
|
|
44
|
-
|
45
|
-
def parse_body_json(
|
46
|
-
body_json: MutableMapping[str, Any] | List[MutableMapping[str, Any]],
|
47
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
48
|
-
if not isinstance(body_json, list):
|
49
|
-
body_json = [body_json]
|
50
|
-
if len(body_json) == 0:
|
46
|
+
if not has_yielded:
|
51
47
|
yield {}
|
52
|
-
else:
|
53
|
-
yield from body_json
|
54
48
|
|
55
49
|
|
56
50
|
@dataclass
|
@@ -69,43 +63,3 @@ class IterableDecoder(Decoder):
|
|
69
63
|
) -> Generator[MutableMapping[str, Any], None, None]:
|
70
64
|
for line in response.iter_lines():
|
71
65
|
yield {"record": line.decode()}
|
72
|
-
|
73
|
-
|
74
|
-
@dataclass
|
75
|
-
class JsonlDecoder(Decoder):
|
76
|
-
"""
|
77
|
-
Decoder strategy that returns the json-encoded content of the response, if any.
|
78
|
-
"""
|
79
|
-
|
80
|
-
parameters: InitVar[Mapping[str, Any]]
|
81
|
-
|
82
|
-
def is_stream_response(self) -> bool:
|
83
|
-
return True
|
84
|
-
|
85
|
-
def decode(
|
86
|
-
self, response: requests.Response
|
87
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
88
|
-
# TODO???: set delimiter? usually it is `\n` but maybe it would be useful to set optional?
|
89
|
-
# https://github.com/airbytehq/airbyte-internal-issues/issues/8436
|
90
|
-
for record in response.iter_lines():
|
91
|
-
yield orjson.loads(record)
|
92
|
-
|
93
|
-
|
94
|
-
@dataclass
|
95
|
-
class GzipJsonDecoder(JsonDecoder):
|
96
|
-
encoding: Optional[str]
|
97
|
-
|
98
|
-
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
99
|
-
if self.encoding:
|
100
|
-
try:
|
101
|
-
codecs.lookup(self.encoding)
|
102
|
-
except LookupError:
|
103
|
-
raise ValueError(
|
104
|
-
f"Invalid encoding '{self.encoding}'. Please check provided encoding"
|
105
|
-
)
|
106
|
-
|
107
|
-
def decode(
|
108
|
-
self, response: requests.Response
|
109
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
110
|
-
raw_string = decompress(response.content).decode(encoding=self.encoding or "utf-8")
|
111
|
-
yield from self.parse_body_json(orjson.loads(raw_string))
|