airbyte-cdk 6.34.0.dev2__py3-none-any.whl → 6.34.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +12 -16
- airbyte_cdk/connector_builder/message_grouper.py +448 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
- airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
- airbyte_cdk/sources/declarative/auth/oauth.py +1 -6
- airbyte_cdk/sources/declarative/auth/token.py +8 -3
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +19 -30
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +85 -203
- airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
- airbyte_cdk/sources/declarative/decoders/__init__.py +4 -0
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +2 -7
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +58 -12
- airbyte_cdk/sources/declarative/extractors/record_selector.py +3 -12
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +38 -122
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -12
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +0 -9
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +41 -150
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +84 -234
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +18 -26
- airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -8
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -16
- airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -4
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -2
- airbyte_cdk/sources/file_based/file_based_source.py +37 -70
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +12 -107
- airbyte_cdk/sources/file_based/stream/__init__.py +1 -10
- airbyte_cdk/sources/streams/call_rate.py +47 -185
- airbyte_cdk/sources/streams/http/http.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +56 -217
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +73 -144
- airbyte_cdk/test/mock_http/mocker.py +1 -9
- airbyte_cdk/test/mock_http/response.py +3 -6
- airbyte_cdk/utils/datetime_helpers.py +66 -48
- airbyte_cdk/utils/mapping_helpers.py +26 -126
- {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/RECORD +45 -54
- airbyte_cdk/connector_builder/test_reader/__init__.py +0 -7
- airbyte_cdk/connector_builder/test_reader/helpers.py +0 -591
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +0 -160
- airbyte_cdk/connector_builder/test_reader/reader.py +0 -441
- airbyte_cdk/connector_builder/test_reader/types.py +0 -75
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +0 -81
- airbyte_cdk/sources/file_based/stream/identities_stream.py +0 -47
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +0 -85
- airbyte_cdk/sources/specs/transfer_modes.py +0 -26
- airbyte_cdk/sources/streams/permissions/identities_stream.py +0 -75
- {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/entry_points.txt +0 -0
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from typing import Any, Generic, Iterator, List, Mapping,
|
6
|
+
from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple
|
7
7
|
|
8
8
|
from airbyte_cdk.models import (
|
9
9
|
AirbyteCatalog,
|
@@ -19,7 +19,6 @@ from airbyte_cdk.sources.declarative.extractors import RecordSelector
|
|
19
19
|
from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
20
20
|
ClientSideIncrementalRecordFilterDecorator,
|
21
21
|
)
|
22
|
-
from airbyte_cdk.sources.declarative.incremental import ConcurrentPerPartitionCursor
|
23
22
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
24
23
|
from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
|
25
24
|
PerPartitionWithGlobalCursor,
|
@@ -47,6 +46,7 @@ from airbyte_cdk.sources.declarative.types import ConnectionDefinition
|
|
47
46
|
from airbyte_cdk.sources.source import TState
|
48
47
|
from airbyte_cdk.sources.streams import Stream
|
49
48
|
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
49
|
+
from airbyte_cdk.sources.streams.concurrent.abstract_stream_facade import AbstractStreamFacade
|
50
50
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
|
51
51
|
AlwaysAvailableAvailabilityStrategy,
|
52
52
|
)
|
@@ -121,6 +121,12 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
121
121
|
message_repository=self.message_repository,
|
122
122
|
)
|
123
123
|
|
124
|
+
# TODO: Remove this. This property is necessary to safely migrate Stripe during the transition state.
|
125
|
+
@property
|
126
|
+
def is_partially_declarative(self) -> bool:
|
127
|
+
"""This flag used to avoid unexpected AbstractStreamFacade processing as concurrent streams."""
|
128
|
+
return False
|
129
|
+
|
124
130
|
def read(
|
125
131
|
self,
|
126
132
|
logger: logging.Logger,
|
@@ -224,7 +230,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
224
230
|
stream_state = self._connector_state_manager.get_stream_state(
|
225
231
|
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
226
232
|
)
|
227
|
-
stream_state = self._migrate_state(declarative_stream, stream_state)
|
228
233
|
|
229
234
|
retriever = self._get_retriever(declarative_stream, stream_state)
|
230
235
|
|
@@ -233,7 +238,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
233
238
|
):
|
234
239
|
cursor = declarative_stream.retriever.stream_slicer.stream_slicer
|
235
240
|
|
236
|
-
if not isinstance(cursor, ConcurrentCursor
|
241
|
+
if not isinstance(cursor, ConcurrentCursor):
|
237
242
|
# This should never happen since we instantiate ConcurrentCursor in
|
238
243
|
# model_to_component_factory.py
|
239
244
|
raise ValueError(
|
@@ -332,8 +337,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
332
337
|
stream_state = self._connector_state_manager.get_stream_state(
|
333
338
|
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
334
339
|
)
|
335
|
-
stream_state = self._migrate_state(declarative_stream, stream_state)
|
336
|
-
|
337
340
|
partition_router = declarative_stream.retriever.stream_slicer._partition_router
|
338
341
|
|
339
342
|
perpartition_cursor = (
|
@@ -375,6 +378,14 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
375
378
|
)
|
376
379
|
else:
|
377
380
|
synchronous_streams.append(declarative_stream)
|
381
|
+
# TODO: Remove this. This check is necessary to safely migrate Stripe during the transition state.
|
382
|
+
# Condition below needs to ensure that concurrent support is not lost for sources that already support
|
383
|
+
# it before migration, but now are only partially migrated to declarative implementation (e.g., Stripe).
|
384
|
+
elif (
|
385
|
+
isinstance(declarative_stream, AbstractStreamFacade)
|
386
|
+
and self.is_partially_declarative
|
387
|
+
):
|
388
|
+
concurrent_streams.append(declarative_stream.get_underlying_stream())
|
378
389
|
else:
|
379
390
|
synchronous_streams.append(declarative_stream)
|
380
391
|
|
@@ -478,21 +489,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
478
489
|
# Also a temporary hack. In the legacy Stream implementation, as part of the read,
|
479
490
|
# set_initial_state() is called to instantiate incoming state on the cursor. Although we no
|
480
491
|
# longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
|
481
|
-
# like StopConditionPaginationStrategyDecorator
|
482
|
-
# properly initialized with state.
|
492
|
+
# like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
|
493
|
+
# still rely on a DatetimeBasedCursor that is properly initialized with state.
|
483
494
|
if retriever.cursor:
|
484
495
|
retriever.cursor.set_initial_state(stream_state=stream_state)
|
485
|
-
|
486
|
-
# Similar to above, the ClientSideIncrementalRecordFilterDecorator cursor is a separate instance
|
487
|
-
# from the one initialized on the SimpleRetriever, so it also must also have state initialized
|
488
|
-
# for semi-incremental streams using is_client_side_incremental to filter properly
|
489
|
-
if isinstance(retriever.record_selector, RecordSelector) and isinstance(
|
490
|
-
retriever.record_selector.record_filter, ClientSideIncrementalRecordFilterDecorator
|
491
|
-
):
|
492
|
-
retriever.record_selector.record_filter._cursor.set_initial_state(
|
493
|
-
stream_state=stream_state
|
494
|
-
) # type: ignore # After non-concurrent cursors are deprecated we can remove these cursor workarounds
|
495
|
-
|
496
496
|
# We zero it out here, but since this is a cursor reference, the state is still properly
|
497
497
|
# instantiated for the other components that reference it
|
498
498
|
retriever.cursor = None
|
@@ -524,14 +524,3 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
524
524
|
if stream.stream.name not in concurrent_stream_names
|
525
525
|
]
|
526
526
|
)
|
527
|
-
|
528
|
-
@staticmethod
|
529
|
-
def _migrate_state(
|
530
|
-
declarative_stream: DeclarativeStream, stream_state: MutableMapping[str, Any]
|
531
|
-
) -> MutableMapping[str, Any]:
|
532
|
-
for state_migration in declarative_stream.state_migrations:
|
533
|
-
if state_migration.should_migrate(stream_state):
|
534
|
-
# The state variable is expected to be mutable but the migrate method returns an immutable mapping.
|
535
|
-
stream_state = dict(state_migration.migrate(stream_state))
|
536
|
-
|
537
|
-
return stream_state
|
@@ -40,8 +40,6 @@ properties:
|
|
40
40
|
"$ref": "#/definitions/Spec"
|
41
41
|
concurrency_level:
|
42
42
|
"$ref": "#/definitions/ConcurrencyLevel"
|
43
|
-
api_budget:
|
44
|
-
"$ref": "#/definitions/HTTPAPIBudget"
|
45
43
|
metadata:
|
46
44
|
type: object
|
47
45
|
description: For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.
|
@@ -796,7 +794,7 @@ definitions:
|
|
796
794
|
description: This option is used to adjust the upper and lower boundaries of each datetime window to beginning and end of the provided target period (day, week, month)
|
797
795
|
type: object
|
798
796
|
required:
|
799
|
-
|
797
|
+
- target
|
800
798
|
properties:
|
801
799
|
target:
|
802
800
|
title: Target
|
@@ -1367,170 +1365,6 @@ definitions:
|
|
1367
1365
|
$parameters:
|
1368
1366
|
type: object
|
1369
1367
|
additional_properties: true
|
1370
|
-
HTTPAPIBudget:
|
1371
|
-
title: HTTP API Budget
|
1372
|
-
description: >
|
1373
|
-
Defines how many requests can be made to the API in a given time frame. `HTTPAPIBudget` extracts the remaining
|
1374
|
-
call count and the reset time from HTTP response headers using the header names provided by
|
1375
|
-
`ratelimit_remaining_header` and `ratelimit_reset_header`. Only requests using `HttpRequester`
|
1376
|
-
are rate-limited; custom components that bypass `HttpRequester` are not covered by this budget.
|
1377
|
-
type: object
|
1378
|
-
required:
|
1379
|
-
- type
|
1380
|
-
- policies
|
1381
|
-
properties:
|
1382
|
-
type:
|
1383
|
-
type: string
|
1384
|
-
enum: [HTTPAPIBudget]
|
1385
|
-
policies:
|
1386
|
-
title: Policies
|
1387
|
-
description: List of call rate policies that define how many calls are allowed.
|
1388
|
-
type: array
|
1389
|
-
items:
|
1390
|
-
anyOf:
|
1391
|
-
- "$ref": "#/definitions/FixedWindowCallRatePolicy"
|
1392
|
-
- "$ref": "#/definitions/MovingWindowCallRatePolicy"
|
1393
|
-
- "$ref": "#/definitions/UnlimitedCallRatePolicy"
|
1394
|
-
ratelimit_reset_header:
|
1395
|
-
title: Rate Limit Reset Header
|
1396
|
-
description: The HTTP response header name that indicates when the rate limit resets.
|
1397
|
-
type: string
|
1398
|
-
default: "ratelimit-reset"
|
1399
|
-
ratelimit_remaining_header:
|
1400
|
-
title: Rate Limit Remaining Header
|
1401
|
-
description: The HTTP response header name that indicates the number of remaining allowed calls.
|
1402
|
-
type: string
|
1403
|
-
default: "ratelimit-remaining"
|
1404
|
-
status_codes_for_ratelimit_hit:
|
1405
|
-
title: Status Codes for Rate Limit Hit
|
1406
|
-
description: List of HTTP status codes that indicate a rate limit has been hit.
|
1407
|
-
type: array
|
1408
|
-
items:
|
1409
|
-
type: integer
|
1410
|
-
default: [429]
|
1411
|
-
additionalProperties: true
|
1412
|
-
FixedWindowCallRatePolicy:
|
1413
|
-
title: Fixed Window Call Rate Policy
|
1414
|
-
description: A policy that allows a fixed number of calls within a specific time window.
|
1415
|
-
type: object
|
1416
|
-
required:
|
1417
|
-
- type
|
1418
|
-
- period
|
1419
|
-
- call_limit
|
1420
|
-
- matchers
|
1421
|
-
properties:
|
1422
|
-
type:
|
1423
|
-
type: string
|
1424
|
-
enum: [FixedWindowCallRatePolicy]
|
1425
|
-
period:
|
1426
|
-
title: Period
|
1427
|
-
description: The time interval for the rate limit window.
|
1428
|
-
type: string
|
1429
|
-
call_limit:
|
1430
|
-
title: Call Limit
|
1431
|
-
description: The maximum number of calls allowed within the period.
|
1432
|
-
type: integer
|
1433
|
-
matchers:
|
1434
|
-
title: Matchers
|
1435
|
-
description: List of matchers that define which requests this policy applies to.
|
1436
|
-
type: array
|
1437
|
-
items:
|
1438
|
-
"$ref": "#/definitions/HttpRequestRegexMatcher"
|
1439
|
-
additionalProperties: true
|
1440
|
-
MovingWindowCallRatePolicy:
|
1441
|
-
title: Moving Window Call Rate Policy
|
1442
|
-
description: A policy that allows a fixed number of calls within a moving time window.
|
1443
|
-
type: object
|
1444
|
-
required:
|
1445
|
-
- type
|
1446
|
-
- rates
|
1447
|
-
- matchers
|
1448
|
-
properties:
|
1449
|
-
type:
|
1450
|
-
type: string
|
1451
|
-
enum: [MovingWindowCallRatePolicy]
|
1452
|
-
rates:
|
1453
|
-
title: Rates
|
1454
|
-
description: List of rates that define the call limits for different time intervals.
|
1455
|
-
type: array
|
1456
|
-
items:
|
1457
|
-
"$ref": "#/definitions/Rate"
|
1458
|
-
matchers:
|
1459
|
-
title: Matchers
|
1460
|
-
description: List of matchers that define which requests this policy applies to.
|
1461
|
-
type: array
|
1462
|
-
items:
|
1463
|
-
"$ref": "#/definitions/HttpRequestRegexMatcher"
|
1464
|
-
additionalProperties: true
|
1465
|
-
UnlimitedCallRatePolicy:
|
1466
|
-
title: Unlimited Call Rate Policy
|
1467
|
-
description: A policy that allows unlimited calls for specific requests.
|
1468
|
-
type: object
|
1469
|
-
required:
|
1470
|
-
- type
|
1471
|
-
- matchers
|
1472
|
-
properties:
|
1473
|
-
type:
|
1474
|
-
type: string
|
1475
|
-
enum: [UnlimitedCallRatePolicy]
|
1476
|
-
matchers:
|
1477
|
-
title: Matchers
|
1478
|
-
description: List of matchers that define which requests this policy applies to.
|
1479
|
-
type: array
|
1480
|
-
items:
|
1481
|
-
"$ref": "#/definitions/HttpRequestRegexMatcher"
|
1482
|
-
additionalProperties: true
|
1483
|
-
Rate:
|
1484
|
-
title: Rate
|
1485
|
-
description: Defines a rate limit with a specific number of calls allowed within a time interval.
|
1486
|
-
type: object
|
1487
|
-
required:
|
1488
|
-
- limit
|
1489
|
-
- interval
|
1490
|
-
properties:
|
1491
|
-
limit:
|
1492
|
-
title: Limit
|
1493
|
-
description: The maximum number of calls allowed within the interval.
|
1494
|
-
type: integer
|
1495
|
-
interval:
|
1496
|
-
title: Interval
|
1497
|
-
description: The time interval for the rate limit.
|
1498
|
-
type: string
|
1499
|
-
examples:
|
1500
|
-
- "PT1H"
|
1501
|
-
- "P1D"
|
1502
|
-
additionalProperties: true
|
1503
|
-
HttpRequestRegexMatcher:
|
1504
|
-
title: HTTP Request Matcher
|
1505
|
-
description: >
|
1506
|
-
Matches HTTP requests based on method, base URL, URL path pattern, query parameters, and headers.
|
1507
|
-
Use `url_base` to specify the scheme and host (without trailing slash) and
|
1508
|
-
`url_path_pattern` to apply a regex to the request path.
|
1509
|
-
type: object
|
1510
|
-
properties:
|
1511
|
-
method:
|
1512
|
-
title: Method
|
1513
|
-
description: The HTTP method to match (e.g., GET, POST).
|
1514
|
-
type: string
|
1515
|
-
url_base:
|
1516
|
-
title: URL Base
|
1517
|
-
description: The base URL (scheme and host, e.g. "https://api.example.com") to match.
|
1518
|
-
type: string
|
1519
|
-
url_path_pattern:
|
1520
|
-
title: URL Path Pattern
|
1521
|
-
description: A regular expression pattern to match the URL path.
|
1522
|
-
type: string
|
1523
|
-
params:
|
1524
|
-
title: Parameters
|
1525
|
-
description: The query parameters to match.
|
1526
|
-
type: object
|
1527
|
-
additionalProperties: true
|
1528
|
-
headers:
|
1529
|
-
title: Headers
|
1530
|
-
description: The headers to match.
|
1531
|
-
type: object
|
1532
|
-
additionalProperties: true
|
1533
|
-
additionalProperties: true
|
1534
1368
|
DefaultErrorHandler:
|
1535
1369
|
title: Default Error Handler
|
1536
1370
|
description: Component defining how to handle errors. Default behavior includes only retrying server errors (HTTP 5XX) and too many requests (HTTP 429) with an exponential backoff.
|
@@ -1715,6 +1549,7 @@ definitions:
|
|
1715
1549
|
anyOf:
|
1716
1550
|
- "$ref": "#/definitions/JsonDecoder"
|
1717
1551
|
- "$ref": "#/definitions/XmlDecoder"
|
1552
|
+
- "$ref": "#/definitions/CompositeRawDecoder"
|
1718
1553
|
$parameters:
|
1719
1554
|
type: object
|
1720
1555
|
additionalProperties: true
|
@@ -2298,6 +2133,23 @@ definitions:
|
|
2298
2133
|
$parameters:
|
2299
2134
|
type: object
|
2300
2135
|
additionalProperties: true
|
2136
|
+
GzipJsonDecoder:
|
2137
|
+
title: GzipJson Decoder
|
2138
|
+
description: Use this if the response is Gzip compressed Json.
|
2139
|
+
type: object
|
2140
|
+
additionalProperties: true
|
2141
|
+
required:
|
2142
|
+
- type
|
2143
|
+
properties:
|
2144
|
+
type:
|
2145
|
+
type: string
|
2146
|
+
enum: [GzipJsonDecoder]
|
2147
|
+
encoding:
|
2148
|
+
type: string
|
2149
|
+
default: utf-8
|
2150
|
+
$parameters:
|
2151
|
+
type: object
|
2152
|
+
additionalProperties: true
|
2301
2153
|
ZipfileDecoder:
|
2302
2154
|
title: Zipfile Decoder
|
2303
2155
|
description: Decoder for response data that is returned as zipfile(s).
|
@@ -2305,19 +2157,19 @@ definitions:
|
|
2305
2157
|
additionalProperties: true
|
2306
2158
|
required:
|
2307
2159
|
- type
|
2308
|
-
-
|
2160
|
+
- parser
|
2309
2161
|
properties:
|
2310
2162
|
type:
|
2311
2163
|
type: string
|
2312
2164
|
enum: [ZipfileDecoder]
|
2313
|
-
|
2165
|
+
parser:
|
2314
2166
|
title: Parser
|
2315
2167
|
description: Parser to parse the decompressed data from the zipfile(s).
|
2316
2168
|
anyOf:
|
2317
|
-
- "$ref": "#/definitions/
|
2318
|
-
- "$ref": "#/definitions/
|
2319
|
-
- "$ref": "#/definitions/
|
2320
|
-
- "$ref": "#/definitions/
|
2169
|
+
- "$ref": "#/definitions/GzipParser"
|
2170
|
+
- "$ref": "#/definitions/JsonParser"
|
2171
|
+
- "$ref": "#/definitions/JsonLineParser"
|
2172
|
+
- "$ref": "#/definitions/CsvParser"
|
2321
2173
|
ListPartitionRouter:
|
2322
2174
|
title: List Partition Router
|
2323
2175
|
description: A Partition router that specifies a list of attributes where each attribute describes a portion of the complete data set for a stream. During a sync, each value is iterated over and can be used as input to outbound API requests.
|
@@ -2995,35 +2847,25 @@ definitions:
|
|
2995
2847
|
enum: [RequestPath]
|
2996
2848
|
RequestOption:
|
2997
2849
|
title: Request Option
|
2998
|
-
description: Specifies the key field
|
2850
|
+
description: Specifies the key field and where in the request a component's value should be injected.
|
2999
2851
|
type: object
|
3000
2852
|
required:
|
3001
2853
|
- type
|
2854
|
+
- field_name
|
3002
2855
|
- inject_into
|
3003
2856
|
properties:
|
3004
2857
|
type:
|
3005
2858
|
type: string
|
3006
2859
|
enum: [RequestOption]
|
3007
2860
|
field_name:
|
3008
|
-
title:
|
3009
|
-
description: Configures which key should be used in the location that the descriptor is being injected into
|
2861
|
+
title: Request Option
|
2862
|
+
description: Configures which key should be used in the location that the descriptor is being injected into
|
3010
2863
|
type: string
|
3011
2864
|
examples:
|
3012
2865
|
- segment_id
|
3013
2866
|
interpolation_context:
|
3014
2867
|
- config
|
3015
2868
|
- parameters
|
3016
|
-
field_path:
|
3017
|
-
title: Field Path
|
3018
|
-
description: Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries)
|
3019
|
-
type: array
|
3020
|
-
items:
|
3021
|
-
type: string
|
3022
|
-
examples:
|
3023
|
-
- ["data", "viewer", "id"]
|
3024
|
-
interpolation_context:
|
3025
|
-
- config
|
3026
|
-
- parameters
|
3027
2869
|
inject_into:
|
3028
2870
|
title: Inject Into
|
3029
2871
|
description: Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.
|
@@ -3150,39 +2992,79 @@ definitions:
|
|
3150
2992
|
description: Component decoding the response so records can be extracted.
|
3151
2993
|
anyOf:
|
3152
2994
|
- "$ref": "#/definitions/CustomDecoder"
|
3153
|
-
- "$ref": "#/definitions/CsvDecoder"
|
3154
|
-
- "$ref": "#/definitions/GzipDecoder"
|
3155
2995
|
- "$ref": "#/definitions/JsonDecoder"
|
3156
2996
|
- "$ref": "#/definitions/JsonlDecoder"
|
3157
2997
|
- "$ref": "#/definitions/IterableDecoder"
|
3158
2998
|
- "$ref": "#/definitions/XmlDecoder"
|
2999
|
+
- "$ref": "#/definitions/GzipJsonDecoder"
|
3000
|
+
- "$ref": "#/definitions/CompositeRawDecoder"
|
3159
3001
|
- "$ref": "#/definitions/ZipfileDecoder"
|
3160
3002
|
$parameters:
|
3161
3003
|
type: object
|
3162
3004
|
additionalProperties: true
|
3163
|
-
|
3005
|
+
CompositeRawDecoder:
|
3006
|
+
description: "(This is experimental, use at your own risk)"
|
3164
3007
|
type: object
|
3165
3008
|
required:
|
3166
3009
|
- type
|
3167
|
-
-
|
3010
|
+
- parser
|
3168
3011
|
properties:
|
3169
3012
|
type:
|
3170
3013
|
type: string
|
3171
|
-
enum: [
|
3172
|
-
|
3014
|
+
enum: [CompositeRawDecoder]
|
3015
|
+
parser:
|
3173
3016
|
anyOf:
|
3174
|
-
- "$ref": "#/definitions/
|
3175
|
-
- "$ref": "#/definitions/
|
3176
|
-
- "$ref": "#/definitions/
|
3177
|
-
- "$ref": "#/definitions/
|
3178
|
-
|
3017
|
+
- "$ref": "#/definitions/GzipParser"
|
3018
|
+
- "$ref": "#/definitions/JsonParser"
|
3019
|
+
- "$ref": "#/definitions/JsonLineParser"
|
3020
|
+
- "$ref": "#/definitions/CsvParser"
|
3021
|
+
# PARSERS
|
3022
|
+
GzipParser:
|
3023
|
+
type: object
|
3024
|
+
required:
|
3025
|
+
- type
|
3026
|
+
- inner_parser
|
3027
|
+
properties:
|
3028
|
+
type:
|
3029
|
+
type: string
|
3030
|
+
enum: [GzipParser]
|
3031
|
+
inner_parser:
|
3032
|
+
anyOf:
|
3033
|
+
- "$ref": "#/definitions/JsonLineParser"
|
3034
|
+
- "$ref": "#/definitions/CsvParser"
|
3035
|
+
- "$ref": "#/definitions/JsonParser"
|
3036
|
+
JsonParser:
|
3037
|
+
title: JsonParser
|
3038
|
+
description: Parser used for parsing str, bytes, or bytearray data and returning data in a dictionary format.
|
3039
|
+
type: object
|
3040
|
+
required:
|
3041
|
+
- type
|
3042
|
+
properties:
|
3043
|
+
type:
|
3044
|
+
type: string
|
3045
|
+
enum: [JsonParser]
|
3046
|
+
encoding:
|
3047
|
+
type: string
|
3048
|
+
default: utf-8
|
3049
|
+
JsonLineParser:
|
3050
|
+
type: object
|
3051
|
+
required:
|
3052
|
+
- type
|
3053
|
+
properties:
|
3054
|
+
type:
|
3055
|
+
type: string
|
3056
|
+
enum: [JsonLineParser]
|
3057
|
+
encoding:
|
3058
|
+
type: string
|
3059
|
+
default: utf-8
|
3060
|
+
CsvParser:
|
3179
3061
|
type: object
|
3180
3062
|
required:
|
3181
3063
|
- type
|
3182
3064
|
properties:
|
3183
3065
|
type:
|
3184
3066
|
type: string
|
3185
|
-
enum: [
|
3067
|
+
enum: [CsvParser]
|
3186
3068
|
encoding:
|
3187
3069
|
type: string
|
3188
3070
|
default: utf-8
|
@@ -3310,24 +3192,24 @@ definitions:
|
|
3310
3192
|
description: Component decoding the response so records can be extracted.
|
3311
3193
|
anyOf:
|
3312
3194
|
- "$ref": "#/definitions/CustomDecoder"
|
3313
|
-
- "$ref": "#/definitions/CsvDecoder"
|
3314
|
-
- "$ref": "#/definitions/GzipDecoder"
|
3315
3195
|
- "$ref": "#/definitions/JsonDecoder"
|
3316
3196
|
- "$ref": "#/definitions/JsonlDecoder"
|
3317
3197
|
- "$ref": "#/definitions/IterableDecoder"
|
3318
3198
|
- "$ref": "#/definitions/XmlDecoder"
|
3199
|
+
- "$ref": "#/definitions/GzipJsonDecoder"
|
3200
|
+
- "$ref": "#/definitions/CompositeRawDecoder"
|
3319
3201
|
- "$ref": "#/definitions/ZipfileDecoder"
|
3320
3202
|
download_decoder:
|
3321
3203
|
title: Download Decoder
|
3322
3204
|
description: Component decoding the download response so records can be extracted.
|
3323
3205
|
anyOf:
|
3324
3206
|
- "$ref": "#/definitions/CustomDecoder"
|
3325
|
-
- "$ref": "#/definitions/CsvDecoder"
|
3326
|
-
- "$ref": "#/definitions/GzipDecoder"
|
3327
3207
|
- "$ref": "#/definitions/JsonDecoder"
|
3328
3208
|
- "$ref": "#/definitions/JsonlDecoder"
|
3329
3209
|
- "$ref": "#/definitions/IterableDecoder"
|
3330
3210
|
- "$ref": "#/definitions/XmlDecoder"
|
3211
|
+
- "$ref": "#/definitions/GzipJsonDecoder"
|
3212
|
+
- "$ref": "#/definitions/CompositeRawDecoder"
|
3331
3213
|
- "$ref": "#/definitions/ZipfileDecoder"
|
3332
3214
|
$parameters:
|
3333
3215
|
type: object
|
@@ -138,9 +138,7 @@ class DeclarativeStream(Stream):
|
|
138
138
|
"""
|
139
139
|
:param: stream_state We knowingly avoid using stream_state as we want cursors to manage their own state.
|
140
140
|
"""
|
141
|
-
if stream_slice is None or
|
142
|
-
not isinstance(stream_slice, StreamSlice) and stream_slice == {}
|
143
|
-
):
|
141
|
+
if stream_slice is None or stream_slice == {}:
|
144
142
|
# As the parameter is Optional, many would just call `read_records(sync_mode)` during testing without specifying the field
|
145
143
|
# As part of the declarative model without custom components, this should never happen as the CDK would wire up a
|
146
144
|
# SinglePartitionRouter that would create this StreamSlice properly
|
@@ -10,8 +10,10 @@ from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
|
|
10
10
|
)
|
11
11
|
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
12
12
|
from airbyte_cdk.sources.declarative.decoders.json_decoder import (
|
13
|
+
GzipJsonDecoder,
|
13
14
|
IterableDecoder,
|
14
15
|
JsonDecoder,
|
16
|
+
JsonlDecoder,
|
15
17
|
)
|
16
18
|
from airbyte_cdk.sources.declarative.decoders.noop_decoder import NoopDecoder
|
17
19
|
from airbyte_cdk.sources.declarative.decoders.pagination_decoder_decorator import (
|
@@ -25,7 +27,9 @@ __all__ = [
|
|
25
27
|
"CompositeRawDecoder",
|
26
28
|
"JsonDecoder",
|
27
29
|
"JsonParser",
|
30
|
+
"JsonlDecoder",
|
28
31
|
"IterableDecoder",
|
32
|
+
"GzipJsonDecoder",
|
29
33
|
"NoopDecoder",
|
30
34
|
"PaginationDecoderDecorator",
|
31
35
|
"XmlDecoder",
|
@@ -1,6 +1,5 @@
|
|
1
1
|
import csv
|
2
2
|
import gzip
|
3
|
-
import io
|
4
3
|
import json
|
5
4
|
import logging
|
6
5
|
from abc import ABC, abstractmethod
|
@@ -131,15 +130,11 @@ class CompositeRawDecoder(Decoder):
|
|
131
130
|
"""
|
132
131
|
|
133
132
|
parser: Parser
|
134
|
-
stream_response: bool = True
|
135
133
|
|
136
134
|
def is_stream_response(self) -> bool:
|
137
|
-
return
|
135
|
+
return True
|
138
136
|
|
139
137
|
def decode(
|
140
138
|
self, response: requests.Response
|
141
139
|
) -> Generator[MutableMapping[str, Any], None, None]:
|
142
|
-
|
143
|
-
yield from self.parser.parse(data=response.raw) # type: ignore[arg-type]
|
144
|
-
else:
|
145
|
-
yield from self.parser.parse(data=io.BytesIO(response.content))
|
140
|
+
yield from self.parser.parse(data=response.raw) # type: ignore[arg-type]
|
@@ -10,24 +10,21 @@ from typing import Any, Generator, List, Mapping, MutableMapping, Optional
|
|
10
10
|
import orjson
|
11
11
|
import requests
|
12
12
|
|
13
|
-
from airbyte_cdk.sources.declarative.decoders import CompositeRawDecoder, JsonParser
|
14
13
|
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
15
14
|
|
16
15
|
logger = logging.getLogger("airbyte")
|
17
16
|
|
18
17
|
|
18
|
+
@dataclass
|
19
19
|
class JsonDecoder(Decoder):
|
20
20
|
"""
|
21
21
|
Decoder strategy that returns the json-encoded content of a response, if any.
|
22
|
-
|
23
|
-
Usually, we would try to instantiate the equivalent `CompositeRawDecoder(parser=JsonParser(), stream_response=False)` but there were specific historical behaviors related to the JsonDecoder that we didn't know if we could remove like the fallback on {} in case of errors.
|
24
22
|
"""
|
25
23
|
|
26
|
-
|
27
|
-
self._decoder = CompositeRawDecoder(parser=JsonParser(), stream_response=False)
|
24
|
+
parameters: InitVar[Mapping[str, Any]]
|
28
25
|
|
29
26
|
def is_stream_response(self) -> bool:
|
30
|
-
return
|
27
|
+
return False
|
31
28
|
|
32
29
|
def decode(
|
33
30
|
self, response: requests.Response
|
@@ -35,16 +32,25 @@ class JsonDecoder(Decoder):
|
|
35
32
|
"""
|
36
33
|
Given the response is an empty string or an emtpy list, the function will return a generator with an empty mapping.
|
37
34
|
"""
|
38
|
-
has_yielded = False
|
39
35
|
try:
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
36
|
+
body_json = response.json()
|
37
|
+
yield from self.parse_body_json(body_json)
|
38
|
+
except requests.exceptions.JSONDecodeError:
|
39
|
+
logger.warning(
|
40
|
+
f"Response cannot be parsed into json: {response.status_code=}, {response.text=}"
|
41
|
+
)
|
44
42
|
yield {}
|
45
43
|
|
46
|
-
|
44
|
+
@staticmethod
|
45
|
+
def parse_body_json(
|
46
|
+
body_json: MutableMapping[str, Any] | List[MutableMapping[str, Any]],
|
47
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
48
|
+
if not isinstance(body_json, list):
|
49
|
+
body_json = [body_json]
|
50
|
+
if len(body_json) == 0:
|
47
51
|
yield {}
|
52
|
+
else:
|
53
|
+
yield from body_json
|
48
54
|
|
49
55
|
|
50
56
|
@dataclass
|
@@ -63,3 +69,43 @@ class IterableDecoder(Decoder):
|
|
63
69
|
) -> Generator[MutableMapping[str, Any], None, None]:
|
64
70
|
for line in response.iter_lines():
|
65
71
|
yield {"record": line.decode()}
|
72
|
+
|
73
|
+
|
74
|
+
@dataclass
|
75
|
+
class JsonlDecoder(Decoder):
|
76
|
+
"""
|
77
|
+
Decoder strategy that returns the json-encoded content of the response, if any.
|
78
|
+
"""
|
79
|
+
|
80
|
+
parameters: InitVar[Mapping[str, Any]]
|
81
|
+
|
82
|
+
def is_stream_response(self) -> bool:
|
83
|
+
return True
|
84
|
+
|
85
|
+
def decode(
|
86
|
+
self, response: requests.Response
|
87
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
88
|
+
# TODO???: set delimiter? usually it is `\n` but maybe it would be useful to set optional?
|
89
|
+
# https://github.com/airbytehq/airbyte-internal-issues/issues/8436
|
90
|
+
for record in response.iter_lines():
|
91
|
+
yield orjson.loads(record)
|
92
|
+
|
93
|
+
|
94
|
+
@dataclass
|
95
|
+
class GzipJsonDecoder(JsonDecoder):
|
96
|
+
encoding: Optional[str]
|
97
|
+
|
98
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
99
|
+
if self.encoding:
|
100
|
+
try:
|
101
|
+
codecs.lookup(self.encoding)
|
102
|
+
except LookupError:
|
103
|
+
raise ValueError(
|
104
|
+
f"Invalid encoding '{self.encoding}'. Please check provided encoding"
|
105
|
+
)
|
106
|
+
|
107
|
+
def decode(
|
108
|
+
self, response: requests.Response
|
109
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
110
|
+
raw_string = decompress(response.content).decode(encoding=self.encoding or "utf-8")
|
111
|
+
yield from self.parse_body_json(orjson.loads(raw_string))
|