airbyte-cdk 6.33.2.dev0__py3-none-any.whl → 6.33.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/auth/oauth.py +6 -1
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +15 -1
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +23 -288
- airbyte_cdk/sources/declarative/decoders/__init__.py +0 -4
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +7 -2
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +12 -58
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +6 -11
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +0 -4
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +14 -202
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +53 -196
- airbyte_cdk/sources/declarative/requesters/http_requester.py +0 -3
- airbyte_cdk/sources/streams/call_rate.py +40 -116
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +3 -0
- {airbyte_cdk-6.33.2.dev0.dist-info → airbyte_cdk-6.33.3.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.33.2.dev0.dist-info → airbyte_cdk-6.33.3.dist-info}/RECORD +19 -19
- {airbyte_cdk-6.33.2.dev0.dist-info → airbyte_cdk-6.33.3.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.33.2.dev0.dist-info → airbyte_cdk-6.33.3.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.33.2.dev0.dist-info → airbyte_cdk-6.33.3.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.33.2.dev0.dist-info → airbyte_cdk-6.33.3.dist-info}/entry_points.txt +0 -0
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
from dataclasses import InitVar, dataclass, field
|
6
|
-
from datetime import timedelta
|
6
|
+
from datetime import datetime, timedelta
|
7
7
|
from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
8
8
|
|
9
9
|
from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
|
@@ -232,8 +232,13 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
232
232
|
return self._refresh_request_headers.eval(self.config)
|
233
233
|
|
234
234
|
def get_token_expiry_date(self) -> AirbyteDateTime:
|
235
|
+
if not self._has_access_token_been_initialized():
|
236
|
+
return AirbyteDateTime.from_datetime(datetime.min)
|
235
237
|
return self._token_expiry_date # type: ignore # _token_expiry_date is an AirbyteDateTime. It is never None despite what mypy thinks
|
236
238
|
|
239
|
+
def _has_access_token_been_initialized(self) -> bool:
|
240
|
+
return self._access_token is not None
|
241
|
+
|
237
242
|
def set_token_expiry_date(self, value: Union[str, int]) -> None:
|
238
243
|
self._token_expiry_date = self._parse_token_expiration_date(value)
|
239
244
|
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple
|
6
|
+
from typing import Any, Generic, Iterator, List, Mapping, MutableMapping, Optional, Tuple
|
7
7
|
|
8
8
|
from airbyte_cdk.models import (
|
9
9
|
AirbyteCatalog,
|
@@ -224,6 +224,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
224
224
|
stream_state = self._connector_state_manager.get_stream_state(
|
225
225
|
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
226
226
|
)
|
227
|
+
stream_state = self._migrate_state(declarative_stream, stream_state)
|
227
228
|
|
228
229
|
retriever = self._get_retriever(declarative_stream, stream_state)
|
229
230
|
|
@@ -331,6 +332,8 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
331
332
|
stream_state = self._connector_state_manager.get_stream_state(
|
332
333
|
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
333
334
|
)
|
335
|
+
stream_state = self._migrate_state(declarative_stream, stream_state)
|
336
|
+
|
334
337
|
partition_router = declarative_stream.retriever.stream_slicer._partition_router
|
335
338
|
|
336
339
|
perpartition_cursor = (
|
@@ -521,3 +524,14 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
521
524
|
if stream.stream.name not in concurrent_stream_names
|
522
525
|
]
|
523
526
|
)
|
527
|
+
|
528
|
+
@staticmethod
|
529
|
+
def _migrate_state(
|
530
|
+
declarative_stream: DeclarativeStream, stream_state: MutableMapping[str, Any]
|
531
|
+
) -> MutableMapping[str, Any]:
|
532
|
+
for state_migration in declarative_stream.state_migrations:
|
533
|
+
if state_migration.should_migrate(stream_state):
|
534
|
+
# The state variable is expected to be mutable but the migrate method returns an immutable mapping.
|
535
|
+
stream_state = dict(state_migration.migrate(stream_state))
|
536
|
+
|
537
|
+
return stream_state
|
@@ -40,12 +40,6 @@ properties:
|
|
40
40
|
"$ref": "#/definitions/Spec"
|
41
41
|
concurrency_level:
|
42
42
|
"$ref": "#/definitions/ConcurrencyLevel"
|
43
|
-
api_budget:
|
44
|
-
title: API Budget
|
45
|
-
description: Defines how many requests can be made to the API in a given time frame. This field accepts either a generic APIBudget or an HTTP-specific configuration (HTTPAPIBudget) to be applied across all streams.
|
46
|
-
anyOf:
|
47
|
-
- "$ref": "#/definitions/APIBudget"
|
48
|
-
- "$ref": "#/definitions/HTTPAPIBudget"
|
49
43
|
metadata:
|
50
44
|
type: object
|
51
45
|
description: For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.
|
@@ -800,7 +794,7 @@ definitions:
|
|
800
794
|
description: This option is used to adjust the upper and lower boundaries of each datetime window to beginning and end of the provided target period (day, week, month)
|
801
795
|
type: object
|
802
796
|
required:
|
803
|
-
|
797
|
+
- target
|
804
798
|
properties:
|
805
799
|
target:
|
806
800
|
title: Target
|
@@ -1371,207 +1365,6 @@ definitions:
|
|
1371
1365
|
$parameters:
|
1372
1366
|
type: object
|
1373
1367
|
additional_properties: true
|
1374
|
-
APIBudget:
|
1375
|
-
title: API Budget
|
1376
|
-
description: >
|
1377
|
-
A generic API budget configuration that defines the policies (rate limiting rules)
|
1378
|
-
and the maximum number of attempts to acquire a call credit. This budget does not automatically
|
1379
|
-
update itself based on HTTP response headers.
|
1380
|
-
type: object
|
1381
|
-
required:
|
1382
|
-
- type
|
1383
|
-
- policies
|
1384
|
-
properties:
|
1385
|
-
type:
|
1386
|
-
type: string
|
1387
|
-
enum: [APIBudget]
|
1388
|
-
policies:
|
1389
|
-
title: Policies
|
1390
|
-
description: List of call rate policies that define how many calls are allowed.
|
1391
|
-
type: array
|
1392
|
-
items:
|
1393
|
-
anyOf:
|
1394
|
-
- "$ref": "#/definitions/FixedWindowCallRatePolicy"
|
1395
|
-
- "$ref": "#/definitions/MovingWindowCallRatePolicy"
|
1396
|
-
- "$ref": "#/definitions/UnlimitedCallRatePolicy"
|
1397
|
-
maximum_attempts_to_acquire:
|
1398
|
-
title: Maximum Attempts to Acquire
|
1399
|
-
description: The maximum number of attempts to acquire a call before giving up.
|
1400
|
-
type: integer
|
1401
|
-
default: 100000
|
1402
|
-
additionalProperties: true
|
1403
|
-
HTTPAPIBudget:
|
1404
|
-
title: HTTP API Budget
|
1405
|
-
description: >
|
1406
|
-
An HTTP-specific API budget that extends APIBudget by updating rate limiting information based
|
1407
|
-
on HTTP response headers. It extracts available calls and the next reset timestamp from the HTTP responses.
|
1408
|
-
type: object
|
1409
|
-
required:
|
1410
|
-
- type
|
1411
|
-
- policies
|
1412
|
-
properties:
|
1413
|
-
type:
|
1414
|
-
type: string
|
1415
|
-
enum: [HTTPAPIBudget]
|
1416
|
-
policies:
|
1417
|
-
title: Policies
|
1418
|
-
description: List of call rate policies that define how many calls are allowed.
|
1419
|
-
type: array
|
1420
|
-
items:
|
1421
|
-
anyOf:
|
1422
|
-
- "$ref": "#/definitions/FixedWindowCallRatePolicy"
|
1423
|
-
- "$ref": "#/definitions/MovingWindowCallRatePolicy"
|
1424
|
-
- "$ref": "#/definitions/UnlimitedCallRatePolicy"
|
1425
|
-
ratelimit_reset_header:
|
1426
|
-
title: Rate Limit Reset Header
|
1427
|
-
description: The HTTP response header name that indicates when the rate limit resets.
|
1428
|
-
type: string
|
1429
|
-
default: "ratelimit-reset"
|
1430
|
-
ratelimit_remaining_header:
|
1431
|
-
title: Rate Limit Remaining Header
|
1432
|
-
description: The HTTP response header name that indicates the number of remaining allowed calls.
|
1433
|
-
type: string
|
1434
|
-
default: "ratelimit-remaining"
|
1435
|
-
status_codes_for_ratelimit_hit:
|
1436
|
-
title: Status Codes for Rate Limit Hit
|
1437
|
-
description: List of HTTP status codes that indicate a rate limit has been hit.
|
1438
|
-
type: array
|
1439
|
-
items:
|
1440
|
-
type: integer
|
1441
|
-
default: [429]
|
1442
|
-
maximum_attempts_to_acquire:
|
1443
|
-
title: Maximum Attempts to Acquire
|
1444
|
-
description: The maximum number of attempts to acquire a call before giving up.
|
1445
|
-
type: integer
|
1446
|
-
default: 100000
|
1447
|
-
additionalProperties: true
|
1448
|
-
FixedWindowCallRatePolicy:
|
1449
|
-
title: Fixed Window Call Rate Policy
|
1450
|
-
description: A policy that allows a fixed number of calls within a specific time window.
|
1451
|
-
type: object
|
1452
|
-
required:
|
1453
|
-
- type
|
1454
|
-
- next_reset_ts
|
1455
|
-
- period
|
1456
|
-
- call_limit
|
1457
|
-
- matchers
|
1458
|
-
properties:
|
1459
|
-
type:
|
1460
|
-
type: string
|
1461
|
-
enum: [FixedWindowCallRatePolicy]
|
1462
|
-
next_reset_ts:
|
1463
|
-
title: Next Reset Timestamp
|
1464
|
-
description: The timestamp when the rate limit will reset.
|
1465
|
-
type: string
|
1466
|
-
format: date-time
|
1467
|
-
period:
|
1468
|
-
title: Period
|
1469
|
-
description: The time interval for the rate limit window.
|
1470
|
-
type: string
|
1471
|
-
format: duration
|
1472
|
-
call_limit:
|
1473
|
-
title: Call Limit
|
1474
|
-
description: The maximum number of calls allowed within the period.
|
1475
|
-
type: integer
|
1476
|
-
matchers:
|
1477
|
-
title: Matchers
|
1478
|
-
description: List of matchers that define which requests this policy applies to.
|
1479
|
-
type: array
|
1480
|
-
items:
|
1481
|
-
"$ref": "#/definitions/HttpRequestRegexMatcher"
|
1482
|
-
additionalProperties: true
|
1483
|
-
MovingWindowCallRatePolicy:
|
1484
|
-
title: Moving Window Call Rate Policy
|
1485
|
-
description: A policy that allows a fixed number of calls within a moving time window.
|
1486
|
-
type: object
|
1487
|
-
required:
|
1488
|
-
- type
|
1489
|
-
- rates
|
1490
|
-
- matchers
|
1491
|
-
properties:
|
1492
|
-
type:
|
1493
|
-
type: string
|
1494
|
-
enum: [MovingWindowCallRatePolicy]
|
1495
|
-
rates:
|
1496
|
-
title: Rates
|
1497
|
-
description: List of rates that define the call limits for different time intervals.
|
1498
|
-
type: array
|
1499
|
-
items:
|
1500
|
-
"$ref": "#/definitions/Rate"
|
1501
|
-
matchers:
|
1502
|
-
title: Matchers
|
1503
|
-
description: List of matchers that define which requests this policy applies to.
|
1504
|
-
type: array
|
1505
|
-
items:
|
1506
|
-
"$ref": "#/definitions/HttpRequestRegexMatcher"
|
1507
|
-
additionalProperties: true
|
1508
|
-
UnlimitedCallRatePolicy:
|
1509
|
-
title: Unlimited Call Rate Policy
|
1510
|
-
description: A policy that allows unlimited calls for specific requests.
|
1511
|
-
type: object
|
1512
|
-
required:
|
1513
|
-
- type
|
1514
|
-
- matchers
|
1515
|
-
properties:
|
1516
|
-
type:
|
1517
|
-
type: string
|
1518
|
-
enum: [UnlimitedCallRatePolicy]
|
1519
|
-
matchers:
|
1520
|
-
title: Matchers
|
1521
|
-
description: List of matchers that define which requests this policy applies to.
|
1522
|
-
type: array
|
1523
|
-
items:
|
1524
|
-
"$ref": "#/definitions/HttpRequestRegexMatcher"
|
1525
|
-
additionalProperties: true
|
1526
|
-
Rate:
|
1527
|
-
title: Rate
|
1528
|
-
description: Defines a rate limit with a specific number of calls allowed within a time interval.
|
1529
|
-
type: object
|
1530
|
-
required:
|
1531
|
-
- limit
|
1532
|
-
- interval
|
1533
|
-
properties:
|
1534
|
-
limit:
|
1535
|
-
title: Limit
|
1536
|
-
description: The maximum number of calls allowed within the interval.
|
1537
|
-
type: integer
|
1538
|
-
interval:
|
1539
|
-
title: Interval
|
1540
|
-
description: The time interval for the rate limit.
|
1541
|
-
type: string
|
1542
|
-
format: duration
|
1543
|
-
additionalProperties: true
|
1544
|
-
HttpRequestRegexMatcher:
|
1545
|
-
title: HTTP Request Matcher
|
1546
|
-
description: >
|
1547
|
-
Matches HTTP requests based on method, base URL, URL path pattern, query parameters, and headers.
|
1548
|
-
Use `url_base` to specify the scheme and host (without trailing slash) and
|
1549
|
-
`url_path_pattern` to apply a regex to the request path.
|
1550
|
-
type: object
|
1551
|
-
properties:
|
1552
|
-
method:
|
1553
|
-
title: Method
|
1554
|
-
description: The HTTP method to match (e.g., GET, POST).
|
1555
|
-
type: string
|
1556
|
-
url_base:
|
1557
|
-
title: URL Base
|
1558
|
-
description: The base URL (scheme and host, e.g. "https://api.example.com") to match.
|
1559
|
-
type: string
|
1560
|
-
url_path_pattern:
|
1561
|
-
title: URL Path Pattern
|
1562
|
-
description: A regular expression pattern to match the URL path.
|
1563
|
-
type: string
|
1564
|
-
params:
|
1565
|
-
title: Parameters
|
1566
|
-
description: The query parameters to match.
|
1567
|
-
type: object
|
1568
|
-
additionalProperties: true
|
1569
|
-
headers:
|
1570
|
-
title: Headers
|
1571
|
-
description: The headers to match.
|
1572
|
-
type: object
|
1573
|
-
additionalProperties: true
|
1574
|
-
additionalProperties: true
|
1575
1368
|
DefaultErrorHandler:
|
1576
1369
|
title: Default Error Handler
|
1577
1370
|
description: Component defining how to handle errors. Default behavior includes only retrying server errors (HTTP 5XX) and too many requests (HTTP 429) with an exponential backoff.
|
@@ -1756,7 +1549,6 @@ definitions:
|
|
1756
1549
|
anyOf:
|
1757
1550
|
- "$ref": "#/definitions/JsonDecoder"
|
1758
1551
|
- "$ref": "#/definitions/XmlDecoder"
|
1759
|
-
- "$ref": "#/definitions/CompositeRawDecoder"
|
1760
1552
|
$parameters:
|
1761
1553
|
type: object
|
1762
1554
|
additionalProperties: true
|
@@ -2340,23 +2132,6 @@ definitions:
|
|
2340
2132
|
$parameters:
|
2341
2133
|
type: object
|
2342
2134
|
additionalProperties: true
|
2343
|
-
GzipJsonDecoder:
|
2344
|
-
title: GzipJson Decoder
|
2345
|
-
description: Use this if the response is Gzip compressed Json.
|
2346
|
-
type: object
|
2347
|
-
additionalProperties: true
|
2348
|
-
required:
|
2349
|
-
- type
|
2350
|
-
properties:
|
2351
|
-
type:
|
2352
|
-
type: string
|
2353
|
-
enum: [GzipJsonDecoder]
|
2354
|
-
encoding:
|
2355
|
-
type: string
|
2356
|
-
default: utf-8
|
2357
|
-
$parameters:
|
2358
|
-
type: object
|
2359
|
-
additionalProperties: true
|
2360
2135
|
ZipfileDecoder:
|
2361
2136
|
title: Zipfile Decoder
|
2362
2137
|
description: Decoder for response data that is returned as zipfile(s).
|
@@ -2364,19 +2139,19 @@ definitions:
|
|
2364
2139
|
additionalProperties: true
|
2365
2140
|
required:
|
2366
2141
|
- type
|
2367
|
-
-
|
2142
|
+
- decoder
|
2368
2143
|
properties:
|
2369
2144
|
type:
|
2370
2145
|
type: string
|
2371
2146
|
enum: [ZipfileDecoder]
|
2372
|
-
|
2147
|
+
decoder:
|
2373
2148
|
title: Parser
|
2374
2149
|
description: Parser to parse the decompressed data from the zipfile(s).
|
2375
2150
|
anyOf:
|
2376
|
-
- "$ref": "#/definitions/
|
2377
|
-
- "$ref": "#/definitions/
|
2378
|
-
- "$ref": "#/definitions/
|
2379
|
-
- "$ref": "#/definitions/
|
2151
|
+
- "$ref": "#/definitions/CsvDecoder"
|
2152
|
+
- "$ref": "#/definitions/GzipDecoder"
|
2153
|
+
- "$ref": "#/definitions/JsonDecoder"
|
2154
|
+
- "$ref": "#/definitions/JsonlDecoder"
|
2380
2155
|
ListPartitionRouter:
|
2381
2156
|
title: List Partition Router
|
2382
2157
|
description: A Partition router that specifies a list of attributes where each attribute describes a portion of the complete data set for a stream. During a sync, each value is iterated over and can be used as input to outbound API requests.
|
@@ -3209,79 +2984,39 @@ definitions:
|
|
3209
2984
|
description: Component decoding the response so records can be extracted.
|
3210
2985
|
anyOf:
|
3211
2986
|
- "$ref": "#/definitions/CustomDecoder"
|
2987
|
+
- "$ref": "#/definitions/CsvDecoder"
|
2988
|
+
- "$ref": "#/definitions/GzipDecoder"
|
3212
2989
|
- "$ref": "#/definitions/JsonDecoder"
|
3213
2990
|
- "$ref": "#/definitions/JsonlDecoder"
|
3214
2991
|
- "$ref": "#/definitions/IterableDecoder"
|
3215
2992
|
- "$ref": "#/definitions/XmlDecoder"
|
3216
|
-
- "$ref": "#/definitions/GzipJsonDecoder"
|
3217
|
-
- "$ref": "#/definitions/CompositeRawDecoder"
|
3218
2993
|
- "$ref": "#/definitions/ZipfileDecoder"
|
3219
2994
|
$parameters:
|
3220
2995
|
type: object
|
3221
2996
|
additionalProperties: true
|
3222
|
-
|
3223
|
-
description: "(This is experimental, use at your own risk)"
|
2997
|
+
GzipDecoder:
|
3224
2998
|
type: object
|
3225
2999
|
required:
|
3226
3000
|
- type
|
3227
|
-
-
|
3001
|
+
- decoder
|
3228
3002
|
properties:
|
3229
3003
|
type:
|
3230
3004
|
type: string
|
3231
|
-
enum: [
|
3232
|
-
|
3233
|
-
anyOf:
|
3234
|
-
- "$ref": "#/definitions/GzipParser"
|
3235
|
-
- "$ref": "#/definitions/JsonParser"
|
3236
|
-
- "$ref": "#/definitions/JsonLineParser"
|
3237
|
-
- "$ref": "#/definitions/CsvParser"
|
3238
|
-
# PARSERS
|
3239
|
-
GzipParser:
|
3240
|
-
type: object
|
3241
|
-
required:
|
3242
|
-
- type
|
3243
|
-
- inner_parser
|
3244
|
-
properties:
|
3245
|
-
type:
|
3246
|
-
type: string
|
3247
|
-
enum: [GzipParser]
|
3248
|
-
inner_parser:
|
3005
|
+
enum: [GzipDecoder]
|
3006
|
+
decoder:
|
3249
3007
|
anyOf:
|
3250
|
-
- "$ref": "#/definitions/
|
3251
|
-
- "$ref": "#/definitions/
|
3252
|
-
- "$ref": "#/definitions/
|
3253
|
-
|
3254
|
-
|
3255
|
-
description: Parser used for parsing str, bytes, or bytearray data and returning data in a dictionary format.
|
3256
|
-
type: object
|
3257
|
-
required:
|
3258
|
-
- type
|
3259
|
-
properties:
|
3260
|
-
type:
|
3261
|
-
type: string
|
3262
|
-
enum: [JsonParser]
|
3263
|
-
encoding:
|
3264
|
-
type: string
|
3265
|
-
default: utf-8
|
3266
|
-
JsonLineParser:
|
3267
|
-
type: object
|
3268
|
-
required:
|
3269
|
-
- type
|
3270
|
-
properties:
|
3271
|
-
type:
|
3272
|
-
type: string
|
3273
|
-
enum: [JsonLineParser]
|
3274
|
-
encoding:
|
3275
|
-
type: string
|
3276
|
-
default: utf-8
|
3277
|
-
CsvParser:
|
3008
|
+
- "$ref": "#/definitions/CsvDecoder"
|
3009
|
+
- "$ref": "#/definitions/GzipDecoder"
|
3010
|
+
- "$ref": "#/definitions/JsonDecoder"
|
3011
|
+
- "$ref": "#/definitions/JsonlDecoder"
|
3012
|
+
CsvDecoder:
|
3278
3013
|
type: object
|
3279
3014
|
required:
|
3280
3015
|
- type
|
3281
3016
|
properties:
|
3282
3017
|
type:
|
3283
3018
|
type: string
|
3284
|
-
enum: [
|
3019
|
+
enum: [CsvDecoder]
|
3285
3020
|
encoding:
|
3286
3021
|
type: string
|
3287
3022
|
default: utf-8
|
@@ -3409,24 +3144,24 @@ definitions:
|
|
3409
3144
|
description: Component decoding the response so records can be extracted.
|
3410
3145
|
anyOf:
|
3411
3146
|
- "$ref": "#/definitions/CustomDecoder"
|
3147
|
+
- "$ref": "#/definitions/CsvDecoder"
|
3148
|
+
- "$ref": "#/definitions/GzipDecoder"
|
3412
3149
|
- "$ref": "#/definitions/JsonDecoder"
|
3413
3150
|
- "$ref": "#/definitions/JsonlDecoder"
|
3414
3151
|
- "$ref": "#/definitions/IterableDecoder"
|
3415
3152
|
- "$ref": "#/definitions/XmlDecoder"
|
3416
|
-
- "$ref": "#/definitions/GzipJsonDecoder"
|
3417
|
-
- "$ref": "#/definitions/CompositeRawDecoder"
|
3418
3153
|
- "$ref": "#/definitions/ZipfileDecoder"
|
3419
3154
|
download_decoder:
|
3420
3155
|
title: Download Decoder
|
3421
3156
|
description: Component decoding the download response so records can be extracted.
|
3422
3157
|
anyOf:
|
3423
3158
|
- "$ref": "#/definitions/CustomDecoder"
|
3159
|
+
- "$ref": "#/definitions/CsvDecoder"
|
3160
|
+
- "$ref": "#/definitions/GzipDecoder"
|
3424
3161
|
- "$ref": "#/definitions/JsonDecoder"
|
3425
3162
|
- "$ref": "#/definitions/JsonlDecoder"
|
3426
3163
|
- "$ref": "#/definitions/IterableDecoder"
|
3427
3164
|
- "$ref": "#/definitions/XmlDecoder"
|
3428
|
-
- "$ref": "#/definitions/GzipJsonDecoder"
|
3429
|
-
- "$ref": "#/definitions/CompositeRawDecoder"
|
3430
3165
|
- "$ref": "#/definitions/ZipfileDecoder"
|
3431
3166
|
$parameters:
|
3432
3167
|
type: object
|
@@ -10,10 +10,8 @@ from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
|
|
10
10
|
)
|
11
11
|
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
12
12
|
from airbyte_cdk.sources.declarative.decoders.json_decoder import (
|
13
|
-
GzipJsonDecoder,
|
14
13
|
IterableDecoder,
|
15
14
|
JsonDecoder,
|
16
|
-
JsonlDecoder,
|
17
15
|
)
|
18
16
|
from airbyte_cdk.sources.declarative.decoders.noop_decoder import NoopDecoder
|
19
17
|
from airbyte_cdk.sources.declarative.decoders.pagination_decoder_decorator import (
|
@@ -27,9 +25,7 @@ __all__ = [
|
|
27
25
|
"CompositeRawDecoder",
|
28
26
|
"JsonDecoder",
|
29
27
|
"JsonParser",
|
30
|
-
"JsonlDecoder",
|
31
28
|
"IterableDecoder",
|
32
|
-
"GzipJsonDecoder",
|
33
29
|
"NoopDecoder",
|
34
30
|
"PaginationDecoderDecorator",
|
35
31
|
"XmlDecoder",
|
@@ -1,5 +1,6 @@
|
|
1
1
|
import csv
|
2
2
|
import gzip
|
3
|
+
import io
|
3
4
|
import json
|
4
5
|
import logging
|
5
6
|
from abc import ABC, abstractmethod
|
@@ -130,11 +131,15 @@ class CompositeRawDecoder(Decoder):
|
|
130
131
|
"""
|
131
132
|
|
132
133
|
parser: Parser
|
134
|
+
stream_response: bool = True
|
133
135
|
|
134
136
|
def is_stream_response(self) -> bool:
|
135
|
-
return
|
137
|
+
return self.stream_response
|
136
138
|
|
137
139
|
def decode(
|
138
140
|
self, response: requests.Response
|
139
141
|
) -> Generator[MutableMapping[str, Any], None, None]:
|
140
|
-
|
142
|
+
if self.is_stream_response():
|
143
|
+
yield from self.parser.parse(data=response.raw) # type: ignore[arg-type]
|
144
|
+
else:
|
145
|
+
yield from self.parser.parse(data=io.BytesIO(response.content))
|
@@ -10,21 +10,24 @@ from typing import Any, Generator, List, Mapping, MutableMapping, Optional
|
|
10
10
|
import orjson
|
11
11
|
import requests
|
12
12
|
|
13
|
+
from airbyte_cdk.sources.declarative.decoders import CompositeRawDecoder, JsonParser
|
13
14
|
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
14
15
|
|
15
16
|
logger = logging.getLogger("airbyte")
|
16
17
|
|
17
18
|
|
18
|
-
@dataclass
|
19
19
|
class JsonDecoder(Decoder):
|
20
20
|
"""
|
21
21
|
Decoder strategy that returns the json-encoded content of a response, if any.
|
22
|
+
|
23
|
+
Usually, we would try to instantiate the equivalent `CompositeRawDecoder(parser=JsonParser(), stream_response=False)` but there were specific historical behaviors related to the JsonDecoder that we didn't know if we could remove like the fallback on {} in case of errors.
|
22
24
|
"""
|
23
25
|
|
24
|
-
parameters:
|
26
|
+
def __init__(self, parameters: Mapping[str, Any]):
|
27
|
+
self._decoder = CompositeRawDecoder(parser=JsonParser(), stream_response=False)
|
25
28
|
|
26
29
|
def is_stream_response(self) -> bool:
|
27
|
-
return
|
30
|
+
return self._decoder.is_stream_response()
|
28
31
|
|
29
32
|
def decode(
|
30
33
|
self, response: requests.Response
|
@@ -32,25 +35,16 @@ class JsonDecoder(Decoder):
|
|
32
35
|
"""
|
33
36
|
Given the response is an empty string or an emtpy list, the function will return a generator with an empty mapping.
|
34
37
|
"""
|
38
|
+
has_yielded = False
|
35
39
|
try:
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
f"Response cannot be parsed into json: {response.status_code=}, {response.text=}"
|
41
|
-
)
|
40
|
+
for element in self._decoder.decode(response):
|
41
|
+
yield element
|
42
|
+
has_yielded = True
|
43
|
+
except Exception:
|
42
44
|
yield {}
|
43
45
|
|
44
|
-
|
45
|
-
def parse_body_json(
|
46
|
-
body_json: MutableMapping[str, Any] | List[MutableMapping[str, Any]],
|
47
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
48
|
-
if not isinstance(body_json, list):
|
49
|
-
body_json = [body_json]
|
50
|
-
if len(body_json) == 0:
|
46
|
+
if not has_yielded:
|
51
47
|
yield {}
|
52
|
-
else:
|
53
|
-
yield from body_json
|
54
48
|
|
55
49
|
|
56
50
|
@dataclass
|
@@ -69,43 +63,3 @@ class IterableDecoder(Decoder):
|
|
69
63
|
) -> Generator[MutableMapping[str, Any], None, None]:
|
70
64
|
for line in response.iter_lines():
|
71
65
|
yield {"record": line.decode()}
|
72
|
-
|
73
|
-
|
74
|
-
@dataclass
|
75
|
-
class JsonlDecoder(Decoder):
|
76
|
-
"""
|
77
|
-
Decoder strategy that returns the json-encoded content of the response, if any.
|
78
|
-
"""
|
79
|
-
|
80
|
-
parameters: InitVar[Mapping[str, Any]]
|
81
|
-
|
82
|
-
def is_stream_response(self) -> bool:
|
83
|
-
return True
|
84
|
-
|
85
|
-
def decode(
|
86
|
-
self, response: requests.Response
|
87
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
88
|
-
# TODO???: set delimiter? usually it is `\n` but maybe it would be useful to set optional?
|
89
|
-
# https://github.com/airbytehq/airbyte-internal-issues/issues/8436
|
90
|
-
for record in response.iter_lines():
|
91
|
-
yield orjson.loads(record)
|
92
|
-
|
93
|
-
|
94
|
-
@dataclass
|
95
|
-
class GzipJsonDecoder(JsonDecoder):
|
96
|
-
encoding: Optional[str]
|
97
|
-
|
98
|
-
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
99
|
-
if self.encoding:
|
100
|
-
try:
|
101
|
-
codecs.lookup(self.encoding)
|
102
|
-
except LookupError:
|
103
|
-
raise ValueError(
|
104
|
-
f"Invalid encoding '{self.encoding}'. Please check provided encoding"
|
105
|
-
)
|
106
|
-
|
107
|
-
def decode(
|
108
|
-
self, response: requests.Response
|
109
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
110
|
-
raw_string = decompress(response.content).decode(encoding=self.encoding or "utf-8")
|
111
|
-
yield from self.parse_body_json(orjson.loads(raw_string))
|
@@ -58,8 +58,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
58
58
|
CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
|
59
59
|
"""
|
60
60
|
|
61
|
-
DEFAULT_MAX_PARTITIONS_NUMBER =
|
62
|
-
SWITCH_TO_GLOBAL_LIMIT = 1000
|
61
|
+
DEFAULT_MAX_PARTITIONS_NUMBER = 10000
|
63
62
|
_NO_STATE: Mapping[str, Any] = {}
|
64
63
|
_NO_CURSOR_STATE: Mapping[str, Any] = {}
|
65
64
|
_GLOBAL_STATE_KEY = "state"
|
@@ -100,7 +99,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
100
99
|
self._new_global_cursor: Optional[StreamState] = None
|
101
100
|
self._lookback_window: int = 0
|
102
101
|
self._parent_state: Optional[StreamState] = None
|
103
|
-
self.
|
102
|
+
self._over_limit: int = 0
|
104
103
|
self._use_global_cursor: bool = False
|
105
104
|
self._partition_serializer = PerPartitionKeySerializer()
|
106
105
|
|
@@ -234,8 +233,8 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
234
233
|
or removed due to being the oldest.
|
235
234
|
"""
|
236
235
|
with self._lock:
|
237
|
-
self._number_of_partitions += 1
|
238
236
|
while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
|
237
|
+
self._over_limit += 1
|
239
238
|
# Try removing finished partitions first
|
240
239
|
for partition_key in list(self._cursor_per_partition.keys()):
|
241
240
|
if (
|
@@ -246,7 +245,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
246
245
|
partition_key
|
247
246
|
) # Remove the oldest partition
|
248
247
|
logger.warning(
|
249
|
-
f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self.
|
248
|
+
f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._over_limit}."
|
250
249
|
)
|
251
250
|
break
|
252
251
|
else:
|
@@ -255,7 +254,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
255
254
|
1
|
256
255
|
] # Remove the oldest partition
|
257
256
|
logger.warning(
|
258
|
-
f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self.
|
257
|
+
f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
|
259
258
|
)
|
260
259
|
|
261
260
|
def _set_initial_state(self, stream_state: StreamState) -> None:
|
@@ -356,10 +355,6 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
356
355
|
|
357
356
|
def observe(self, record: Record) -> None:
|
358
357
|
if not self._use_global_cursor and self.limit_reached():
|
359
|
-
logger.info(
|
360
|
-
f"Exceeded the 'SWITCH_TO_GLOBAL_LIMIT' of {self.SWITCH_TO_GLOBAL_LIMIT}. "
|
361
|
-
f"Switching to global cursor for {self._stream_name}."
|
362
|
-
)
|
363
358
|
self._use_global_cursor = True
|
364
359
|
|
365
360
|
if not record.associated_slice:
|
@@ -402,4 +397,4 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
402
397
|
return cursor
|
403
398
|
|
404
399
|
def limit_reached(self) -> bool:
|
405
|
-
return self.
|
400
|
+
return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
|