airbyte-cdk 6.33.3__py3-none-any.whl → 6.34.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/entrypoint.py +6 -6
- airbyte_cdk/logger.py +4 -1
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +167 -1
- airbyte_cdk/sources/declarative/extractors/record_selector.py +12 -3
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +4 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +126 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +123 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +3 -0
- airbyte_cdk/sources/streams/call_rate.py +185 -47
- airbyte_cdk/utils/print_buffer.py +4 -0
- {airbyte_cdk-6.33.3.dist-info → airbyte_cdk-6.34.0.dev0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.33.3.dist-info → airbyte_cdk-6.34.0.dev0.dist-info}/RECORD +16 -16
- {airbyte_cdk-6.33.3.dist-info → airbyte_cdk-6.34.0.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.33.3.dist-info → airbyte_cdk-6.34.0.dev0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.33.3.dist-info → airbyte_cdk-6.34.0.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.33.3.dist-info → airbyte_cdk-6.34.0.dev0.dist-info}/entry_points.txt +0 -0
airbyte_cdk/entrypoint.py
CHANGED
@@ -22,7 +22,7 @@ from requests import PreparedRequest, Response, Session
|
|
22
22
|
|
23
23
|
from airbyte_cdk.connector import TConfig
|
24
24
|
from airbyte_cdk.exception_handler import init_uncaught_exception_handler
|
25
|
-
from airbyte_cdk.logger import init_logger
|
25
|
+
from airbyte_cdk.logger import PRINT_BUFFER, init_logger
|
26
26
|
from airbyte_cdk.models import (
|
27
27
|
AirbyteConnectionStatus,
|
28
28
|
AirbyteMessage,
|
@@ -337,11 +337,11 @@ def launch(source: Source, args: List[str]) -> None:
|
|
337
337
|
parsed_args = source_entrypoint.parse_args(args)
|
338
338
|
# temporarily removes the PrintBuffer because we're seeing weird print behavior for concurrent syncs
|
339
339
|
# Refer to: https://github.com/airbytehq/oncall/issues/6235
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
340
|
+
with PRINT_BUFFER:
|
341
|
+
for message in source_entrypoint.run(parsed_args):
|
342
|
+
# simply printing is creating issues for concurrent CDK as Python uses different two instructions to print: one for the message and
|
343
|
+
# the other for the break line. Adding `\n` to the message ensure that both are printed at the same time
|
344
|
+
print(f"{message}\n", end="")
|
345
345
|
|
346
346
|
|
347
347
|
def _init_internal_request_filter() -> None:
|
airbyte_cdk/logger.py
CHANGED
@@ -16,8 +16,11 @@ from airbyte_cdk.models import (
|
|
16
16
|
Level,
|
17
17
|
Type,
|
18
18
|
)
|
19
|
+
from airbyte_cdk.utils import PrintBuffer
|
19
20
|
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
20
21
|
|
22
|
+
PRINT_BUFFER = PrintBuffer(flush_interval=0.1)
|
23
|
+
|
21
24
|
LOGGING_CONFIG = {
|
22
25
|
"version": 1,
|
23
26
|
"disable_existing_loggers": False,
|
@@ -27,7 +30,7 @@ LOGGING_CONFIG = {
|
|
27
30
|
"handlers": {
|
28
31
|
"console": {
|
29
32
|
"class": "logging.StreamHandler",
|
30
|
-
"stream":
|
33
|
+
"stream": PRINT_BUFFER,
|
31
34
|
"formatter": "airbyte",
|
32
35
|
},
|
33
36
|
},
|
@@ -40,6 +40,8 @@ properties:
|
|
40
40
|
"$ref": "#/definitions/Spec"
|
41
41
|
concurrency_level:
|
42
42
|
"$ref": "#/definitions/ConcurrencyLevel"
|
43
|
+
api_budget:
|
44
|
+
"$ref": "#/definitions/HTTPAPIBudget"
|
43
45
|
metadata:
|
44
46
|
type: object
|
45
47
|
description: For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.
|
@@ -794,7 +796,7 @@ definitions:
|
|
794
796
|
description: This option is used to adjust the upper and lower boundaries of each datetime window to beginning and end of the provided target period (day, week, month)
|
795
797
|
type: object
|
796
798
|
required:
|
797
|
-
|
799
|
+
- target
|
798
800
|
properties:
|
799
801
|
target:
|
800
802
|
title: Target
|
@@ -1365,6 +1367,170 @@ definitions:
|
|
1365
1367
|
$parameters:
|
1366
1368
|
type: object
|
1367
1369
|
additional_properties: true
|
1370
|
+
HTTPAPIBudget:
|
1371
|
+
title: HTTP API Budget
|
1372
|
+
description: >
|
1373
|
+
Defines how many requests can be made to the API in a given time frame. `HTTPAPIBudget` extracts the remaining
|
1374
|
+
call count and the reset time from HTTP response headers using the header names provided by
|
1375
|
+
`ratelimit_remaining_header` and `ratelimit_reset_header`. Only requests using `HttpRequester`
|
1376
|
+
are rate-limited; custom components that bypass `HttpRequester` are not covered by this budget.
|
1377
|
+
type: object
|
1378
|
+
required:
|
1379
|
+
- type
|
1380
|
+
- policies
|
1381
|
+
properties:
|
1382
|
+
type:
|
1383
|
+
type: string
|
1384
|
+
enum: [HTTPAPIBudget]
|
1385
|
+
policies:
|
1386
|
+
title: Policies
|
1387
|
+
description: List of call rate policies that define how many calls are allowed.
|
1388
|
+
type: array
|
1389
|
+
items:
|
1390
|
+
anyOf:
|
1391
|
+
- "$ref": "#/definitions/FixedWindowCallRatePolicy"
|
1392
|
+
- "$ref": "#/definitions/MovingWindowCallRatePolicy"
|
1393
|
+
- "$ref": "#/definitions/UnlimitedCallRatePolicy"
|
1394
|
+
ratelimit_reset_header:
|
1395
|
+
title: Rate Limit Reset Header
|
1396
|
+
description: The HTTP response header name that indicates when the rate limit resets.
|
1397
|
+
type: string
|
1398
|
+
default: "ratelimit-reset"
|
1399
|
+
ratelimit_remaining_header:
|
1400
|
+
title: Rate Limit Remaining Header
|
1401
|
+
description: The HTTP response header name that indicates the number of remaining allowed calls.
|
1402
|
+
type: string
|
1403
|
+
default: "ratelimit-remaining"
|
1404
|
+
status_codes_for_ratelimit_hit:
|
1405
|
+
title: Status Codes for Rate Limit Hit
|
1406
|
+
description: List of HTTP status codes that indicate a rate limit has been hit.
|
1407
|
+
type: array
|
1408
|
+
items:
|
1409
|
+
type: integer
|
1410
|
+
default: [429]
|
1411
|
+
additionalProperties: true
|
1412
|
+
FixedWindowCallRatePolicy:
|
1413
|
+
title: Fixed Window Call Rate Policy
|
1414
|
+
description: A policy that allows a fixed number of calls within a specific time window.
|
1415
|
+
type: object
|
1416
|
+
required:
|
1417
|
+
- type
|
1418
|
+
- period
|
1419
|
+
- call_limit
|
1420
|
+
- matchers
|
1421
|
+
properties:
|
1422
|
+
type:
|
1423
|
+
type: string
|
1424
|
+
enum: [FixedWindowCallRatePolicy]
|
1425
|
+
period:
|
1426
|
+
title: Period
|
1427
|
+
description: The time interval for the rate limit window.
|
1428
|
+
type: string
|
1429
|
+
call_limit:
|
1430
|
+
title: Call Limit
|
1431
|
+
description: The maximum number of calls allowed within the period.
|
1432
|
+
type: integer
|
1433
|
+
matchers:
|
1434
|
+
title: Matchers
|
1435
|
+
description: List of matchers that define which requests this policy applies to.
|
1436
|
+
type: array
|
1437
|
+
items:
|
1438
|
+
"$ref": "#/definitions/HttpRequestRegexMatcher"
|
1439
|
+
additionalProperties: true
|
1440
|
+
MovingWindowCallRatePolicy:
|
1441
|
+
title: Moving Window Call Rate Policy
|
1442
|
+
description: A policy that allows a fixed number of calls within a moving time window.
|
1443
|
+
type: object
|
1444
|
+
required:
|
1445
|
+
- type
|
1446
|
+
- rates
|
1447
|
+
- matchers
|
1448
|
+
properties:
|
1449
|
+
type:
|
1450
|
+
type: string
|
1451
|
+
enum: [MovingWindowCallRatePolicy]
|
1452
|
+
rates:
|
1453
|
+
title: Rates
|
1454
|
+
description: List of rates that define the call limits for different time intervals.
|
1455
|
+
type: array
|
1456
|
+
items:
|
1457
|
+
"$ref": "#/definitions/Rate"
|
1458
|
+
matchers:
|
1459
|
+
title: Matchers
|
1460
|
+
description: List of matchers that define which requests this policy applies to.
|
1461
|
+
type: array
|
1462
|
+
items:
|
1463
|
+
"$ref": "#/definitions/HttpRequestRegexMatcher"
|
1464
|
+
additionalProperties: true
|
1465
|
+
UnlimitedCallRatePolicy:
|
1466
|
+
title: Unlimited Call Rate Policy
|
1467
|
+
description: A policy that allows unlimited calls for specific requests.
|
1468
|
+
type: object
|
1469
|
+
required:
|
1470
|
+
- type
|
1471
|
+
- matchers
|
1472
|
+
properties:
|
1473
|
+
type:
|
1474
|
+
type: string
|
1475
|
+
enum: [UnlimitedCallRatePolicy]
|
1476
|
+
matchers:
|
1477
|
+
title: Matchers
|
1478
|
+
description: List of matchers that define which requests this policy applies to.
|
1479
|
+
type: array
|
1480
|
+
items:
|
1481
|
+
"$ref": "#/definitions/HttpRequestRegexMatcher"
|
1482
|
+
additionalProperties: true
|
1483
|
+
Rate:
|
1484
|
+
title: Rate
|
1485
|
+
description: Defines a rate limit with a specific number of calls allowed within a time interval.
|
1486
|
+
type: object
|
1487
|
+
required:
|
1488
|
+
- limit
|
1489
|
+
- interval
|
1490
|
+
properties:
|
1491
|
+
limit:
|
1492
|
+
title: Limit
|
1493
|
+
description: The maximum number of calls allowed within the interval.
|
1494
|
+
type: integer
|
1495
|
+
interval:
|
1496
|
+
title: Interval
|
1497
|
+
description: The time interval for the rate limit.
|
1498
|
+
type: string
|
1499
|
+
examples:
|
1500
|
+
- "PT1H"
|
1501
|
+
- "P1D"
|
1502
|
+
additionalProperties: true
|
1503
|
+
HttpRequestRegexMatcher:
|
1504
|
+
title: HTTP Request Matcher
|
1505
|
+
description: >
|
1506
|
+
Matches HTTP requests based on method, base URL, URL path pattern, query parameters, and headers.
|
1507
|
+
Use `url_base` to specify the scheme and host (without trailing slash) and
|
1508
|
+
`url_path_pattern` to apply a regex to the request path.
|
1509
|
+
type: object
|
1510
|
+
properties:
|
1511
|
+
method:
|
1512
|
+
title: Method
|
1513
|
+
description: The HTTP method to match (e.g., GET, POST).
|
1514
|
+
type: string
|
1515
|
+
url_base:
|
1516
|
+
title: URL Base
|
1517
|
+
description: The base URL (scheme and host, e.g. "https://api.example.com") to match.
|
1518
|
+
type: string
|
1519
|
+
url_path_pattern:
|
1520
|
+
title: URL Path Pattern
|
1521
|
+
description: A regular expression pattern to match the URL path.
|
1522
|
+
type: string
|
1523
|
+
params:
|
1524
|
+
title: Parameters
|
1525
|
+
description: The query parameters to match.
|
1526
|
+
type: object
|
1527
|
+
additionalProperties: true
|
1528
|
+
headers:
|
1529
|
+
title: Headers
|
1530
|
+
description: The headers to match.
|
1531
|
+
type: object
|
1532
|
+
additionalProperties: true
|
1533
|
+
additionalProperties: true
|
1368
1534
|
DefaultErrorHandler:
|
1369
1535
|
title: Default Error Handler
|
1370
1536
|
description: Component defining how to handle errors. Default behavior includes only retrying server errors (HTTP 5XX) and too many requests (HTTP 429) with an exponential backoff.
|
@@ -41,6 +41,7 @@ class RecordSelector(HttpSelector):
|
|
41
41
|
_name: Union[InterpolatedString, str] = field(init=False, repr=False, default="")
|
42
42
|
record_filter: Optional[RecordFilter] = None
|
43
43
|
transformations: List[RecordTransformation] = field(default_factory=lambda: [])
|
44
|
+
transform_before_filtering: bool = False
|
44
45
|
|
45
46
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
46
47
|
self._parameters = parameters
|
@@ -104,9 +105,17 @@ class RecordSelector(HttpSelector):
|
|
104
105
|
Until we decide to move this logic away from the selector, we made this method public so that users like AsyncJobRetriever could
|
105
106
|
share the logic of doing transformations on a set of records.
|
106
107
|
"""
|
107
|
-
|
108
|
-
|
109
|
-
|
108
|
+
if self.transform_before_filtering:
|
109
|
+
transformed_data = self._transform(all_data, stream_state, stream_slice)
|
110
|
+
transformed_filtered_data = self._filter(
|
111
|
+
transformed_data, stream_state, stream_slice, next_page_token
|
112
|
+
)
|
113
|
+
else:
|
114
|
+
filtered_data = self._filter(all_data, stream_state, stream_slice, next_page_token)
|
115
|
+
transformed_filtered_data = self._transform(filtered_data, stream_state, stream_slice)
|
116
|
+
normalized_data = self._normalize_by_schema(
|
117
|
+
transformed_filtered_data, schema=records_schema
|
118
|
+
)
|
110
119
|
for data in normalized_data:
|
111
120
|
yield Record(data=data, stream_name=self.name, associated_slice=stream_slice)
|
112
121
|
|
@@ -137,6 +137,10 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
137
137
|
self._source_config, config
|
138
138
|
)
|
139
139
|
|
140
|
+
api_budget_model = self._source_config.get("api_budget")
|
141
|
+
if api_budget_model:
|
142
|
+
self._constructor.set_api_budget(api_budget_model, config)
|
143
|
+
|
140
144
|
source_streams = [
|
141
145
|
self._constructor.create_component(
|
142
146
|
DeclarativeStreamModel,
|
@@ -642,6 +642,48 @@ class OAuthAuthenticator(BaseModel):
|
|
642
642
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
643
643
|
|
644
644
|
|
645
|
+
class Rate(BaseModel):
|
646
|
+
class Config:
|
647
|
+
extra = Extra.allow
|
648
|
+
|
649
|
+
limit: int = Field(
|
650
|
+
...,
|
651
|
+
description="The maximum number of calls allowed within the interval.",
|
652
|
+
title="Limit",
|
653
|
+
)
|
654
|
+
interval: str = Field(
|
655
|
+
...,
|
656
|
+
description="The time interval for the rate limit.",
|
657
|
+
examples=["PT1H", "P1D"],
|
658
|
+
title="Interval",
|
659
|
+
)
|
660
|
+
|
661
|
+
|
662
|
+
class HttpRequestRegexMatcher(BaseModel):
|
663
|
+
class Config:
|
664
|
+
extra = Extra.allow
|
665
|
+
|
666
|
+
method: Optional[str] = Field(
|
667
|
+
None, description="The HTTP method to match (e.g., GET, POST).", title="Method"
|
668
|
+
)
|
669
|
+
url_base: Optional[str] = Field(
|
670
|
+
None,
|
671
|
+
description='The base URL (scheme and host, e.g. "https://api.example.com") to match.',
|
672
|
+
title="URL Base",
|
673
|
+
)
|
674
|
+
url_path_pattern: Optional[str] = Field(
|
675
|
+
None,
|
676
|
+
description="A regular expression pattern to match the URL path.",
|
677
|
+
title="URL Path Pattern",
|
678
|
+
)
|
679
|
+
params: Optional[Dict[str, Any]] = Field(
|
680
|
+
None, description="The query parameters to match.", title="Parameters"
|
681
|
+
)
|
682
|
+
headers: Optional[Dict[str, Any]] = Field(
|
683
|
+
None, description="The headers to match.", title="Headers"
|
684
|
+
)
|
685
|
+
|
686
|
+
|
645
687
|
class DpathExtractor(BaseModel):
|
646
688
|
type: Literal["DpathExtractor"]
|
647
689
|
field_path: List[str] = Field(
|
@@ -1565,6 +1607,55 @@ class DatetimeBasedCursor(BaseModel):
|
|
1565
1607
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1566
1608
|
|
1567
1609
|
|
1610
|
+
class FixedWindowCallRatePolicy(BaseModel):
|
1611
|
+
class Config:
|
1612
|
+
extra = Extra.allow
|
1613
|
+
|
1614
|
+
type: Literal["FixedWindowCallRatePolicy"]
|
1615
|
+
period: str = Field(
|
1616
|
+
..., description="The time interval for the rate limit window.", title="Period"
|
1617
|
+
)
|
1618
|
+
call_limit: int = Field(
|
1619
|
+
...,
|
1620
|
+
description="The maximum number of calls allowed within the period.",
|
1621
|
+
title="Call Limit",
|
1622
|
+
)
|
1623
|
+
matchers: List[HttpRequestRegexMatcher] = Field(
|
1624
|
+
...,
|
1625
|
+
description="List of matchers that define which requests this policy applies to.",
|
1626
|
+
title="Matchers",
|
1627
|
+
)
|
1628
|
+
|
1629
|
+
|
1630
|
+
class MovingWindowCallRatePolicy(BaseModel):
|
1631
|
+
class Config:
|
1632
|
+
extra = Extra.allow
|
1633
|
+
|
1634
|
+
type: Literal["MovingWindowCallRatePolicy"]
|
1635
|
+
rates: List[Rate] = Field(
|
1636
|
+
...,
|
1637
|
+
description="List of rates that define the call limits for different time intervals.",
|
1638
|
+
title="Rates",
|
1639
|
+
)
|
1640
|
+
matchers: List[HttpRequestRegexMatcher] = Field(
|
1641
|
+
...,
|
1642
|
+
description="List of matchers that define which requests this policy applies to.",
|
1643
|
+
title="Matchers",
|
1644
|
+
)
|
1645
|
+
|
1646
|
+
|
1647
|
+
class UnlimitedCallRatePolicy(BaseModel):
|
1648
|
+
class Config:
|
1649
|
+
extra = Extra.allow
|
1650
|
+
|
1651
|
+
type: Literal["UnlimitedCallRatePolicy"]
|
1652
|
+
matchers: List[HttpRequestRegexMatcher] = Field(
|
1653
|
+
...,
|
1654
|
+
description="List of matchers that define which requests this policy applies to.",
|
1655
|
+
title="Matchers",
|
1656
|
+
)
|
1657
|
+
|
1658
|
+
|
1568
1659
|
class DefaultErrorHandler(BaseModel):
|
1569
1660
|
type: Literal["DefaultErrorHandler"]
|
1570
1661
|
backoff_strategies: Optional[
|
@@ -1696,6 +1787,39 @@ class CompositeErrorHandler(BaseModel):
|
|
1696
1787
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1697
1788
|
|
1698
1789
|
|
1790
|
+
class HTTPAPIBudget(BaseModel):
|
1791
|
+
class Config:
|
1792
|
+
extra = Extra.allow
|
1793
|
+
|
1794
|
+
type: Literal["HTTPAPIBudget"]
|
1795
|
+
policies: List[
|
1796
|
+
Union[
|
1797
|
+
FixedWindowCallRatePolicy,
|
1798
|
+
MovingWindowCallRatePolicy,
|
1799
|
+
UnlimitedCallRatePolicy,
|
1800
|
+
]
|
1801
|
+
] = Field(
|
1802
|
+
...,
|
1803
|
+
description="List of call rate policies that define how many calls are allowed.",
|
1804
|
+
title="Policies",
|
1805
|
+
)
|
1806
|
+
ratelimit_reset_header: Optional[str] = Field(
|
1807
|
+
"ratelimit-reset",
|
1808
|
+
description="The HTTP response header name that indicates when the rate limit resets.",
|
1809
|
+
title="Rate Limit Reset Header",
|
1810
|
+
)
|
1811
|
+
ratelimit_remaining_header: Optional[str] = Field(
|
1812
|
+
"ratelimit-remaining",
|
1813
|
+
description="The HTTP response header name that indicates the number of remaining allowed calls.",
|
1814
|
+
title="Rate Limit Remaining Header",
|
1815
|
+
)
|
1816
|
+
status_codes_for_ratelimit_hit: Optional[List[int]] = Field(
|
1817
|
+
[429],
|
1818
|
+
description="List of HTTP status codes that indicate a rate limit has been hit.",
|
1819
|
+
title="Status Codes for Rate Limit Hit",
|
1820
|
+
)
|
1821
|
+
|
1822
|
+
|
1699
1823
|
class ZipfileDecoder(BaseModel):
|
1700
1824
|
class Config:
|
1701
1825
|
extra = Extra.allow
|
@@ -1724,6 +1848,7 @@ class DeclarativeSource1(BaseModel):
|
|
1724
1848
|
definitions: Optional[Dict[str, Any]] = None
|
1725
1849
|
spec: Optional[Spec] = None
|
1726
1850
|
concurrency_level: Optional[ConcurrencyLevel] = None
|
1851
|
+
api_budget: Optional[HTTPAPIBudget] = None
|
1727
1852
|
metadata: Optional[Dict[str, Any]] = Field(
|
1728
1853
|
None,
|
1729
1854
|
description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.",
|
@@ -1750,6 +1875,7 @@ class DeclarativeSource2(BaseModel):
|
|
1750
1875
|
definitions: Optional[Dict[str, Any]] = None
|
1751
1876
|
spec: Optional[Spec] = None
|
1752
1877
|
concurrency_level: Optional[ConcurrencyLevel] = None
|
1878
|
+
api_budget: Optional[HTTPAPIBudget] = None
|
1753
1879
|
metadata: Optional[Dict[str, Any]] = Field(
|
1754
1880
|
None,
|
1755
1881
|
description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.",
|
@@ -221,18 +221,27 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
221
221
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
222
222
|
ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
|
223
223
|
)
|
224
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
225
|
+
FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel,
|
226
|
+
)
|
224
227
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
225
228
|
FlattenFields as FlattenFieldsModel,
|
226
229
|
)
|
227
230
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
228
231
|
GzipDecoder as GzipDecoderModel,
|
229
232
|
)
|
233
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
234
|
+
HTTPAPIBudget as HTTPAPIBudgetModel,
|
235
|
+
)
|
230
236
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
231
237
|
HttpComponentsResolver as HttpComponentsResolverModel,
|
232
238
|
)
|
233
239
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
234
240
|
HttpRequester as HttpRequesterModel,
|
235
241
|
)
|
242
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
243
|
+
HttpRequestRegexMatcher as HttpRequestRegexMatcherModel,
|
244
|
+
)
|
236
245
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
237
246
|
HttpResponseFilter as HttpResponseFilterModel,
|
238
247
|
)
|
@@ -281,6 +290,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
281
290
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
282
291
|
MinMaxDatetime as MinMaxDatetimeModel,
|
283
292
|
)
|
293
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
294
|
+
MovingWindowCallRatePolicy as MovingWindowCallRatePolicyModel,
|
295
|
+
)
|
284
296
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
285
297
|
NoAuth as NoAuthModel,
|
286
298
|
)
|
@@ -299,6 +311,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
299
311
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
300
312
|
ParentStreamConfig as ParentStreamConfigModel,
|
301
313
|
)
|
314
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
315
|
+
Rate as RateModel,
|
316
|
+
)
|
302
317
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
303
318
|
RecordFilter as RecordFilterModel,
|
304
319
|
)
|
@@ -342,6 +357,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
342
357
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
343
358
|
TypesMap as TypesMapModel,
|
344
359
|
)
|
360
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
361
|
+
UnlimitedCallRatePolicy as UnlimitedCallRatePolicyModel,
|
362
|
+
)
|
345
363
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
|
346
364
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
347
365
|
WaitTimeFromHeader as WaitTimeFromHeaderModel,
|
@@ -455,6 +473,15 @@ from airbyte_cdk.sources.message import (
|
|
455
473
|
MessageRepository,
|
456
474
|
NoopMessageRepository,
|
457
475
|
)
|
476
|
+
from airbyte_cdk.sources.streams.call_rate import (
|
477
|
+
APIBudget,
|
478
|
+
FixedWindowCallRatePolicy,
|
479
|
+
HttpAPIBudget,
|
480
|
+
HttpRequestRegexMatcher,
|
481
|
+
MovingWindowCallRatePolicy,
|
482
|
+
Rate,
|
483
|
+
UnlimitedCallRatePolicy,
|
484
|
+
)
|
458
485
|
from airbyte_cdk.sources.streams.concurrent.clamping import (
|
459
486
|
ClampingEndProvider,
|
460
487
|
ClampingStrategy,
|
@@ -506,6 +533,7 @@ class ModelToComponentFactory:
|
|
506
533
|
self._evaluate_log_level(emit_connector_builder_messages)
|
507
534
|
)
|
508
535
|
self._connector_state_manager = connector_state_manager or ConnectorStateManager()
|
536
|
+
self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None
|
509
537
|
|
510
538
|
def _init_mappings(self) -> None:
|
511
539
|
self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
|
@@ -590,6 +618,12 @@ class ModelToComponentFactory:
|
|
590
618
|
StreamConfigModel: self.create_stream_config,
|
591
619
|
ComponentMappingDefinitionModel: self.create_components_mapping_definition,
|
592
620
|
ZipfileDecoderModel: self.create_zipfile_decoder,
|
621
|
+
HTTPAPIBudgetModel: self.create_http_api_budget,
|
622
|
+
FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
|
623
|
+
MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
|
624
|
+
UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
|
625
|
+
RateModel: self.create_rate,
|
626
|
+
HttpRequestRegexMatcherModel: self.create_http_request_matcher,
|
593
627
|
}
|
594
628
|
|
595
629
|
# Needed for the case where we need to perform a second parse on the fields of a custom component
|
@@ -1902,6 +1936,8 @@ class ModelToComponentFactory:
|
|
1902
1936
|
)
|
1903
1937
|
)
|
1904
1938
|
|
1939
|
+
api_budget = self._api_budget
|
1940
|
+
|
1905
1941
|
request_options_provider = InterpolatedRequestOptionsProvider(
|
1906
1942
|
request_body_data=model.request_body_data,
|
1907
1943
|
request_body_json=model.request_body_json,
|
@@ -1922,6 +1958,7 @@ class ModelToComponentFactory:
|
|
1922
1958
|
path=model.path,
|
1923
1959
|
authenticator=authenticator,
|
1924
1960
|
error_handler=error_handler,
|
1961
|
+
api_budget=api_budget,
|
1925
1962
|
http_method=HttpMethod[model.http_method.value],
|
1926
1963
|
request_options_provider=request_options_provider,
|
1927
1964
|
config=config,
|
@@ -2378,6 +2415,8 @@ class ModelToComponentFactory:
|
|
2378
2415
|
if model.record_filter
|
2379
2416
|
else None
|
2380
2417
|
)
|
2418
|
+
|
2419
|
+
transform_before_filtering = False
|
2381
2420
|
if client_side_incremental_sync:
|
2382
2421
|
record_filter = ClientSideIncrementalRecordFilterDecorator(
|
2383
2422
|
config=config,
|
@@ -2387,6 +2426,8 @@ class ModelToComponentFactory:
|
|
2387
2426
|
else None,
|
2388
2427
|
**client_side_incremental_sync,
|
2389
2428
|
)
|
2429
|
+
transform_before_filtering = True
|
2430
|
+
|
2390
2431
|
schema_normalization = (
|
2391
2432
|
TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
|
2392
2433
|
if isinstance(model.schema_normalization, SchemaNormalizationModel)
|
@@ -2401,6 +2442,7 @@ class ModelToComponentFactory:
|
|
2401
2442
|
transformations=transformations or [],
|
2402
2443
|
schema_normalization=schema_normalization,
|
2403
2444
|
parameters=model.parameters or {},
|
2445
|
+
transform_before_filtering=transform_before_filtering,
|
2404
2446
|
)
|
2405
2447
|
|
2406
2448
|
@staticmethod
|
@@ -2921,3 +2963,84 @@ class ModelToComponentFactory:
|
|
2921
2963
|
return isinstance(parser.inner_parser, JsonParser)
|
2922
2964
|
else:
|
2923
2965
|
return False
|
2966
|
+
|
2967
|
+
def create_http_api_budget(
|
2968
|
+
self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any
|
2969
|
+
) -> HttpAPIBudget:
|
2970
|
+
policies = [
|
2971
|
+
self._create_component_from_model(model=policy, config=config)
|
2972
|
+
for policy in model.policies
|
2973
|
+
]
|
2974
|
+
|
2975
|
+
return HttpAPIBudget(
|
2976
|
+
policies=policies,
|
2977
|
+
ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset",
|
2978
|
+
ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining",
|
2979
|
+
status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429],
|
2980
|
+
)
|
2981
|
+
|
2982
|
+
def create_fixed_window_call_rate_policy(
|
2983
|
+
self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any
|
2984
|
+
) -> FixedWindowCallRatePolicy:
|
2985
|
+
matchers = [
|
2986
|
+
self._create_component_from_model(model=matcher, config=config)
|
2987
|
+
for matcher in model.matchers
|
2988
|
+
]
|
2989
|
+
|
2990
|
+
# Set the initial reset timestamp to 10 days from now.
|
2991
|
+
# This value will be updated by the first request.
|
2992
|
+
return FixedWindowCallRatePolicy(
|
2993
|
+
next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10),
|
2994
|
+
period=parse_duration(model.period),
|
2995
|
+
call_limit=model.call_limit,
|
2996
|
+
matchers=matchers,
|
2997
|
+
)
|
2998
|
+
|
2999
|
+
def create_moving_window_call_rate_policy(
|
3000
|
+
self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
|
3001
|
+
) -> MovingWindowCallRatePolicy:
|
3002
|
+
rates = [
|
3003
|
+
self._create_component_from_model(model=rate, config=config) for rate in model.rates
|
3004
|
+
]
|
3005
|
+
matchers = [
|
3006
|
+
self._create_component_from_model(model=matcher, config=config)
|
3007
|
+
for matcher in model.matchers
|
3008
|
+
]
|
3009
|
+
return MovingWindowCallRatePolicy(
|
3010
|
+
rates=rates,
|
3011
|
+
matchers=matchers,
|
3012
|
+
)
|
3013
|
+
|
3014
|
+
def create_unlimited_call_rate_policy(
|
3015
|
+
self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any
|
3016
|
+
) -> UnlimitedCallRatePolicy:
|
3017
|
+
matchers = [
|
3018
|
+
self._create_component_from_model(model=matcher, config=config)
|
3019
|
+
for matcher in model.matchers
|
3020
|
+
]
|
3021
|
+
|
3022
|
+
return UnlimitedCallRatePolicy(
|
3023
|
+
matchers=matchers,
|
3024
|
+
)
|
3025
|
+
|
3026
|
+
def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
|
3027
|
+
return Rate(
|
3028
|
+
limit=model.limit,
|
3029
|
+
interval=parse_duration(model.interval),
|
3030
|
+
)
|
3031
|
+
|
3032
|
+
def create_http_request_matcher(
|
3033
|
+
self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any
|
3034
|
+
) -> HttpRequestRegexMatcher:
|
3035
|
+
return HttpRequestRegexMatcher(
|
3036
|
+
method=model.method,
|
3037
|
+
url_base=model.url_base,
|
3038
|
+
url_path_pattern=model.url_path_pattern,
|
3039
|
+
params=model.params,
|
3040
|
+
headers=model.headers,
|
3041
|
+
)
|
3042
|
+
|
3043
|
+
def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None:
|
3044
|
+
self._api_budget = self.create_component(
|
3045
|
+
model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
|
3046
|
+
)
|
@@ -22,6 +22,7 @@ from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_req
|
|
22
22
|
)
|
23
23
|
from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod, Requester
|
24
24
|
from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
|
25
|
+
from airbyte_cdk.sources.streams.call_rate import APIBudget
|
25
26
|
from airbyte_cdk.sources.streams.http import HttpClient
|
26
27
|
from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler
|
27
28
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
@@ -55,6 +56,7 @@ class HttpRequester(Requester):
|
|
55
56
|
http_method: Union[str, HttpMethod] = HttpMethod.GET
|
56
57
|
request_options_provider: Optional[InterpolatedRequestOptionsProvider] = None
|
57
58
|
error_handler: Optional[ErrorHandler] = None
|
59
|
+
api_budget: Optional[APIBudget] = None
|
58
60
|
disable_retries: bool = False
|
59
61
|
message_repository: MessageRepository = NoopMessageRepository()
|
60
62
|
use_cache: bool = False
|
@@ -91,6 +93,7 @@ class HttpRequester(Requester):
|
|
91
93
|
name=self.name,
|
92
94
|
logger=self.logger,
|
93
95
|
error_handler=self.error_handler,
|
96
|
+
api_budget=self.api_budget,
|
94
97
|
authenticator=self._authenticator,
|
95
98
|
use_cache=self.use_cache,
|
96
99
|
backoff_strategy=backoff_strategies,
|
@@ -6,6 +6,7 @@ import abc
|
|
6
6
|
import dataclasses
|
7
7
|
import datetime
|
8
8
|
import logging
|
9
|
+
import re
|
9
10
|
import time
|
10
11
|
from datetime import timedelta
|
11
12
|
from threading import RLock
|
@@ -25,6 +26,7 @@ else:
|
|
25
26
|
MIXIN_BASE = object
|
26
27
|
|
27
28
|
logger = logging.getLogger("airbyte")
|
29
|
+
logging.getLogger("pyrate_limiter").setLevel(logging.WARNING)
|
28
30
|
|
29
31
|
|
30
32
|
@dataclasses.dataclass
|
@@ -98,7 +100,7 @@ class RequestMatcher(abc.ABC):
|
|
98
100
|
|
99
101
|
|
100
102
|
class HttpRequestMatcher(RequestMatcher):
|
101
|
-
"""Simple implementation of RequestMatcher for
|
103
|
+
"""Simple implementation of RequestMatcher for HTTP requests using HttpRequestRegexMatcher under the hood."""
|
102
104
|
|
103
105
|
def __init__(
|
104
106
|
self,
|
@@ -109,32 +111,94 @@ class HttpRequestMatcher(RequestMatcher):
|
|
109
111
|
):
|
110
112
|
"""Constructor
|
111
113
|
|
112
|
-
:param method:
|
113
|
-
:param url:
|
114
|
-
:param params:
|
115
|
-
:param headers:
|
114
|
+
:param method: HTTP method (e.g., "GET", "POST").
|
115
|
+
:param url: Full URL to match.
|
116
|
+
:param params: Dictionary of query parameters to match.
|
117
|
+
:param headers: Dictionary of headers to match.
|
116
118
|
"""
|
117
|
-
|
118
|
-
|
119
|
+
# Parse the URL to extract the base and path
|
120
|
+
if url:
|
121
|
+
parsed_url = parse.urlsplit(url)
|
122
|
+
url_base = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
123
|
+
url_path = parsed_url.path if parsed_url.path != "/" else None
|
124
|
+
else:
|
125
|
+
url_base = None
|
126
|
+
url_path = None
|
127
|
+
|
128
|
+
# Use HttpRequestRegexMatcher under the hood
|
129
|
+
self._regex_matcher = HttpRequestRegexMatcher(
|
130
|
+
method=method,
|
131
|
+
url_base=url_base,
|
132
|
+
url_path_pattern=re.escape(url_path) if url_path else None,
|
133
|
+
params=params,
|
134
|
+
headers=headers,
|
135
|
+
)
|
136
|
+
|
137
|
+
def __call__(self, request: Any) -> bool:
|
138
|
+
"""
|
139
|
+
:param request: A requests.Request or requests.PreparedRequest instance.
|
140
|
+
:return: True if the request matches all provided criteria; False otherwise.
|
141
|
+
"""
|
142
|
+
return self._regex_matcher(request)
|
143
|
+
|
144
|
+
def __str__(self) -> str:
|
145
|
+
return (
|
146
|
+
f"HttpRequestMatcher(method={self._regex_matcher._method}, "
|
147
|
+
f"url={self._regex_matcher._url_base}{self._regex_matcher._url_path_pattern.pattern if self._regex_matcher._url_path_pattern else ''}, "
|
148
|
+
f"params={self._regex_matcher._params}, headers={self._regex_matcher._headers})"
|
149
|
+
)
|
150
|
+
|
151
|
+
|
152
|
+
class HttpRequestRegexMatcher(RequestMatcher):
|
153
|
+
"""
|
154
|
+
Extended RequestMatcher for HTTP requests that supports matching on:
|
155
|
+
- HTTP method (case-insensitive)
|
156
|
+
- URL base (scheme + netloc) optionally
|
157
|
+
- URL path pattern (a regex applied to the path portion of the URL)
|
158
|
+
- Query parameters (must be present)
|
159
|
+
- Headers (header names compared case-insensitively)
|
160
|
+
"""
|
161
|
+
|
162
|
+
def __init__(
|
163
|
+
self,
|
164
|
+
method: Optional[str] = None,
|
165
|
+
url_base: Optional[str] = None,
|
166
|
+
url_path_pattern: Optional[str] = None,
|
167
|
+
params: Optional[Mapping[str, Any]] = None,
|
168
|
+
headers: Optional[Mapping[str, Any]] = None,
|
169
|
+
):
|
170
|
+
"""
|
171
|
+
:param method: HTTP method (e.g. "GET", "POST"); compared case-insensitively.
|
172
|
+
:param url_base: Base URL (scheme://host) that must match.
|
173
|
+
:param url_path_pattern: A regex pattern that will be applied to the path portion of the URL.
|
174
|
+
:param params: Dictionary of query parameters that must be present in the request.
|
175
|
+
:param headers: Dictionary of headers that must be present (header keys are compared case-insensitively).
|
176
|
+
"""
|
177
|
+
self._method = method.upper() if method else None
|
178
|
+
|
179
|
+
# Normalize the url_base if provided: remove trailing slash.
|
180
|
+
self._url_base = url_base.rstrip("/") if url_base else None
|
181
|
+
|
182
|
+
# Compile the URL path pattern if provided.
|
183
|
+
self._url_path_pattern = re.compile(url_path_pattern) if url_path_pattern else None
|
184
|
+
|
185
|
+
# Normalize query parameters to strings.
|
119
186
|
self._params = {str(k): str(v) for k, v in (params or {}).items()}
|
120
|
-
|
187
|
+
|
188
|
+
# Normalize header keys to lowercase.
|
189
|
+
self._headers = {str(k).lower(): str(v) for k, v in (headers or {}).items()}
|
121
190
|
|
122
191
|
@staticmethod
|
123
192
|
def _match_dict(obj: Mapping[str, Any], pattern: Mapping[str, Any]) -> bool:
|
124
|
-
"""Check that
|
125
|
-
|
126
|
-
:param obj:
|
127
|
-
:param pattern:
|
128
|
-
:return:
|
129
|
-
"""
|
193
|
+
"""Check that every key/value in the pattern exists in the object."""
|
130
194
|
return pattern.items() <= obj.items()
|
131
195
|
|
132
196
|
def __call__(self, request: Any) -> bool:
|
133
197
|
"""
|
134
|
-
|
135
|
-
:
|
136
|
-
:return: True if matches the provided request object, False - otherwise
|
198
|
+
:param request: A requests.Request or requests.PreparedRequest instance.
|
199
|
+
:return: True if the request matches all provided criteria; False otherwise.
|
137
200
|
"""
|
201
|
+
# Prepare the request (if needed) and extract the URL details.
|
138
202
|
if isinstance(request, requests.Request):
|
139
203
|
prepared_request = request.prepare()
|
140
204
|
elif isinstance(request, requests.PreparedRequest):
|
@@ -142,23 +206,49 @@ class HttpRequestMatcher(RequestMatcher):
|
|
142
206
|
else:
|
143
207
|
return False
|
144
208
|
|
209
|
+
# Check HTTP method.
|
145
210
|
if self._method is not None:
|
146
211
|
if prepared_request.method != self._method:
|
147
212
|
return False
|
148
|
-
|
149
|
-
|
150
|
-
|
213
|
+
|
214
|
+
# Parse the URL.
|
215
|
+
parsed_url = parse.urlsplit(prepared_request.url)
|
216
|
+
# Reconstruct the base: scheme://netloc
|
217
|
+
request_url_base = f"{str(parsed_url.scheme)}://{str(parsed_url.netloc)}"
|
218
|
+
# The path (without query parameters)
|
219
|
+
request_path = str(parsed_url.path).rstrip("/")
|
220
|
+
|
221
|
+
# If a base URL is provided, check that it matches.
|
222
|
+
if self._url_base is not None:
|
223
|
+
if request_url_base != self._url_base:
|
224
|
+
return False
|
225
|
+
|
226
|
+
# If a URL path pattern is provided, ensure the path matches the regex.
|
227
|
+
if self._url_path_pattern is not None:
|
228
|
+
if not self._url_path_pattern.search(request_path):
|
151
229
|
return False
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
230
|
+
|
231
|
+
# Check query parameters.
|
232
|
+
if self._params:
|
233
|
+
query_params = dict(parse.parse_qsl(str(parsed_url.query)))
|
234
|
+
if not self._match_dict(query_params, self._params):
|
156
235
|
return False
|
157
|
-
|
158
|
-
|
236
|
+
|
237
|
+
# Check headers (normalize keys to lower-case).
|
238
|
+
if self._headers:
|
239
|
+
req_headers = {k.lower(): v for k, v in prepared_request.headers.items()}
|
240
|
+
if not self._match_dict(req_headers, self._headers):
|
159
241
|
return False
|
242
|
+
|
160
243
|
return True
|
161
244
|
|
245
|
+
def __str__(self) -> str:
|
246
|
+
regex = self._url_path_pattern.pattern if self._url_path_pattern else None
|
247
|
+
return (
|
248
|
+
f"HttpRequestRegexMatcher(method={self._method}, url_base={self._url_base}, "
|
249
|
+
f"url_path_pattern={regex}, params={self._params}, headers={self._headers})"
|
250
|
+
)
|
251
|
+
|
162
252
|
|
163
253
|
class BaseCallRatePolicy(AbstractCallRatePolicy, abc.ABC):
|
164
254
|
def __init__(self, matchers: list[RequestMatcher]):
|
@@ -257,6 +347,14 @@ class FixedWindowCallRatePolicy(BaseCallRatePolicy):
|
|
257
347
|
|
258
348
|
self._calls_num += weight
|
259
349
|
|
350
|
+
def __str__(self) -> str:
|
351
|
+
matcher_str = ", ".join(f"{matcher}" for matcher in self._matchers)
|
352
|
+
return (
|
353
|
+
f"FixedWindowCallRatePolicy(call_limit={self._call_limit}, period={self._offset}, "
|
354
|
+
f"calls_used={self._calls_num}, next_reset={self._next_reset_ts}, "
|
355
|
+
f"matchers=[{matcher_str}])"
|
356
|
+
)
|
357
|
+
|
260
358
|
def update(
|
261
359
|
self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime]
|
262
360
|
) -> None:
|
@@ -363,6 +461,19 @@ class MovingWindowCallRatePolicy(BaseCallRatePolicy):
|
|
363
461
|
# if available_calls is not None and call_reset_ts is not None:
|
364
462
|
# ts = call_reset_ts.timestamp()
|
365
463
|
|
464
|
+
def __str__(self) -> str:
|
465
|
+
"""Return a human-friendly description of the moving window rate policy for logging purposes."""
|
466
|
+
rates_info = ", ".join(
|
467
|
+
f"{rate.limit} per {timedelta(milliseconds=rate.interval)}"
|
468
|
+
for rate in self._bucket.rates
|
469
|
+
)
|
470
|
+
current_bucket_count = self._bucket.count()
|
471
|
+
matcher_str = ", ".join(f"{matcher}" for matcher in self._matchers)
|
472
|
+
return (
|
473
|
+
f"MovingWindowCallRatePolicy(rates=[{rates_info}], current_bucket_count={current_bucket_count}, "
|
474
|
+
f"matchers=[{matcher_str}])"
|
475
|
+
)
|
476
|
+
|
366
477
|
|
367
478
|
class AbstractAPIBudget(abc.ABC):
|
368
479
|
"""Interface to some API where a client allowed to have N calls per T interval.
|
@@ -415,6 +526,23 @@ class APIBudget(AbstractAPIBudget):
|
|
415
526
|
self._policies = policies
|
416
527
|
self._maximum_attempts_to_acquire = maximum_attempts_to_acquire
|
417
528
|
|
529
|
+
def _extract_endpoint(self, request: Any) -> str:
|
530
|
+
"""Extract the endpoint URL from the request if available."""
|
531
|
+
endpoint = None
|
532
|
+
try:
|
533
|
+
# If the request is already a PreparedRequest, it should have a URL.
|
534
|
+
if isinstance(request, requests.PreparedRequest):
|
535
|
+
endpoint = request.url
|
536
|
+
# If it's a requests.Request, we call prepare() to extract the URL.
|
537
|
+
elif isinstance(request, requests.Request):
|
538
|
+
prepared = request.prepare()
|
539
|
+
endpoint = prepared.url
|
540
|
+
except Exception as e:
|
541
|
+
logger.debug(f"Error extracting endpoint: {e}")
|
542
|
+
if endpoint:
|
543
|
+
return endpoint
|
544
|
+
return "unknown endpoint"
|
545
|
+
|
418
546
|
def get_matching_policy(self, request: Any) -> Optional[AbstractCallRatePolicy]:
|
419
547
|
for policy in self._policies:
|
420
548
|
if policy.matches(request):
|
@@ -428,20 +556,24 @@ class APIBudget(AbstractAPIBudget):
|
|
428
556
|
Matchers will be called sequentially in the same order they were added.
|
429
557
|
The first matcher that returns True will
|
430
558
|
|
431
|
-
:param request:
|
432
|
-
:param block: when
|
433
|
-
:param timeout: if provided
|
434
|
-
:raises: CallRateLimitHit
|
559
|
+
:param request: the API request
|
560
|
+
:param block: when True (default) will block until a call credit is available
|
561
|
+
:param timeout: if provided, limits maximum waiting time; otherwise, waits indefinitely
|
562
|
+
:raises: CallRateLimitHit if the call credit cannot be acquired within the timeout
|
435
563
|
"""
|
436
564
|
|
437
565
|
policy = self.get_matching_policy(request)
|
566
|
+
endpoint = self._extract_endpoint(request)
|
438
567
|
if policy:
|
568
|
+
logger.debug(f"Acquiring call for endpoint {endpoint} using policy: {policy}")
|
439
569
|
self._do_acquire(request=request, policy=policy, block=block, timeout=timeout)
|
440
570
|
elif self._policies:
|
441
|
-
logger.
|
571
|
+
logger.debug(
|
572
|
+
f"No policies matched for endpoint {endpoint} (request: {request}). Allowing call by default."
|
573
|
+
)
|
442
574
|
|
443
575
|
def update_from_response(self, request: Any, response: Any) -> None:
|
444
|
-
"""Update budget information based on
|
576
|
+
"""Update budget information based on the API response.
|
445
577
|
|
446
578
|
:param request: the initial request that triggered this response
|
447
579
|
:param response: response from the API
|
@@ -451,15 +583,17 @@ class APIBudget(AbstractAPIBudget):
|
|
451
583
|
def _do_acquire(
|
452
584
|
self, request: Any, policy: AbstractCallRatePolicy, block: bool, timeout: Optional[float]
|
453
585
|
) -> None:
|
454
|
-
"""Internal method to try to acquire a call credit
|
586
|
+
"""Internal method to try to acquire a call credit.
|
455
587
|
|
456
|
-
:param request:
|
457
|
-
:param policy:
|
458
|
-
:param block:
|
459
|
-
:param timeout:
|
588
|
+
:param request: the API request
|
589
|
+
:param policy: the matching rate-limiting policy
|
590
|
+
:param block: indicates whether to block until a call credit is available
|
591
|
+
:param timeout: maximum time to wait if blocking
|
592
|
+
:raises: CallRateLimitHit if unable to acquire a call credit
|
460
593
|
"""
|
461
594
|
last_exception = None
|
462
|
-
|
595
|
+
endpoint = self._extract_endpoint(request)
|
596
|
+
# sometimes we spend all budget before a second attempt, so we have a few more attempts
|
463
597
|
for attempt in range(1, self._maximum_attempts_to_acquire):
|
464
598
|
try:
|
465
599
|
policy.try_acquire(request, weight=1)
|
@@ -471,20 +605,24 @@ class APIBudget(AbstractAPIBudget):
|
|
471
605
|
time_to_wait = min(timedelta(seconds=timeout), exc.time_to_wait)
|
472
606
|
else:
|
473
607
|
time_to_wait = exc.time_to_wait
|
474
|
-
|
475
|
-
time_to_wait = max(
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
"reached call limit %s. going to sleep for %s", exc.rate, time_to_wait
|
608
|
+
# Ensure we never sleep for a negative duration.
|
609
|
+
time_to_wait = max(timedelta(0), time_to_wait)
|
610
|
+
logger.debug(
|
611
|
+
f"Policy {policy} reached call limit for endpoint {endpoint} ({exc.rate}). "
|
612
|
+
f"Sleeping for {time_to_wait} on attempt {attempt}."
|
480
613
|
)
|
481
614
|
time.sleep(time_to_wait.total_seconds())
|
482
615
|
else:
|
616
|
+
logger.debug(
|
617
|
+
f"Policy {policy} reached call limit for endpoint {endpoint} ({exc.rate}) "
|
618
|
+
f"and blocking is disabled."
|
619
|
+
)
|
483
620
|
raise
|
484
621
|
|
485
622
|
if last_exception:
|
486
|
-
logger.
|
487
|
-
"
|
623
|
+
logger.debug(
|
624
|
+
f"Exhausted all {self._maximum_attempts_to_acquire} attempts to acquire a call for endpoint {endpoint} "
|
625
|
+
f"using policy: {policy}"
|
488
626
|
)
|
489
627
|
raise last_exception
|
490
628
|
|
@@ -496,7 +634,7 @@ class HttpAPIBudget(APIBudget):
|
|
496
634
|
self,
|
497
635
|
ratelimit_reset_header: str = "ratelimit-reset",
|
498
636
|
ratelimit_remaining_header: str = "ratelimit-remaining",
|
499
|
-
status_codes_for_ratelimit_hit:
|
637
|
+
status_codes_for_ratelimit_hit: list[int] = [429],
|
500
638
|
**kwargs: Any,
|
501
639
|
):
|
502
640
|
"""Constructor
|
@@ -22,9 +22,9 @@ airbyte_cdk/destinations/vector_db_based/indexer.py,sha256=beiSi2Uu67EoTr7yQSaCJ
|
|
22
22
|
airbyte_cdk/destinations/vector_db_based/test_utils.py,sha256=MkqLiOJ5QyKbV4rNiJhe-BHM7FD-ADHQ4bQGf4c5lRY,1932
|
23
23
|
airbyte_cdk/destinations/vector_db_based/utils.py,sha256=FOyEo8Lc-fY8UyhpCivhZtIqBRyxf3cUt6anmK03fUY,1127
|
24
24
|
airbyte_cdk/destinations/vector_db_based/writer.py,sha256=nZ00xPiohElJmYktEZZIhr0m5EDETCHGhg0Lb2S7A20,5095
|
25
|
-
airbyte_cdk/entrypoint.py,sha256=
|
25
|
+
airbyte_cdk/entrypoint.py,sha256=NRJv5BNZRSUEVTmNBa9N7ih6fW5sg4DwL0nkB9kI99Y,18570
|
26
26
|
airbyte_cdk/exception_handler.py,sha256=D_doVl3Dt60ASXlJsfviOCswxGyKF2q0RL6rif3fNks,2013
|
27
|
-
airbyte_cdk/logger.py,sha256=
|
27
|
+
airbyte_cdk/logger.py,sha256=1cURbvawbunCAV178q-XhTHcbAQZTSf07WhU7U9AXWU,3744
|
28
28
|
airbyte_cdk/models/__init__.py,sha256=MOTiuML2wShBaMSIwikdjyye2uUWBjo4J1QFSbnoiM4,2075
|
29
29
|
airbyte_cdk/models/airbyte_protocol.py,sha256=MCmLir67-hF12YM5OKzeGbWrlxr7ChG_OQSE1xG8EIU,3748
|
30
30
|
airbyte_cdk/models/airbyte_protocol_serializers.py,sha256=s6SaFB2CMrG_7jTQGn_fhFbQ1FUxhCxf5kq2RWGHMVI,1749
|
@@ -67,7 +67,7 @@ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=ThOqmaaq
|
|
67
67
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
|
68
68
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
|
69
69
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
70
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
70
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=LExB78FzoQ1ueR5GRyEO-r4HqdghiywvvfyVUXhvU4I,144561
|
71
71
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
72
72
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=venZjfpvtqr3oFSuvMBWtn4h9ayLhD4L65ACuXCDZ64,10445
|
73
73
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=JHb_0d3SE6kNY10mxA5YBEKPeSbsWYjByq1gUQxepoE,953
|
@@ -84,7 +84,7 @@ airbyte_cdk/sources/declarative/extractors/dpath_extractor.py,sha256=wR4Ol4MG2lt
|
|
84
84
|
airbyte_cdk/sources/declarative/extractors/http_selector.py,sha256=2zWZ4ewTqQC8VwkjS0xD_u350Km3SiYP7hpOOgiLg5o,1169
|
85
85
|
airbyte_cdk/sources/declarative/extractors/record_extractor.py,sha256=XJELMjahAsaomlvQgN2zrNO0DJX0G0fr9r682gUz7Pg,691
|
86
86
|
airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=yTdEkyDUSW2KbFkEwJJMlS963C955LgCCOVfTmmScpQ,3367
|
87
|
-
airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=
|
87
|
+
airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=HCqx7IyENM_aRF4it2zJN26_vDu6WeP8XgCxQWHUvcY,6934
|
88
88
|
airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=LhqGDfX06_dDYLKsIVnwQ_nAWCln-v8PV7Wgt_QVeTI,6533
|
89
89
|
airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
|
90
90
|
airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=U1oZKtBaEC6IACmvziY9Wzg7Z8EgF4ZuR7NwvjlB_Sk,1255
|
@@ -104,18 +104,18 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=LYEZ
|
|
104
104
|
airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=-V5UddGm69UKEB6o_O1EIES9kfY8FV_X4Ji8w1yOuSA,981
|
105
105
|
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=BtsY_jtT4MihFqeQgc05HXj3Ndt-e2ESQgGwbg3Sdxc,6430
|
106
106
|
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=Y5AWYxbJTUtJ_Jm7DV9qrZDiymFR9LST7fBt4piT2-U,4585
|
107
|
-
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=
|
107
|
+
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=TN6GCgLXaWDONTaJwQ3A5ELqC-sxwKz-UYSraJYB-dI,17078
|
108
108
|
airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
109
109
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
|
110
110
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
111
111
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
112
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
112
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=peEl_gQUK6Lu98BntStmtVh0BoBI7OBTUNg9ftMGhEA,101946
|
113
113
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
114
114
|
airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=958MMX6_ZOJUlDDdNr9Krosgi2bCKGx2Z765M2Woz18,5505
|
115
115
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
116
116
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
|
117
117
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
118
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
118
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=YAM05AejFafdtx2deEuxEjU3GtebyYVr-MnsIY8eiZU,133750
|
119
119
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
|
120
120
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
|
121
121
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
@@ -139,7 +139,7 @@ airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_
|
|
139
139
|
airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py,sha256=Tan66odx8VHzfdyyXMQkXz2pJYksllGqvxmpoajgcK4,669
|
140
140
|
airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py,sha256=E-fQbt4ShfxZVoqfnmOx69C6FUPWZz8BIqI3DN9Kcjs,7935
|
141
141
|
airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=3GtOefPH08evlSUxaILkiKLTHbIspFY4qd5B3ZqNE60,10063
|
142
|
-
airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=
|
142
|
+
airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=Ek5hS60-CYjvEaFD-bI7qA-bPgbOPb9hTbMBU4n5zNs,14994
|
143
143
|
airbyte_cdk/sources/declarative/requesters/paginators/__init__.py,sha256=uArbKs9JKNCt7t9tZoeWwjDpyI1HoPp29FNW0JzvaEM,644
|
144
144
|
airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=dSm_pKGOZjzvg-X_Vif-MjrnlUG23fCa69bocq8dVIs,11693
|
145
145
|
airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py,sha256=j6j9QRPaTbKQ2N661RFVKthhkWiodEp6ut0tKeEd0Ng,2019
|
@@ -249,7 +249,7 @@ airbyte_cdk/sources/message/repository.py,sha256=SG7avgti_-dj8FcRHTTrhgLLGJbElv1
|
|
249
249
|
airbyte_cdk/sources/source.py,sha256=KIBBH5VLEb8BZ8B9aROlfaI6OLoJqKDPMJ10jkAR7nk,3611
|
250
250
|
airbyte_cdk/sources/streams/__init__.py,sha256=8fzTKpRTnSx5PggXgQPKJzHNZUV2BCA40N-dI6JM1xI,256
|
251
251
|
airbyte_cdk/sources/streams/availability_strategy.py,sha256=_RU4JITrxMEN36g1RDHMu0iSw0I_3yWGfo5N8_YRvOg,3247
|
252
|
-
airbyte_cdk/sources/streams/call_rate.py,sha256=
|
252
|
+
airbyte_cdk/sources/streams/call_rate.py,sha256=jRsGp1PDZBCDQNxzcGVnVmVzLk0wLHxS1JnJwMAgy9U,27568
|
253
253
|
airbyte_cdk/sources/streams/checkpoint/__init__.py,sha256=3oy7Hd4ivVWTZlN6dKAf4Fv_G7U5iZrvhO9hT871UIo,712
|
254
254
|
airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py,sha256=6HMT2NI-FQuaW0nt95NcyWrt5rZN4gF-Arx0sxdgbv4,15221
|
255
255
|
airbyte_cdk/sources/streams/checkpoint/cursor.py,sha256=3e-3c-54k8U7Awno7DMmAD9ndbnl9OM48EnbEgeDUO0,3499
|
@@ -345,15 +345,15 @@ airbyte_cdk/utils/is_cloud_environment.py,sha256=DayV32Irh-SdnJ0MnjvstwCJ66_l5oE
|
|
345
345
|
airbyte_cdk/utils/mapping_helpers.py,sha256=4EOyUzNAGkq-M0QF5rPeBfT4v_eV7qBrEaAtsTH1k8Y,4309
|
346
346
|
airbyte_cdk/utils/message_utils.py,sha256=OTzbkwN7AdMDA3iKYq1LKwfPFxpyEDfdgEF9BED3dkU,1366
|
347
347
|
airbyte_cdk/utils/oneof_option_config.py,sha256=N8EmWdYdwt0FM7fuShh6H8nj_r4KEL9tb2DJJtwsPow,1180
|
348
|
-
airbyte_cdk/utils/print_buffer.py,sha256=
|
348
|
+
airbyte_cdk/utils/print_buffer.py,sha256=0jhRBruCDPOwrytm03iVJp7C-2XPmaliMyENYJJEiac,2962
|
349
349
|
airbyte_cdk/utils/schema_inferrer.py,sha256=_jLzL9PzE4gfR44OSavkIqZNFM9t08c3LuRrkR7PZbk,9861
|
350
350
|
airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7GrKcg,1264
|
351
351
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
352
352
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
353
353
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
354
|
-
airbyte_cdk-6.
|
355
|
-
airbyte_cdk-6.
|
356
|
-
airbyte_cdk-6.
|
357
|
-
airbyte_cdk-6.
|
358
|
-
airbyte_cdk-6.
|
359
|
-
airbyte_cdk-6.
|
354
|
+
airbyte_cdk-6.34.0.dev0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
355
|
+
airbyte_cdk-6.34.0.dev0.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
|
356
|
+
airbyte_cdk-6.34.0.dev0.dist-info/METADATA,sha256=ru0Rv9rx2U3dGuLAtDmJoCJbgSqiIV2aVpGTYOxdUz0,6015
|
357
|
+
airbyte_cdk-6.34.0.dev0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
358
|
+
airbyte_cdk-6.34.0.dev0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
359
|
+
airbyte_cdk-6.34.0.dev0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|