airbyte-cdk 6.34.0.dev1__py3-none-any.whl → 6.34.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +12 -16
  2. airbyte_cdk/connector_builder/message_grouper.py +448 -0
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  4. airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
  5. airbyte_cdk/sources/declarative/auth/oauth.py +1 -6
  6. airbyte_cdk/sources/declarative/auth/token.py +8 -3
  7. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +19 -30
  8. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +85 -203
  9. airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
  10. airbyte_cdk/sources/declarative/decoders/__init__.py +4 -0
  11. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +2 -7
  12. airbyte_cdk/sources/declarative/decoders/json_decoder.py +58 -12
  13. airbyte_cdk/sources/declarative/extractors/record_selector.py +3 -12
  14. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +25 -56
  15. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -12
  16. airbyte_cdk/sources/declarative/manifest_declarative_source.py +0 -9
  17. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +41 -150
  18. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +84 -234
  19. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  20. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
  21. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +18 -26
  22. airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -8
  23. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -16
  24. airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
  25. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
  26. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
  27. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -4
  28. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -2
  29. airbyte_cdk/sources/file_based/file_based_source.py +37 -70
  30. airbyte_cdk/sources/file_based/file_based_stream_reader.py +12 -107
  31. airbyte_cdk/sources/file_based/stream/__init__.py +1 -10
  32. airbyte_cdk/sources/streams/call_rate.py +47 -185
  33. airbyte_cdk/sources/streams/http/http.py +2 -1
  34. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +56 -217
  35. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +73 -144
  36. airbyte_cdk/utils/datetime_helpers.py +66 -48
  37. airbyte_cdk/utils/mapping_helpers.py +26 -126
  38. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/METADATA +1 -1
  39. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/RECORD +43 -52
  40. airbyte_cdk/connector_builder/test_reader/__init__.py +0 -7
  41. airbyte_cdk/connector_builder/test_reader/helpers.py +0 -591
  42. airbyte_cdk/connector_builder/test_reader/message_grouper.py +0 -160
  43. airbyte_cdk/connector_builder/test_reader/reader.py +0 -441
  44. airbyte_cdk/connector_builder/test_reader/types.py +0 -75
  45. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +0 -81
  46. airbyte_cdk/sources/file_based/stream/identities_stream.py +0 -47
  47. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +0 -85
  48. airbyte_cdk/sources/specs/transfer_modes.py +0 -26
  49. airbyte_cdk/sources/streams/permissions/identities_stream.py +0 -75
  50. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE.txt +0 -0
  51. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE_SHORT +0 -0
  52. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/WHEEL +0 -0
  53. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/entry_points.txt +0 -0
@@ -13,11 +13,6 @@ from typing import Any, Dict, Iterable, List, Optional, Set
13
13
  from wcmatch.glob import GLOBSTAR, globmatch
14
14
 
15
15
  from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
16
- from airbyte_cdk.sources.file_based.config.validate_config_transfer_modes import (
17
- include_identities_stream,
18
- preserve_directory_structure,
19
- use_file_transfer,
20
- )
21
16
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
22
17
 
23
18
 
@@ -133,20 +128,24 @@ class AbstractFileBasedStreamReader(ABC):
133
128
 
134
129
  def use_file_transfer(self) -> bool:
135
130
  if self.config:
136
- return use_file_transfer(self.config)
131
+ use_file_transfer = (
132
+ hasattr(self.config.delivery_method, "delivery_type")
133
+ and self.config.delivery_method.delivery_type == "use_file_transfer"
134
+ )
135
+ return use_file_transfer
137
136
  return False
138
137
 
139
138
  def preserve_directory_structure(self) -> bool:
140
139
  # fall back to preserve subdirectories if config is not present or incomplete
141
- if self.config:
142
- return preserve_directory_structure(self.config)
140
+ if (
141
+ self.use_file_transfer()
142
+ and self.config
143
+ and hasattr(self.config.delivery_method, "preserve_directory_structure")
144
+ and self.config.delivery_method.preserve_directory_structure is not None
145
+ ):
146
+ return self.config.delivery_method.preserve_directory_structure
143
147
  return True
144
148
 
145
- def include_identities_stream(self) -> bool:
146
- if self.config:
147
- return include_identities_stream(self.config)
148
- return False
149
-
150
149
  @abstractmethod
151
150
  def get_file(
152
151
  self, file: RemoteFile, local_directory: str, logger: logging.Logger
@@ -184,97 +183,3 @@ class AbstractFileBasedStreamReader(ABC):
184
183
  makedirs(path.dirname(local_file_path), exist_ok=True)
185
184
  absolute_file_path = path.abspath(local_file_path)
186
185
  return [file_relative_path, local_file_path, absolute_file_path]
187
-
188
- @abstractmethod
189
- def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger) -> Dict[str, Any]:
190
- """
191
- This function should return the allow list for a given file, i.e. the list of all identities and their permission levels associated with it
192
-
193
- e.g.
194
- def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger):
195
- api_conn = some_api.conn(credentials=SOME_CREDENTIALS)
196
- result = api_conn.get_file_permissions_info(file.id)
197
- return MyPermissionsModel(
198
- id=result["id"],
199
- access_control_list = result["access_control_list"],
200
- is_public = result["is_public"],
201
- ).dict()
202
- """
203
- raise NotImplementedError(
204
- f"{self.__class__.__name__} does not implement get_file_acl_permissions(). To support ACL permissions, implement this method and update file_permissions_schema."
205
- )
206
-
207
- @abstractmethod
208
- def load_identity_groups(self, logger: logging.Logger) -> Iterable[Dict[str, Any]]:
209
- """
210
- This function should return the Identities in a determined "space" or "domain" where the file metadata (ACLs) are fetched and ACLs items (Identities) exists.
211
-
212
- e.g.
213
- def load_identity_groups(self, logger: logging.Logger) -> Dict[str, Any]:
214
- api_conn = some_api.conn(credentials=SOME_CREDENTIALS)
215
- users_api = api_conn.users()
216
- groups_api = api_conn.groups()
217
- members_api = self.google_directory_service.members()
218
- for user in users_api.list():
219
- yield my_identity_model(id=user.id, name=user.name, email_address=user.email, type="user").dict()
220
- for group in groups_api.list():
221
- group_obj = my_identity_model(id=group.id, name=groups.name, email_address=user.email, type="group").dict()
222
- for member in members_api.list(group=group):
223
- group_obj.member_email_addresses = group_obj.member_email_addresses or []
224
- group_obj.member_email_addresses.append(member.email)
225
- yield group_obj.dict()
226
- """
227
- raise NotImplementedError(
228
- f"{self.__class__.__name__} does not implement load_identity_groups(). To support identities, implement this method and update identities_schema."
229
- )
230
-
231
- @property
232
- @abstractmethod
233
- def file_permissions_schema(self) -> Dict[str, Any]:
234
- """
235
- This function should return the permissions schema for file permissions stream.
236
-
237
- e.g.
238
- def file_permissions_schema(self) -> Dict[str, Any]:
239
- # you can also follow the patter we have for python connectors and have a json file and read from there e.g. schemas/identities.json
240
- return {
241
- "type": "object",
242
- "properties": {
243
- "id": { "type": "string" },
244
- "file_path": { "type": "string" },
245
- "access_control_list": {
246
- "type": "array",
247
- "items": { "type": "string" }
248
- },
249
- "publicly_accessible": { "type": "boolean" }
250
- }
251
- }
252
- """
253
- raise NotImplementedError(
254
- f"{self.__class__.__name__} does not implement file_permissions_schema, please return json schema for your permissions streams."
255
- )
256
-
257
- @property
258
- @abstractmethod
259
- def identities_schema(self) -> Dict[str, Any]:
260
- """
261
- This function should return the identities schema for file identity stream.
262
-
263
- e.g.
264
- def identities_schema(self) -> Dict[str, Any]:
265
- # you can also follow the patter we have for python connectors and have a json file and read from there e.g. schemas/identities.json
266
- return {
267
- "type": "object",
268
- "properties": {
269
- "id": { "type": "string" },
270
- "remote_id": { "type": "string" },
271
- "name": { "type": ["null", "string"] },
272
- "email_address": { "type": ["null", "string"] },
273
- "member_email_addresses": { "type": ["null", "array"] },
274
- "type": { "type": "string" },
275
- }
276
- }
277
- """
278
- raise NotImplementedError(
279
- f"{self.__class__.__name__} does not implement identities_schema, please return json schema for your identities stream."
280
- )
@@ -1,13 +1,4 @@
1
1
  from airbyte_cdk.sources.file_based.stream.abstract_file_based_stream import AbstractFileBasedStream
2
2
  from airbyte_cdk.sources.file_based.stream.default_file_based_stream import DefaultFileBasedStream
3
- from airbyte_cdk.sources.file_based.stream.identities_stream import FileIdentitiesStream
4
- from airbyte_cdk.sources.file_based.stream.permissions_file_based_stream import (
5
- PermissionsFileBasedStream,
6
- )
7
3
 
8
- __all__ = [
9
- "AbstractFileBasedStream",
10
- "DefaultFileBasedStream",
11
- "FileIdentitiesStream",
12
- "PermissionsFileBasedStream",
13
- ]
4
+ __all__ = ["AbstractFileBasedStream", "DefaultFileBasedStream"]
@@ -6,7 +6,6 @@ import abc
6
6
  import dataclasses
7
7
  import datetime
8
8
  import logging
9
- import re
10
9
  import time
11
10
  from datetime import timedelta
12
11
  from threading import RLock
@@ -26,7 +25,6 @@ else:
26
25
  MIXIN_BASE = object
27
26
 
28
27
  logger = logging.getLogger("airbyte")
29
- logging.getLogger("pyrate_limiter").setLevel(logging.WARNING)
30
28
 
31
29
 
32
30
  @dataclasses.dataclass
@@ -100,7 +98,7 @@ class RequestMatcher(abc.ABC):
100
98
 
101
99
 
102
100
  class HttpRequestMatcher(RequestMatcher):
103
- """Simple implementation of RequestMatcher for HTTP requests using HttpRequestRegexMatcher under the hood."""
101
+ """Simple implementation of RequestMatcher for http requests case"""
104
102
 
105
103
  def __init__(
106
104
  self,
@@ -111,94 +109,32 @@ class HttpRequestMatcher(RequestMatcher):
111
109
  ):
112
110
  """Constructor
113
111
 
114
- :param method: HTTP method (e.g., "GET", "POST").
115
- :param url: Full URL to match.
116
- :param params: Dictionary of query parameters to match.
117
- :param headers: Dictionary of headers to match.
112
+ :param method:
113
+ :param url:
114
+ :param params:
115
+ :param headers:
118
116
  """
119
- # Parse the URL to extract the base and path
120
- if url:
121
- parsed_url = parse.urlsplit(url)
122
- url_base = f"{parsed_url.scheme}://{parsed_url.netloc}"
123
- url_path = parsed_url.path if parsed_url.path != "/" else None
124
- else:
125
- url_base = None
126
- url_path = None
127
-
128
- # Use HttpRequestRegexMatcher under the hood
129
- self._regex_matcher = HttpRequestRegexMatcher(
130
- method=method,
131
- url_base=url_base,
132
- url_path_pattern=re.escape(url_path) if url_path else None,
133
- params=params,
134
- headers=headers,
135
- )
136
-
137
- def __call__(self, request: Any) -> bool:
138
- """
139
- :param request: A requests.Request or requests.PreparedRequest instance.
140
- :return: True if the request matches all provided criteria; False otherwise.
141
- """
142
- return self._regex_matcher(request)
143
-
144
- def __str__(self) -> str:
145
- return (
146
- f"HttpRequestMatcher(method={self._regex_matcher._method}, "
147
- f"url={self._regex_matcher._url_base}{self._regex_matcher._url_path_pattern.pattern if self._regex_matcher._url_path_pattern else ''}, "
148
- f"params={self._regex_matcher._params}, headers={self._regex_matcher._headers})"
149
- )
150
-
151
-
152
- class HttpRequestRegexMatcher(RequestMatcher):
153
- """
154
- Extended RequestMatcher for HTTP requests that supports matching on:
155
- - HTTP method (case-insensitive)
156
- - URL base (scheme + netloc) optionally
157
- - URL path pattern (a regex applied to the path portion of the URL)
158
- - Query parameters (must be present)
159
- - Headers (header names compared case-insensitively)
160
- """
161
-
162
- def __init__(
163
- self,
164
- method: Optional[str] = None,
165
- url_base: Optional[str] = None,
166
- url_path_pattern: Optional[str] = None,
167
- params: Optional[Mapping[str, Any]] = None,
168
- headers: Optional[Mapping[str, Any]] = None,
169
- ):
170
- """
171
- :param method: HTTP method (e.g. "GET", "POST"); compared case-insensitively.
172
- :param url_base: Base URL (scheme://host) that must match.
173
- :param url_path_pattern: A regex pattern that will be applied to the path portion of the URL.
174
- :param params: Dictionary of query parameters that must be present in the request.
175
- :param headers: Dictionary of headers that must be present (header keys are compared case-insensitively).
176
- """
177
- self._method = method.upper() if method else None
178
-
179
- # Normalize the url_base if provided: remove trailing slash.
180
- self._url_base = url_base.rstrip("/") if url_base else None
181
-
182
- # Compile the URL path pattern if provided.
183
- self._url_path_pattern = re.compile(url_path_pattern) if url_path_pattern else None
184
-
185
- # Normalize query parameters to strings.
117
+ self._method = method
118
+ self._url = url
186
119
  self._params = {str(k): str(v) for k, v in (params or {}).items()}
187
-
188
- # Normalize header keys to lowercase.
189
- self._headers = {str(k).lower(): str(v) for k, v in (headers or {}).items()}
120
+ self._headers = {str(k): str(v) for k, v in (headers or {}).items()}
190
121
 
191
122
  @staticmethod
192
123
  def _match_dict(obj: Mapping[str, Any], pattern: Mapping[str, Any]) -> bool:
193
- """Check that every key/value in the pattern exists in the object."""
124
+ """Check that all elements from pattern dict present and have the same values in obj dict
125
+
126
+ :param obj:
127
+ :param pattern:
128
+ :return:
129
+ """
194
130
  return pattern.items() <= obj.items()
195
131
 
196
132
  def __call__(self, request: Any) -> bool:
197
133
  """
198
- :param request: A requests.Request or requests.PreparedRequest instance.
199
- :return: True if the request matches all provided criteria; False otherwise.
134
+
135
+ :param request:
136
+ :return: True if matches the provided request object, False - otherwise
200
137
  """
201
- # Prepare the request (if needed) and extract the URL details.
202
138
  if isinstance(request, requests.Request):
203
139
  prepared_request = request.prepare()
204
140
  elif isinstance(request, requests.PreparedRequest):
@@ -206,49 +142,23 @@ class HttpRequestRegexMatcher(RequestMatcher):
206
142
  else:
207
143
  return False
208
144
 
209
- # Check HTTP method.
210
145
  if self._method is not None:
211
146
  if prepared_request.method != self._method:
212
147
  return False
213
-
214
- # Parse the URL.
215
- parsed_url = parse.urlsplit(prepared_request.url)
216
- # Reconstruct the base: scheme://netloc
217
- request_url_base = f"{str(parsed_url.scheme)}://{str(parsed_url.netloc)}"
218
- # The path (without query parameters)
219
- request_path = str(parsed_url.path).rstrip("/")
220
-
221
- # If a base URL is provided, check that it matches.
222
- if self._url_base is not None:
223
- if request_url_base != self._url_base:
224
- return False
225
-
226
- # If a URL path pattern is provided, ensure the path matches the regex.
227
- if self._url_path_pattern is not None:
228
- if not self._url_path_pattern.search(request_path):
148
+ if self._url is not None and prepared_request.url is not None:
149
+ url_without_params = prepared_request.url.split("?")[0]
150
+ if url_without_params != self._url:
229
151
  return False
230
-
231
- # Check query parameters.
232
- if self._params:
233
- query_params = dict(parse.parse_qsl(str(parsed_url.query)))
234
- if not self._match_dict(query_params, self._params):
152
+ if self._params is not None:
153
+ parsed_url = parse.urlsplit(prepared_request.url)
154
+ params = dict(parse.parse_qsl(str(parsed_url.query)))
155
+ if not self._match_dict(params, self._params):
235
156
  return False
236
-
237
- # Check headers (normalize keys to lower-case).
238
- if self._headers:
239
- req_headers = {k.lower(): v for k, v in prepared_request.headers.items()}
240
- if not self._match_dict(req_headers, self._headers):
157
+ if self._headers is not None:
158
+ if not self._match_dict(prepared_request.headers, self._headers):
241
159
  return False
242
-
243
160
  return True
244
161
 
245
- def __str__(self) -> str:
246
- regex = self._url_path_pattern.pattern if self._url_path_pattern else None
247
- return (
248
- f"HttpRequestRegexMatcher(method={self._method}, url_base={self._url_base}, "
249
- f"url_path_pattern={regex}, params={self._params}, headers={self._headers})"
250
- )
251
-
252
162
 
253
163
  class BaseCallRatePolicy(AbstractCallRatePolicy, abc.ABC):
254
164
  def __init__(self, matchers: list[RequestMatcher]):
@@ -347,14 +257,6 @@ class FixedWindowCallRatePolicy(BaseCallRatePolicy):
347
257
 
348
258
  self._calls_num += weight
349
259
 
350
- def __str__(self) -> str:
351
- matcher_str = ", ".join(f"{matcher}" for matcher in self._matchers)
352
- return (
353
- f"FixedWindowCallRatePolicy(call_limit={self._call_limit}, period={self._offset}, "
354
- f"calls_used={self._calls_num}, next_reset={self._next_reset_ts}, "
355
- f"matchers=[{matcher_str}])"
356
- )
357
-
358
260
  def update(
359
261
  self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime]
360
262
  ) -> None:
@@ -461,19 +363,6 @@ class MovingWindowCallRatePolicy(BaseCallRatePolicy):
461
363
  # if available_calls is not None and call_reset_ts is not None:
462
364
  # ts = call_reset_ts.timestamp()
463
365
 
464
- def __str__(self) -> str:
465
- """Return a human-friendly description of the moving window rate policy for logging purposes."""
466
- rates_info = ", ".join(
467
- f"{rate.limit} per {timedelta(milliseconds=rate.interval)}"
468
- for rate in self._bucket.rates
469
- )
470
- current_bucket_count = self._bucket.count()
471
- matcher_str = ", ".join(f"{matcher}" for matcher in self._matchers)
472
- return (
473
- f"MovingWindowCallRatePolicy(rates=[{rates_info}], current_bucket_count={current_bucket_count}, "
474
- f"matchers=[{matcher_str}])"
475
- )
476
-
477
366
 
478
367
  class AbstractAPIBudget(abc.ABC):
479
368
  """Interface to some API where a client allowed to have N calls per T interval.
@@ -526,23 +415,6 @@ class APIBudget(AbstractAPIBudget):
526
415
  self._policies = policies
527
416
  self._maximum_attempts_to_acquire = maximum_attempts_to_acquire
528
417
 
529
- def _extract_endpoint(self, request: Any) -> str:
530
- """Extract the endpoint URL from the request if available."""
531
- endpoint = None
532
- try:
533
- # If the request is already a PreparedRequest, it should have a URL.
534
- if isinstance(request, requests.PreparedRequest):
535
- endpoint = request.url
536
- # If it's a requests.Request, we call prepare() to extract the URL.
537
- elif isinstance(request, requests.Request):
538
- prepared = request.prepare()
539
- endpoint = prepared.url
540
- except Exception as e:
541
- logger.debug(f"Error extracting endpoint: {e}")
542
- if endpoint:
543
- return endpoint
544
- return "unknown endpoint"
545
-
546
418
  def get_matching_policy(self, request: Any) -> Optional[AbstractCallRatePolicy]:
547
419
  for policy in self._policies:
548
420
  if policy.matches(request):
@@ -556,24 +428,20 @@ class APIBudget(AbstractAPIBudget):
556
428
  Matchers will be called sequentially in the same order they were added.
557
429
  The first matcher that returns True will
558
430
 
559
- :param request: the API request
560
- :param block: when True (default) will block until a call credit is available
561
- :param timeout: if provided, limits maximum waiting time; otherwise, waits indefinitely
562
- :raises: CallRateLimitHit if the call credit cannot be acquired within the timeout
431
+ :param request:
432
+ :param block: when true (default) will block the current thread until call credit is available
433
+ :param timeout: if provided will limit maximum time in block, otherwise will wait until credit is available
434
+ :raises: CallRateLimitHit - when no calls left and if timeout was set the waiting time exceed the timeout
563
435
  """
564
436
 
565
437
  policy = self.get_matching_policy(request)
566
- endpoint = self._extract_endpoint(request)
567
438
  if policy:
568
- logger.debug(f"Acquiring call for endpoint {endpoint} using policy: {policy}")
569
439
  self._do_acquire(request=request, policy=policy, block=block, timeout=timeout)
570
440
  elif self._policies:
571
- logger.debug(
572
- f"No policies matched for endpoint {endpoint} (request: {request}). Allowing call by default."
573
- )
441
+ logger.info("no policies matched with requests, allow call by default")
574
442
 
575
443
  def update_from_response(self, request: Any, response: Any) -> None:
576
- """Update budget information based on the API response.
444
+ """Update budget information based on response from API
577
445
 
578
446
  :param request: the initial request that triggered this response
579
447
  :param response: response from the API
@@ -583,17 +451,15 @@ class APIBudget(AbstractAPIBudget):
583
451
  def _do_acquire(
584
452
  self, request: Any, policy: AbstractCallRatePolicy, block: bool, timeout: Optional[float]
585
453
  ) -> None:
586
- """Internal method to try to acquire a call credit.
454
+ """Internal method to try to acquire a call credit
587
455
 
588
- :param request: the API request
589
- :param policy: the matching rate-limiting policy
590
- :param block: indicates whether to block until a call credit is available
591
- :param timeout: maximum time to wait if blocking
592
- :raises: CallRateLimitHit if unable to acquire a call credit
456
+ :param request:
457
+ :param policy:
458
+ :param block:
459
+ :param timeout:
593
460
  """
594
461
  last_exception = None
595
- endpoint = self._extract_endpoint(request)
596
- # sometimes we spend all budget before a second attempt, so we have a few more attempts
462
+ # sometimes we spend all budget before a second attempt, so we have few more here
597
463
  for attempt in range(1, self._maximum_attempts_to_acquire):
598
464
  try:
599
465
  policy.try_acquire(request, weight=1)
@@ -605,24 +471,20 @@ class APIBudget(AbstractAPIBudget):
605
471
  time_to_wait = min(timedelta(seconds=timeout), exc.time_to_wait)
606
472
  else:
607
473
  time_to_wait = exc.time_to_wait
608
- # Ensure we never sleep for a negative duration.
609
- time_to_wait = max(timedelta(0), time_to_wait)
610
- logger.debug(
611
- f"Policy {policy} reached call limit for endpoint {endpoint} ({exc.rate}). "
612
- f"Sleeping for {time_to_wait} on attempt {attempt}."
474
+
475
+ time_to_wait = max(
476
+ timedelta(0), time_to_wait
477
+ ) # sometimes we get negative duration
478
+ logger.info(
479
+ "reached call limit %s. going to sleep for %s", exc.rate, time_to_wait
613
480
  )
614
481
  time.sleep(time_to_wait.total_seconds())
615
482
  else:
616
- logger.debug(
617
- f"Policy {policy} reached call limit for endpoint {endpoint} ({exc.rate}) "
618
- f"and blocking is disabled."
619
- )
620
483
  raise
621
484
 
622
485
  if last_exception:
623
- logger.debug(
624
- f"Exhausted all {self._maximum_attempts_to_acquire} attempts to acquire a call for endpoint {endpoint} "
625
- f"using policy: {policy}"
486
+ logger.info(
487
+ "we used all %s attempts to acquire and failed", self._maximum_attempts_to_acquire
626
488
  )
627
489
  raise last_exception
628
490
 
@@ -634,7 +496,7 @@ class HttpAPIBudget(APIBudget):
634
496
  self,
635
497
  ratelimit_reset_header: str = "ratelimit-reset",
636
498
  ratelimit_remaining_header: str = "ratelimit-remaining",
637
- status_codes_for_ratelimit_hit: list[int] = [429],
499
+ status_codes_for_ratelimit_hit: tuple[int] = (429,),
638
500
  **kwargs: Any,
639
501
  ):
640
502
  """Constructor
@@ -423,6 +423,8 @@ class HttpStream(Stream, CheckpointMixin, ABC):
423
423
  stream_slice: Optional[Mapping[str, Any]] = None,
424
424
  stream_state: Optional[Mapping[str, Any]] = None,
425
425
  ) -> Iterable[StreamData]:
426
+ partition, _, _ = self._extract_slice_fields(stream_slice=stream_slice)
427
+
426
428
  stream_state = stream_state or {}
427
429
  pagination_complete = False
428
430
  next_page_token = None
@@ -436,7 +438,6 @@ class HttpStream(Stream, CheckpointMixin, ABC):
436
438
 
437
439
  cursor = self.get_cursor()
438
440
  if cursor and isinstance(cursor, SubstreamResumableFullRefreshCursor):
439
- partition, _, _ = self._extract_slice_fields(stream_slice=stream_slice)
440
441
  # Substreams checkpoint state by marking an entire parent partition as completed so that on the subsequent attempt
441
442
  # after a failure, completed parents are skipped and the sync can make progress
442
443
  cursor.close_slice(StreamSlice(cursor_slice={}, partition=partition))