airbyte-cdk 6.33.2.dev0__py3-none-any.whl → 6.33.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@
3
3
  #
4
4
 
5
5
  from dataclasses import InitVar, dataclass, field
6
- from datetime import timedelta
6
+ from datetime import datetime, timedelta
7
7
  from typing import Any, List, Mapping, MutableMapping, Optional, Union
8
8
 
9
9
  from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
@@ -232,8 +232,13 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
232
232
  return self._refresh_request_headers.eval(self.config)
233
233
 
234
234
  def get_token_expiry_date(self) -> AirbyteDateTime:
235
+ if not self._has_access_token_been_initialized():
236
+ return AirbyteDateTime.from_datetime(datetime.min)
235
237
  return self._token_expiry_date # type: ignore # _token_expiry_date is an AirbyteDateTime. It is never None despite what mypy thinks
236
238
 
239
+ def _has_access_token_been_initialized(self) -> bool:
240
+ return self._access_token is not None
241
+
237
242
  def set_token_expiry_date(self, value: Union[str, int]) -> None:
238
243
  self._token_expiry_date = self._parse_token_expiration_date(value)
239
244
 
@@ -3,7 +3,7 @@
3
3
  #
4
4
 
5
5
  import logging
6
- from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple
6
+ from typing import Any, Generic, Iterator, List, Mapping, MutableMapping, Optional, Tuple
7
7
 
8
8
  from airbyte_cdk.models import (
9
9
  AirbyteCatalog,
@@ -224,6 +224,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
224
224
  stream_state = self._connector_state_manager.get_stream_state(
225
225
  stream_name=declarative_stream.name, namespace=declarative_stream.namespace
226
226
  )
227
+ stream_state = self._migrate_state(declarative_stream, stream_state)
227
228
 
228
229
  retriever = self._get_retriever(declarative_stream, stream_state)
229
230
 
@@ -331,6 +332,8 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
331
332
  stream_state = self._connector_state_manager.get_stream_state(
332
333
  stream_name=declarative_stream.name, namespace=declarative_stream.namespace
333
334
  )
335
+ stream_state = self._migrate_state(declarative_stream, stream_state)
336
+
334
337
  partition_router = declarative_stream.retriever.stream_slicer._partition_router
335
338
 
336
339
  perpartition_cursor = (
@@ -521,3 +524,14 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
521
524
  if stream.stream.name not in concurrent_stream_names
522
525
  ]
523
526
  )
527
+
528
+ @staticmethod
529
+ def _migrate_state(
530
+ declarative_stream: DeclarativeStream, stream_state: MutableMapping[str, Any]
531
+ ) -> MutableMapping[str, Any]:
532
+ for state_migration in declarative_stream.state_migrations:
533
+ if state_migration.should_migrate(stream_state):
534
+ # The state variable is expected to be mutable but the migrate method returns an immutable mapping.
535
+ stream_state = dict(state_migration.migrate(stream_state))
536
+
537
+ return stream_state
@@ -40,12 +40,6 @@ properties:
40
40
  "$ref": "#/definitions/Spec"
41
41
  concurrency_level:
42
42
  "$ref": "#/definitions/ConcurrencyLevel"
43
- api_budget:
44
- title: API Budget
45
- description: Defines how many requests can be made to the API in a given time frame. This field accepts either a generic APIBudget or an HTTP-specific configuration (HTTPAPIBudget) to be applied across all streams.
46
- anyOf:
47
- - "$ref": "#/definitions/APIBudget"
48
- - "$ref": "#/definitions/HTTPAPIBudget"
49
43
  metadata:
50
44
  type: object
51
45
  description: For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.
@@ -800,7 +794,7 @@ definitions:
800
794
  description: This option is used to adjust the upper and lower boundaries of each datetime window to beginning and end of the provided target period (day, week, month)
801
795
  type: object
802
796
  required:
803
- - target
797
+ - target
804
798
  properties:
805
799
  target:
806
800
  title: Target
@@ -1371,207 +1365,6 @@ definitions:
1371
1365
  $parameters:
1372
1366
  type: object
1373
1367
  additional_properties: true
1374
- APIBudget:
1375
- title: API Budget
1376
- description: >
1377
- A generic API budget configuration that defines the policies (rate limiting rules)
1378
- and the maximum number of attempts to acquire a call credit. This budget does not automatically
1379
- update itself based on HTTP response headers.
1380
- type: object
1381
- required:
1382
- - type
1383
- - policies
1384
- properties:
1385
- type:
1386
- type: string
1387
- enum: [APIBudget]
1388
- policies:
1389
- title: Policies
1390
- description: List of call rate policies that define how many calls are allowed.
1391
- type: array
1392
- items:
1393
- anyOf:
1394
- - "$ref": "#/definitions/FixedWindowCallRatePolicy"
1395
- - "$ref": "#/definitions/MovingWindowCallRatePolicy"
1396
- - "$ref": "#/definitions/UnlimitedCallRatePolicy"
1397
- maximum_attempts_to_acquire:
1398
- title: Maximum Attempts to Acquire
1399
- description: The maximum number of attempts to acquire a call before giving up.
1400
- type: integer
1401
- default: 100000
1402
- additionalProperties: true
1403
- HTTPAPIBudget:
1404
- title: HTTP API Budget
1405
- description: >
1406
- An HTTP-specific API budget that extends APIBudget by updating rate limiting information based
1407
- on HTTP response headers. It extracts available calls and the next reset timestamp from the HTTP responses.
1408
- type: object
1409
- required:
1410
- - type
1411
- - policies
1412
- properties:
1413
- type:
1414
- type: string
1415
- enum: [HTTPAPIBudget]
1416
- policies:
1417
- title: Policies
1418
- description: List of call rate policies that define how many calls are allowed.
1419
- type: array
1420
- items:
1421
- anyOf:
1422
- - "$ref": "#/definitions/FixedWindowCallRatePolicy"
1423
- - "$ref": "#/definitions/MovingWindowCallRatePolicy"
1424
- - "$ref": "#/definitions/UnlimitedCallRatePolicy"
1425
- ratelimit_reset_header:
1426
- title: Rate Limit Reset Header
1427
- description: The HTTP response header name that indicates when the rate limit resets.
1428
- type: string
1429
- default: "ratelimit-reset"
1430
- ratelimit_remaining_header:
1431
- title: Rate Limit Remaining Header
1432
- description: The HTTP response header name that indicates the number of remaining allowed calls.
1433
- type: string
1434
- default: "ratelimit-remaining"
1435
- status_codes_for_ratelimit_hit:
1436
- title: Status Codes for Rate Limit Hit
1437
- description: List of HTTP status codes that indicate a rate limit has been hit.
1438
- type: array
1439
- items:
1440
- type: integer
1441
- default: [429]
1442
- maximum_attempts_to_acquire:
1443
- title: Maximum Attempts to Acquire
1444
- description: The maximum number of attempts to acquire a call before giving up.
1445
- type: integer
1446
- default: 100000
1447
- additionalProperties: true
1448
- FixedWindowCallRatePolicy:
1449
- title: Fixed Window Call Rate Policy
1450
- description: A policy that allows a fixed number of calls within a specific time window.
1451
- type: object
1452
- required:
1453
- - type
1454
- - next_reset_ts
1455
- - period
1456
- - call_limit
1457
- - matchers
1458
- properties:
1459
- type:
1460
- type: string
1461
- enum: [FixedWindowCallRatePolicy]
1462
- next_reset_ts:
1463
- title: Next Reset Timestamp
1464
- description: The timestamp when the rate limit will reset.
1465
- type: string
1466
- format: date-time
1467
- period:
1468
- title: Period
1469
- description: The time interval for the rate limit window.
1470
- type: string
1471
- format: duration
1472
- call_limit:
1473
- title: Call Limit
1474
- description: The maximum number of calls allowed within the period.
1475
- type: integer
1476
- matchers:
1477
- title: Matchers
1478
- description: List of matchers that define which requests this policy applies to.
1479
- type: array
1480
- items:
1481
- "$ref": "#/definitions/HttpRequestRegexMatcher"
1482
- additionalProperties: true
1483
- MovingWindowCallRatePolicy:
1484
- title: Moving Window Call Rate Policy
1485
- description: A policy that allows a fixed number of calls within a moving time window.
1486
- type: object
1487
- required:
1488
- - type
1489
- - rates
1490
- - matchers
1491
- properties:
1492
- type:
1493
- type: string
1494
- enum: [MovingWindowCallRatePolicy]
1495
- rates:
1496
- title: Rates
1497
- description: List of rates that define the call limits for different time intervals.
1498
- type: array
1499
- items:
1500
- "$ref": "#/definitions/Rate"
1501
- matchers:
1502
- title: Matchers
1503
- description: List of matchers that define which requests this policy applies to.
1504
- type: array
1505
- items:
1506
- "$ref": "#/definitions/HttpRequestRegexMatcher"
1507
- additionalProperties: true
1508
- UnlimitedCallRatePolicy:
1509
- title: Unlimited Call Rate Policy
1510
- description: A policy that allows unlimited calls for specific requests.
1511
- type: object
1512
- required:
1513
- - type
1514
- - matchers
1515
- properties:
1516
- type:
1517
- type: string
1518
- enum: [UnlimitedCallRatePolicy]
1519
- matchers:
1520
- title: Matchers
1521
- description: List of matchers that define which requests this policy applies to.
1522
- type: array
1523
- items:
1524
- "$ref": "#/definitions/HttpRequestRegexMatcher"
1525
- additionalProperties: true
1526
- Rate:
1527
- title: Rate
1528
- description: Defines a rate limit with a specific number of calls allowed within a time interval.
1529
- type: object
1530
- required:
1531
- - limit
1532
- - interval
1533
- properties:
1534
- limit:
1535
- title: Limit
1536
- description: The maximum number of calls allowed within the interval.
1537
- type: integer
1538
- interval:
1539
- title: Interval
1540
- description: The time interval for the rate limit.
1541
- type: string
1542
- format: duration
1543
- additionalProperties: true
1544
- HttpRequestRegexMatcher:
1545
- title: HTTP Request Matcher
1546
- description: >
1547
- Matches HTTP requests based on method, base URL, URL path pattern, query parameters, and headers.
1548
- Use `url_base` to specify the scheme and host (without trailing slash) and
1549
- `url_path_pattern` to apply a regex to the request path.
1550
- type: object
1551
- properties:
1552
- method:
1553
- title: Method
1554
- description: The HTTP method to match (e.g., GET, POST).
1555
- type: string
1556
- url_base:
1557
- title: URL Base
1558
- description: The base URL (scheme and host, e.g. "https://api.example.com") to match.
1559
- type: string
1560
- url_path_pattern:
1561
- title: URL Path Pattern
1562
- description: A regular expression pattern to match the URL path.
1563
- type: string
1564
- params:
1565
- title: Parameters
1566
- description: The query parameters to match.
1567
- type: object
1568
- additionalProperties: true
1569
- headers:
1570
- title: Headers
1571
- description: The headers to match.
1572
- type: object
1573
- additionalProperties: true
1574
- additionalProperties: true
1575
1368
  DefaultErrorHandler:
1576
1369
  title: Default Error Handler
1577
1370
  description: Component defining how to handle errors. Default behavior includes only retrying server errors (HTTP 5XX) and too many requests (HTTP 429) with an exponential backoff.
@@ -1756,7 +1549,6 @@ definitions:
1756
1549
  anyOf:
1757
1550
  - "$ref": "#/definitions/JsonDecoder"
1758
1551
  - "$ref": "#/definitions/XmlDecoder"
1759
- - "$ref": "#/definitions/CompositeRawDecoder"
1760
1552
  $parameters:
1761
1553
  type: object
1762
1554
  additionalProperties: true
@@ -2340,23 +2132,6 @@ definitions:
2340
2132
  $parameters:
2341
2133
  type: object
2342
2134
  additionalProperties: true
2343
- GzipJsonDecoder:
2344
- title: GzipJson Decoder
2345
- description: Use this if the response is Gzip compressed Json.
2346
- type: object
2347
- additionalProperties: true
2348
- required:
2349
- - type
2350
- properties:
2351
- type:
2352
- type: string
2353
- enum: [GzipJsonDecoder]
2354
- encoding:
2355
- type: string
2356
- default: utf-8
2357
- $parameters:
2358
- type: object
2359
- additionalProperties: true
2360
2135
  ZipfileDecoder:
2361
2136
  title: Zipfile Decoder
2362
2137
  description: Decoder for response data that is returned as zipfile(s).
@@ -2364,19 +2139,19 @@ definitions:
2364
2139
  additionalProperties: true
2365
2140
  required:
2366
2141
  - type
2367
- - parser
2142
+ - decoder
2368
2143
  properties:
2369
2144
  type:
2370
2145
  type: string
2371
2146
  enum: [ZipfileDecoder]
2372
- parser:
2147
+ decoder:
2373
2148
  title: Parser
2374
2149
  description: Parser to parse the decompressed data from the zipfile(s).
2375
2150
  anyOf:
2376
- - "$ref": "#/definitions/GzipParser"
2377
- - "$ref": "#/definitions/JsonParser"
2378
- - "$ref": "#/definitions/JsonLineParser"
2379
- - "$ref": "#/definitions/CsvParser"
2151
+ - "$ref": "#/definitions/CsvDecoder"
2152
+ - "$ref": "#/definitions/GzipDecoder"
2153
+ - "$ref": "#/definitions/JsonDecoder"
2154
+ - "$ref": "#/definitions/JsonlDecoder"
2380
2155
  ListPartitionRouter:
2381
2156
  title: List Partition Router
2382
2157
  description: A Partition router that specifies a list of attributes where each attribute describes a portion of the complete data set for a stream. During a sync, each value is iterated over and can be used as input to outbound API requests.
@@ -3209,79 +2984,39 @@ definitions:
3209
2984
  description: Component decoding the response so records can be extracted.
3210
2985
  anyOf:
3211
2986
  - "$ref": "#/definitions/CustomDecoder"
2987
+ - "$ref": "#/definitions/CsvDecoder"
2988
+ - "$ref": "#/definitions/GzipDecoder"
3212
2989
  - "$ref": "#/definitions/JsonDecoder"
3213
2990
  - "$ref": "#/definitions/JsonlDecoder"
3214
2991
  - "$ref": "#/definitions/IterableDecoder"
3215
2992
  - "$ref": "#/definitions/XmlDecoder"
3216
- - "$ref": "#/definitions/GzipJsonDecoder"
3217
- - "$ref": "#/definitions/CompositeRawDecoder"
3218
2993
  - "$ref": "#/definitions/ZipfileDecoder"
3219
2994
  $parameters:
3220
2995
  type: object
3221
2996
  additionalProperties: true
3222
- CompositeRawDecoder:
3223
- description: "(This is experimental, use at your own risk)"
2997
+ GzipDecoder:
3224
2998
  type: object
3225
2999
  required:
3226
3000
  - type
3227
- - parser
3001
+ - decoder
3228
3002
  properties:
3229
3003
  type:
3230
3004
  type: string
3231
- enum: [CompositeRawDecoder]
3232
- parser:
3233
- anyOf:
3234
- - "$ref": "#/definitions/GzipParser"
3235
- - "$ref": "#/definitions/JsonParser"
3236
- - "$ref": "#/definitions/JsonLineParser"
3237
- - "$ref": "#/definitions/CsvParser"
3238
- # PARSERS
3239
- GzipParser:
3240
- type: object
3241
- required:
3242
- - type
3243
- - inner_parser
3244
- properties:
3245
- type:
3246
- type: string
3247
- enum: [GzipParser]
3248
- inner_parser:
3005
+ enum: [GzipDecoder]
3006
+ decoder:
3249
3007
  anyOf:
3250
- - "$ref": "#/definitions/JsonLineParser"
3251
- - "$ref": "#/definitions/CsvParser"
3252
- - "$ref": "#/definitions/JsonParser"
3253
- JsonParser:
3254
- title: JsonParser
3255
- description: Parser used for parsing str, bytes, or bytearray data and returning data in a dictionary format.
3256
- type: object
3257
- required:
3258
- - type
3259
- properties:
3260
- type:
3261
- type: string
3262
- enum: [JsonParser]
3263
- encoding:
3264
- type: string
3265
- default: utf-8
3266
- JsonLineParser:
3267
- type: object
3268
- required:
3269
- - type
3270
- properties:
3271
- type:
3272
- type: string
3273
- enum: [JsonLineParser]
3274
- encoding:
3275
- type: string
3276
- default: utf-8
3277
- CsvParser:
3008
+ - "$ref": "#/definitions/CsvDecoder"
3009
+ - "$ref": "#/definitions/GzipDecoder"
3010
+ - "$ref": "#/definitions/JsonDecoder"
3011
+ - "$ref": "#/definitions/JsonlDecoder"
3012
+ CsvDecoder:
3278
3013
  type: object
3279
3014
  required:
3280
3015
  - type
3281
3016
  properties:
3282
3017
  type:
3283
3018
  type: string
3284
- enum: [CsvParser]
3019
+ enum: [CsvDecoder]
3285
3020
  encoding:
3286
3021
  type: string
3287
3022
  default: utf-8
@@ -3409,24 +3144,24 @@ definitions:
3409
3144
  description: Component decoding the response so records can be extracted.
3410
3145
  anyOf:
3411
3146
  - "$ref": "#/definitions/CustomDecoder"
3147
+ - "$ref": "#/definitions/CsvDecoder"
3148
+ - "$ref": "#/definitions/GzipDecoder"
3412
3149
  - "$ref": "#/definitions/JsonDecoder"
3413
3150
  - "$ref": "#/definitions/JsonlDecoder"
3414
3151
  - "$ref": "#/definitions/IterableDecoder"
3415
3152
  - "$ref": "#/definitions/XmlDecoder"
3416
- - "$ref": "#/definitions/GzipJsonDecoder"
3417
- - "$ref": "#/definitions/CompositeRawDecoder"
3418
3153
  - "$ref": "#/definitions/ZipfileDecoder"
3419
3154
  download_decoder:
3420
3155
  title: Download Decoder
3421
3156
  description: Component decoding the download response so records can be extracted.
3422
3157
  anyOf:
3423
3158
  - "$ref": "#/definitions/CustomDecoder"
3159
+ - "$ref": "#/definitions/CsvDecoder"
3160
+ - "$ref": "#/definitions/GzipDecoder"
3424
3161
  - "$ref": "#/definitions/JsonDecoder"
3425
3162
  - "$ref": "#/definitions/JsonlDecoder"
3426
3163
  - "$ref": "#/definitions/IterableDecoder"
3427
3164
  - "$ref": "#/definitions/XmlDecoder"
3428
- - "$ref": "#/definitions/GzipJsonDecoder"
3429
- - "$ref": "#/definitions/CompositeRawDecoder"
3430
3165
  - "$ref": "#/definitions/ZipfileDecoder"
3431
3166
  $parameters:
3432
3167
  type: object
@@ -10,10 +10,8 @@ from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
10
10
  )
11
11
  from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
12
12
  from airbyte_cdk.sources.declarative.decoders.json_decoder import (
13
- GzipJsonDecoder,
14
13
  IterableDecoder,
15
14
  JsonDecoder,
16
- JsonlDecoder,
17
15
  )
18
16
  from airbyte_cdk.sources.declarative.decoders.noop_decoder import NoopDecoder
19
17
  from airbyte_cdk.sources.declarative.decoders.pagination_decoder_decorator import (
@@ -27,9 +25,7 @@ __all__ = [
27
25
  "CompositeRawDecoder",
28
26
  "JsonDecoder",
29
27
  "JsonParser",
30
- "JsonlDecoder",
31
28
  "IterableDecoder",
32
- "GzipJsonDecoder",
33
29
  "NoopDecoder",
34
30
  "PaginationDecoderDecorator",
35
31
  "XmlDecoder",
@@ -1,5 +1,6 @@
1
1
  import csv
2
2
  import gzip
3
+ import io
3
4
  import json
4
5
  import logging
5
6
  from abc import ABC, abstractmethod
@@ -130,11 +131,15 @@ class CompositeRawDecoder(Decoder):
130
131
  """
131
132
 
132
133
  parser: Parser
134
+ stream_response: bool = True
133
135
 
134
136
  def is_stream_response(self) -> bool:
135
- return True
137
+ return self.stream_response
136
138
 
137
139
  def decode(
138
140
  self, response: requests.Response
139
141
  ) -> Generator[MutableMapping[str, Any], None, None]:
140
- yield from self.parser.parse(data=response.raw) # type: ignore[arg-type]
142
+ if self.is_stream_response():
143
+ yield from self.parser.parse(data=response.raw) # type: ignore[arg-type]
144
+ else:
145
+ yield from self.parser.parse(data=io.BytesIO(response.content))
@@ -10,21 +10,24 @@ from typing import Any, Generator, List, Mapping, MutableMapping, Optional
10
10
  import orjson
11
11
  import requests
12
12
 
13
+ from airbyte_cdk.sources.declarative.decoders import CompositeRawDecoder, JsonParser
13
14
  from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
14
15
 
15
16
  logger = logging.getLogger("airbyte")
16
17
 
17
18
 
18
- @dataclass
19
19
  class JsonDecoder(Decoder):
20
20
  """
21
21
  Decoder strategy that returns the json-encoded content of a response, if any.
22
+
23
+ Usually, we would try to instantiate the equivalent `CompositeRawDecoder(parser=JsonParser(), stream_response=False)` but there were specific historical behaviors related to the JsonDecoder that we didn't know if we could remove like the fallback on {} in case of errors.
22
24
  """
23
25
 
24
- parameters: InitVar[Mapping[str, Any]]
26
+ def __init__(self, parameters: Mapping[str, Any]):
27
+ self._decoder = CompositeRawDecoder(parser=JsonParser(), stream_response=False)
25
28
 
26
29
  def is_stream_response(self) -> bool:
27
- return False
30
+ return self._decoder.is_stream_response()
28
31
 
29
32
  def decode(
30
33
  self, response: requests.Response
@@ -32,25 +35,16 @@ class JsonDecoder(Decoder):
32
35
  """
33
36
  Given the response is an empty string or an emtpy list, the function will return a generator with an empty mapping.
34
37
  """
38
+ has_yielded = False
35
39
  try:
36
- body_json = response.json()
37
- yield from self.parse_body_json(body_json)
38
- except requests.exceptions.JSONDecodeError:
39
- logger.warning(
40
- f"Response cannot be parsed into json: {response.status_code=}, {response.text=}"
41
- )
40
+ for element in self._decoder.decode(response):
41
+ yield element
42
+ has_yielded = True
43
+ except Exception:
42
44
  yield {}
43
45
 
44
- @staticmethod
45
- def parse_body_json(
46
- body_json: MutableMapping[str, Any] | List[MutableMapping[str, Any]],
47
- ) -> Generator[MutableMapping[str, Any], None, None]:
48
- if not isinstance(body_json, list):
49
- body_json = [body_json]
50
- if len(body_json) == 0:
46
+ if not has_yielded:
51
47
  yield {}
52
- else:
53
- yield from body_json
54
48
 
55
49
 
56
50
  @dataclass
@@ -69,43 +63,3 @@ class IterableDecoder(Decoder):
69
63
  ) -> Generator[MutableMapping[str, Any], None, None]:
70
64
  for line in response.iter_lines():
71
65
  yield {"record": line.decode()}
72
-
73
-
74
- @dataclass
75
- class JsonlDecoder(Decoder):
76
- """
77
- Decoder strategy that returns the json-encoded content of the response, if any.
78
- """
79
-
80
- parameters: InitVar[Mapping[str, Any]]
81
-
82
- def is_stream_response(self) -> bool:
83
- return True
84
-
85
- def decode(
86
- self, response: requests.Response
87
- ) -> Generator[MutableMapping[str, Any], None, None]:
88
- # TODO???: set delimiter? usually it is `\n` but maybe it would be useful to set optional?
89
- # https://github.com/airbytehq/airbyte-internal-issues/issues/8436
90
- for record in response.iter_lines():
91
- yield orjson.loads(record)
92
-
93
-
94
- @dataclass
95
- class GzipJsonDecoder(JsonDecoder):
96
- encoding: Optional[str]
97
-
98
- def __post_init__(self, parameters: Mapping[str, Any]) -> None:
99
- if self.encoding:
100
- try:
101
- codecs.lookup(self.encoding)
102
- except LookupError:
103
- raise ValueError(
104
- f"Invalid encoding '{self.encoding}'. Please check provided encoding"
105
- )
106
-
107
- def decode(
108
- self, response: requests.Response
109
- ) -> Generator[MutableMapping[str, Any], None, None]:
110
- raw_string = decompress(response.content).decode(encoding=self.encoding or "utf-8")
111
- yield from self.parse_body_json(orjson.loads(raw_string))
@@ -58,8 +58,7 @@ class ConcurrentPerPartitionCursor(Cursor):
58
58
  CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
59
59
  """
60
60
 
61
- DEFAULT_MAX_PARTITIONS_NUMBER = 10_000
62
- SWITCH_TO_GLOBAL_LIMIT = 1000
61
+ DEFAULT_MAX_PARTITIONS_NUMBER = 10000
63
62
  _NO_STATE: Mapping[str, Any] = {}
64
63
  _NO_CURSOR_STATE: Mapping[str, Any] = {}
65
64
  _GLOBAL_STATE_KEY = "state"
@@ -100,7 +99,7 @@ class ConcurrentPerPartitionCursor(Cursor):
100
99
  self._new_global_cursor: Optional[StreamState] = None
101
100
  self._lookback_window: int = 0
102
101
  self._parent_state: Optional[StreamState] = None
103
- self._number_of_partitions: int = 0
102
+ self._over_limit: int = 0
104
103
  self._use_global_cursor: bool = False
105
104
  self._partition_serializer = PerPartitionKeySerializer()
106
105
 
@@ -234,8 +233,8 @@ class ConcurrentPerPartitionCursor(Cursor):
234
233
  or removed due to being the oldest.
235
234
  """
236
235
  with self._lock:
237
- self._number_of_partitions += 1
238
236
  while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
237
+ self._over_limit += 1
239
238
  # Try removing finished partitions first
240
239
  for partition_key in list(self._cursor_per_partition.keys()):
241
240
  if (
@@ -246,7 +245,7 @@ class ConcurrentPerPartitionCursor(Cursor):
246
245
  partition_key
247
246
  ) # Remove the oldest partition
248
247
  logger.warning(
249
- f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._number_of_partitions}."
248
+ f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._over_limit}."
250
249
  )
251
250
  break
252
251
  else:
@@ -255,7 +254,7 @@ class ConcurrentPerPartitionCursor(Cursor):
255
254
  1
256
255
  ] # Remove the oldest partition
257
256
  logger.warning(
258
- f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._number_of_partitions}."
257
+ f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
259
258
  )
260
259
 
261
260
  def _set_initial_state(self, stream_state: StreamState) -> None:
@@ -356,10 +355,6 @@ class ConcurrentPerPartitionCursor(Cursor):
356
355
 
357
356
  def observe(self, record: Record) -> None:
358
357
  if not self._use_global_cursor and self.limit_reached():
359
- logger.info(
360
- f"Exceeded the 'SWITCH_TO_GLOBAL_LIMIT' of {self.SWITCH_TO_GLOBAL_LIMIT}. "
361
- f"Switching to global cursor for {self._stream_name}."
362
- )
363
358
  self._use_global_cursor = True
364
359
 
365
360
  if not record.associated_slice:
@@ -402,4 +397,4 @@ class ConcurrentPerPartitionCursor(Cursor):
402
397
  return cursor
403
398
 
404
399
  def limit_reached(self) -> bool:
405
- return self._number_of_partitions > self.SWITCH_TO_GLOBAL_LIMIT
400
+ return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER