airbyte-cdk 6.37.2.dev1__py3-none-any.whl → 6.38.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/entrypoint.py +6 -6
- airbyte_cdk/logger.py +4 -1
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +28 -7
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +7 -1
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +62 -3
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +2 -3
- airbyte_cdk/sources/declarative/interpolation/macros.py +2 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +27 -3
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +18 -4
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +119 -7
- airbyte_cdk/sources/declarative/requesters/README.md +5 -5
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +18 -13
- airbyte_cdk/sources/declarative/requesters/http_requester.py +7 -1
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +2 -2
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +2 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.38.0.dev0.dist-info}/METADATA +2 -2
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.38.0.dev0.dist-info}/RECORD +22 -21
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.38.0.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.38.0.dev0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.38.0.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.38.0.dev0.dist-info}/entry_points.txt +0 -0
airbyte_cdk/entrypoint.py
CHANGED
@@ -22,7 +22,7 @@ from requests import PreparedRequest, Response, Session
|
|
22
22
|
|
23
23
|
from airbyte_cdk.connector import TConfig
|
24
24
|
from airbyte_cdk.exception_handler import init_uncaught_exception_handler
|
25
|
-
from airbyte_cdk.logger import init_logger
|
25
|
+
from airbyte_cdk.logger import PRINT_BUFFER, init_logger
|
26
26
|
from airbyte_cdk.models import (
|
27
27
|
AirbyteConnectionStatus,
|
28
28
|
AirbyteMessage,
|
@@ -337,11 +337,11 @@ def launch(source: Source, args: List[str]) -> None:
|
|
337
337
|
parsed_args = source_entrypoint.parse_args(args)
|
338
338
|
# temporarily removes the PrintBuffer because we're seeing weird print behavior for concurrent syncs
|
339
339
|
# Refer to: https://github.com/airbytehq/oncall/issues/6235
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
340
|
+
with PRINT_BUFFER:
|
341
|
+
for message in source_entrypoint.run(parsed_args):
|
342
|
+
# simply printing is creating issues for concurrent CDK as Python uses different two instructions to print: one for the message and
|
343
|
+
# the other for the break line. Adding `\n` to the message ensure that both are printed at the same time
|
344
|
+
print(f"{message}\n", end="")
|
345
345
|
|
346
346
|
|
347
347
|
def _init_internal_request_filter() -> None:
|
airbyte_cdk/logger.py
CHANGED
@@ -16,8 +16,11 @@ from airbyte_cdk.models import (
|
|
16
16
|
Level,
|
17
17
|
Type,
|
18
18
|
)
|
19
|
+
from airbyte_cdk.utils import PrintBuffer
|
19
20
|
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
20
21
|
|
22
|
+
PRINT_BUFFER = PrintBuffer(flush_interval=0.1)
|
23
|
+
|
21
24
|
LOGGING_CONFIG = {
|
22
25
|
"version": 1,
|
23
26
|
"disable_existing_loggers": False,
|
@@ -27,7 +30,7 @@ LOGGING_CONFIG = {
|
|
27
30
|
"handlers": {
|
28
31
|
"console": {
|
29
32
|
"class": "logging.StreamHandler",
|
30
|
-
"stream":
|
33
|
+
"stream": PRINT_BUFFER,
|
31
34
|
"formatter": "airbyte",
|
32
35
|
},
|
33
36
|
},
|
@@ -31,6 +31,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
31
31
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
32
32
|
DatetimeBasedCursor as DatetimeBasedCursorModel,
|
33
33
|
)
|
34
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
35
|
+
IncrementingCountCursor as IncrementingCountCursorModel,
|
36
|
+
)
|
34
37
|
from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
35
38
|
ModelToComponentFactory,
|
36
39
|
)
|
@@ -222,7 +225,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
222
225
|
and not incremental_sync_component_definition
|
223
226
|
)
|
224
227
|
|
225
|
-
if self.
|
228
|
+
if self._is_concurrent_cursor_incremental_without_partition_routing(
|
226
229
|
declarative_stream, incremental_sync_component_definition
|
227
230
|
):
|
228
231
|
stream_state = self._connector_state_manager.get_stream_state(
|
@@ -254,15 +257,26 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
254
257
|
stream_slicer=declarative_stream.retriever.stream_slicer,
|
255
258
|
)
|
256
259
|
else:
|
257
|
-
|
258
|
-
|
260
|
+
if (
|
261
|
+
incremental_sync_component_definition
|
262
|
+
and incremental_sync_component_definition.get("type")
|
263
|
+
== IncrementingCountCursorModel.__name__
|
264
|
+
):
|
265
|
+
cursor = self._constructor.create_concurrent_cursor_from_incrementing_count_cursor(
|
266
|
+
model_type=IncrementingCountCursorModel,
|
267
|
+
component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
|
268
|
+
stream_name=declarative_stream.name,
|
269
|
+
stream_namespace=declarative_stream.namespace,
|
270
|
+
config=config or {},
|
271
|
+
)
|
272
|
+
else:
|
273
|
+
cursor = self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
|
259
274
|
model_type=DatetimeBasedCursorModel,
|
260
275
|
component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
|
261
276
|
stream_name=declarative_stream.name,
|
262
277
|
stream_namespace=declarative_stream.namespace,
|
263
278
|
config=config or {},
|
264
279
|
)
|
265
|
-
)
|
266
280
|
partition_generator = StreamSlicerPartitionGenerator(
|
267
281
|
partition_factory=DeclarativePartitionFactory(
|
268
282
|
declarative_stream.name,
|
@@ -389,7 +403,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
389
403
|
|
390
404
|
return concurrent_streams, synchronous_streams
|
391
405
|
|
392
|
-
def
|
406
|
+
def _is_concurrent_cursor_incremental_without_partition_routing(
|
393
407
|
self,
|
394
408
|
declarative_stream: DeclarativeStream,
|
395
409
|
incremental_sync_component_definition: Mapping[str, Any] | None,
|
@@ -397,11 +411,18 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
397
411
|
return (
|
398
412
|
incremental_sync_component_definition is not None
|
399
413
|
and bool(incremental_sync_component_definition)
|
400
|
-
and
|
401
|
-
|
414
|
+
and (
|
415
|
+
incremental_sync_component_definition.get("type", "")
|
416
|
+
in (DatetimeBasedCursorModel.__name__, IncrementingCountCursorModel.__name__)
|
417
|
+
)
|
402
418
|
and hasattr(declarative_stream.retriever, "stream_slicer")
|
403
419
|
and (
|
404
420
|
isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
|
421
|
+
# IncrementingCountCursorModel is hardcoded to be of type DatetimeBasedCursor
|
422
|
+
# add isintance check here if we want to create a Declarative IncrementingCountCursor
|
423
|
+
# or isinstance(
|
424
|
+
# declarative_stream.retriever.stream_slicer, IncrementingCountCursor
|
425
|
+
# )
|
405
426
|
or isinstance(declarative_stream.retriever.stream_slicer, AsyncJobPartitionRouter)
|
406
427
|
)
|
407
428
|
)
|
@@ -31,7 +31,8 @@ class DatetimeParser:
|
|
31
31
|
return datetime.datetime.fromtimestamp(float(date), tz=datetime.timezone.utc)
|
32
32
|
elif format == "%ms":
|
33
33
|
return self._UNIX_EPOCH + datetime.timedelta(milliseconds=int(date))
|
34
|
-
|
34
|
+
elif "%_ms" in format:
|
35
|
+
format = format.replace("%_ms", "%f")
|
35
36
|
parsed_datetime = datetime.datetime.strptime(str(date), format)
|
36
37
|
if self._is_naive(parsed_datetime):
|
37
38
|
return parsed_datetime.replace(tzinfo=datetime.timezone.utc)
|
@@ -48,6 +49,11 @@ class DatetimeParser:
|
|
48
49
|
if format == "%ms":
|
49
50
|
# timstamp() returns a float representing the number of seconds since the unix epoch
|
50
51
|
return str(int(dt.timestamp() * 1000))
|
52
|
+
if "%_ms" in format:
|
53
|
+
_format = format.replace("%_ms", "%f")
|
54
|
+
milliseconds = int(dt.microsecond / 1000)
|
55
|
+
formatted_dt = dt.strftime(_format).replace(dt.strftime("%f"), "%03d" % milliseconds)
|
56
|
+
return formatted_dt
|
51
57
|
else:
|
52
58
|
return dt.strftime(format)
|
53
59
|
|
@@ -777,6 +777,44 @@ definitions:
|
|
777
777
|
type:
|
778
778
|
type: string
|
779
779
|
enum: [LegacyToPerPartitionStateMigration]
|
780
|
+
IncrementingCountCursor:
|
781
|
+
title: Incrementing Count Cursor
|
782
|
+
description: Cursor that allows for incremental sync according to a continuously increasing integer.
|
783
|
+
type: object
|
784
|
+
required:
|
785
|
+
- type
|
786
|
+
- cursor_field
|
787
|
+
properties:
|
788
|
+
type:
|
789
|
+
type: string
|
790
|
+
enum: [IncrementingCountCursor]
|
791
|
+
cursor_field:
|
792
|
+
title: Cursor Field
|
793
|
+
description: The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.
|
794
|
+
type: string
|
795
|
+
interpolation_context:
|
796
|
+
- config
|
797
|
+
examples:
|
798
|
+
- "created_at"
|
799
|
+
- "{{ config['record_cursor'] }}"
|
800
|
+
start_value:
|
801
|
+
title: Start Value
|
802
|
+
description: The value that determines the earliest record that should be synced.
|
803
|
+
anyOf:
|
804
|
+
- type: string
|
805
|
+
- type: integer
|
806
|
+
interpolation_context:
|
807
|
+
- config
|
808
|
+
examples:
|
809
|
+
- 0
|
810
|
+
- "{{ config['start_value'] }}"
|
811
|
+
start_value_option:
|
812
|
+
title: Inject Start Value Into Outgoing HTTP Request
|
813
|
+
description: Optionally configures how the start value will be sent in requests to the source API.
|
814
|
+
"$ref": "#/definitions/RequestOption"
|
815
|
+
$parameters:
|
816
|
+
type: object
|
817
|
+
additionalProperties: true
|
780
818
|
DatetimeBasedCursor:
|
781
819
|
title: Datetime Based Cursor
|
782
820
|
description: Cursor to provide incremental capabilities over datetime.
|
@@ -844,6 +882,7 @@ definitions:
|
|
844
882
|
* **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`
|
845
883
|
* **%S**: Second (zero-padded) - `00`, `01`, ..., `59`
|
846
884
|
* **%f**: Microsecond (zero-padded to 6 digits) - `000000`
|
885
|
+
* **%_ms**: Millisecond (zero-padded to 3 digits) - `000`
|
847
886
|
* **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`
|
848
887
|
* **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`
|
849
888
|
* **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`
|
@@ -1318,6 +1357,7 @@ definitions:
|
|
1318
1357
|
anyOf:
|
1319
1358
|
- "$ref": "#/definitions/CustomIncrementalSync"
|
1320
1359
|
- "$ref": "#/definitions/DatetimeBasedCursor"
|
1360
|
+
- "$ref": "#/definitions/IncrementingCountCursor"
|
1321
1361
|
name:
|
1322
1362
|
title: Name
|
1323
1363
|
description: The stream name.
|
@@ -1779,6 +1819,9 @@ definitions:
|
|
1779
1819
|
- stream_interval
|
1780
1820
|
- stream_partition
|
1781
1821
|
- stream_slice
|
1822
|
+
- creation_response
|
1823
|
+
- polling_response
|
1824
|
+
- download_target
|
1782
1825
|
examples:
|
1783
1826
|
- "/products"
|
1784
1827
|
- "/quotes/{{ stream_partition['id'] }}/quote_line_groups"
|
@@ -2398,6 +2441,7 @@ definitions:
|
|
2398
2441
|
* **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`
|
2399
2442
|
* **%S**: Second (zero-padded) - `00`, `01`, ..., `59`
|
2400
2443
|
* **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`
|
2444
|
+
* **%_ms**: Millisecond (zero-padded to 3 digits) - `000`, `001`, ..., `999`
|
2401
2445
|
* **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`
|
2402
2446
|
* **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`
|
2403
2447
|
* **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`
|
@@ -3223,7 +3267,7 @@ definitions:
|
|
3223
3267
|
- polling_requester
|
3224
3268
|
- download_requester
|
3225
3269
|
- status_extractor
|
3226
|
-
-
|
3270
|
+
- download_target_extractor
|
3227
3271
|
properties:
|
3228
3272
|
type:
|
3229
3273
|
type: string
|
@@ -3240,7 +3284,7 @@ definitions:
|
|
3240
3284
|
anyOf:
|
3241
3285
|
- "$ref": "#/definitions/CustomRecordExtractor"
|
3242
3286
|
- "$ref": "#/definitions/DpathExtractor"
|
3243
|
-
|
3287
|
+
download_target_extractor:
|
3244
3288
|
description: Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.
|
3245
3289
|
anyOf:
|
3246
3290
|
- "$ref": "#/definitions/CustomRecordExtractor"
|
@@ -3261,7 +3305,7 @@ definitions:
|
|
3261
3305
|
anyOf:
|
3262
3306
|
- "$ref": "#/definitions/CustomRequester"
|
3263
3307
|
- "$ref": "#/definitions/HttpRequester"
|
3264
|
-
|
3308
|
+
download_target_requester:
|
3265
3309
|
description: Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.
|
3266
3310
|
anyOf:
|
3267
3311
|
- "$ref": "#/definitions/CustomRequester"
|
@@ -3667,6 +3711,21 @@ interpolation:
|
|
3667
3711
|
self: https://api.sendgrid.com/v3/marketing/lists?page_size=1&page_token=
|
3668
3712
|
next: https://api.sendgrid.com/v3/marketing/lists?page_size=1&page_token=0236d6d2
|
3669
3713
|
count: 82
|
3714
|
+
- title: creation_response
|
3715
|
+
description: The response received from the creation_requester in the AsyncRetriever component.
|
3716
|
+
type: object
|
3717
|
+
examples:
|
3718
|
+
- id: "1234"
|
3719
|
+
- title: polling_response
|
3720
|
+
description: The response received from the polling_requester in the AsyncRetriever component.
|
3721
|
+
type: object
|
3722
|
+
examples:
|
3723
|
+
- id: "1234"
|
3724
|
+
- title: download_target
|
3725
|
+
description: The `URL` received from the polling_requester in the AsyncRetriever with jobStatus as `COMPLETED`.
|
3726
|
+
type: string
|
3727
|
+
examples:
|
3728
|
+
- "https://api.sendgrid.com/v3/marketing/lists?page_size=1&page_token=0236d6d2&filename=xxx_yyy_zzz.csv"
|
3670
3729
|
- title: stream_interval
|
3671
3730
|
description: The current stream interval being processed. The keys are defined by the incremental sync component. Default keys are `start_time` and `end_time`.
|
3672
3731
|
type: object
|
@@ -5,7 +5,7 @@ import json
|
|
5
5
|
import logging
|
6
6
|
from abc import ABC, abstractmethod
|
7
7
|
from dataclasses import dataclass
|
8
|
-
from io import BufferedIOBase,
|
8
|
+
from io import BufferedIOBase, TextIOWrapper
|
9
9
|
from typing import Any, Generator, MutableMapping, Optional
|
10
10
|
|
11
11
|
import orjson
|
@@ -124,8 +124,7 @@ class CsvParser(Parser):
|
|
124
124
|
"""
|
125
125
|
Parse CSV data from decompressed bytes.
|
126
126
|
"""
|
127
|
-
|
128
|
-
text_data = TextIOWrapper(bytes_data, encoding=self.encoding) # type: ignore
|
127
|
+
text_data = TextIOWrapper(data, encoding=self.encoding) # type: ignore
|
129
128
|
reader = csv.DictReader(text_data, delimiter=self._get_delimiter() or ",")
|
130
129
|
for row in reader:
|
131
130
|
yield row
|
@@ -1508,6 +1508,28 @@ class AuthFlow(BaseModel):
|
|
1508
1508
|
oauth_config_specification: Optional[OAuthConfigSpecification] = None
|
1509
1509
|
|
1510
1510
|
|
1511
|
+
class IncrementingCountCursor(BaseModel):
|
1512
|
+
type: Literal["IncrementingCountCursor"]
|
1513
|
+
cursor_field: str = Field(
|
1514
|
+
...,
|
1515
|
+
description="The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.",
|
1516
|
+
examples=["created_at", "{{ config['record_cursor'] }}"],
|
1517
|
+
title="Cursor Field",
|
1518
|
+
)
|
1519
|
+
start_value: Optional[Union[str, int]] = Field(
|
1520
|
+
None,
|
1521
|
+
description="The value that determines the earliest record that should be synced.",
|
1522
|
+
examples=[0, "{{ config['start_value'] }}"],
|
1523
|
+
title="Start Value",
|
1524
|
+
)
|
1525
|
+
start_value_option: Optional[RequestOption] = Field(
|
1526
|
+
None,
|
1527
|
+
description="Optionally configures how the start value will be sent in requests to the source API.",
|
1528
|
+
title="Inject Start Value Into Outgoing HTTP Request",
|
1529
|
+
)
|
1530
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1531
|
+
|
1532
|
+
|
1511
1533
|
class DatetimeBasedCursor(BaseModel):
|
1512
1534
|
type: Literal["DatetimeBasedCursor"]
|
1513
1535
|
clamping: Optional[Clamping] = Field(
|
@@ -1948,7 +1970,9 @@ class DeclarativeStream(BaseModel):
|
|
1948
1970
|
description="Component used to coordinate how records are extracted across stream slices and request pages.",
|
1949
1971
|
title="Retriever",
|
1950
1972
|
)
|
1951
|
-
incremental_sync: Optional[
|
1973
|
+
incremental_sync: Optional[
|
1974
|
+
Union[CustomIncrementalSync, DatetimeBasedCursor, IncrementingCountCursor]
|
1975
|
+
] = Field(
|
1952
1976
|
None,
|
1953
1977
|
description="Component used to fetch data incrementally based on a time field in the data.",
|
1954
1978
|
title="Incremental Sync",
|
@@ -2263,7 +2287,7 @@ class AsyncRetriever(BaseModel):
|
|
2263
2287
|
status_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
|
2264
2288
|
..., description="Responsible for fetching the actual status of the async job."
|
2265
2289
|
)
|
2266
|
-
|
2290
|
+
download_target_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
|
2267
2291
|
...,
|
2268
2292
|
description="Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.",
|
2269
2293
|
)
|
@@ -2278,7 +2302,7 @@ class AsyncRetriever(BaseModel):
|
|
2278
2302
|
...,
|
2279
2303
|
description="Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.",
|
2280
2304
|
)
|
2281
|
-
|
2305
|
+
download_target_requester: Optional[Union[CustomRequester, HttpRequester]] = Field(
|
2282
2306
|
None,
|
2283
2307
|
description="Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.",
|
2284
2308
|
)
|
@@ -45,7 +45,7 @@ class AirbyteCustomCodeNotPermittedError(Exception):
|
|
45
45
|
def _hash_text(input_text: str, hash_type: str = "md5") -> str:
|
46
46
|
"""Return the hash of the input text using the specified hash type."""
|
47
47
|
if not input_text:
|
48
|
-
raise ValueError("
|
48
|
+
raise ValueError("Hash input text cannot be empty.")
|
49
49
|
|
50
50
|
hash_object = CHECKSUM_FUNCTIONS[hash_type]()
|
51
51
|
hash_object.update(input_text.encode())
|
@@ -68,6 +68,10 @@ def validate_python_code(
|
|
68
68
|
|
69
69
|
Currently we fail if no checksums are provided, although this may change in the future.
|
70
70
|
"""
|
71
|
+
if not code_text:
|
72
|
+
# No code provided, nothing to validate.
|
73
|
+
return
|
74
|
+
|
71
75
|
if not checksums:
|
72
76
|
raise ValueError(f"A checksum is required to validate the code. Received: {checksums}")
|
73
77
|
|
@@ -77,8 +81,18 @@ def validate_python_code(
|
|
77
81
|
f"Unsupported checksum type: {checksum_type}. Supported checksum types are: {CHECKSUM_FUNCTIONS.keys()}"
|
78
82
|
)
|
79
83
|
|
80
|
-
|
81
|
-
|
84
|
+
calculated_checksum = _hash_text(code_text, checksum_type)
|
85
|
+
if calculated_checksum != checksum:
|
86
|
+
raise AirbyteCodeTamperedError(
|
87
|
+
f"{checksum_type} checksum does not match."
|
88
|
+
+ str(
|
89
|
+
{
|
90
|
+
"expected_checksum": checksum,
|
91
|
+
"actual_checksum": calculated_checksum,
|
92
|
+
"code_text": code_text,
|
93
|
+
}
|
94
|
+
),
|
95
|
+
)
|
82
96
|
|
83
97
|
|
84
98
|
def get_registered_components_module(
|
@@ -94,7 +108,7 @@ def get_registered_components_module(
|
|
94
108
|
|
95
109
|
Returns `None` if no components is provided and the `components` module is not found.
|
96
110
|
"""
|
97
|
-
if config and INJECTED_COMPONENTS_PY
|
111
|
+
if config and config.get(INJECTED_COMPONENTS_PY, None):
|
98
112
|
if not custom_code_execution_permitted():
|
99
113
|
raise AirbyteCustomCodeNotPermittedError
|
100
114
|
|
@@ -245,6 +245,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
245
245
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
246
246
|
HttpResponseFilter as HttpResponseFilterModel,
|
247
247
|
)
|
248
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
249
|
+
IncrementingCountCursor as IncrementingCountCursorModel,
|
250
|
+
)
|
248
251
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
249
252
|
InlineSchemaLoader as InlineSchemaLoaderModel,
|
250
253
|
)
|
@@ -496,6 +499,9 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_sta
|
|
496
499
|
CustomFormatConcurrentStreamStateConverter,
|
497
500
|
DateTimeStreamStateConverter,
|
498
501
|
)
|
502
|
+
from airbyte_cdk.sources.streams.concurrent.state_converters.incrementing_count_stream_state_converter import (
|
503
|
+
IncrementingCountStreamStateConverter,
|
504
|
+
)
|
499
505
|
from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
|
500
506
|
from airbyte_cdk.sources.types import Config
|
501
507
|
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
@@ -584,6 +590,7 @@ class ModelToComponentFactory:
|
|
584
590
|
FlattenFieldsModel: self.create_flatten_fields,
|
585
591
|
DpathFlattenFieldsModel: self.create_dpath_flatten_fields,
|
586
592
|
IterableDecoderModel: self.create_iterable_decoder,
|
593
|
+
IncrementingCountCursorModel: self.create_incrementing_count_cursor,
|
587
594
|
XmlDecoderModel: self.create_xml_decoder,
|
588
595
|
JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
|
589
596
|
DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
|
@@ -1189,6 +1196,70 @@ class ModelToComponentFactory:
|
|
1189
1196
|
clamping_strategy=clamping_strategy,
|
1190
1197
|
)
|
1191
1198
|
|
1199
|
+
def create_concurrent_cursor_from_incrementing_count_cursor(
|
1200
|
+
self,
|
1201
|
+
model_type: Type[BaseModel],
|
1202
|
+
component_definition: ComponentDefinition,
|
1203
|
+
stream_name: str,
|
1204
|
+
stream_namespace: Optional[str],
|
1205
|
+
config: Config,
|
1206
|
+
message_repository: Optional[MessageRepository] = None,
|
1207
|
+
**kwargs: Any,
|
1208
|
+
) -> ConcurrentCursor:
|
1209
|
+
# Per-partition incremental streams can dynamically create child cursors which will pass their current
|
1210
|
+
# state via the stream_state keyword argument. Incremental syncs without parent streams use the
|
1211
|
+
# incoming state and connector_state_manager that is initialized when the component factory is created
|
1212
|
+
stream_state = (
|
1213
|
+
self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
|
1214
|
+
if "stream_state" not in kwargs
|
1215
|
+
else kwargs["stream_state"]
|
1216
|
+
)
|
1217
|
+
|
1218
|
+
component_type = component_definition.get("type")
|
1219
|
+
if component_definition.get("type") != model_type.__name__:
|
1220
|
+
raise ValueError(
|
1221
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
1222
|
+
)
|
1223
|
+
|
1224
|
+
incrementing_count_cursor_model = model_type.parse_obj(component_definition)
|
1225
|
+
|
1226
|
+
if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel):
|
1227
|
+
raise ValueError(
|
1228
|
+
f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}"
|
1229
|
+
)
|
1230
|
+
|
1231
|
+
interpolated_start_value = (
|
1232
|
+
InterpolatedString.create(
|
1233
|
+
incrementing_count_cursor_model.start_value, # type: ignore
|
1234
|
+
parameters=incrementing_count_cursor_model.parameters or {},
|
1235
|
+
)
|
1236
|
+
if incrementing_count_cursor_model.start_value
|
1237
|
+
else 0
|
1238
|
+
)
|
1239
|
+
|
1240
|
+
interpolated_cursor_field = InterpolatedString.create(
|
1241
|
+
incrementing_count_cursor_model.cursor_field,
|
1242
|
+
parameters=incrementing_count_cursor_model.parameters or {},
|
1243
|
+
)
|
1244
|
+
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
1245
|
+
|
1246
|
+
connector_state_converter = IncrementingCountStreamStateConverter(
|
1247
|
+
is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
|
1248
|
+
)
|
1249
|
+
|
1250
|
+
return ConcurrentCursor(
|
1251
|
+
stream_name=stream_name,
|
1252
|
+
stream_namespace=stream_namespace,
|
1253
|
+
stream_state=stream_state,
|
1254
|
+
message_repository=message_repository or self._message_repository,
|
1255
|
+
connector_state_manager=self._connector_state_manager,
|
1256
|
+
connector_state_converter=connector_state_converter,
|
1257
|
+
cursor_field=cursor_field,
|
1258
|
+
slice_boundary_fields=None,
|
1259
|
+
start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1260
|
+
end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1261
|
+
)
|
1262
|
+
|
1192
1263
|
def _assemble_weekday(self, weekday: str) -> Weekday:
|
1193
1264
|
match weekday:
|
1194
1265
|
case "MONDAY":
|
@@ -1622,6 +1693,31 @@ class ModelToComponentFactory:
|
|
1622
1693
|
config=config,
|
1623
1694
|
parameters=model.parameters or {},
|
1624
1695
|
)
|
1696
|
+
elif model.incremental_sync and isinstance(
|
1697
|
+
model.incremental_sync, IncrementingCountCursorModel
|
1698
|
+
):
|
1699
|
+
cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore
|
1700
|
+
|
1701
|
+
start_time_option = (
|
1702
|
+
self._create_component_from_model(
|
1703
|
+
cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
|
1704
|
+
config,
|
1705
|
+
parameters=cursor_model.parameters or {},
|
1706
|
+
)
|
1707
|
+
if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
|
1708
|
+
else None
|
1709
|
+
)
|
1710
|
+
|
1711
|
+
# The concurrent engine defaults the start/end fields on the slice to "start" and "end", but
|
1712
|
+
# the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time
|
1713
|
+
partition_field_start = "start"
|
1714
|
+
|
1715
|
+
request_options_provider = DatetimeBasedRequestOptionsProvider(
|
1716
|
+
start_time_option=start_time_option,
|
1717
|
+
partition_field_start=partition_field_start,
|
1718
|
+
config=config,
|
1719
|
+
parameters=model.parameters or {},
|
1720
|
+
)
|
1625
1721
|
else:
|
1626
1722
|
request_options_provider = None
|
1627
1723
|
|
@@ -2111,6 +2207,22 @@ class ModelToComponentFactory:
|
|
2111
2207
|
stream_response=False if self._emit_connector_builder_messages else True,
|
2112
2208
|
)
|
2113
2209
|
|
2210
|
+
@staticmethod
|
2211
|
+
def create_incrementing_count_cursor(
|
2212
|
+
model: IncrementingCountCursorModel, config: Config, **kwargs: Any
|
2213
|
+
) -> DatetimeBasedCursor:
|
2214
|
+
# This should not actually get used anywhere at runtime, but needed to add this to pass checks since
|
2215
|
+
# we still parse models into components. The issue is that there's no runtime implementation of a
|
2216
|
+
# IncrementingCountCursor.
|
2217
|
+
# A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor.
|
2218
|
+
return DatetimeBasedCursor(
|
2219
|
+
cursor_field=model.cursor_field,
|
2220
|
+
datetime_format="%Y-%m-%d",
|
2221
|
+
start_datetime="2024-12-12",
|
2222
|
+
config=config,
|
2223
|
+
parameters={},
|
2224
|
+
)
|
2225
|
+
|
2114
2226
|
@staticmethod
|
2115
2227
|
def create_iterable_decoder(
|
2116
2228
|
model: IterableDecoderModel, config: Config, **kwargs: Any
|
@@ -2744,32 +2856,32 @@ class ModelToComponentFactory:
|
|
2744
2856
|
if model.delete_requester
|
2745
2857
|
else None
|
2746
2858
|
)
|
2747
|
-
|
2859
|
+
download_target_requester = (
|
2748
2860
|
self._create_component_from_model(
|
2749
|
-
model=model.
|
2861
|
+
model=model.download_target_requester,
|
2750
2862
|
decoder=decoder,
|
2751
2863
|
config=config,
|
2752
2864
|
name=f"job extract_url - {name}",
|
2753
2865
|
)
|
2754
|
-
if model.
|
2866
|
+
if model.download_target_requester
|
2755
2867
|
else None
|
2756
2868
|
)
|
2757
2869
|
status_extractor = self._create_component_from_model(
|
2758
2870
|
model=model.status_extractor, decoder=decoder, config=config, name=name
|
2759
2871
|
)
|
2760
|
-
|
2761
|
-
model=model.
|
2872
|
+
download_target_extractor = self._create_component_from_model(
|
2873
|
+
model=model.download_target_extractor, decoder=decoder, config=config, name=name
|
2762
2874
|
)
|
2763
2875
|
job_repository: AsyncJobRepository = AsyncHttpJobRepository(
|
2764
2876
|
creation_requester=creation_requester,
|
2765
2877
|
polling_requester=polling_requester,
|
2766
2878
|
download_retriever=download_retriever,
|
2767
|
-
|
2879
|
+
download_target_requester=download_target_requester,
|
2768
2880
|
abort_requester=abort_requester,
|
2769
2881
|
delete_requester=delete_requester,
|
2770
2882
|
status_extractor=status_extractor,
|
2771
2883
|
status_mapping=self._create_async_job_status_mapping(model.status_mapping, config),
|
2772
|
-
|
2884
|
+
download_target_extractor=download_target_extractor,
|
2773
2885
|
)
|
2774
2886
|
|
2775
2887
|
async_job_partition_router = AsyncJobPartitionRouter(
|
@@ -1,8 +1,8 @@
|
|
1
1
|
# AsyncHttpJobRepository sequence diagram
|
2
2
|
|
3
3
|
- Components marked as optional are not required and can be ignored.
|
4
|
-
- if `
|
5
|
-
- interpolation_context, e.g. `
|
4
|
+
- if `download_target_requester` is not provided, `download_target_extractor` will get urls from the `polling_response`
|
5
|
+
- interpolation_context, e.g. `creation_response` or `polling_response` can be obtained from stream_slice
|
6
6
|
|
7
7
|
```mermaid
|
8
8
|
---
|
@@ -12,7 +12,7 @@ sequenceDiagram
|
|
12
12
|
participant AsyncHttpJobRepository as AsyncOrchestrator
|
13
13
|
participant CreationRequester as creation_requester
|
14
14
|
participant PollingRequester as polling_requester
|
15
|
-
participant UrlRequester as
|
15
|
+
participant UrlRequester as download_target_requester (Optional)
|
16
16
|
participant DownloadRetriever as download_retriever
|
17
17
|
participant AbortRequester as abort_requester (Optional)
|
18
18
|
participant DeleteRequester as delete_requester (Optional)
|
@@ -25,14 +25,14 @@ sequenceDiagram
|
|
25
25
|
|
26
26
|
loop Poll for job status
|
27
27
|
AsyncHttpJobRepository ->> PollingRequester: Check job status
|
28
|
-
PollingRequester ->> Reporting Server: Status request (interpolation_context: `
|
28
|
+
PollingRequester ->> Reporting Server: Status request (interpolation_context: `creation_response`)
|
29
29
|
Reporting Server -->> PollingRequester: Status response
|
30
30
|
PollingRequester -->> AsyncHttpJobRepository: Job status
|
31
31
|
end
|
32
32
|
|
33
33
|
alt Status: Ready
|
34
34
|
AsyncHttpJobRepository ->> UrlRequester: Request download URLs (if applicable)
|
35
|
-
UrlRequester ->> Reporting Server: URL request (interpolation_context: `
|
35
|
+
UrlRequester ->> Reporting Server: URL request (interpolation_context: `polling_response`)
|
36
36
|
Reporting Server -->> UrlRequester: Download URLs
|
37
37
|
UrlRequester -->> AsyncHttpJobRepository: Download URLs
|
38
38
|
|
@@ -43,13 +43,13 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
43
43
|
delete_requester: Optional[Requester]
|
44
44
|
status_extractor: DpathExtractor
|
45
45
|
status_mapping: Mapping[str, AsyncJobStatus]
|
46
|
-
|
46
|
+
download_target_extractor: DpathExtractor
|
47
47
|
|
48
48
|
job_timeout: Optional[timedelta] = None
|
49
49
|
record_extractor: RecordExtractor = field(
|
50
50
|
init=False, repr=False, default_factory=lambda: ResponseToFileExtractor({})
|
51
51
|
)
|
52
|
-
|
52
|
+
download_target_requester: Optional[Requester] = (
|
53
53
|
None # use it in case polling_requester provides some <id> and extra request is needed to obtain list of urls to download from
|
54
54
|
)
|
55
55
|
|
@@ -211,12 +211,15 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
211
211
|
|
212
212
|
"""
|
213
213
|
|
214
|
-
for
|
214
|
+
for target_url in self._get_download_targets(job):
|
215
215
|
job_slice = job.job_parameters()
|
216
216
|
stream_slice = StreamSlice(
|
217
217
|
partition=job_slice.partition,
|
218
218
|
cursor_slice=job_slice.cursor_slice,
|
219
|
-
extra_fields={
|
219
|
+
extra_fields={
|
220
|
+
**job_slice.extra_fields,
|
221
|
+
"download_target": target_url,
|
222
|
+
},
|
220
223
|
)
|
221
224
|
for message in self.download_retriever.read_records({}, stream_slice):
|
222
225
|
if isinstance(message, Record):
|
@@ -269,27 +272,29 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
269
272
|
del self._polling_job_response_by_id[job_id]
|
270
273
|
|
271
274
|
def _get_create_job_stream_slice(self, job: AsyncJob) -> StreamSlice:
|
275
|
+
creation_response = self._create_job_response_by_id[job.api_job_id()].json()
|
272
276
|
stream_slice = StreamSlice(
|
273
|
-
partition={
|
277
|
+
partition={},
|
274
278
|
cursor_slice={},
|
279
|
+
extra_fields={"creation_response": creation_response},
|
275
280
|
)
|
276
281
|
return stream_slice
|
277
282
|
|
278
|
-
def
|
279
|
-
if not self.
|
283
|
+
def _get_download_targets(self, job: AsyncJob) -> Iterable[str]:
|
284
|
+
if not self.download_target_requester:
|
280
285
|
url_response = self._polling_job_response_by_id[job.api_job_id()]
|
281
286
|
else:
|
287
|
+
polling_response = self._polling_job_response_by_id[job.api_job_id()].json()
|
282
288
|
stream_slice: StreamSlice = StreamSlice(
|
283
|
-
partition={
|
284
|
-
"polling_job_response": self._polling_job_response_by_id[job.api_job_id()]
|
285
|
-
},
|
289
|
+
partition={},
|
286
290
|
cursor_slice={},
|
291
|
+
extra_fields={"polling_response": polling_response},
|
287
292
|
)
|
288
|
-
url_response = self.
|
293
|
+
url_response = self.download_target_requester.send_request(stream_slice=stream_slice) # type: ignore # we expect download_target_requester to always be presented, otherwise raise an exception as we cannot proceed with the report
|
289
294
|
if not url_response:
|
290
295
|
raise AirbyteTracedException(
|
291
|
-
internal_message="Always expect a response or an exception from
|
296
|
+
internal_message="Always expect a response or an exception from download_target_requester",
|
292
297
|
failure_type=FailureType.system_error,
|
293
298
|
)
|
294
299
|
|
295
|
-
yield from self.
|
300
|
+
yield from self.download_target_extractor.extract_records(url_response) # type: ignore # we expect download_target_extractor to always return list of strings
|
@@ -85,7 +85,7 @@ class HttpRequester(Requester):
|
|
85
85
|
self._parameters = parameters
|
86
86
|
|
87
87
|
if self.error_handler is not None and hasattr(self.error_handler, "backoff_strategies"):
|
88
|
-
backoff_strategies = self.error_handler.backoff_strategies
|
88
|
+
backoff_strategies = self.error_handler.backoff_strategies # type: ignore
|
89
89
|
else:
|
90
90
|
backoff_strategies = None
|
91
91
|
|
@@ -125,6 +125,12 @@ class HttpRequester(Requester):
|
|
125
125
|
kwargs = {
|
126
126
|
"stream_slice": stream_slice,
|
127
127
|
"next_page_token": next_page_token,
|
128
|
+
# update the interpolation context with extra fields, if passed.
|
129
|
+
**(
|
130
|
+
stream_slice.extra_fields
|
131
|
+
if stream_slice is not None and hasattr(stream_slice, "extra_fields")
|
132
|
+
else {}
|
133
|
+
),
|
128
134
|
}
|
129
135
|
path = str(self._path.eval(self.config, **kwargs))
|
130
136
|
return path.lstrip("/")
|
@@ -6,7 +6,7 @@ import re
|
|
6
6
|
from dataclasses import dataclass
|
7
7
|
from typing import Any, Dict, List, Optional
|
8
8
|
|
9
|
-
import
|
9
|
+
import anyascii
|
10
10
|
|
11
11
|
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
12
12
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
@@ -48,7 +48,7 @@ class KeysToSnakeCaseTransformation(RecordTransformation):
|
|
48
48
|
return self.tokens_to_snake_case(tokens)
|
49
49
|
|
50
50
|
def normalize_key(self, key: str) -> str:
|
51
|
-
return
|
51
|
+
return str(anyascii.anyascii(key))
|
52
52
|
|
53
53
|
def tokenize_key(self, key: str) -> List[str]:
|
54
54
|
tokens = []
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from abc import ABC, abstractmethod
|
6
6
|
from enum import Enum
|
7
|
-
from typing import TYPE_CHECKING, Any, List, MutableMapping, Optional, Tuple
|
7
|
+
from typing import TYPE_CHECKING, Any, Callable, List, MutableMapping, Optional, Tuple
|
8
8
|
|
9
9
|
if TYPE_CHECKING:
|
10
10
|
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
@@ -12,6 +12,7 @@ if TYPE_CHECKING:
|
|
12
12
|
|
13
13
|
class ConcurrencyCompatibleStateType(Enum):
|
14
14
|
date_range = "date-range"
|
15
|
+
integer = "integer"
|
15
16
|
|
16
17
|
|
17
18
|
class AbstractStreamStateConverter(ABC):
|
airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from typing import Any, Callable, MutableMapping, Optional, Tuple
|
6
|
+
|
7
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
8
|
+
from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
|
9
|
+
AbstractStreamStateConverter,
|
10
|
+
ConcurrencyCompatibleStateType,
|
11
|
+
)
|
12
|
+
|
13
|
+
|
14
|
+
class IncrementingCountStreamStateConverter(AbstractStreamStateConverter):
|
15
|
+
def _from_state_message(self, value: Any) -> Any:
|
16
|
+
return value
|
17
|
+
|
18
|
+
def _to_state_message(self, value: Any) -> Any:
|
19
|
+
return value
|
20
|
+
|
21
|
+
@classmethod
|
22
|
+
def get_end_provider(cls) -> Callable[[], float]:
|
23
|
+
return lambda: float("inf")
|
24
|
+
|
25
|
+
def convert_from_sequential_state(
|
26
|
+
self,
|
27
|
+
cursor_field: "CursorField", # to deprecate as it is only needed for sequential state
|
28
|
+
stream_state: MutableMapping[str, Any],
|
29
|
+
start: Optional[Any],
|
30
|
+
) -> Tuple[Any, MutableMapping[str, Any]]:
|
31
|
+
"""
|
32
|
+
Convert the state message to the format required by the ConcurrentCursor.
|
33
|
+
|
34
|
+
e.g.
|
35
|
+
{
|
36
|
+
"state_type": ConcurrencyCompatibleStateType.date_range.value,
|
37
|
+
"metadata": { … },
|
38
|
+
"slices": [
|
39
|
+
{"start": "10", "end": "2021-01-18T21:18:20.000+00:00"},
|
40
|
+
]
|
41
|
+
}
|
42
|
+
"""
|
43
|
+
sync_start = self._get_sync_start(cursor_field, stream_state, start)
|
44
|
+
if self.is_state_message_compatible(stream_state):
|
45
|
+
return sync_start, stream_state
|
46
|
+
|
47
|
+
# Create a slice to represent the records synced during prior syncs.
|
48
|
+
# The start and end are the same to avoid confusion as to whether the records for this slice
|
49
|
+
# were actually synced
|
50
|
+
slices = [
|
51
|
+
{
|
52
|
+
self.START_KEY: start if start is not None else sync_start,
|
53
|
+
self.END_KEY: sync_start, # this may not be relevant anymore
|
54
|
+
self.MOST_RECENT_RECORD_KEY: sync_start,
|
55
|
+
}
|
56
|
+
]
|
57
|
+
|
58
|
+
return sync_start, {
|
59
|
+
"state_type": ConcurrencyCompatibleStateType.integer.value,
|
60
|
+
"slices": slices,
|
61
|
+
"legacy": stream_state,
|
62
|
+
}
|
63
|
+
|
64
|
+
def parse_value(self, value: int) -> int:
|
65
|
+
return value
|
66
|
+
|
67
|
+
@property
|
68
|
+
def zero_value(self) -> int:
|
69
|
+
return 0
|
70
|
+
|
71
|
+
def increment(self, value: int) -> int:
|
72
|
+
return value + 1
|
73
|
+
|
74
|
+
def output_format(self, value: int) -> int:
|
75
|
+
return value
|
76
|
+
|
77
|
+
def _get_sync_start(
|
78
|
+
self,
|
79
|
+
cursor_field: CursorField,
|
80
|
+
stream_state: MutableMapping[str, Any],
|
81
|
+
start: Optional[int],
|
82
|
+
) -> int:
|
83
|
+
sync_start = start if start is not None else self.zero_value
|
84
|
+
prev_sync_low_water_mark: Optional[int] = (
|
85
|
+
stream_state[cursor_field.cursor_field_key]
|
86
|
+
if cursor_field.cursor_field_key in stream_state
|
87
|
+
else None
|
88
|
+
)
|
89
|
+
if prev_sync_low_water_mark and prev_sync_low_water_mark >= sync_start:
|
90
|
+
return prev_sync_low_water_mark
|
91
|
+
else:
|
92
|
+
return sync_start
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: airbyte-cdk
|
3
|
-
Version: 6.
|
3
|
+
Version: 6.38.0.dev0
|
4
4
|
Summary: A framework for writing Airbyte Connectors.
|
5
5
|
Home-page: https://airbyte.com
|
6
6
|
License: MIT
|
@@ -22,8 +22,8 @@ Provides-Extra: sql
|
|
22
22
|
Provides-Extra: vector-db-based
|
23
23
|
Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
|
24
24
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
25
|
-
Requires-Dist: Unidecode (>=1.3,<2.0)
|
26
25
|
Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
|
26
|
+
Requires-Dist: anyascii (>=0.3.2,<0.4.0)
|
27
27
|
Requires-Dist: avro (>=1.11.2,<1.13.0) ; extra == "file-based"
|
28
28
|
Requires-Dist: backoff
|
29
29
|
Requires-Dist: cachetools
|
@@ -26,9 +26,9 @@ airbyte_cdk/destinations/vector_db_based/indexer.py,sha256=beiSi2Uu67EoTr7yQSaCJ
|
|
26
26
|
airbyte_cdk/destinations/vector_db_based/test_utils.py,sha256=MkqLiOJ5QyKbV4rNiJhe-BHM7FD-ADHQ4bQGf4c5lRY,1932
|
27
27
|
airbyte_cdk/destinations/vector_db_based/utils.py,sha256=FOyEo8Lc-fY8UyhpCivhZtIqBRyxf3cUt6anmK03fUY,1127
|
28
28
|
airbyte_cdk/destinations/vector_db_based/writer.py,sha256=nZ00xPiohElJmYktEZZIhr0m5EDETCHGhg0Lb2S7A20,5095
|
29
|
-
airbyte_cdk/entrypoint.py,sha256=
|
29
|
+
airbyte_cdk/entrypoint.py,sha256=NRJv5BNZRSUEVTmNBa9N7ih6fW5sg4DwL0nkB9kI99Y,18570
|
30
30
|
airbyte_cdk/exception_handler.py,sha256=D_doVl3Dt60ASXlJsfviOCswxGyKF2q0RL6rif3fNks,2013
|
31
|
-
airbyte_cdk/logger.py,sha256=
|
31
|
+
airbyte_cdk/logger.py,sha256=1cURbvawbunCAV178q-XhTHcbAQZTSf07WhU7U9AXWU,3744
|
32
32
|
airbyte_cdk/models/__init__.py,sha256=MOTiuML2wShBaMSIwikdjyye2uUWBjo4J1QFSbnoiM4,2075
|
33
33
|
airbyte_cdk/models/airbyte_protocol.py,sha256=MCmLir67-hF12YM5OKzeGbWrlxr7ChG_OQSE1xG8EIU,3748
|
34
34
|
airbyte_cdk/models/airbyte_protocol_serializers.py,sha256=s6SaFB2CMrG_7jTQGn_fhFbQ1FUxhCxf5kq2RWGHMVI,1749
|
@@ -67,15 +67,15 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
|
|
67
67
|
airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
|
68
68
|
airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
|
69
69
|
airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
|
70
|
-
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=
|
70
|
+
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=rAp-sgld4n8Tmybz-51m7VcYXqKwzKDpCJVr1elmkRc,26824
|
71
71
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
|
72
|
-
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=
|
72
|
+
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=0qs4hhmh_XOy2B4MHCn2qVMM79C6MizIBqnvpZj1aSE,2923
|
73
73
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
74
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
74
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=Vsem7b0YL_kaLeTwY_kX-EqHzuBDjik0lBN7e3srXT4,147126
|
75
75
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
76
76
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=venZjfpvtqr3oFSuvMBWtn4h9ayLhD4L65ACuXCDZ64,10445
|
77
77
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=JHb_0d3SE6kNY10mxA5YBEKPeSbsWYjByq1gUQxepoE,953
|
78
|
-
airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py,sha256=
|
78
|
+
airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py,sha256=DJbWaaJ5LHCBpyWz-4bEw8rqtJYqabEYZtxnfRtWFE0,4946
|
79
79
|
airbyte_cdk/sources/declarative/decoders/decoder.py,sha256=sl-Gt8lXi7yD2Q-sD8je5QS2PbgrgsYjxRLWsay7DMc,826
|
80
80
|
airbyte_cdk/sources/declarative/decoders/json_decoder.py,sha256=BdWpXXPhEGf_zknggJmhojLosmxuw51RBVTS0jvdCPc,2080
|
81
81
|
airbyte_cdk/sources/declarative/decoders/noop_decoder.py,sha256=iZh0yKY_JzgBnJWiubEusf5c0o6Khd-8EWFWT-8EgFo,542
|
@@ -107,19 +107,19 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py,sha
|
|
107
107
|
airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=CQkHqGlfa87G6VYMtBAQWin7ECKpfMdrDcg0JO5_rhc,3212
|
108
108
|
airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=9IoeuWam3L6GyN10L6U8xNWXmkt9cnahSDNkez1OmFY,982
|
109
109
|
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=UQeuS4Vpyp4hlOn-R3tRyeBX0e9IoV6jQ6gH-Jz8lY0,7182
|
110
|
-
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=
|
110
|
+
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=uuXBZUWDWM-sPcUKjNSPRN657QhNQCx_hnhTuJj2zOA,5129
|
111
111
|
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=TN6GCgLXaWDONTaJwQ3A5ELqC-sxwKz-UYSraJYB-dI,17078
|
112
112
|
airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
113
113
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
|
114
114
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
115
115
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
116
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
116
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=Kd8HvvXqvGWZBey99eQzbK5u2k1ItnRAi2h7C7UNwBQ,103225
|
117
117
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
118
|
-
airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=
|
118
|
+
airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=jDw_TttD3_hpfevXOH-0Ws0eRuqt6wvED0BqosGPRjI,5938
|
119
119
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
120
120
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
|
121
121
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
122
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
122
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=Mx0KJGbqIZeUWduKy-UvpVH-DRm0pzXDcz203r69oNY,140619
|
123
123
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
|
124
124
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
|
125
125
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
@@ -127,7 +127,7 @@ airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha25
|
|
127
127
|
airbyte_cdk/sources/declarative/partition_routers/partition_router.py,sha256=YyEIzdmLd1FjbVP3QbQ2VFCLW_P-OGbVh6VpZShp54k,2218
|
128
128
|
airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py,sha256=SKzKjSyfccq4dxGIh-J6ejrgkCHzaiTIazmbmeQiRD4,1942
|
129
129
|
airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py,sha256=LlWj-Ofs-xfjlqmDzH8OYpyblP2Pb8bPDdR9g1UZyt0,17693
|
130
|
-
airbyte_cdk/sources/declarative/requesters/README.md,sha256=
|
130
|
+
airbyte_cdk/sources/declarative/requesters/README.md,sha256=DQll2qsIzzTiiP35kJp16ONpr7cFeUQNgPfhl5krB24,2675
|
131
131
|
airbyte_cdk/sources/declarative/requesters/__init__.py,sha256=d7a3OoHbqaJDyyPli3nqqJ2yAW_SLX6XDaBAKOwvpxw,364
|
132
132
|
airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py,sha256=SkEDcJxlT1683rNx93K9whoS0OyUukkuOfToGtgpF58,776
|
133
133
|
airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py,sha256=1WZdpFmWL6W_Dko0qjflTaKIWeqt8jHT-D6HcujIp3s,884
|
@@ -142,8 +142,8 @@ airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.
|
|
142
142
|
airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py,sha256=q0YkeYUUWO6iErUy0vjqiOkhg8_9d5YcCmtlpXAJJ9E,1314
|
143
143
|
airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py,sha256=Tan66odx8VHzfdyyXMQkXz2pJYksllGqvxmpoajgcK4,669
|
144
144
|
airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py,sha256=E-fQbt4ShfxZVoqfnmOx69C6FUPWZz8BIqI3DN9Kcjs,7935
|
145
|
-
airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=
|
146
|
-
airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=
|
145
|
+
airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=4wpP0ZNTMLugi-Rc1OFdFaxWfRZSl45nzhHqMFCE8SQ,11924
|
146
|
+
airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=Sie8IyntFu66UoJASwpWV0WrRDBr9lpHWSOws7vZfM0,15228
|
147
147
|
airbyte_cdk/sources/declarative/requesters/paginators/__init__.py,sha256=uArbKs9JKNCt7t9tZoeWwjDpyI1HoPp29FNW0JzvaEM,644
|
148
148
|
airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=ZW4lwWNAzb4zL0jKc-HjowP5-y0Zg9xi0YlK6tkx_XY,12057
|
149
149
|
airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py,sha256=j6j9QRPaTbKQ2N661RFVKthhkWiodEp6ut0tKeEd0Ng,2019
|
@@ -189,7 +189,7 @@ airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py,sha256=1
|
|
189
189
|
airbyte_cdk/sources/declarative/transformations/flatten_fields.py,sha256=yT3owG6rMKaRX-LJ_T-jSTnh1B5NoAHyH4YZN9yOvE8,1758
|
190
190
|
airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py,sha256=vbIn6ump-Ut6g20yMub7PFoPBhOKVtrHSAUdcOUdLfw,1999
|
191
191
|
airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py,sha256=RTs5KX4V3hM7A6QN1WlGF21YccTIyNH6qQI9IMb__hw,670
|
192
|
-
airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py,sha256=
|
192
|
+
airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py,sha256=_3ldEbsA7tQK-zzeU_cG86D1_1SY3wAo1vHE0zXrOck,2265
|
193
193
|
airbyte_cdk/sources/declarative/transformations/remove_fields.py,sha256=EwUP0SZ2p4GRJ6Q8CUzlz9dcUeEidEFDlI2IBye2tlc,2745
|
194
194
|
airbyte_cdk/sources/declarative/transformations/transformation.py,sha256=4sXtx9cNY2EHUPq-xHvDs8GQEBUy3Eo6TkRLKHPXx68,1161
|
195
195
|
airbyte_cdk/sources/declarative/types.py,sha256=yqx0xlZv_76tkC7fqJKefmvl4GJJ8mXbeddwVV8XRJU,778
|
@@ -284,8 +284,9 @@ airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py,sha256=
|
|
284
284
|
airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py,sha256=nbdkkHoN0NFeSs7YUFfzY1Lg5Jrt8fWY_ln3YrhY-Ko,544
|
285
285
|
airbyte_cdk/sources/streams/concurrent/partitions/types.py,sha256=frPVvHtY7vLxpGEbMQzNvF1Y52ZVyct9f1DDhGoRjwY,1166
|
286
286
|
airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
287
|
-
airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py,sha256=
|
287
|
+
airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py,sha256=JCRanZBAC8a0pPuzHCHiJ1irHgFkDd83l2K5jA1xRkU,6853
|
288
288
|
airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py,sha256=x8MLm1pTMfLNHvMF3P1ixYkYt_xjpbaIwnvhY_ofdBo,8076
|
289
|
+
airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py,sha256=bC6L82nsErXcFSPlxcdp4SneJ7qFuqCelP3-8svEh5E,3054
|
289
290
|
airbyte_cdk/sources/streams/core.py,sha256=jiYW6w8cjNjzXMd8U8Gt-02fYYU7b0ciXSSSnGvFRak,32219
|
290
291
|
airbyte_cdk/sources/streams/http/__init__.py,sha256=AGiEZ5B1Joi9ZnFpkJLT7F3QLpCAaBgAeVWy-1znmZw,311
|
291
292
|
airbyte_cdk/sources/streams/http/availability_strategy.py,sha256=sovoGFThZr-doMN9vJvTuJBrvkwQVIO0qTQO64pGZPY,2428
|
@@ -360,9 +361,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
|
|
360
361
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
361
362
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
362
363
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
363
|
-
airbyte_cdk-6.
|
364
|
-
airbyte_cdk-6.
|
365
|
-
airbyte_cdk-6.
|
366
|
-
airbyte_cdk-6.
|
367
|
-
airbyte_cdk-6.
|
368
|
-
airbyte_cdk-6.
|
364
|
+
airbyte_cdk-6.38.0.dev0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
365
|
+
airbyte_cdk-6.38.0.dev0.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
|
366
|
+
airbyte_cdk-6.38.0.dev0.dist-info/METADATA,sha256=1EhdJFpsvmQYoaGXbq_JPTxW9hAy5n81U-fuQsfp24A,6018
|
367
|
+
airbyte_cdk-6.38.0.dev0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
368
|
+
airbyte_cdk-6.38.0.dev0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
369
|
+
airbyte_cdk-6.38.0.dev0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|