airbyte-cdk 6.36.5.dev0__py3-none-any.whl → 6.37.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +28 -7
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +39 -0
- airbyte_cdk/sources/declarative/interpolation/macros.py +2 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +25 -1
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +113 -13
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +2 -2
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +6 -11
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +2 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
- {airbyte_cdk-6.36.5.dev0.dist-info → airbyte_cdk-6.37.0.dist-info}/METADATA +2 -2
- {airbyte_cdk-6.36.5.dev0.dist-info → airbyte_cdk-6.37.0.dist-info}/RECORD +15 -14
- {airbyte_cdk-6.36.5.dev0.dist-info → airbyte_cdk-6.37.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.36.5.dev0.dist-info → airbyte_cdk-6.37.0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.36.5.dev0.dist-info → airbyte_cdk-6.37.0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.36.5.dev0.dist-info → airbyte_cdk-6.37.0.dist-info}/entry_points.txt +0 -0
@@ -31,6 +31,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
31
31
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
32
32
|
DatetimeBasedCursor as DatetimeBasedCursorModel,
|
33
33
|
)
|
34
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
35
|
+
IncrementingCountCursor as IncrementingCountCursorModel,
|
36
|
+
)
|
34
37
|
from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
35
38
|
ModelToComponentFactory,
|
36
39
|
)
|
@@ -222,7 +225,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
222
225
|
and not incremental_sync_component_definition
|
223
226
|
)
|
224
227
|
|
225
|
-
if self.
|
228
|
+
if self._is_concurrent_cursor_incremental_without_partition_routing(
|
226
229
|
declarative_stream, incremental_sync_component_definition
|
227
230
|
):
|
228
231
|
stream_state = self._connector_state_manager.get_stream_state(
|
@@ -254,15 +257,26 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
254
257
|
stream_slicer=declarative_stream.retriever.stream_slicer,
|
255
258
|
)
|
256
259
|
else:
|
257
|
-
|
258
|
-
|
260
|
+
if (
|
261
|
+
incremental_sync_component_definition
|
262
|
+
and incremental_sync_component_definition.get("type")
|
263
|
+
== IncrementingCountCursorModel.__name__
|
264
|
+
):
|
265
|
+
cursor = self._constructor.create_concurrent_cursor_from_incrementing_count_cursor(
|
266
|
+
model_type=IncrementingCountCursorModel,
|
267
|
+
component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
|
268
|
+
stream_name=declarative_stream.name,
|
269
|
+
stream_namespace=declarative_stream.namespace,
|
270
|
+
config=config or {},
|
271
|
+
)
|
272
|
+
else:
|
273
|
+
cursor = self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
|
259
274
|
model_type=DatetimeBasedCursorModel,
|
260
275
|
component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
|
261
276
|
stream_name=declarative_stream.name,
|
262
277
|
stream_namespace=declarative_stream.namespace,
|
263
278
|
config=config or {},
|
264
279
|
)
|
265
|
-
)
|
266
280
|
partition_generator = StreamSlicerPartitionGenerator(
|
267
281
|
partition_factory=DeclarativePartitionFactory(
|
268
282
|
declarative_stream.name,
|
@@ -389,7 +403,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
389
403
|
|
390
404
|
return concurrent_streams, synchronous_streams
|
391
405
|
|
392
|
-
def
|
406
|
+
def _is_concurrent_cursor_incremental_without_partition_routing(
|
393
407
|
self,
|
394
408
|
declarative_stream: DeclarativeStream,
|
395
409
|
incremental_sync_component_definition: Mapping[str, Any] | None,
|
@@ -397,11 +411,18 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
397
411
|
return (
|
398
412
|
incremental_sync_component_definition is not None
|
399
413
|
and bool(incremental_sync_component_definition)
|
400
|
-
and
|
401
|
-
|
414
|
+
and (
|
415
|
+
incremental_sync_component_definition.get("type", "")
|
416
|
+
in (DatetimeBasedCursorModel.__name__, IncrementingCountCursorModel.__name__)
|
417
|
+
)
|
402
418
|
and hasattr(declarative_stream.retriever, "stream_slicer")
|
403
419
|
and (
|
404
420
|
isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
|
421
|
+
# IncrementingCountCursorModel is hardcoded to be of type DatetimeBasedCursor
|
422
|
+
# add isintance check here if we want to create a Declarative IncrementingCountCursor
|
423
|
+
# or isinstance(
|
424
|
+
# declarative_stream.retriever.stream_slicer, IncrementingCountCursor
|
425
|
+
# )
|
405
426
|
or isinstance(declarative_stream.retriever.stream_slicer, AsyncJobPartitionRouter)
|
406
427
|
)
|
407
428
|
)
|
@@ -777,6 +777,44 @@ definitions:
|
|
777
777
|
type:
|
778
778
|
type: string
|
779
779
|
enum: [LegacyToPerPartitionStateMigration]
|
780
|
+
IncrementingCountCursor:
|
781
|
+
title: Incrementing Count Cursor
|
782
|
+
description: Cursor that allows for incremental sync according to a continuously increasing integer.
|
783
|
+
type: object
|
784
|
+
required:
|
785
|
+
- type
|
786
|
+
- cursor_field
|
787
|
+
properties:
|
788
|
+
type:
|
789
|
+
type: string
|
790
|
+
enum: [IncrementingCountCursor]
|
791
|
+
cursor_field:
|
792
|
+
title: Cursor Field
|
793
|
+
description: The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.
|
794
|
+
type: string
|
795
|
+
interpolation_context:
|
796
|
+
- config
|
797
|
+
examples:
|
798
|
+
- "created_at"
|
799
|
+
- "{{ config['record_cursor'] }}"
|
800
|
+
start_value:
|
801
|
+
title: Start Value
|
802
|
+
description: The value that determines the earliest record that should be synced.
|
803
|
+
anyOf:
|
804
|
+
- type: string
|
805
|
+
- type: integer
|
806
|
+
interpolation_context:
|
807
|
+
- config
|
808
|
+
examples:
|
809
|
+
- 0
|
810
|
+
- "{{ config['start_value'] }}"
|
811
|
+
start_value_option:
|
812
|
+
title: Inject Start Value Into Outgoing HTTP Request
|
813
|
+
description: Optionally configures how the start value will be sent in requests to the source API.
|
814
|
+
"$ref": "#/definitions/RequestOption"
|
815
|
+
$parameters:
|
816
|
+
type: object
|
817
|
+
additionalProperties: true
|
780
818
|
DatetimeBasedCursor:
|
781
819
|
title: Datetime Based Cursor
|
782
820
|
description: Cursor to provide incremental capabilities over datetime.
|
@@ -1319,6 +1357,7 @@ definitions:
|
|
1319
1357
|
anyOf:
|
1320
1358
|
- "$ref": "#/definitions/CustomIncrementalSync"
|
1321
1359
|
- "$ref": "#/definitions/DatetimeBasedCursor"
|
1360
|
+
- "$ref": "#/definitions/IncrementingCountCursor"
|
1322
1361
|
name:
|
1323
1362
|
title: Name
|
1324
1363
|
description: The stream name.
|
@@ -1508,6 +1508,28 @@ class AuthFlow(BaseModel):
|
|
1508
1508
|
oauth_config_specification: Optional[OAuthConfigSpecification] = None
|
1509
1509
|
|
1510
1510
|
|
1511
|
+
class IncrementingCountCursor(BaseModel):
|
1512
|
+
type: Literal["IncrementingCountCursor"]
|
1513
|
+
cursor_field: str = Field(
|
1514
|
+
...,
|
1515
|
+
description="The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.",
|
1516
|
+
examples=["created_at", "{{ config['record_cursor'] }}"],
|
1517
|
+
title="Cursor Field",
|
1518
|
+
)
|
1519
|
+
start_value: Optional[Union[str, int]] = Field(
|
1520
|
+
None,
|
1521
|
+
description="The value that determines the earliest record that should be synced.",
|
1522
|
+
examples=[0, "{{ config['start_value'] }}"],
|
1523
|
+
title="Start Value",
|
1524
|
+
)
|
1525
|
+
start_value_option: Optional[RequestOption] = Field(
|
1526
|
+
None,
|
1527
|
+
description="Optionally configures how the start value will be sent in requests to the source API.",
|
1528
|
+
title="Inject Start Value Into Outgoing HTTP Request",
|
1529
|
+
)
|
1530
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1531
|
+
|
1532
|
+
|
1511
1533
|
class DatetimeBasedCursor(BaseModel):
|
1512
1534
|
type: Literal["DatetimeBasedCursor"]
|
1513
1535
|
clamping: Optional[Clamping] = Field(
|
@@ -1948,7 +1970,9 @@ class DeclarativeStream(BaseModel):
|
|
1948
1970
|
description="Component used to coordinate how records are extracted across stream slices and request pages.",
|
1949
1971
|
title="Retriever",
|
1950
1972
|
)
|
1951
|
-
incremental_sync: Optional[
|
1973
|
+
incremental_sync: Optional[
|
1974
|
+
Union[CustomIncrementalSync, DatetimeBasedCursor, IncrementingCountCursor]
|
1975
|
+
] = Field(
|
1952
1976
|
None,
|
1953
1977
|
description="Component used to fetch data incrementally based on a time field in the data.",
|
1954
1978
|
title="Incremental Sync",
|
@@ -245,6 +245,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
245
245
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
246
246
|
HttpResponseFilter as HttpResponseFilterModel,
|
247
247
|
)
|
248
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
249
|
+
IncrementingCountCursor as IncrementingCountCursorModel,
|
250
|
+
)
|
248
251
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
249
252
|
InlineSchemaLoader as InlineSchemaLoaderModel,
|
250
253
|
)
|
@@ -496,6 +499,9 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_sta
|
|
496
499
|
CustomFormatConcurrentStreamStateConverter,
|
497
500
|
DateTimeStreamStateConverter,
|
498
501
|
)
|
502
|
+
from airbyte_cdk.sources.streams.concurrent.state_converters.incrementing_count_stream_state_converter import (
|
503
|
+
IncrementingCountStreamStateConverter,
|
504
|
+
)
|
499
505
|
from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
|
500
506
|
from airbyte_cdk.sources.types import Config
|
501
507
|
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
@@ -584,6 +590,7 @@ class ModelToComponentFactory:
|
|
584
590
|
FlattenFieldsModel: self.create_flatten_fields,
|
585
591
|
DpathFlattenFieldsModel: self.create_dpath_flatten_fields,
|
586
592
|
IterableDecoderModel: self.create_iterable_decoder,
|
593
|
+
IncrementingCountCursorModel: self.create_incrementing_count_cursor,
|
587
594
|
XmlDecoderModel: self.create_xml_decoder,
|
588
595
|
JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
|
589
596
|
DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
|
@@ -1189,6 +1196,70 @@ class ModelToComponentFactory:
|
|
1189
1196
|
clamping_strategy=clamping_strategy,
|
1190
1197
|
)
|
1191
1198
|
|
1199
|
+
def create_concurrent_cursor_from_incrementing_count_cursor(
|
1200
|
+
self,
|
1201
|
+
model_type: Type[BaseModel],
|
1202
|
+
component_definition: ComponentDefinition,
|
1203
|
+
stream_name: str,
|
1204
|
+
stream_namespace: Optional[str],
|
1205
|
+
config: Config,
|
1206
|
+
message_repository: Optional[MessageRepository] = None,
|
1207
|
+
**kwargs: Any,
|
1208
|
+
) -> ConcurrentCursor:
|
1209
|
+
# Per-partition incremental streams can dynamically create child cursors which will pass their current
|
1210
|
+
# state via the stream_state keyword argument. Incremental syncs without parent streams use the
|
1211
|
+
# incoming state and connector_state_manager that is initialized when the component factory is created
|
1212
|
+
stream_state = (
|
1213
|
+
self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
|
1214
|
+
if "stream_state" not in kwargs
|
1215
|
+
else kwargs["stream_state"]
|
1216
|
+
)
|
1217
|
+
|
1218
|
+
component_type = component_definition.get("type")
|
1219
|
+
if component_definition.get("type") != model_type.__name__:
|
1220
|
+
raise ValueError(
|
1221
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
1222
|
+
)
|
1223
|
+
|
1224
|
+
incrementing_count_cursor_model = model_type.parse_obj(component_definition)
|
1225
|
+
|
1226
|
+
if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel):
|
1227
|
+
raise ValueError(
|
1228
|
+
f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}"
|
1229
|
+
)
|
1230
|
+
|
1231
|
+
interpolated_start_value = (
|
1232
|
+
InterpolatedString.create(
|
1233
|
+
incrementing_count_cursor_model.start_value, # type: ignore
|
1234
|
+
parameters=incrementing_count_cursor_model.parameters or {},
|
1235
|
+
)
|
1236
|
+
if incrementing_count_cursor_model.start_value
|
1237
|
+
else 0
|
1238
|
+
)
|
1239
|
+
|
1240
|
+
interpolated_cursor_field = InterpolatedString.create(
|
1241
|
+
incrementing_count_cursor_model.cursor_field,
|
1242
|
+
parameters=incrementing_count_cursor_model.parameters or {},
|
1243
|
+
)
|
1244
|
+
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
1245
|
+
|
1246
|
+
connector_state_converter = IncrementingCountStreamStateConverter(
|
1247
|
+
is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
|
1248
|
+
)
|
1249
|
+
|
1250
|
+
return ConcurrentCursor(
|
1251
|
+
stream_name=stream_name,
|
1252
|
+
stream_namespace=stream_namespace,
|
1253
|
+
stream_state=stream_state,
|
1254
|
+
message_repository=message_repository or self._message_repository,
|
1255
|
+
connector_state_manager=self._connector_state_manager,
|
1256
|
+
connector_state_converter=connector_state_converter,
|
1257
|
+
cursor_field=cursor_field,
|
1258
|
+
slice_boundary_fields=None,
|
1259
|
+
start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1260
|
+
end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1261
|
+
)
|
1262
|
+
|
1192
1263
|
def _assemble_weekday(self, weekday: str) -> Weekday:
|
1193
1264
|
match weekday:
|
1194
1265
|
case "MONDAY":
|
@@ -1405,19 +1476,7 @@ class ModelToComponentFactory:
|
|
1405
1476
|
try:
|
1406
1477
|
module_ref = importlib.import_module(module_name_full)
|
1407
1478
|
except ModuleNotFoundError as e:
|
1408
|
-
|
1409
|
-
# During testing, the modules containing the custom components are not moved to source_declarative_manifest. In order to run the test, add the source folder to your PYTHONPATH or add it runtime using sys.path.append
|
1410
|
-
try:
|
1411
|
-
import os
|
1412
|
-
|
1413
|
-
module_name_with_source_declarative_manifest = ".".join(split[1:-1])
|
1414
|
-
module_ref = importlib.import_module(
|
1415
|
-
module_name_with_source_declarative_manifest
|
1416
|
-
)
|
1417
|
-
except ModuleNotFoundError:
|
1418
|
-
raise ValueError(f"Could not load module `{module_name_full}`.") from e
|
1419
|
-
else:
|
1420
|
-
raise ValueError(f"Could not load module `{module_name_full}`.") from e
|
1479
|
+
raise ValueError(f"Could not load module `{module_name_full}`.") from e
|
1421
1480
|
|
1422
1481
|
try:
|
1423
1482
|
return getattr(module_ref, class_name)
|
@@ -1634,6 +1693,31 @@ class ModelToComponentFactory:
|
|
1634
1693
|
config=config,
|
1635
1694
|
parameters=model.parameters or {},
|
1636
1695
|
)
|
1696
|
+
elif model.incremental_sync and isinstance(
|
1697
|
+
model.incremental_sync, IncrementingCountCursorModel
|
1698
|
+
):
|
1699
|
+
cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore
|
1700
|
+
|
1701
|
+
start_time_option = (
|
1702
|
+
self._create_component_from_model(
|
1703
|
+
cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
|
1704
|
+
config,
|
1705
|
+
parameters=cursor_model.parameters or {},
|
1706
|
+
)
|
1707
|
+
if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
|
1708
|
+
else None
|
1709
|
+
)
|
1710
|
+
|
1711
|
+
# The concurrent engine defaults the start/end fields on the slice to "start" and "end", but
|
1712
|
+
# the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time
|
1713
|
+
partition_field_start = "start"
|
1714
|
+
|
1715
|
+
request_options_provider = DatetimeBasedRequestOptionsProvider(
|
1716
|
+
start_time_option=start_time_option,
|
1717
|
+
partition_field_start=partition_field_start,
|
1718
|
+
config=config,
|
1719
|
+
parameters=model.parameters or {},
|
1720
|
+
)
|
1637
1721
|
else:
|
1638
1722
|
request_options_provider = None
|
1639
1723
|
|
@@ -2123,6 +2207,22 @@ class ModelToComponentFactory:
|
|
2123
2207
|
stream_response=False if self._emit_connector_builder_messages else True,
|
2124
2208
|
)
|
2125
2209
|
|
2210
|
+
@staticmethod
|
2211
|
+
def create_incrementing_count_cursor(
|
2212
|
+
model: IncrementingCountCursorModel, config: Config, **kwargs: Any
|
2213
|
+
) -> DatetimeBasedCursor:
|
2214
|
+
# This should not actually get used anywhere at runtime, but needed to add this to pass checks since
|
2215
|
+
# we still parse models into components. The issue is that there's no runtime implementation of a
|
2216
|
+
# IncrementingCountCursor.
|
2217
|
+
# A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor.
|
2218
|
+
return DatetimeBasedCursor(
|
2219
|
+
cursor_field=model.cursor_field,
|
2220
|
+
datetime_format="%Y-%m-%d",
|
2221
|
+
start_datetime="2024-12-12",
|
2222
|
+
config=config,
|
2223
|
+
parameters={},
|
2224
|
+
)
|
2225
|
+
|
2126
2226
|
@staticmethod
|
2127
2227
|
def create_iterable_decoder(
|
2128
2228
|
model: IterableDecoderModel, config: Config, **kwargs: Any
|
@@ -6,7 +6,7 @@ import re
|
|
6
6
|
from dataclasses import dataclass
|
7
7
|
from typing import Any, Dict, List, Optional
|
8
8
|
|
9
|
-
import
|
9
|
+
import anyascii
|
10
10
|
|
11
11
|
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
12
12
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
@@ -48,7 +48,7 @@ class KeysToSnakeCaseTransformation(RecordTransformation):
|
|
48
48
|
return self.tokens_to_snake_case(tokens)
|
49
49
|
|
50
50
|
def normalize_key(self, key: str) -> str:
|
51
|
-
return
|
51
|
+
return str(anyascii.anyascii(key))
|
52
52
|
|
53
53
|
def tokenize_key(self, key: str) -> List[str]:
|
54
54
|
tokens = []
|
@@ -39,18 +39,13 @@ class YamlDeclarativeSource(ConcurrentDeclarativeSource[List[AirbyteStateMessage
|
|
39
39
|
)
|
40
40
|
|
41
41
|
def _read_and_parse_yaml_file(self, path_to_yaml_file: str) -> ConnectionDefinition:
|
42
|
-
|
43
|
-
# For testing purposes, we want to allow to just pass a file. However, this
|
44
|
-
with open(path_to_yaml_file, "r") as f:
|
45
|
-
return yaml.safe_load(f) # type: ignore # we assume the yaml represents a ConnectionDefinition
|
46
|
-
except FileNotFoundError:
|
47
|
-
# Running inside the container, the working directory during an operation is not structured the same as the static files
|
48
|
-
package = self.__class__.__module__.split(".")[0]
|
42
|
+
package = self.__class__.__module__.split(".")[0]
|
49
43
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
44
|
+
yaml_config = pkgutil.get_data(package, path_to_yaml_file)
|
45
|
+
if yaml_config:
|
46
|
+
decoded_yaml = yaml_config.decode()
|
47
|
+
return self._parse(decoded_yaml)
|
48
|
+
else:
|
54
49
|
return {}
|
55
50
|
|
56
51
|
def _emit_manifest_debug_message(self, extra_args: dict[str, Any]) -> None:
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from abc import ABC, abstractmethod
|
6
6
|
from enum import Enum
|
7
|
-
from typing import TYPE_CHECKING, Any, List, MutableMapping, Optional, Tuple
|
7
|
+
from typing import TYPE_CHECKING, Any, Callable, List, MutableMapping, Optional, Tuple
|
8
8
|
|
9
9
|
if TYPE_CHECKING:
|
10
10
|
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
@@ -12,6 +12,7 @@ if TYPE_CHECKING:
|
|
12
12
|
|
13
13
|
class ConcurrencyCompatibleStateType(Enum):
|
14
14
|
date_range = "date-range"
|
15
|
+
integer = "integer"
|
15
16
|
|
16
17
|
|
17
18
|
class AbstractStreamStateConverter(ABC):
|
airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from typing import Any, Callable, MutableMapping, Optional, Tuple
|
6
|
+
|
7
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
8
|
+
from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
|
9
|
+
AbstractStreamStateConverter,
|
10
|
+
ConcurrencyCompatibleStateType,
|
11
|
+
)
|
12
|
+
|
13
|
+
|
14
|
+
class IncrementingCountStreamStateConverter(AbstractStreamStateConverter):
|
15
|
+
def _from_state_message(self, value: Any) -> Any:
|
16
|
+
return value
|
17
|
+
|
18
|
+
def _to_state_message(self, value: Any) -> Any:
|
19
|
+
return value
|
20
|
+
|
21
|
+
@classmethod
|
22
|
+
def get_end_provider(cls) -> Callable[[], float]:
|
23
|
+
return lambda: float("inf")
|
24
|
+
|
25
|
+
def convert_from_sequential_state(
|
26
|
+
self,
|
27
|
+
cursor_field: "CursorField", # to deprecate as it is only needed for sequential state
|
28
|
+
stream_state: MutableMapping[str, Any],
|
29
|
+
start: Optional[Any],
|
30
|
+
) -> Tuple[Any, MutableMapping[str, Any]]:
|
31
|
+
"""
|
32
|
+
Convert the state message to the format required by the ConcurrentCursor.
|
33
|
+
|
34
|
+
e.g.
|
35
|
+
{
|
36
|
+
"state_type": ConcurrencyCompatibleStateType.date_range.value,
|
37
|
+
"metadata": { … },
|
38
|
+
"slices": [
|
39
|
+
{"start": "10", "end": "2021-01-18T21:18:20.000+00:00"},
|
40
|
+
]
|
41
|
+
}
|
42
|
+
"""
|
43
|
+
sync_start = self._get_sync_start(cursor_field, stream_state, start)
|
44
|
+
if self.is_state_message_compatible(stream_state):
|
45
|
+
return sync_start, stream_state
|
46
|
+
|
47
|
+
# Create a slice to represent the records synced during prior syncs.
|
48
|
+
# The start and end are the same to avoid confusion as to whether the records for this slice
|
49
|
+
# were actually synced
|
50
|
+
slices = [
|
51
|
+
{
|
52
|
+
self.START_KEY: start if start is not None else sync_start,
|
53
|
+
self.END_KEY: sync_start, # this may not be relevant anymore
|
54
|
+
self.MOST_RECENT_RECORD_KEY: sync_start,
|
55
|
+
}
|
56
|
+
]
|
57
|
+
|
58
|
+
return sync_start, {
|
59
|
+
"state_type": ConcurrencyCompatibleStateType.integer.value,
|
60
|
+
"slices": slices,
|
61
|
+
"legacy": stream_state,
|
62
|
+
}
|
63
|
+
|
64
|
+
def parse_value(self, value: int) -> int:
|
65
|
+
return value
|
66
|
+
|
67
|
+
@property
|
68
|
+
def zero_value(self) -> int:
|
69
|
+
return 0
|
70
|
+
|
71
|
+
def increment(self, value: int) -> int:
|
72
|
+
return value + 1
|
73
|
+
|
74
|
+
def output_format(self, value: int) -> int:
|
75
|
+
return value
|
76
|
+
|
77
|
+
def _get_sync_start(
|
78
|
+
self,
|
79
|
+
cursor_field: CursorField,
|
80
|
+
stream_state: MutableMapping[str, Any],
|
81
|
+
start: Optional[int],
|
82
|
+
) -> int:
|
83
|
+
sync_start = start if start is not None else self.zero_value
|
84
|
+
prev_sync_low_water_mark: Optional[int] = (
|
85
|
+
stream_state[cursor_field.cursor_field_key]
|
86
|
+
if cursor_field.cursor_field_key in stream_state
|
87
|
+
else None
|
88
|
+
)
|
89
|
+
if prev_sync_low_water_mark and prev_sync_low_water_mark >= sync_start:
|
90
|
+
return prev_sync_low_water_mark
|
91
|
+
else:
|
92
|
+
return sync_start
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: airbyte-cdk
|
3
|
-
Version: 6.
|
3
|
+
Version: 6.37.0
|
4
4
|
Summary: A framework for writing Airbyte Connectors.
|
5
5
|
Home-page: https://airbyte.com
|
6
6
|
License: MIT
|
@@ -22,8 +22,8 @@ Provides-Extra: sql
|
|
22
22
|
Provides-Extra: vector-db-based
|
23
23
|
Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
|
24
24
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
25
|
-
Requires-Dist: Unidecode (>=1.3,<2.0)
|
26
25
|
Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
|
26
|
+
Requires-Dist: anyascii (>=0.3.2,<0.4.0)
|
27
27
|
Requires-Dist: avro (>=1.11.2,<1.13.0) ; extra == "file-based"
|
28
28
|
Requires-Dist: backoff
|
29
29
|
Requires-Dist: cachetools
|
@@ -67,11 +67,11 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
|
|
67
67
|
airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
|
68
68
|
airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
|
69
69
|
airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
|
70
|
-
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=
|
70
|
+
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=rAp-sgld4n8Tmybz-51m7VcYXqKwzKDpCJVr1elmkRc,26824
|
71
71
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
|
72
72
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=0qs4hhmh_XOy2B4MHCn2qVMM79C6MizIBqnvpZj1aSE,2923
|
73
73
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
74
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
74
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=Vsem7b0YL_kaLeTwY_kX-EqHzuBDjik0lBN7e3srXT4,147126
|
75
75
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
76
76
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=venZjfpvtqr3oFSuvMBWtn4h9ayLhD4L65ACuXCDZ64,10445
|
77
77
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=JHb_0d3SE6kNY10mxA5YBEKPeSbsWYjByq1gUQxepoE,953
|
@@ -107,19 +107,19 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py,sha
|
|
107
107
|
airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=CQkHqGlfa87G6VYMtBAQWin7ECKpfMdrDcg0JO5_rhc,3212
|
108
108
|
airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=9IoeuWam3L6GyN10L6U8xNWXmkt9cnahSDNkez1OmFY,982
|
109
109
|
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=UQeuS4Vpyp4hlOn-R3tRyeBX0e9IoV6jQ6gH-Jz8lY0,7182
|
110
|
-
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=
|
110
|
+
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=uuXBZUWDWM-sPcUKjNSPRN657QhNQCx_hnhTuJj2zOA,5129
|
111
111
|
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=TN6GCgLXaWDONTaJwQ3A5ELqC-sxwKz-UYSraJYB-dI,17078
|
112
112
|
airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
113
113
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
|
114
114
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
115
115
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
116
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
116
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=Kd8HvvXqvGWZBey99eQzbK5u2k1ItnRAi2h7C7UNwBQ,103225
|
117
117
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
118
118
|
airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=958MMX6_ZOJUlDDdNr9Krosgi2bCKGx2Z765M2Woz18,5505
|
119
119
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
120
120
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
|
121
121
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
122
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
122
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=Mx0KJGbqIZeUWduKy-UvpVH-DRm0pzXDcz203r69oNY,140619
|
123
123
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
|
124
124
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
|
125
125
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
@@ -189,11 +189,11 @@ airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py,sha256=1
|
|
189
189
|
airbyte_cdk/sources/declarative/transformations/flatten_fields.py,sha256=yT3owG6rMKaRX-LJ_T-jSTnh1B5NoAHyH4YZN9yOvE8,1758
|
190
190
|
airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py,sha256=vbIn6ump-Ut6g20yMub7PFoPBhOKVtrHSAUdcOUdLfw,1999
|
191
191
|
airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py,sha256=RTs5KX4V3hM7A6QN1WlGF21YccTIyNH6qQI9IMb__hw,670
|
192
|
-
airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py,sha256=
|
192
|
+
airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py,sha256=_3ldEbsA7tQK-zzeU_cG86D1_1SY3wAo1vHE0zXrOck,2265
|
193
193
|
airbyte_cdk/sources/declarative/transformations/remove_fields.py,sha256=EwUP0SZ2p4GRJ6Q8CUzlz9dcUeEidEFDlI2IBye2tlc,2745
|
194
194
|
airbyte_cdk/sources/declarative/transformations/transformation.py,sha256=4sXtx9cNY2EHUPq-xHvDs8GQEBUy3Eo6TkRLKHPXx68,1161
|
195
195
|
airbyte_cdk/sources/declarative/types.py,sha256=yqx0xlZv_76tkC7fqJKefmvl4GJJ8mXbeddwVV8XRJU,778
|
196
|
-
airbyte_cdk/sources/declarative/yaml_declarative_source.py,sha256=
|
196
|
+
airbyte_cdk/sources/declarative/yaml_declarative_source.py,sha256=MsKSAqtpwIqJfYOiUX01RbqMeTy7pvBoguvyTWrL7pI,2390
|
197
197
|
airbyte_cdk/sources/embedded/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
198
198
|
airbyte_cdk/sources/embedded/base_integration.py,sha256=0LbtEtWlnVdkYlweA5OJU4BIoyS6d4le5w9FsLn25Zc,2417
|
199
199
|
airbyte_cdk/sources/embedded/catalog.py,sha256=EAnLw9u5fXLNBLfWr_I0itA7OEHMWdqEaM_rWc_tCpI,1653
|
@@ -284,8 +284,9 @@ airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py,sha256=
|
|
284
284
|
airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py,sha256=nbdkkHoN0NFeSs7YUFfzY1Lg5Jrt8fWY_ln3YrhY-Ko,544
|
285
285
|
airbyte_cdk/sources/streams/concurrent/partitions/types.py,sha256=frPVvHtY7vLxpGEbMQzNvF1Y52ZVyct9f1DDhGoRjwY,1166
|
286
286
|
airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
287
|
-
airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py,sha256=
|
287
|
+
airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py,sha256=JCRanZBAC8a0pPuzHCHiJ1irHgFkDd83l2K5jA1xRkU,6853
|
288
288
|
airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py,sha256=x8MLm1pTMfLNHvMF3P1ixYkYt_xjpbaIwnvhY_ofdBo,8076
|
289
|
+
airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py,sha256=bC6L82nsErXcFSPlxcdp4SneJ7qFuqCelP3-8svEh5E,3054
|
289
290
|
airbyte_cdk/sources/streams/core.py,sha256=jiYW6w8cjNjzXMd8U8Gt-02fYYU7b0ciXSSSnGvFRak,32219
|
290
291
|
airbyte_cdk/sources/streams/http/__init__.py,sha256=AGiEZ5B1Joi9ZnFpkJLT7F3QLpCAaBgAeVWy-1znmZw,311
|
291
292
|
airbyte_cdk/sources/streams/http/availability_strategy.py,sha256=sovoGFThZr-doMN9vJvTuJBrvkwQVIO0qTQO64pGZPY,2428
|
@@ -360,9 +361,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
|
|
360
361
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
361
362
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
362
363
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
363
|
-
airbyte_cdk-6.
|
364
|
-
airbyte_cdk-6.
|
365
|
-
airbyte_cdk-6.
|
366
|
-
airbyte_cdk-6.
|
367
|
-
airbyte_cdk-6.
|
368
|
-
airbyte_cdk-6.
|
364
|
+
airbyte_cdk-6.37.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
365
|
+
airbyte_cdk-6.37.0.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
|
366
|
+
airbyte_cdk-6.37.0.dist-info/METADATA,sha256=g4WHcM7TfV-PzEFv-cACDUYqAf_iFEZfCzi9sVwjSmw,6013
|
367
|
+
airbyte_cdk-6.37.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
368
|
+
airbyte_cdk-6.37.0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
369
|
+
airbyte_cdk-6.37.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|