airbyte-cdk 6.17.1.dev1__py3-none-any.whl → 6.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +0 -69
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +5 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
- airbyte_cdk/sources/declarative/incremental/__init__.py +0 -6
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +0 -15
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +26 -12
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +26 -79
- airbyte_cdk/sources/declarative/requesters/README.md +57 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +33 -4
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
- airbyte_cdk/sources/streams/concurrent/cursor.py +21 -30
- airbyte_cdk/sources/types.py +3 -0
- {airbyte_cdk-6.17.1.dev1.dist-info → airbyte_cdk-6.18.0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.17.1.dev1.dist-info → airbyte_cdk-6.18.0.dist-info}/RECORD +17 -17
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -340
- {airbyte_cdk-6.17.1.dev1.dist-info → airbyte_cdk-6.18.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.17.1.dev1.dist-info → airbyte_cdk-6.18.0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.17.1.dev1.dist-info → airbyte_cdk-6.18.0.dist-info}/entry_points.txt +0 -0
@@ -20,9 +20,6 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
|
20
20
|
ClientSideIncrementalRecordFilterDecorator,
|
21
21
|
)
|
22
22
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
23
|
-
from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
|
24
|
-
PerPartitionWithGlobalCursor,
|
25
|
-
)
|
26
23
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
27
24
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
28
25
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
@@ -307,72 +304,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
307
304
|
cursor=final_state_cursor,
|
308
305
|
)
|
309
306
|
)
|
310
|
-
elif (
|
311
|
-
incremental_sync_component_definition
|
312
|
-
and incremental_sync_component_definition.get("type", "")
|
313
|
-
== DatetimeBasedCursorModel.__name__
|
314
|
-
and self._stream_supports_concurrent_partition_processing(
|
315
|
-
declarative_stream=declarative_stream
|
316
|
-
)
|
317
|
-
and hasattr(declarative_stream.retriever, "stream_slicer")
|
318
|
-
and isinstance(
|
319
|
-
declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
|
320
|
-
)
|
321
|
-
):
|
322
|
-
stream_state = state_manager.get_stream_state(
|
323
|
-
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
324
|
-
)
|
325
|
-
partition_router = declarative_stream.retriever.stream_slicer._partition_router
|
326
|
-
|
327
|
-
cursor = self._constructor.create_concurrent_cursor_from_perpartition_cursor(
|
328
|
-
state_manager=state_manager,
|
329
|
-
model_type=DatetimeBasedCursorModel,
|
330
|
-
component_definition=incremental_sync_component_definition,
|
331
|
-
stream_name=declarative_stream.name,
|
332
|
-
stream_namespace=declarative_stream.namespace,
|
333
|
-
config=config or {},
|
334
|
-
stream_state=stream_state,
|
335
|
-
partition_router=partition_router,
|
336
|
-
)
|
337
|
-
|
338
|
-
retriever = declarative_stream.retriever
|
339
|
-
|
340
|
-
# This is an optimization so that we don't invoke any cursor or state management flows within the
|
341
|
-
# low-code framework because state management is handled through the ConcurrentCursor.
|
342
|
-
if declarative_stream and isinstance(retriever, SimpleRetriever):
|
343
|
-
# Also a temporary hack. In the legacy Stream implementation, as part of the read,
|
344
|
-
# set_initial_state() is called to instantiate incoming state on the cursor. Although we no
|
345
|
-
# longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
|
346
|
-
# like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
|
347
|
-
# still rely on a DatetimeBasedCursor that is properly initialized with state.
|
348
|
-
if retriever.cursor:
|
349
|
-
retriever.cursor.set_initial_state(stream_state=stream_state)
|
350
|
-
# We zero it out here, but since this is a cursor reference, the state is still properly
|
351
|
-
# instantiated for the other components that reference it
|
352
|
-
retriever.cursor = None
|
353
|
-
|
354
|
-
partition_generator = StreamSlicerPartitionGenerator(
|
355
|
-
DeclarativePartitionFactory(
|
356
|
-
declarative_stream.name,
|
357
|
-
declarative_stream.get_json_schema(),
|
358
|
-
retriever,
|
359
|
-
self.message_repository,
|
360
|
-
),
|
361
|
-
cursor,
|
362
|
-
)
|
363
|
-
|
364
|
-
concurrent_streams.append(
|
365
|
-
DefaultStream(
|
366
|
-
partition_generator=partition_generator,
|
367
|
-
name=declarative_stream.name,
|
368
|
-
json_schema=declarative_stream.get_json_schema(),
|
369
|
-
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
370
|
-
primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
|
371
|
-
cursor_field=cursor.cursor_field.cursor_field_key,
|
372
|
-
logger=self.logger,
|
373
|
-
cursor=cursor,
|
374
|
-
)
|
375
|
-
)
|
376
307
|
else:
|
377
308
|
synchronous_streams.append(declarative_stream)
|
378
309
|
else:
|
@@ -2977,6 +2977,11 @@ definitions:
|
|
2977
2977
|
anyOf:
|
2978
2978
|
- "$ref": "#/definitions/CustomRequester"
|
2979
2979
|
- "$ref": "#/definitions/HttpRequester"
|
2980
|
+
url_requester:
|
2981
|
+
description: Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.
|
2982
|
+
anyOf:
|
2983
|
+
- "$ref": "#/definitions/CustomRequester"
|
2984
|
+
- "$ref": "#/definitions/HttpRequester"
|
2980
2985
|
download_requester:
|
2981
2986
|
description: Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.
|
2982
2987
|
anyOf:
|
@@ -59,11 +59,13 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
59
59
|
|
60
60
|
def __init__(
|
61
61
|
self,
|
62
|
-
|
62
|
+
date_time_based_cursor: DatetimeBasedCursor,
|
63
|
+
substream_cursor: Optional[Union[PerPartitionWithGlobalCursor, GlobalSubstreamCursor]],
|
63
64
|
**kwargs: Any,
|
64
65
|
):
|
65
66
|
super().__init__(**kwargs)
|
66
|
-
self.
|
67
|
+
self._date_time_based_cursor = date_time_based_cursor
|
68
|
+
self._substream_cursor = substream_cursor
|
67
69
|
|
68
70
|
def filter_records(
|
69
71
|
self,
|
@@ -75,7 +77,7 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
75
77
|
records = (
|
76
78
|
record
|
77
79
|
for record in records
|
78
|
-
if self.
|
80
|
+
if (self._substream_cursor or self._date_time_based_cursor).should_be_synced(
|
79
81
|
# Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
|
80
82
|
# Record stream name is empty cause it is not used durig the filtering
|
81
83
|
Record(data=record, associated_slice=stream_slice, stream_name="")
|
@@ -2,10 +2,6 @@
|
|
2
2
|
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import (
|
6
|
-
ConcurrentCursorFactory,
|
7
|
-
ConcurrentPerPartitionCursor,
|
8
|
-
)
|
9
5
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
10
6
|
from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
|
11
7
|
from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
|
@@ -25,8 +21,6 @@ from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor i
|
|
25
21
|
|
26
22
|
__all__ = [
|
27
23
|
"CursorFactory",
|
28
|
-
"ConcurrentCursorFactory",
|
29
|
-
"ConcurrentPerPartitionCursor",
|
30
24
|
"DatetimeBasedCursor",
|
31
25
|
"DeclarativeCursor",
|
32
26
|
"GlobalSubstreamCursor",
|
@@ -303,21 +303,6 @@ class PerPartitionCursor(DeclarativeCursor):
|
|
303
303
|
raise ValueError("A partition needs to be provided in order to get request body json")
|
304
304
|
|
305
305
|
def should_be_synced(self, record: Record) -> bool:
|
306
|
-
if (
|
307
|
-
record.associated_slice
|
308
|
-
and self._to_partition_key(record.associated_slice.partition)
|
309
|
-
not in self._cursor_per_partition
|
310
|
-
):
|
311
|
-
partition_state = (
|
312
|
-
self._state_to_migrate_from
|
313
|
-
if self._state_to_migrate_from
|
314
|
-
else self._NO_CURSOR_STATE
|
315
|
-
)
|
316
|
-
cursor = self._create_cursor(partition_state)
|
317
|
-
|
318
|
-
self._cursor_per_partition[
|
319
|
-
self._to_partition_key(record.associated_slice.partition)
|
320
|
-
] = cursor
|
321
306
|
return self._get_cursor(record).should_be_synced(
|
322
307
|
self._convert_record_to_cursor_record(record)
|
323
308
|
)
|
@@ -737,33 +737,43 @@ class KeysToSnakeCase(BaseModel):
|
|
737
737
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
738
738
|
|
739
739
|
|
740
|
+
class FlattenFields(BaseModel):
|
741
|
+
type: Literal["FlattenFields"]
|
742
|
+
flatten_lists: Optional[bool] = Field(
|
743
|
+
True,
|
744
|
+
description="Whether to flatten lists or leave it as is. Default is True.",
|
745
|
+
title="Flatten Lists",
|
746
|
+
)
|
747
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
748
|
+
|
749
|
+
|
740
750
|
class KeysReplace(BaseModel):
|
741
751
|
type: Literal["KeysReplace"]
|
742
752
|
old: str = Field(
|
743
753
|
...,
|
744
754
|
description="Old value to replace.",
|
745
|
-
examples=[
|
755
|
+
examples=[
|
756
|
+
" ",
|
757
|
+
"{{ record.id }}",
|
758
|
+
"{{ config['id'] }}",
|
759
|
+
"{{ stream_slice['id'] }}",
|
760
|
+
],
|
746
761
|
title="Old value",
|
747
762
|
)
|
748
763
|
new: str = Field(
|
749
764
|
...,
|
750
765
|
description="New value to set.",
|
751
|
-
examples=[
|
766
|
+
examples=[
|
767
|
+
"_",
|
768
|
+
"{{ record.id }}",
|
769
|
+
"{{ config['id'] }}",
|
770
|
+
"{{ stream_slice['id'] }}",
|
771
|
+
],
|
752
772
|
title="New value",
|
753
773
|
)
|
754
774
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
755
775
|
|
756
776
|
|
757
|
-
class FlattenFields(BaseModel):
|
758
|
-
type: Literal["FlattenFields"]
|
759
|
-
flatten_lists: Optional[bool] = Field(
|
760
|
-
True,
|
761
|
-
description="Whether to flatten lists or leave it as is. Default is True.",
|
762
|
-
title="Flatten Lists",
|
763
|
-
)
|
764
|
-
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
765
|
-
|
766
|
-
|
767
777
|
class IterableDecoder(BaseModel):
|
768
778
|
type: Literal["IterableDecoder"]
|
769
779
|
|
@@ -2040,6 +2050,10 @@ class AsyncRetriever(BaseModel):
|
|
2040
2050
|
...,
|
2041
2051
|
description="Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.",
|
2042
2052
|
)
|
2053
|
+
url_requester: Optional[Union[CustomRequester, HttpRequester]] = Field(
|
2054
|
+
None,
|
2055
|
+
description="Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.",
|
2056
|
+
)
|
2043
2057
|
download_requester: Union[CustomRequester, HttpRequester] = Field(
|
2044
2058
|
...,
|
2045
2059
|
description="Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.",
|
@@ -84,8 +84,6 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
|
84
84
|
)
|
85
85
|
from airbyte_cdk.sources.declarative.incremental import (
|
86
86
|
ChildPartitionResumableFullRefreshCursor,
|
87
|
-
ConcurrentCursorFactory,
|
88
|
-
ConcurrentPerPartitionCursor,
|
89
87
|
CursorFactory,
|
90
88
|
DatetimeBasedCursor,
|
91
89
|
DeclarativeCursor,
|
@@ -440,7 +438,6 @@ from airbyte_cdk.sources.message import (
|
|
440
438
|
InMemoryMessageRepository,
|
441
439
|
LogAppenderMessageRepositoryDecorator,
|
442
440
|
MessageRepository,
|
443
|
-
NoopMessageRepository,
|
444
441
|
)
|
445
442
|
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
|
446
443
|
from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
|
@@ -874,8 +871,6 @@ class ModelToComponentFactory:
|
|
874
871
|
stream_namespace: Optional[str],
|
875
872
|
config: Config,
|
876
873
|
stream_state: MutableMapping[str, Any],
|
877
|
-
message_repository: Optional[MessageRepository] = None,
|
878
|
-
runtime_lookback_window: Optional[int] = None,
|
879
874
|
**kwargs: Any,
|
880
875
|
) -> ConcurrentCursor:
|
881
876
|
component_type = component_definition.get("type")
|
@@ -933,11 +928,6 @@ class ModelToComponentFactory:
|
|
933
928
|
if evaluated_lookback_window:
|
934
929
|
lookback_window = parse_duration(evaluated_lookback_window)
|
935
930
|
|
936
|
-
if runtime_lookback_window and lookback_window:
|
937
|
-
lookback_window = max(lookback_window, runtime_lookback_window)
|
938
|
-
elif runtime_lookback_window:
|
939
|
-
lookback_window = runtime_lookback_window
|
940
|
-
|
941
931
|
connector_state_converter: DateTimeStreamStateConverter
|
942
932
|
connector_state_converter = CustomFormatConcurrentStreamStateConverter(
|
943
933
|
datetime_format=datetime_format,
|
@@ -1016,7 +1006,7 @@ class ModelToComponentFactory:
|
|
1016
1006
|
stream_name=stream_name,
|
1017
1007
|
stream_namespace=stream_namespace,
|
1018
1008
|
stream_state=stream_state,
|
1019
|
-
message_repository=
|
1009
|
+
message_repository=self._message_repository,
|
1020
1010
|
connector_state_manager=state_manager,
|
1021
1011
|
connector_state_converter=connector_state_converter,
|
1022
1012
|
cursor_field=cursor_field,
|
@@ -1028,63 +1018,6 @@ class ModelToComponentFactory:
|
|
1028
1018
|
cursor_granularity=cursor_granularity,
|
1029
1019
|
)
|
1030
1020
|
|
1031
|
-
def create_concurrent_cursor_from_perpartition_cursor(
|
1032
|
-
self,
|
1033
|
-
state_manager: ConnectorStateManager,
|
1034
|
-
model_type: Type[BaseModel],
|
1035
|
-
component_definition: ComponentDefinition,
|
1036
|
-
stream_name: str,
|
1037
|
-
stream_namespace: Optional[str],
|
1038
|
-
config: Config,
|
1039
|
-
stream_state: MutableMapping[str, Any],
|
1040
|
-
partition_router,
|
1041
|
-
**kwargs: Any,
|
1042
|
-
) -> ConcurrentPerPartitionCursor:
|
1043
|
-
component_type = component_definition.get("type")
|
1044
|
-
if component_definition.get("type") != model_type.__name__:
|
1045
|
-
raise ValueError(
|
1046
|
-
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
1047
|
-
)
|
1048
|
-
|
1049
|
-
datetime_based_cursor_model = model_type.parse_obj(component_definition)
|
1050
|
-
|
1051
|
-
if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
|
1052
|
-
raise ValueError(
|
1053
|
-
f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
|
1054
|
-
)
|
1055
|
-
|
1056
|
-
interpolated_cursor_field = InterpolatedString.create(
|
1057
|
-
datetime_based_cursor_model.cursor_field,
|
1058
|
-
parameters=datetime_based_cursor_model.parameters or {},
|
1059
|
-
)
|
1060
|
-
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
1061
|
-
|
1062
|
-
# Create the cursor factory
|
1063
|
-
cursor_factory = ConcurrentCursorFactory(
|
1064
|
-
partial(
|
1065
|
-
self.create_concurrent_cursor_from_datetime_based_cursor,
|
1066
|
-
state_manager=state_manager,
|
1067
|
-
model_type=model_type,
|
1068
|
-
component_definition=component_definition,
|
1069
|
-
stream_name=stream_name,
|
1070
|
-
stream_namespace=stream_namespace,
|
1071
|
-
config=config,
|
1072
|
-
message_repository=NoopMessageRepository(),
|
1073
|
-
)
|
1074
|
-
)
|
1075
|
-
|
1076
|
-
# Return the concurrent cursor and state converter
|
1077
|
-
return ConcurrentPerPartitionCursor(
|
1078
|
-
cursor_factory=cursor_factory,
|
1079
|
-
partition_router=partition_router,
|
1080
|
-
stream_name=stream_name,
|
1081
|
-
stream_namespace=stream_namespace,
|
1082
|
-
stream_state=stream_state,
|
1083
|
-
message_repository=self._message_repository, # type: ignore
|
1084
|
-
connector_state_manager=state_manager,
|
1085
|
-
cursor_field=cursor_field,
|
1086
|
-
)
|
1087
|
-
|
1088
1021
|
@staticmethod
|
1089
1022
|
def create_constant_backoff_strategy(
|
1090
1023
|
model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
|
@@ -1367,15 +1300,18 @@ class ModelToComponentFactory:
|
|
1367
1300
|
raise ValueError(
|
1368
1301
|
"Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
|
1369
1302
|
)
|
1370
|
-
|
1371
|
-
|
1372
|
-
|
1373
|
-
|
1374
|
-
|
1375
|
-
|
1376
|
-
|
1377
|
-
|
1378
|
-
|
1303
|
+
client_side_incremental_sync = {
|
1304
|
+
"date_time_based_cursor": self._create_component_from_model(
|
1305
|
+
model=model.incremental_sync, config=config
|
1306
|
+
),
|
1307
|
+
"substream_cursor": (
|
1308
|
+
combined_slicers
|
1309
|
+
if isinstance(
|
1310
|
+
combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
|
1311
|
+
)
|
1312
|
+
else None
|
1313
|
+
),
|
1314
|
+
}
|
1379
1315
|
|
1380
1316
|
if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
|
1381
1317
|
cursor_model = model.incremental_sync
|
@@ -2191,7 +2127,7 @@ class ModelToComponentFactory:
|
|
2191
2127
|
if (
|
2192
2128
|
not isinstance(stream_slicer, DatetimeBasedCursor)
|
2193
2129
|
or type(stream_slicer) is not DatetimeBasedCursor
|
2194
|
-
)
|
2130
|
+
):
|
2195
2131
|
# Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
|
2196
2132
|
# Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
|
2197
2133
|
# their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
|
@@ -2351,7 +2287,7 @@ class ModelToComponentFactory:
|
|
2351
2287
|
extractor=download_extractor,
|
2352
2288
|
name=name,
|
2353
2289
|
record_filter=None,
|
2354
|
-
transformations=
|
2290
|
+
transformations=transformations,
|
2355
2291
|
schema_normalization=TypeTransformer(TransformConfig.NoTransform),
|
2356
2292
|
config=config,
|
2357
2293
|
parameters={},
|
@@ -2388,6 +2324,16 @@ class ModelToComponentFactory:
|
|
2388
2324
|
if model.delete_requester
|
2389
2325
|
else None
|
2390
2326
|
)
|
2327
|
+
url_requester = (
|
2328
|
+
self._create_component_from_model(
|
2329
|
+
model=model.url_requester,
|
2330
|
+
decoder=decoder,
|
2331
|
+
config=config,
|
2332
|
+
name=f"job extract_url - {name}",
|
2333
|
+
)
|
2334
|
+
if model.url_requester
|
2335
|
+
else None
|
2336
|
+
)
|
2391
2337
|
status_extractor = self._create_component_from_model(
|
2392
2338
|
model=model.status_extractor, decoder=decoder, config=config, name=name
|
2393
2339
|
)
|
@@ -2398,6 +2344,7 @@ class ModelToComponentFactory:
|
|
2398
2344
|
creation_requester=creation_requester,
|
2399
2345
|
polling_requester=polling_requester,
|
2400
2346
|
download_retriever=download_retriever,
|
2347
|
+
url_requester=url_requester,
|
2401
2348
|
abort_requester=abort_requester,
|
2402
2349
|
delete_requester=delete_requester,
|
2403
2350
|
status_extractor=status_extractor,
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# AsyncHttpJobRepository sequence diagram
|
2
|
+
|
3
|
+
- Components marked as optional are not required and can be ignored.
|
4
|
+
- if `url_requester` is not provided, `urls_extractor` will get urls from the `polling_job_response`
|
5
|
+
- interpolation_context, e.g. `create_job_response` or `polling_job_response` can be obtained from stream_slice
|
6
|
+
|
7
|
+
|
8
|
+
```mermaid
|
9
|
+
---
|
10
|
+
title: AsyncHttpJobRepository Sequence Diagram
|
11
|
+
---
|
12
|
+
sequenceDiagram
|
13
|
+
participant AsyncHttpJobRepository as AsyncOrchestrator
|
14
|
+
participant CreationRequester as creation_requester
|
15
|
+
participant PollingRequester as polling_requester
|
16
|
+
participant UrlRequester as url_requester (Optional)
|
17
|
+
participant DownloadRetriever as download_retriever
|
18
|
+
participant AbortRequester as abort_requester (Optional)
|
19
|
+
participant DeleteRequester as delete_requester (Optional)
|
20
|
+
participant Reporting Server as Async Reporting Server
|
21
|
+
|
22
|
+
AsyncHttpJobRepository ->> CreationRequester: Initiate job creation
|
23
|
+
CreationRequester ->> Reporting Server: Create job request
|
24
|
+
Reporting Server -->> CreationRequester: Job ID response
|
25
|
+
CreationRequester -->> AsyncHttpJobRepository: Job ID
|
26
|
+
|
27
|
+
loop Poll for job status
|
28
|
+
AsyncHttpJobRepository ->> PollingRequester: Check job status
|
29
|
+
PollingRequester ->> Reporting Server: Status request (interpolation_context: `create_job_response`)
|
30
|
+
Reporting Server -->> PollingRequester: Status response
|
31
|
+
PollingRequester -->> AsyncHttpJobRepository: Job status
|
32
|
+
end
|
33
|
+
|
34
|
+
alt Status: Ready
|
35
|
+
AsyncHttpJobRepository ->> UrlRequester: Request download URLs (if applicable)
|
36
|
+
UrlRequester ->> Reporting Server: URL request (interpolation_context: `polling_job_response`)
|
37
|
+
Reporting Server -->> UrlRequester: Download URLs
|
38
|
+
UrlRequester -->> AsyncHttpJobRepository: Download URLs
|
39
|
+
|
40
|
+
AsyncHttpJobRepository ->> DownloadRetriever: Download reports
|
41
|
+
DownloadRetriever ->> Reporting Server: Retrieve report data (interpolation_context: `url`)
|
42
|
+
Reporting Server -->> DownloadRetriever: Report data
|
43
|
+
DownloadRetriever -->> AsyncHttpJobRepository: Report data
|
44
|
+
else Status: Failed
|
45
|
+
AsyncHttpJobRepository ->> AbortRequester: Send abort request
|
46
|
+
AbortRequester ->> Reporting Server: Abort job
|
47
|
+
Reporting Server -->> AbortRequester: Abort confirmation
|
48
|
+
AbortRequester -->> AsyncHttpJobRepository: Confirmation
|
49
|
+
end
|
50
|
+
|
51
|
+
AsyncHttpJobRepository ->> DeleteRequester: Send delete job request
|
52
|
+
DeleteRequester ->> Reporting Server: Delete job
|
53
|
+
Reporting Server -->> DeleteRequester: Deletion confirmation
|
54
|
+
DeleteRequester -->> AsyncHttpJobRepository: Confirmation
|
55
|
+
|
56
|
+
|
57
|
+
```
|
@@ -31,6 +31,10 @@ LOGGER = logging.getLogger("airbyte")
|
|
31
31
|
|
32
32
|
@dataclass
|
33
33
|
class AsyncHttpJobRepository(AsyncJobRepository):
|
34
|
+
"""
|
35
|
+
See Readme file for more details about flow.
|
36
|
+
"""
|
37
|
+
|
34
38
|
creation_requester: Requester
|
35
39
|
polling_requester: Requester
|
36
40
|
download_retriever: SimpleRetriever
|
@@ -44,6 +48,9 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
44
48
|
record_extractor: RecordExtractor = field(
|
45
49
|
init=False, repr=False, default_factory=lambda: ResponseToFileExtractor({})
|
46
50
|
)
|
51
|
+
url_requester: Optional[Requester] = (
|
52
|
+
None # use it in case polling_requester provides some <id> and extra request is needed to obtain list of urls to download from
|
53
|
+
)
|
47
54
|
|
48
55
|
def __post_init__(self) -> None:
|
49
56
|
self._create_job_response_by_id: Dict[str, Response] = {}
|
@@ -186,10 +193,13 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
186
193
|
|
187
194
|
"""
|
188
195
|
|
189
|
-
for url in self.
|
190
|
-
|
191
|
-
|
192
|
-
|
196
|
+
for url in self._get_download_url(job):
|
197
|
+
job_slice = job.job_parameters()
|
198
|
+
stream_slice = StreamSlice(
|
199
|
+
partition=job_slice.partition,
|
200
|
+
cursor_slice=job_slice.cursor_slice,
|
201
|
+
extra_fields={**job_slice.extra_fields, "url": url},
|
202
|
+
)
|
193
203
|
for message in self.download_retriever.read_records({}, stream_slice):
|
194
204
|
if isinstance(message, Record):
|
195
205
|
yield message.data
|
@@ -226,3 +236,22 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
226
236
|
cursor_slice={},
|
227
237
|
)
|
228
238
|
return stream_slice
|
239
|
+
|
240
|
+
def _get_download_url(self, job: AsyncJob) -> Iterable[str]:
|
241
|
+
if not self.url_requester:
|
242
|
+
url_response = self._polling_job_response_by_id[job.api_job_id()]
|
243
|
+
else:
|
244
|
+
stream_slice: StreamSlice = StreamSlice(
|
245
|
+
partition={
|
246
|
+
"polling_job_response": self._polling_job_response_by_id[job.api_job_id()]
|
247
|
+
},
|
248
|
+
cursor_slice={},
|
249
|
+
)
|
250
|
+
url_response = self.url_requester.send_request(stream_slice=stream_slice) # type: ignore # we expect url_requester to always be presented, otherwise raise an exception as we cannot proceed with the report
|
251
|
+
if not url_response:
|
252
|
+
raise AirbyteTracedException(
|
253
|
+
internal_message="Always expect a response or an exception from url_requester",
|
254
|
+
failure_type=FailureType.system_error,
|
255
|
+
)
|
256
|
+
|
257
|
+
yield from self.urls_extractor.extract_records(url_response) # type: ignore # we expect urls_extractor to always return list of strings
|
@@ -160,7 +160,7 @@ class SimpleRetriever(Retriever):
|
|
160
160
|
stream_slice,
|
161
161
|
next_page_token,
|
162
162
|
self._paginator.get_request_headers,
|
163
|
-
self.
|
163
|
+
self.stream_slicer.get_request_headers,
|
164
164
|
)
|
165
165
|
if isinstance(headers, str):
|
166
166
|
raise ValueError("Request headers cannot be a string")
|
@@ -196,9 +196,7 @@ class ConcurrentCursor(Cursor):
|
|
196
196
|
|
197
197
|
@property
|
198
198
|
def state(self) -> MutableMapping[str, Any]:
|
199
|
-
return self.
|
200
|
-
self.cursor_field, self._concurrent_state
|
201
|
-
)
|
199
|
+
return self._concurrent_state
|
202
200
|
|
203
201
|
@property
|
204
202
|
def cursor_field(self) -> CursorField:
|
@@ -243,10 +241,10 @@ class ConcurrentCursor(Cursor):
|
|
243
241
|
return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
|
244
242
|
|
245
243
|
def close_partition(self, partition: Partition) -> None:
|
246
|
-
slice_count_before = len(self.
|
244
|
+
slice_count_before = len(self.state.get("slices", []))
|
247
245
|
self._add_slice_to_state(partition)
|
248
246
|
if slice_count_before < len(
|
249
|
-
self.
|
247
|
+
self.state["slices"]
|
250
248
|
): # only emit if at least one slice has been processed
|
251
249
|
self._merge_partitions()
|
252
250
|
self._emit_state_message()
|
@@ -258,11 +256,11 @@ class ConcurrentCursor(Cursor):
|
|
258
256
|
)
|
259
257
|
|
260
258
|
if self._slice_boundary_fields:
|
261
|
-
if "slices" not in self.
|
259
|
+
if "slices" not in self.state:
|
262
260
|
raise RuntimeError(
|
263
261
|
f"The state for stream {self._stream_name} should have at least one slice to delineate the sync start time, but no slices are present. This is unexpected. Please contact Support."
|
264
262
|
)
|
265
|
-
self.
|
263
|
+
self.state["slices"].append(
|
266
264
|
{
|
267
265
|
self._connector_state_converter.START_KEY: self._extract_from_slice(
|
268
266
|
partition, self._slice_boundary_fields[self._START_BOUNDARY]
|
@@ -290,7 +288,7 @@ class ConcurrentCursor(Cursor):
|
|
290
288
|
"expected. Please contact the Airbyte team."
|
291
289
|
)
|
292
290
|
|
293
|
-
self.
|
291
|
+
self.state["slices"].append(
|
294
292
|
{
|
295
293
|
self._connector_state_converter.START_KEY: self.start,
|
296
294
|
self._connector_state_converter.END_KEY: most_recent_cursor_value,
|
@@ -302,7 +300,9 @@ class ConcurrentCursor(Cursor):
|
|
302
300
|
self._connector_state_manager.update_state_for_stream(
|
303
301
|
self._stream_name,
|
304
302
|
self._stream_namespace,
|
305
|
-
self.
|
303
|
+
self._connector_state_converter.convert_to_state_message(
|
304
|
+
self._cursor_field, self.state
|
305
|
+
),
|
306
306
|
)
|
307
307
|
state_message = self._connector_state_manager.create_state_message(
|
308
308
|
self._stream_name, self._stream_namespace
|
@@ -310,9 +310,7 @@ class ConcurrentCursor(Cursor):
|
|
310
310
|
self._message_repository.emit_message(state_message)
|
311
311
|
|
312
312
|
def _merge_partitions(self) -> None:
|
313
|
-
self.
|
314
|
-
self._concurrent_state["slices"]
|
315
|
-
)
|
313
|
+
self.state["slices"] = self._connector_state_converter.merge_intervals(self.state["slices"])
|
316
314
|
|
317
315
|
def _extract_from_slice(self, partition: Partition, key: str) -> CursorValueType:
|
318
316
|
try:
|
@@ -349,42 +347,36 @@ class ConcurrentCursor(Cursor):
|
|
349
347
|
if self._start is not None and self._is_start_before_first_slice():
|
350
348
|
yield from self._split_per_slice_range(
|
351
349
|
self._start,
|
352
|
-
self.
|
350
|
+
self.state["slices"][0][self._connector_state_converter.START_KEY],
|
353
351
|
False,
|
354
352
|
)
|
355
353
|
|
356
|
-
if len(self.
|
354
|
+
if len(self.state["slices"]) == 1:
|
357
355
|
yield from self._split_per_slice_range(
|
358
356
|
self._calculate_lower_boundary_of_last_slice(
|
359
|
-
self.
|
357
|
+
self.state["slices"][0][self._connector_state_converter.END_KEY]
|
360
358
|
),
|
361
359
|
self._end_provider(),
|
362
360
|
True,
|
363
361
|
)
|
364
|
-
elif len(self.
|
365
|
-
for i in range(len(self.
|
362
|
+
elif len(self.state["slices"]) > 1:
|
363
|
+
for i in range(len(self.state["slices"]) - 1):
|
366
364
|
if self._cursor_granularity:
|
367
365
|
yield from self._split_per_slice_range(
|
368
|
-
self.
|
366
|
+
self.state["slices"][i][self._connector_state_converter.END_KEY]
|
369
367
|
+ self._cursor_granularity,
|
370
|
-
self.
|
371
|
-
self._connector_state_converter.START_KEY
|
372
|
-
],
|
368
|
+
self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
|
373
369
|
False,
|
374
370
|
)
|
375
371
|
else:
|
376
372
|
yield from self._split_per_slice_range(
|
377
|
-
self.
|
378
|
-
|
379
|
-
],
|
380
|
-
self._concurrent_state["slices"][i + 1][
|
381
|
-
self._connector_state_converter.START_KEY
|
382
|
-
],
|
373
|
+
self.state["slices"][i][self._connector_state_converter.END_KEY],
|
374
|
+
self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
|
383
375
|
False,
|
384
376
|
)
|
385
377
|
yield from self._split_per_slice_range(
|
386
378
|
self._calculate_lower_boundary_of_last_slice(
|
387
|
-
self.
|
379
|
+
self.state["slices"][-1][self._connector_state_converter.END_KEY]
|
388
380
|
),
|
389
381
|
self._end_provider(),
|
390
382
|
True,
|
@@ -395,8 +387,7 @@ class ConcurrentCursor(Cursor):
|
|
395
387
|
def _is_start_before_first_slice(self) -> bool:
|
396
388
|
return (
|
397
389
|
self._start is not None
|
398
|
-
and self._start
|
399
|
-
< self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY]
|
390
|
+
and self._start < self.state["slices"][0][self._connector_state_converter.START_KEY]
|
400
391
|
)
|
401
392
|
|
402
393
|
def _calculate_lower_boundary_of_last_slice(
|
airbyte_cdk/sources/types.py
CHANGED
@@ -62,11 +62,11 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
|
|
62
62
|
airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
|
63
63
|
airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
|
64
64
|
airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
|
65
|
-
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=
|
65
|
+
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=tSTCSmyMCu1qoGsne1Ooz3c1da-8EDZk6Suiy2gIq9Q,22475
|
66
66
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
|
67
67
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
|
68
68
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
69
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
69
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=2t3_QVXWOImPcH-apR_Xd8qNl6K_URFwBbQ47YHcjXg,133490
|
70
70
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
71
71
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
|
72
72
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=edGj4fGxznBk4xzRQyCA1rGfbpqe7z-RE0K3kQQWbgA,858
|
@@ -81,16 +81,15 @@ airbyte_cdk/sources/declarative/extractors/__init__.py,sha256=RmV-IkO1YLj0PSOrrq
|
|
81
81
|
airbyte_cdk/sources/declarative/extractors/dpath_extractor.py,sha256=wR4Ol4MG2lt5UlqXF5EU_k7qa5cN4_-luu3PJ1PlO3A,3131
|
82
82
|
airbyte_cdk/sources/declarative/extractors/http_selector.py,sha256=2zWZ4ewTqQC8VwkjS0xD_u350Km3SiYP7hpOOgiLg5o,1169
|
83
83
|
airbyte_cdk/sources/declarative/extractors/record_extractor.py,sha256=XJELMjahAsaomlvQgN2zrNO0DJX0G0fr9r682gUz7Pg,691
|
84
|
-
airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=
|
84
|
+
airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=OJ9xmhNWNwwzxYOeIrDy1GINb1zH9MBy6suC5tm2LSk,3545
|
85
85
|
airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=tjNwcURmlyD-TGCScXvW95ThNKyPGcx2SiWbG1-H-sc,6552
|
86
86
|
airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=LhqGDfX06_dDYLKsIVnwQ_nAWCln-v8PV7Wgt_QVeTI,6533
|
87
87
|
airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
|
88
|
-
airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=
|
89
|
-
airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=vU6bcVgjDFou7szl5UKxv2-theKSsV78oSME84-C78A,15043
|
88
|
+
airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=huRz3KQJSUFmJCg5GPE9TckEBsB5TMsCa_THhJAhPVI,1037
|
90
89
|
airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=_UzUnSIUsDbRgbFTXgSyZEFb4ws-KdhdQPWO8mFbV7U,22028
|
91
90
|
airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
|
92
91
|
airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=3_EEZop94bMitZaJd2PF5Q2Xt9v94tYg7p7YJz8tAFc,15869
|
93
|
-
airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=
|
92
|
+
airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=hElcYijbOHjdLKOMA7W7aizEbf22r7OSApXALP875uI,15749
|
94
93
|
airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py,sha256=2YBOA2NnwAeIKlIhSwUB_W-FaGnPcmrG_liY7b4mV2Y,8365
|
95
94
|
airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py,sha256=10LFv1QPM-agVKl6eaANmEBOfd7gZgBrkoTcMggsieQ,4809
|
96
95
|
airbyte_cdk/sources/declarative/interpolation/__init__.py,sha256=tjUJkn3B-iZ-p7RP2c3dVZejrGiQeooGmS5ibWTuUL4,437
|
@@ -107,12 +106,12 @@ airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW
|
|
107
106
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
|
108
107
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
109
108
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
110
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
109
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=3xWpeDNDGOw_I2pQ1LDiUhNBEWEvNAtd-HCi_1aklSQ,93666
|
111
110
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
112
111
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
113
112
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
|
114
113
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
115
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
114
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=dpRWxZaPghPcE5vGkI4swKDaXyLWLMAbvDoazuNSobU,109709
|
116
115
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
|
117
116
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=n82J15S8bjeMZ5uROu--P3hnbQoxkY5v7RPHYx7g7ro,2929
|
118
117
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
@@ -120,6 +119,7 @@ airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha25
|
|
120
119
|
airbyte_cdk/sources/declarative/partition_routers/partition_router.py,sha256=YyEIzdmLd1FjbVP3QbQ2VFCLW_P-OGbVh6VpZShp54k,2218
|
121
120
|
airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py,sha256=SKzKjSyfccq4dxGIh-J6ejrgkCHzaiTIazmbmeQiRD4,1942
|
122
121
|
airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py,sha256=5bgXoJfBg_6i53krQMptAGb50XB5XoVfqQxKQhlLtBA,15383
|
122
|
+
airbyte_cdk/sources/declarative/requesters/README.md,sha256=WabtHlwHg_J34aL1Kwm8vboYqBaSgsFjq10qR-P2sx8,2658
|
123
123
|
airbyte_cdk/sources/declarative/requesters/__init__.py,sha256=d7a3OoHbqaJDyyPli3nqqJ2yAW_SLX6XDaBAKOwvpxw,364
|
124
124
|
airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py,sha256=SkEDcJxlT1683rNx93K9whoS0OyUukkuOfToGtgpF58,776
|
125
125
|
airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py,sha256=1WZdpFmWL6W_Dko0qjflTaKIWeqt8jHT-D6HcujIp3s,884
|
@@ -134,7 +134,7 @@ airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.
|
|
134
134
|
airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py,sha256=q0YkeYUUWO6iErUy0vjqiOkhg8_9d5YcCmtlpXAJJ9E,1314
|
135
135
|
airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py,sha256=Tan66odx8VHzfdyyXMQkXz2pJYksllGqvxmpoajgcK4,669
|
136
136
|
airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py,sha256=vhWsEKNTYEzZ4gerhHqnDNKu4wGIP485NAzpSQ5DRZg,7941
|
137
|
-
airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=
|
137
|
+
airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=3GtOefPH08evlSUxaILkiKLTHbIspFY4qd5B3ZqNE60,10063
|
138
138
|
airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=RqYPkgJFAWfcZBTc-JBcGHPm4JL1ZQOhs9GKU4MP2eE,14723
|
139
139
|
airbyte_cdk/sources/declarative/requesters/paginators/__init__.py,sha256=uArbKs9JKNCt7t9tZoeWwjDpyI1HoPp29FNW0JzvaEM,644
|
140
140
|
airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=FnSl3qPvv5wD6ieAI2Ic5c4dqBk-3fRe4tCaWzq3YwM,11840
|
@@ -163,7 +163,7 @@ airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=Aio
|
|
163
163
|
airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=ix9m1dkR69DcXCXUKC5RK_ZZM7ojTLBQ4IkWQTfmfCk,456
|
164
164
|
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=kX9ltelK2xLIBWDJBK2ucrvVe5tc5xmhdbVbgsjvlxY,3696
|
165
165
|
airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
|
166
|
-
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=
|
166
|
+
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=jxQ_9xcVD07r9PKhofitAqMkdX1k8ZNyy50qz5NwkFs,24540
|
167
167
|
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=HztgVVaZdil5UfgUZcv_Hyy84r89_EKRwyO2hoewNVg,749
|
168
168
|
airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
|
169
169
|
airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=H6A3NQ6kPPM-cUNPmdvDPc9xNzR1rQNrK95GbgCW334,8822
|
@@ -257,7 +257,7 @@ airbyte_cdk/sources/streams/concurrent/abstract_stream.py,sha256=3OB5VsvOkJmCxIM
|
|
257
257
|
airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py,sha256=QTry1QCBUwJDw1QSCEvz23s7zIEx_7QMxkPq9j-oPIQ,1358
|
258
258
|
airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=QP_64kQo-b3sRNHZA5aqrgCJqAhIVegRM3vJ8jGyuSY,15213
|
259
259
|
airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=4La5v2UffSjGnhmF4kwNIKt_g3RXk2ux1mSHA1ejgYM,2898
|
260
|
-
airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=
|
260
|
+
airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=Hke6CpD8Sq1FS4g1Xuht39UN7hKkGy1mvOxvQrm1lLM,20810
|
261
261
|
airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=K3rLMpYhS7nnmvwQ52lqBy7DQdFMJpvvT7sgBg_ckA8,3207
|
262
262
|
airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=JOZ446MCLpmF26r9KfS6OO_6rGjcjgJNZdcw6jccjEI,468
|
263
263
|
airbyte_cdk/sources/streams/concurrent/helpers.py,sha256=S6AW8TgIASCZ2UuUcQLE8OzgYUHWt2-KPOvNPwnQf-Q,1596
|
@@ -293,7 +293,7 @@ airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py,sha256=Y
|
|
293
293
|
airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py,sha256=ka-bBRWvIv09LmZNYl49p2lK9nd_Tvi2g0lIp3OkU40,14872
|
294
294
|
airbyte_cdk/sources/streams/http/requests_native_auth/token.py,sha256=h5PTzcdH-RQLeCg7xZ45w_484OPUDSwNWl_iMJQmZoI,2526
|
295
295
|
airbyte_cdk/sources/streams/utils/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
296
|
-
airbyte_cdk/sources/types.py,sha256=
|
296
|
+
airbyte_cdk/sources/types.py,sha256=nLPkTpyfGV4E6e99qcBWX4r8C3fE4I8Fvgx2EjvT9ic,5005
|
297
297
|
airbyte_cdk/sources/utils/__init__.py,sha256=TTN6VUxVy6Is8BhYQZR5pxJGQh8yH4duXh4O1TiMiEY,118
|
298
298
|
airbyte_cdk/sources/utils/casing.py,sha256=QC-gV1O4e8DR4-bhdXieUPKm_JamzslVyfABLYYRSXA,256
|
299
299
|
airbyte_cdk/sources/utils/record_helper.py,sha256=jeB0mucudzna7Zvj-pCBbwFrbLJ36SlAWZTh5O4Fb9Y,2168
|
@@ -343,8 +343,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
|
|
343
343
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
344
344
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
345
345
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
346
|
-
airbyte_cdk-6.
|
347
|
-
airbyte_cdk-6.
|
348
|
-
airbyte_cdk-6.
|
349
|
-
airbyte_cdk-6.
|
350
|
-
airbyte_cdk-6.
|
346
|
+
airbyte_cdk-6.18.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
347
|
+
airbyte_cdk-6.18.0.dist-info/METADATA,sha256=RvVkgbg-LBbS5eGTntO-mp34yRIDMuPYZ26VRmSkhCA,6000
|
348
|
+
airbyte_cdk-6.18.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
349
|
+
airbyte_cdk-6.18.0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
350
|
+
airbyte_cdk-6.18.0.dist-info/RECORD,,
|
@@ -1,340 +0,0 @@
|
|
1
|
-
import copy
|
2
|
-
import logging
|
3
|
-
|
4
|
-
#
|
5
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
6
|
-
#
|
7
|
-
import threading
|
8
|
-
from collections import OrderedDict
|
9
|
-
from copy import deepcopy
|
10
|
-
from datetime import timedelta
|
11
|
-
from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional
|
12
|
-
|
13
|
-
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
14
|
-
from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
|
15
|
-
from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
|
16
|
-
Timer,
|
17
|
-
iterate_with_last_flag_and_state,
|
18
|
-
)
|
19
|
-
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
20
|
-
from airbyte_cdk.sources.message import MessageRepository
|
21
|
-
from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
|
22
|
-
PerPartitionKeySerializer,
|
23
|
-
)
|
24
|
-
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, CursorField
|
25
|
-
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
26
|
-
from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
|
27
|
-
|
28
|
-
logger = logging.getLogger("airbyte")
|
29
|
-
|
30
|
-
|
31
|
-
class ConcurrentCursorFactory:
|
32
|
-
def __init__(self, create_function: Callable[..., Cursor]):
|
33
|
-
self._create_function = create_function
|
34
|
-
|
35
|
-
def create(self, stream_state: Mapping[str, Any], runtime_lookback_window: Any) -> Cursor:
|
36
|
-
return self._create_function(
|
37
|
-
stream_state=stream_state, runtime_lookback_window=runtime_lookback_window
|
38
|
-
)
|
39
|
-
|
40
|
-
|
41
|
-
class ConcurrentPerPartitionCursor(Cursor):
|
42
|
-
"""
|
43
|
-
Manages state per partition when a stream has many partitions, to prevent data loss or duplication.
|
44
|
-
|
45
|
-
**Partition Limitation and Limit Reached Logic**
|
46
|
-
|
47
|
-
- **DEFAULT_MAX_PARTITIONS_NUMBER**: The maximum number of partitions to keep in memory (default is 10,000).
|
48
|
-
- **_cursor_per_partition**: An ordered dictionary that stores cursors for each partition.
|
49
|
-
- **_over_limit**: A counter that increments each time an oldest partition is removed when the limit is exceeded.
|
50
|
-
|
51
|
-
The class ensures that the number of partitions tracked does not exceed the `DEFAULT_MAX_PARTITIONS_NUMBER` to prevent excessive memory usage.
|
52
|
-
|
53
|
-
- When the number of partitions exceeds the limit, the oldest partitions are removed from `_cursor_per_partition`, and `_over_limit` is incremented accordingly.
|
54
|
-
- The `limit_reached` method returns `True` when `_over_limit` exceeds `DEFAULT_MAX_PARTITIONS_NUMBER`, indicating that the global cursor should be used instead of per-partition cursors.
|
55
|
-
|
56
|
-
This approach avoids unnecessary switching to a global cursor due to temporary spikes in partition counts, ensuring that switching is only done when a sustained high number of partitions is observed.
|
57
|
-
"""
|
58
|
-
|
59
|
-
DEFAULT_MAX_PARTITIONS_NUMBER = 10000
|
60
|
-
_NO_STATE: Mapping[str, Any] = {}
|
61
|
-
_NO_CURSOR_STATE: Mapping[str, Any] = {}
|
62
|
-
_KEY = 0
|
63
|
-
_VALUE = 1
|
64
|
-
|
65
|
-
def __init__(
|
66
|
-
self,
|
67
|
-
cursor_factory: ConcurrentCursorFactory,
|
68
|
-
partition_router: PartitionRouter,
|
69
|
-
stream_name: str,
|
70
|
-
stream_namespace: Optional[str],
|
71
|
-
stream_state: Any,
|
72
|
-
message_repository: MessageRepository,
|
73
|
-
connector_state_manager: ConnectorStateManager,
|
74
|
-
cursor_field: CursorField,
|
75
|
-
) -> None:
|
76
|
-
self._global_cursor: Mapping[str, Any] = {}
|
77
|
-
self._stream_name = stream_name
|
78
|
-
self._stream_namespace = stream_namespace
|
79
|
-
self._message_repository = message_repository
|
80
|
-
self._connector_state_manager = connector_state_manager
|
81
|
-
self._cursor_field = cursor_field
|
82
|
-
|
83
|
-
self._cursor_factory = cursor_factory
|
84
|
-
self._partition_router = partition_router
|
85
|
-
|
86
|
-
# The dict is ordered to ensure that once the maximum number of partitions is reached,
|
87
|
-
# the oldest partitions can be efficiently removed, maintaining the most recent partitions.
|
88
|
-
self._cursor_per_partition: OrderedDict[str, Cursor] = OrderedDict()
|
89
|
-
self._state = {"states": []}
|
90
|
-
self._semaphore_per_partition = OrderedDict()
|
91
|
-
self._finished_partitions = set()
|
92
|
-
self._lock = threading.Lock()
|
93
|
-
self._timer = Timer()
|
94
|
-
self._new_global_cursor = None
|
95
|
-
self._lookback_window = 0
|
96
|
-
self._parent_state = None
|
97
|
-
self._over_limit = 0
|
98
|
-
self._partition_serializer = PerPartitionKeySerializer()
|
99
|
-
|
100
|
-
self._set_initial_state(stream_state)
|
101
|
-
|
102
|
-
@property
|
103
|
-
def cursor_field(self) -> CursorField:
|
104
|
-
return self._cursor_field
|
105
|
-
|
106
|
-
@property
|
107
|
-
def state(self) -> MutableMapping[str, Any]:
|
108
|
-
states = []
|
109
|
-
for partition_tuple, cursor in self._cursor_per_partition.items():
|
110
|
-
if cursor.state:
|
111
|
-
states.append(
|
112
|
-
{
|
113
|
-
"partition": self._to_dict(partition_tuple),
|
114
|
-
"cursor": copy.deepcopy(cursor.state),
|
115
|
-
}
|
116
|
-
)
|
117
|
-
state: dict[str, Any] = {"states": states}
|
118
|
-
|
119
|
-
if self._global_cursor:
|
120
|
-
state["state"] = self._global_cursor
|
121
|
-
if self._lookback_window is not None:
|
122
|
-
state["lookback_window"] = self._lookback_window
|
123
|
-
if self._parent_state is not None:
|
124
|
-
state["parent_state"] = self._parent_state
|
125
|
-
return state
|
126
|
-
|
127
|
-
def close_partition(self, partition: Partition) -> None:
|
128
|
-
self._cursor_per_partition[
|
129
|
-
self._to_partition_key(partition._stream_slice.partition)
|
130
|
-
].close_partition(partition=partition)
|
131
|
-
with self._lock:
|
132
|
-
self._semaphore_per_partition[
|
133
|
-
self._to_partition_key(partition._stream_slice.partition)
|
134
|
-
].acquire()
|
135
|
-
cursor = self._cursor_per_partition[
|
136
|
-
self._to_partition_key(partition._stream_slice.partition)
|
137
|
-
]
|
138
|
-
if (
|
139
|
-
self._to_partition_key(partition._stream_slice.partition)
|
140
|
-
in self._finished_partitions
|
141
|
-
and self._semaphore_per_partition[
|
142
|
-
self._to_partition_key(partition._stream_slice.partition)
|
143
|
-
]._value
|
144
|
-
== 0
|
145
|
-
):
|
146
|
-
if (
|
147
|
-
self._new_global_cursor is None
|
148
|
-
or self._new_global_cursor[self.cursor_field.cursor_field_key]
|
149
|
-
< cursor.state[self.cursor_field.cursor_field_key]
|
150
|
-
):
|
151
|
-
self._new_global_cursor = copy.deepcopy(cursor.state)
|
152
|
-
|
153
|
-
def ensure_at_least_one_state_emitted(self) -> None:
|
154
|
-
"""
|
155
|
-
The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
|
156
|
-
called.
|
157
|
-
"""
|
158
|
-
if not any(
|
159
|
-
semaphore_item[1]._value for semaphore_item in self._semaphore_per_partition.items()
|
160
|
-
):
|
161
|
-
self._global_cursor = self._new_global_cursor
|
162
|
-
self._lookback_window = self._timer.finish()
|
163
|
-
self._parent_state = self._partition_router.get_stream_state()
|
164
|
-
self._emit_state_message()
|
165
|
-
|
166
|
-
def _emit_state_message(self) -> None:
|
167
|
-
self._connector_state_manager.update_state_for_stream(
|
168
|
-
self._stream_name,
|
169
|
-
self._stream_namespace,
|
170
|
-
self.state,
|
171
|
-
)
|
172
|
-
state_message = self._connector_state_manager.create_state_message(
|
173
|
-
self._stream_name, self._stream_namespace
|
174
|
-
)
|
175
|
-
self._message_repository.emit_message(state_message)
|
176
|
-
|
177
|
-
def stream_slices(self) -> Iterable[StreamSlice]:
|
178
|
-
slices = self._partition_router.stream_slices()
|
179
|
-
self._timer.start()
|
180
|
-
for partition in slices:
|
181
|
-
yield from self.generate_slices_from_partition(partition)
|
182
|
-
|
183
|
-
def generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
|
184
|
-
# Ensure the maximum number of partitions is not exceeded
|
185
|
-
self._ensure_partition_limit()
|
186
|
-
|
187
|
-
cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
|
188
|
-
if not cursor:
|
189
|
-
partition_state = self._global_cursor if self._global_cursor else self._NO_CURSOR_STATE
|
190
|
-
cursor = self._create_cursor(partition_state)
|
191
|
-
self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
|
192
|
-
self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
|
193
|
-
threading.Semaphore(0)
|
194
|
-
)
|
195
|
-
|
196
|
-
for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
|
197
|
-
cursor.stream_slices(),
|
198
|
-
lambda: None,
|
199
|
-
):
|
200
|
-
self._semaphore_per_partition[self._to_partition_key(partition.partition)].release()
|
201
|
-
if is_last_slice:
|
202
|
-
self._finished_partitions.add(self._to_partition_key(partition.partition))
|
203
|
-
yield StreamSlice(
|
204
|
-
partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
|
205
|
-
)
|
206
|
-
|
207
|
-
def _ensure_partition_limit(self) -> None:
|
208
|
-
"""
|
209
|
-
Ensure the maximum number of partitions is not exceeded. If so, the oldest added partition will be dropped.
|
210
|
-
"""
|
211
|
-
while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
|
212
|
-
self._over_limit += 1
|
213
|
-
oldest_partition = self._cursor_per_partition.popitem(last=False)[
|
214
|
-
0
|
215
|
-
] # Remove the oldest partition
|
216
|
-
logger.warning(
|
217
|
-
f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
|
218
|
-
)
|
219
|
-
|
220
|
-
def limit_reached(self) -> bool:
|
221
|
-
return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
|
222
|
-
|
223
|
-
def _set_initial_state(self, stream_state: StreamState) -> None:
|
224
|
-
"""
|
225
|
-
Set the initial state for the cursors.
|
226
|
-
|
227
|
-
This method initializes the state for each partition cursor using the provided stream state.
|
228
|
-
If a partition state is provided in the stream state, it will update the corresponding partition cursor with this state.
|
229
|
-
|
230
|
-
Additionally, it sets the parent state for partition routers that are based on parent streams. If a partition router
|
231
|
-
does not have parent streams, this step will be skipped due to the default PartitionRouter implementation.
|
232
|
-
|
233
|
-
Args:
|
234
|
-
stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
|
235
|
-
{
|
236
|
-
"states": [
|
237
|
-
{
|
238
|
-
"partition": {
|
239
|
-
"partition_key": "value"
|
240
|
-
},
|
241
|
-
"cursor": {
|
242
|
-
"last_updated": "2023-05-27T00:00:00Z"
|
243
|
-
}
|
244
|
-
}
|
245
|
-
],
|
246
|
-
"parent_state": {
|
247
|
-
"parent_stream_name": {
|
248
|
-
"last_updated": "2023-05-27T00:00:00Z"
|
249
|
-
}
|
250
|
-
}
|
251
|
-
}
|
252
|
-
"""
|
253
|
-
if not stream_state:
|
254
|
-
return
|
255
|
-
|
256
|
-
if "states" not in stream_state:
|
257
|
-
# We assume that `stream_state` is in a global format that can be applied to all partitions.
|
258
|
-
# Example: {"global_state_format_key": "global_state_format_value"}
|
259
|
-
self._global_cursor = deepcopy(stream_state)
|
260
|
-
self._new_global_cursor = deepcopy(stream_state)
|
261
|
-
|
262
|
-
else:
|
263
|
-
self._lookback_window = stream_state.get("lookback_window")
|
264
|
-
|
265
|
-
for state in stream_state["states"]:
|
266
|
-
self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
|
267
|
-
self._create_cursor(
|
268
|
-
state["cursor"], runtime_lookback_window=self._lookback_window
|
269
|
-
)
|
270
|
-
)
|
271
|
-
self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
|
272
|
-
threading.Semaphore(0)
|
273
|
-
)
|
274
|
-
|
275
|
-
# set default state for missing partitions if it is per partition with fallback to global
|
276
|
-
if "state" in stream_state:
|
277
|
-
self._global_cursor = deepcopy(stream_state["state"])
|
278
|
-
self._new_global_cursor = deepcopy(stream_state["state"])
|
279
|
-
|
280
|
-
# Set parent state for partition routers based on parent streams
|
281
|
-
self._partition_router.set_initial_state(stream_state)
|
282
|
-
|
283
|
-
def observe(self, record: Record) -> None:
|
284
|
-
self._cursor_per_partition[
|
285
|
-
self._to_partition_key(record.associated_slice.partition)
|
286
|
-
].observe(record)
|
287
|
-
|
288
|
-
def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
|
289
|
-
return self._partition_serializer.to_partition_key(partition)
|
290
|
-
|
291
|
-
def _to_dict(self, partition_key: str) -> Mapping[str, Any]:
|
292
|
-
return self._partition_serializer.to_partition(partition_key)
|
293
|
-
|
294
|
-
def _create_cursor(self, cursor_state: Any, runtime_lookback_window: Any = None) -> Cursor:
|
295
|
-
if runtime_lookback_window:
|
296
|
-
runtime_lookback_window = timedelta(seconds=runtime_lookback_window)
|
297
|
-
cursor = self._cursor_factory.create(
|
298
|
-
stream_state=deepcopy(cursor_state), runtime_lookback_window=runtime_lookback_window
|
299
|
-
)
|
300
|
-
return cursor
|
301
|
-
|
302
|
-
def should_be_synced(self, record: Record) -> bool:
|
303
|
-
return self._get_cursor(record).should_be_synced(record)
|
304
|
-
|
305
|
-
def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
|
306
|
-
if not first.associated_slice or not second.associated_slice:
|
307
|
-
raise ValueError(
|
308
|
-
f"Both records should have an associated slice but got {first.associated_slice} and {second.associated_slice}"
|
309
|
-
)
|
310
|
-
if first.associated_slice.partition != second.associated_slice.partition:
|
311
|
-
raise ValueError(
|
312
|
-
f"To compare records, partition should be the same but got {first.associated_slice.partition} and {second.associated_slice.partition}"
|
313
|
-
)
|
314
|
-
|
315
|
-
return self._get_cursor(first).is_greater_than_or_equal(
|
316
|
-
self._convert_record_to_cursor_record(first),
|
317
|
-
self._convert_record_to_cursor_record(second),
|
318
|
-
)
|
319
|
-
|
320
|
-
@staticmethod
|
321
|
-
def _convert_record_to_cursor_record(record: Record) -> Record:
|
322
|
-
return Record(
|
323
|
-
record.data,
|
324
|
-
StreamSlice(partition={}, cursor_slice=record.associated_slice.cursor_slice)
|
325
|
-
if record.associated_slice
|
326
|
-
else None,
|
327
|
-
)
|
328
|
-
|
329
|
-
def _get_cursor(self, record: Record) -> Cursor:
|
330
|
-
if not record.associated_slice:
|
331
|
-
raise ValueError(
|
332
|
-
"Invalid state as stream slices that are emitted should refer to an existing cursor"
|
333
|
-
)
|
334
|
-
partition_key = self._to_partition_key(record.associated_slice.partition)
|
335
|
-
if partition_key not in self._cursor_per_partition:
|
336
|
-
raise ValueError(
|
337
|
-
"Invalid state as stream slices that are emitted should refer to an existing cursor"
|
338
|
-
)
|
339
|
-
cursor = self._cursor_per_partition[partition_key]
|
340
|
-
return cursor
|
File without changes
|
File without changes
|
File without changes
|