airbyte-cdk 6.60.0.post35.dev16509779638__py3-none-any.whl → 6.60.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +35 -30
- airbyte_cdk/config_observation.py +2 -2
- airbyte_cdk/connector.py +2 -1
- airbyte_cdk/connector_builder/connector_builder_handler.py +6 -1
- airbyte_cdk/connector_builder/main.py +11 -18
- airbyte_cdk/connector_builder/test_reader/helpers.py +31 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +5 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +8 -3
- airbyte_cdk/destinations/destination.py +7 -7
- airbyte_cdk/entrypoint.py +23 -8
- airbyte_cdk/logger.py +2 -2
- airbyte_cdk/models/__init__.py +6 -7
- airbyte_cdk/models/airbyte_protocol.py +81 -2
- airbyte_cdk/models/airbyte_protocol_serializers.py +26 -152
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +2 -4
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +0 -11
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -6
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +0 -15
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +0 -3
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +0 -6
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +15 -7
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +7 -4
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +2 -25
- airbyte_cdk/sources/declarative/spec/spec.py +2 -2
- airbyte_cdk/sources/file_based/file_based_source.py +3 -3
- airbyte_cdk/sources/source.py +4 -2
- airbyte_cdk/sources/streams/checkpoint/cursor.py +0 -6
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +0 -6
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +0 -6
- airbyte_cdk/sources/streams/http/http_client.py +7 -5
- airbyte_cdk/sources/streams/permissions/identities_stream.py +1 -1
- airbyte_cdk/sql/shared/sql_processor.py +1 -1
- airbyte_cdk/test/catalog_builder.py +2 -1
- airbyte_cdk/test/entrypoint_wrapper.py +16 -25
- airbyte_cdk/utils/datetime_helpers.py +5 -14
- airbyte_cdk/utils/traced_exception.py +2 -2
- {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.2.dist-info}/METADATA +11 -10
- {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.2.dist-info}/RECORD +43 -43
- {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.2.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.2.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.2.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.2.dist-info}/entry_points.txt +0 -0
@@ -1,13 +1,7 @@
|
|
1
1
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
-
import
|
3
|
-
import logging
|
4
|
-
import sys
|
5
|
-
from enum import Enum
|
6
|
-
from typing import Any, Callable, Dict, Type, TypeVar, cast
|
2
|
+
from typing import Any, Dict
|
7
3
|
|
8
|
-
import
|
9
|
-
import orjson
|
10
|
-
from pydantic import ValidationError
|
4
|
+
from serpyco_rs import CustomType, Serializer
|
11
5
|
|
12
6
|
from .airbyte_protocol import ( # type: ignore[attr-defined] # all classes are imported to airbyte_protocol via *
|
13
7
|
AirbyteCatalog,
|
@@ -21,154 +15,34 @@ from .airbyte_protocol import ( # type: ignore[attr-defined] # all classes are
|
|
21
15
|
ConnectorSpecification,
|
22
16
|
)
|
23
17
|
|
24
|
-
USE_RUST_BACKEND = sys.platform != "emscripten"
|
25
|
-
"""When run in WASM, use the pure Python backend for serpyco."""
|
26
18
|
|
27
|
-
|
28
|
-
|
19
|
+
class AirbyteStateBlobType(CustomType[AirbyteStateBlob, Dict[str, Any]]):
|
20
|
+
def serialize(self, value: AirbyteStateBlob) -> Dict[str, Any]:
|
21
|
+
# cant use orjson.dumps() directly because private attributes are excluded, e.g. "__ab_full_refresh_sync_complete"
|
22
|
+
return {k: v for k, v in value.__dict__.items()}
|
29
23
|
|
30
|
-
|
24
|
+
def deserialize(self, value: Dict[str, Any]) -> AirbyteStateBlob:
|
25
|
+
return AirbyteStateBlob(value)
|
31
26
|
|
32
|
-
|
27
|
+
def get_json_schema(self) -> Dict[str, Any]:
|
28
|
+
return {"type": "object"}
|
33
29
|
|
34
|
-
# Making this a no-op for now:
|
35
30
|
|
31
|
+
def custom_type_resolver(t: type) -> CustomType[AirbyteStateBlob, Dict[str, Any]] | None:
|
32
|
+
return AirbyteStateBlobType() if t is AirbyteStateBlob else None
|
36
33
|
|
37
|
-
def ab_message_to_string(
|
38
|
-
message: AirbyteMessage,
|
39
|
-
) -> str:
|
40
|
-
"""
|
41
|
-
Convert an AirbyteMessage to a JSON string.
|
42
34
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
Args:
|
59
|
-
message_str (str): The JSON string to convert.
|
60
|
-
|
61
|
-
Returns:
|
62
|
-
AirbyteMessage: The deserialized AirbyteMessage.
|
63
|
-
"""
|
64
|
-
try:
|
65
|
-
return AirbyteMessage.model_validate_json(message_json)
|
66
|
-
except ValidationError as e:
|
67
|
-
raise ValueError(f"Invalid AirbyteMessage format: {e}") from e
|
68
|
-
except orjson.JSONDecodeError as e:
|
69
|
-
raise ValueError(f"Failed to decode JSON: {e}") from e
|
70
|
-
|
71
|
-
|
72
|
-
def ab_connector_spec_from_string(
|
73
|
-
spec_json: str,
|
74
|
-
) -> ConnectorSpecification:
|
75
|
-
"""
|
76
|
-
Convert a JSON string to a ConnectorSpecification.
|
77
|
-
|
78
|
-
Args:
|
79
|
-
spec_str (str): The JSON string to convert.
|
80
|
-
|
81
|
-
Returns:
|
82
|
-
ConnectorSpecification: The deserialized ConnectorSpecification.
|
83
|
-
"""
|
84
|
-
try:
|
85
|
-
return ConnectorSpecification.model_validate_json(spec_json)
|
86
|
-
except ValidationError as e:
|
87
|
-
raise ValueError(f"Invalid ConnectorSpecification format: {e}") from e
|
88
|
-
except orjson.JSONDecodeError as e:
|
89
|
-
raise ValueError(f"Failed to decode JSON: {e}") from e
|
90
|
-
|
91
|
-
|
92
|
-
def ab_connector_spec_to_string(
|
93
|
-
spec: ConnectorSpecification,
|
94
|
-
) -> str:
|
95
|
-
"""
|
96
|
-
Convert a ConnectorSpecification to a JSON string.
|
97
|
-
|
98
|
-
Args:
|
99
|
-
spec (ConnectorSpecification): The ConnectorSpecification to convert.
|
100
|
-
|
101
|
-
Returns:
|
102
|
-
str: JSON string representation of the ConnectorSpecification.
|
103
|
-
"""
|
104
|
-
return spec.model_dump_json()
|
105
|
-
|
106
|
-
|
107
|
-
def ab_configured_catalog_to_string(
|
108
|
-
catalog: ConfiguredAirbyteCatalog,
|
109
|
-
) -> str:
|
110
|
-
"""
|
111
|
-
Convert a ConfiguredAirbyteCatalog to a JSON string.
|
112
|
-
|
113
|
-
Args:
|
114
|
-
catalog (ConfiguredAirbyteCatalog): The ConfiguredAirbyteCatalog to convert.
|
115
|
-
|
116
|
-
Returns:
|
117
|
-
str: JSON string representation of the ConfiguredAirbyteCatalog.
|
118
|
-
"""
|
119
|
-
return catalog.model_dump_json()
|
120
|
-
|
121
|
-
|
122
|
-
def ab_configured_catalog_from_string(
|
123
|
-
catalog_json: str,
|
124
|
-
) -> ConfiguredAirbyteCatalog:
|
125
|
-
"""
|
126
|
-
Convert a JSON string to a ConfiguredAirbyteCatalog.
|
127
|
-
|
128
|
-
Args:
|
129
|
-
catalog_json (str): The JSON string to convert.
|
130
|
-
|
131
|
-
Returns:
|
132
|
-
ConfiguredAirbyteCatalog: The deserialized ConfiguredAirbyteCatalog.
|
133
|
-
"""
|
134
|
-
try:
|
135
|
-
return ConfiguredAirbyteCatalog.model_validate_json(catalog_json)
|
136
|
-
except ValidationError as e:
|
137
|
-
raise ValueError(f"Invalid ConfiguredAirbyteCatalog format: {e}") from e
|
138
|
-
except orjson.JSONDecodeError as e:
|
139
|
-
raise ValueError(f"Failed to decode JSON: {e}") from e
|
140
|
-
|
141
|
-
|
142
|
-
def ab_state_message_from_string(
|
143
|
-
state_json: str,
|
144
|
-
) -> AirbyteStateMessage:
|
145
|
-
"""
|
146
|
-
Convert a JSON string to an AirbyteStateMessage.
|
147
|
-
|
148
|
-
Args:
|
149
|
-
state_json (str): The JSON string to convert.
|
150
|
-
|
151
|
-
Returns:
|
152
|
-
AirbyteStateMessage: The deserialized AirbyteStateMessage.
|
153
|
-
"""
|
154
|
-
try:
|
155
|
-
return AirbyteStateMessage.model_validate_json(state_json)
|
156
|
-
except ValidationError as e:
|
157
|
-
raise ValueError(f"Invalid AirbyteStateMessage format: {e}") from e
|
158
|
-
except orjson.JSONDecodeError as e:
|
159
|
-
raise ValueError(f"Failed to decode JSON: {e}") from e
|
160
|
-
|
161
|
-
|
162
|
-
def ab_state_message_to_string(
|
163
|
-
state: AirbyteStateMessage,
|
164
|
-
) -> str:
|
165
|
-
"""
|
166
|
-
Convert an AirbyteStateMessage to a JSON string.
|
167
|
-
|
168
|
-
Args:
|
169
|
-
state (AirbyteStateMessage): The AirbyteStateMessage to convert.
|
170
|
-
|
171
|
-
Returns:
|
172
|
-
str: JSON string representation of the AirbyteStateMessage.
|
173
|
-
"""
|
174
|
-
return state.model_dump_json()
|
35
|
+
AirbyteCatalogSerializer = Serializer(AirbyteCatalog, omit_none=True)
|
36
|
+
AirbyteStreamSerializer = Serializer(AirbyteStream, omit_none=True)
|
37
|
+
AirbyteStreamStateSerializer = Serializer(
|
38
|
+
AirbyteStreamState, omit_none=True, custom_type_resolver=custom_type_resolver
|
39
|
+
)
|
40
|
+
AirbyteStateMessageSerializer = Serializer(
|
41
|
+
AirbyteStateMessage, omit_none=True, custom_type_resolver=custom_type_resolver
|
42
|
+
)
|
43
|
+
AirbyteMessageSerializer = Serializer(
|
44
|
+
AirbyteMessage, omit_none=True, custom_type_resolver=custom_type_resolver
|
45
|
+
)
|
46
|
+
ConfiguredAirbyteCatalogSerializer = Serializer(ConfiguredAirbyteCatalog, omit_none=True)
|
47
|
+
ConfiguredAirbyteStreamSerializer = Serializer(ConfiguredAirbyteStream, omit_none=True)
|
48
|
+
ConnectorSpecificationSerializer = Serializer(ConnectorSpecification, omit_none=True)
|
@@ -63,7 +63,7 @@ from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_
|
|
63
63
|
class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
64
64
|
# By default, we defer to a value of 2. A value lower than than could cause a PartitionEnqueuer to be stuck in a state of deadlock
|
65
65
|
# because it has hit the limit of futures but not partition reader is consuming them.
|
66
|
-
_LOWEST_SAFE_CONCURRENCY_LEVEL =
|
66
|
+
_LOWEST_SAFE_CONCURRENCY_LEVEL = 2
|
67
67
|
|
68
68
|
def __init__(
|
69
69
|
self,
|
@@ -119,9 +119,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
119
119
|
) # Partition_generation iterates using range based on this value. If this is floored to zero we end up in a dead lock during start up
|
120
120
|
else:
|
121
121
|
concurrency_level = self._LOWEST_SAFE_CONCURRENCY_LEVEL
|
122
|
-
initial_number_of_partitions_to_generate =
|
123
|
-
self._LOWEST_SAFE_CONCURRENCY_LEVEL // 2, 1
|
124
|
-
)
|
122
|
+
initial_number_of_partitions_to_generate = self._LOWEST_SAFE_CONCURRENCY_LEVEL // 2
|
125
123
|
|
126
124
|
self._concurrent_source = ConcurrentSource.create(
|
127
125
|
num_workers=concurrency_level,
|
@@ -429,17 +429,6 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
429
429
|
)
|
430
430
|
)
|
431
431
|
|
432
|
-
def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
|
433
|
-
cursor_field = self.cursor_field.eval(self.config) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
|
434
|
-
first_cursor_value = first.get(cursor_field)
|
435
|
-
second_cursor_value = second.get(cursor_field)
|
436
|
-
if first_cursor_value and second_cursor_value:
|
437
|
-
return self.parse_date(first_cursor_value) >= self.parse_date(second_cursor_value)
|
438
|
-
elif first_cursor_value:
|
439
|
-
return True
|
440
|
-
else:
|
441
|
-
return False
|
442
|
-
|
443
432
|
def set_runtime_lookback_window(self, lookback_window_in_seconds: int) -> None:
|
444
433
|
"""
|
445
434
|
Updates the lookback window based on a given number of seconds if the new duration
|
@@ -338,12 +338,6 @@ class GlobalSubstreamCursor(DeclarativeCursor):
|
|
338
338
|
def should_be_synced(self, record: Record) -> bool:
|
339
339
|
return self._stream_cursor.should_be_synced(self._convert_record_to_cursor_record(record))
|
340
340
|
|
341
|
-
def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
|
342
|
-
return self._stream_cursor.is_greater_than_or_equal(
|
343
|
-
self._convert_record_to_cursor_record(first),
|
344
|
-
self._convert_record_to_cursor_record(second),
|
345
|
-
)
|
346
|
-
|
347
341
|
@staticmethod
|
348
342
|
def _convert_record_to_cursor_record(record: Record) -> Record:
|
349
343
|
return Record(
|
@@ -315,21 +315,6 @@ class PerPartitionCursor(DeclarativeCursor):
|
|
315
315
|
self._convert_record_to_cursor_record(record)
|
316
316
|
)
|
317
317
|
|
318
|
-
def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
|
319
|
-
if not first.associated_slice or not second.associated_slice:
|
320
|
-
raise ValueError(
|
321
|
-
f"Both records should have an associated slice but got {first.associated_slice} and {second.associated_slice}"
|
322
|
-
)
|
323
|
-
if first.associated_slice.partition != second.associated_slice.partition:
|
324
|
-
raise ValueError(
|
325
|
-
f"To compare records, partition should be the same but got {first.associated_slice.partition} and {second.associated_slice.partition}"
|
326
|
-
)
|
327
|
-
|
328
|
-
return self._get_cursor(first).is_greater_than_or_equal(
|
329
|
-
self._convert_record_to_cursor_record(first),
|
330
|
-
self._convert_record_to_cursor_record(second),
|
331
|
-
)
|
332
|
-
|
333
318
|
@staticmethod
|
334
319
|
def _convert_record_to_cursor_record(record: Record) -> Record:
|
335
320
|
return Record(
|
@@ -195,6 +195,3 @@ class PerPartitionWithGlobalCursor(DeclarativeCursor):
|
|
195
195
|
|
196
196
|
def should_be_synced(self, record: Record) -> bool:
|
197
197
|
return self._get_active_cursor().should_be_synced(record)
|
198
|
-
|
199
|
-
def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
|
200
|
-
return self._global_cursor.is_greater_than_or_equal(first, second)
|
@@ -42,12 +42,6 @@ class ResumableFullRefreshCursor(DeclarativeCursor):
|
|
42
42
|
"""
|
43
43
|
return True
|
44
44
|
|
45
|
-
def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
|
46
|
-
"""
|
47
|
-
RFR record don't have ordering to be compared between one another.
|
48
|
-
"""
|
49
|
-
return False
|
50
|
-
|
51
45
|
def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]:
|
52
46
|
# A top-level RFR cursor only manages the state of a single partition
|
53
47
|
return self._cursor
|
@@ -31,7 +31,7 @@ from airbyte_cdk.models import (
|
|
31
31
|
ConnectorSpecification,
|
32
32
|
FailureType,
|
33
33
|
)
|
34
|
-
from airbyte_cdk.models.airbyte_protocol_serializers import
|
34
|
+
from airbyte_cdk.models.airbyte_protocol_serializers import AirbyteMessageSerializer
|
35
35
|
from airbyte_cdk.sources.declarative.checks import COMPONENTS_CHECKER_TYPE_MAPPING
|
36
36
|
from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
|
37
37
|
from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
|
@@ -234,7 +234,7 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
234
234
|
)
|
235
235
|
# We have no mechanism for consuming the queue, so we print the messages to stdout
|
236
236
|
for message in self.message_repository.consume_queue():
|
237
|
-
print(
|
237
|
+
print(orjson.dumps(AirbyteMessageSerializer.dump(message)).decode())
|
238
238
|
self._spec_component.transform_config(mutable_config)
|
239
239
|
return mutable_config
|
240
240
|
|
@@ -542,11 +542,19 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
542
542
|
components_resolver_config["retriever"]["requester"]["use_cache"] = True
|
543
543
|
|
544
544
|
# Create a resolver for dynamic components based on type
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
545
|
+
if resolver_type == "HttpComponentsResolver":
|
546
|
+
components_resolver = self._constructor.create_component(
|
547
|
+
model_type=COMPONENTS_RESOLVER_TYPE_MAPPING[resolver_type],
|
548
|
+
component_definition=components_resolver_config,
|
549
|
+
config=config,
|
550
|
+
stream_name=dynamic_definition.get("name"),
|
551
|
+
)
|
552
|
+
else:
|
553
|
+
components_resolver = self._constructor.create_component(
|
554
|
+
model_type=COMPONENTS_RESOLVER_TYPE_MAPPING[resolver_type],
|
555
|
+
component_definition=components_resolver_config,
|
556
|
+
config=config,
|
557
|
+
)
|
550
558
|
|
551
559
|
stream_template_config = dynamic_definition["stream_template"]
|
552
560
|
|
@@ -3493,10 +3493,11 @@ class ModelToComponentFactory:
|
|
3493
3493
|
requester=download_requester,
|
3494
3494
|
record_selector=record_selector,
|
3495
3495
|
primary_key=None,
|
3496
|
-
name=
|
3496
|
+
name=name,
|
3497
3497
|
paginator=paginator,
|
3498
3498
|
config=config,
|
3499
3499
|
parameters={},
|
3500
|
+
log_formatter=self._get_log_formatter(None, name),
|
3500
3501
|
)
|
3501
3502
|
|
3502
3503
|
def _get_job_timeout() -> datetime.timedelta:
|
@@ -3805,7 +3806,7 @@ class ModelToComponentFactory:
|
|
3805
3806
|
)
|
3806
3807
|
|
3807
3808
|
def create_http_components_resolver(
|
3808
|
-
self, model: HttpComponentsResolverModel, config: Config
|
3809
|
+
self, model: HttpComponentsResolverModel, config: Config, stream_name: Optional[str] = None
|
3809
3810
|
) -> Any:
|
3810
3811
|
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
3811
3812
|
combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
|
@@ -3813,7 +3814,7 @@ class ModelToComponentFactory:
|
|
3813
3814
|
retriever = self._create_component_from_model(
|
3814
3815
|
model=model.retriever,
|
3815
3816
|
config=config,
|
3816
|
-
name="",
|
3817
|
+
name=f"{stream_name if stream_name else '__http_components_resolver'}",
|
3817
3818
|
primary_key=None,
|
3818
3819
|
stream_slicer=stream_slicer if stream_slicer else combined_slicers,
|
3819
3820
|
transformations=[],
|
@@ -3890,7 +3891,9 @@ class ModelToComponentFactory:
|
|
3890
3891
|
)
|
3891
3892
|
|
3892
3893
|
def create_parametrized_components_resolver(
|
3893
|
-
self,
|
3894
|
+
self,
|
3895
|
+
model: ParametrizedComponentsResolverModel,
|
3896
|
+
config: Config,
|
3894
3897
|
) -> ParametrizedComponentsResolver:
|
3895
3898
|
stream_parameters = StreamParametersDefinition(
|
3896
3899
|
list_of_parameters_for_stream=model.stream_parameters.list_of_parameters_for_stream
|
@@ -41,7 +41,6 @@ from airbyte_cdk.sources.declarative.requesters.request_options import (
|
|
41
41
|
from airbyte_cdk.sources.declarative.requesters.requester import Requester
|
42
42
|
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
43
43
|
from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer
|
44
|
-
from airbyte_cdk.sources.http_logger import format_http_message
|
45
44
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
46
45
|
from airbyte_cdk.sources.streams.core import StreamData
|
47
46
|
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
|
@@ -528,35 +527,13 @@ class SimpleRetriever(Retriever):
|
|
528
527
|
if self.cursor and current_record:
|
529
528
|
self.cursor.observe(_slice, current_record)
|
530
529
|
|
531
|
-
# Latest record read, not necessarily within slice boundaries.
|
532
|
-
# TODO Remove once all custom components implement `observe` method.
|
533
|
-
# https://github.com/airbytehq/airbyte-internal-issues/issues/6955
|
534
|
-
most_recent_record_from_slice = self._get_most_recent_record(
|
535
|
-
most_recent_record_from_slice, current_record, _slice
|
536
|
-
)
|
537
530
|
yield stream_data
|
538
531
|
|
539
532
|
if self.cursor:
|
540
|
-
self.cursor.close_slice(_slice
|
533
|
+
self.cursor.close_slice(_slice)
|
541
534
|
return
|
542
535
|
|
543
|
-
|
544
|
-
self,
|
545
|
-
current_most_recent: Optional[Record],
|
546
|
-
current_record: Optional[Record],
|
547
|
-
stream_slice: StreamSlice,
|
548
|
-
) -> Optional[Record]:
|
549
|
-
if self.cursor and current_record:
|
550
|
-
if not current_most_recent:
|
551
|
-
return current_record
|
552
|
-
else:
|
553
|
-
return (
|
554
|
-
current_most_recent
|
555
|
-
if self.cursor.is_greater_than_or_equal(current_most_recent, current_record)
|
556
|
-
else current_record
|
557
|
-
)
|
558
|
-
else:
|
559
|
-
return None
|
536
|
+
# FIXME based on the comment above in SimpleRetriever.read_records, it seems like we can tackle https://github.com/airbytehq/airbyte-internal-issues/issues/6955 and remove this
|
560
537
|
|
561
538
|
def _extract_record(
|
562
539
|
self, stream_data: StreamData, stream_slice: StreamSlice
|
@@ -8,8 +8,8 @@ from typing import Any, List, Mapping, MutableMapping, Optional
|
|
8
8
|
from airbyte_cdk.models import (
|
9
9
|
AdvancedAuth,
|
10
10
|
ConnectorSpecification,
|
11
|
+
ConnectorSpecificationSerializer,
|
11
12
|
)
|
12
|
-
from airbyte_cdk.models.airbyte_protocol_serializers import ab_connector_spec_from_string
|
13
13
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import AuthFlow
|
14
14
|
from airbyte_cdk.sources.declarative.transformations.config_transformations.config_transformation import (
|
15
15
|
ConfigTransformation,
|
@@ -59,7 +59,7 @@ class Spec:
|
|
59
59
|
obj["advanced_auth"] = self.advanced_auth.dict()
|
60
60
|
|
61
61
|
# We remap these keys to camel case because that's the existing format expected by the rest of the platform
|
62
|
-
return
|
62
|
+
return ConnectorSpecificationSerializer.load(obj)
|
63
63
|
|
64
64
|
def migrate_config(self, config: MutableMapping[str, Any]) -> None:
|
65
65
|
"""
|
@@ -77,9 +77,9 @@ from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
|
77
77
|
from airbyte_cdk.utils.analytics_message import create_analytics_message
|
78
78
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
79
79
|
|
80
|
-
DEFAULT_CONCURRENCY =
|
81
|
-
MAX_CONCURRENCY =
|
82
|
-
INITIAL_N_PARTITIONS =
|
80
|
+
DEFAULT_CONCURRENCY = 100
|
81
|
+
MAX_CONCURRENCY = 100
|
82
|
+
INITIAL_N_PARTITIONS = MAX_CONCURRENCY // 2
|
83
83
|
IDENTITIES_STREAM = "identities"
|
84
84
|
|
85
85
|
|
airbyte_cdk/sources/source.py
CHANGED
@@ -12,7 +12,9 @@ from airbyte_cdk.models import (
|
|
12
12
|
AirbyteCatalog,
|
13
13
|
AirbyteMessage,
|
14
14
|
AirbyteStateMessage,
|
15
|
+
AirbyteStateMessageSerializer,
|
15
16
|
ConfiguredAirbyteCatalog,
|
17
|
+
ConfiguredAirbyteCatalogSerializer,
|
16
18
|
)
|
17
19
|
|
18
20
|
TState = TypeVar("TState")
|
@@ -70,7 +72,7 @@ class Source(
|
|
70
72
|
state_obj = BaseConnector._read_json_file(state_path)
|
71
73
|
if state_obj:
|
72
74
|
for state in state_obj: # type: ignore # `isinstance(state_obj, List)` ensures that this is a list
|
73
|
-
parsed_message =
|
75
|
+
parsed_message = AirbyteStateMessageSerializer.load(state)
|
74
76
|
if (
|
75
77
|
not parsed_message.stream
|
76
78
|
and not parsed_message.data
|
@@ -85,7 +87,7 @@ class Source(
|
|
85
87
|
# can be overridden to change an input catalog
|
86
88
|
@classmethod
|
87
89
|
def read_catalog(cls, catalog_path: str) -> ConfiguredAirbyteCatalog:
|
88
|
-
return
|
90
|
+
return ConfiguredAirbyteCatalogSerializer.load(cls._read_json_file(catalog_path))
|
89
91
|
|
90
92
|
@property
|
91
93
|
def name(self) -> str:
|
@@ -62,12 +62,6 @@ class Cursor(ABC):
|
|
62
62
|
Evaluating if a record should be synced allows for filtering and stop condition on pagination
|
63
63
|
"""
|
64
64
|
|
65
|
-
@abstractmethod
|
66
|
-
def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
|
67
|
-
"""
|
68
|
-
Evaluating which record is greater in terms of cursor. This is used to avoid having to capture all the records to close a slice
|
69
|
-
"""
|
70
|
-
|
71
65
|
@abstractmethod
|
72
66
|
def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]:
|
73
67
|
"""
|
@@ -40,12 +40,6 @@ class ResumableFullRefreshCursor(Cursor):
|
|
40
40
|
"""
|
41
41
|
return True
|
42
42
|
|
43
|
-
def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
|
44
|
-
"""
|
45
|
-
RFR record don't have ordering to be compared between one another.
|
46
|
-
"""
|
47
|
-
return False
|
48
|
-
|
49
43
|
def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]:
|
50
44
|
# A top-level RFR cursor only manages the state of a single partition
|
51
45
|
return self._cursor
|
@@ -89,12 +89,6 @@ class SubstreamResumableFullRefreshCursor(Cursor):
|
|
89
89
|
"""
|
90
90
|
return True
|
91
91
|
|
92
|
-
def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
|
93
|
-
"""
|
94
|
-
RFR record don't have ordering to be compared between one another.
|
95
|
-
"""
|
96
|
-
return False
|
97
|
-
|
98
92
|
def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]:
|
99
93
|
if not stream_slice:
|
100
94
|
raise ValueError("A partition needs to be provided in order to extract a state")
|
@@ -14,12 +14,12 @@ import requests_cache
|
|
14
14
|
from requests.auth import AuthBase
|
15
15
|
|
16
16
|
from airbyte_cdk.models import (
|
17
|
+
AirbyteMessageSerializer,
|
17
18
|
AirbyteStreamStatus,
|
18
19
|
AirbyteStreamStatusReason,
|
19
20
|
AirbyteStreamStatusReasonType,
|
20
21
|
Level,
|
21
22
|
StreamDescriptor,
|
22
|
-
ab_message_to_string,
|
23
23
|
)
|
24
24
|
from airbyte_cdk.sources.http_config import MAX_CONNECTION_POOL_SIZE
|
25
25
|
from airbyte_cdk.sources.message import MessageRepository
|
@@ -396,11 +396,13 @@ class HttpClient:
|
|
396
396
|
if error_resolution.response_action == ResponseAction.RATE_LIMITED:
|
397
397
|
# TODO: Update to handle with message repository when concurrent message repository is ready
|
398
398
|
reasons = [AirbyteStreamStatusReason(type=AirbyteStreamStatusReasonType.RATE_LIMITED)]
|
399
|
-
message =
|
400
|
-
|
401
|
-
|
399
|
+
message = orjson.dumps(
|
400
|
+
AirbyteMessageSerializer.dump(
|
401
|
+
stream_status_as_airbyte_message(
|
402
|
+
StreamDescriptor(name=self._name), AirbyteStreamStatus.RUNNING, reasons
|
403
|
+
)
|
402
404
|
)
|
403
|
-
)
|
405
|
+
).decode()
|
404
406
|
|
405
407
|
# Simply printing the stream status is a temporary solution and can cause future issues. Currently, the _send method is
|
406
408
|
# wrapped with backoff decorators, and we can only emit messages by iterating record_iterator in the abstract source at the
|
@@ -6,7 +6,7 @@ import traceback
|
|
6
6
|
from abc import ABC, abstractmethod
|
7
7
|
from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional
|
8
8
|
|
9
|
-
from
|
9
|
+
from airbyte_protocol_dataclasses.models import SyncMode
|
10
10
|
|
11
11
|
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level
|
12
12
|
from airbyte_cdk.models import Type as MessageType
|
@@ -13,7 +13,7 @@ from typing import TYPE_CHECKING, Any, final
|
|
13
13
|
import pandas as pd
|
14
14
|
import sqlalchemy
|
15
15
|
import ulid
|
16
|
-
from
|
16
|
+
from airbyte_protocol_dataclasses.models import AirbyteStateMessage
|
17
17
|
from pandas import Index
|
18
18
|
from pydantic import BaseModel, Field
|
19
19
|
from sqlalchemy import Column, Table, and_, create_engine, insert, null, select, text, update
|
@@ -5,6 +5,7 @@ from typing import Any, Dict, List, Union, overload
|
|
5
5
|
from airbyte_cdk.models import (
|
6
6
|
ConfiguredAirbyteCatalog,
|
7
7
|
ConfiguredAirbyteStream,
|
8
|
+
ConfiguredAirbyteStreamSerializer,
|
8
9
|
SyncMode,
|
9
10
|
)
|
10
11
|
|
@@ -41,7 +42,7 @@ class ConfiguredAirbyteStreamBuilder:
|
|
41
42
|
return self
|
42
43
|
|
43
44
|
def build(self) -> ConfiguredAirbyteStream:
|
44
|
-
return
|
45
|
+
return ConfiguredAirbyteStreamSerializer.load(self._stream)
|
45
46
|
|
46
47
|
|
47
48
|
class CatalogBuilder:
|