airbyte-cdk 6.60.0.post35.dev16509779638__py3-none-any.whl → 6.60.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +35 -30
- airbyte_cdk/config_observation.py +2 -2
- airbyte_cdk/connector.py +2 -1
- airbyte_cdk/connector_builder/connector_builder_handler.py +6 -1
- airbyte_cdk/connector_builder/main.py +11 -18
- airbyte_cdk/connector_builder/test_reader/helpers.py +31 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +5 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +8 -3
- airbyte_cdk/destinations/destination.py +7 -7
- airbyte_cdk/entrypoint.py +23 -8
- airbyte_cdk/logger.py +2 -2
- airbyte_cdk/models/__init__.py +6 -7
- airbyte_cdk/models/airbyte_protocol.py +81 -2
- airbyte_cdk/models/airbyte_protocol_serializers.py +26 -152
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +2 -4
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +15 -7
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +7 -4
- airbyte_cdk/sources/declarative/spec/spec.py +2 -2
- airbyte_cdk/sources/file_based/file_based_source.py +3 -3
- airbyte_cdk/sources/source.py +4 -2
- airbyte_cdk/sources/streams/http/http_client.py +7 -5
- airbyte_cdk/sources/streams/permissions/identities_stream.py +1 -1
- airbyte_cdk/sql/shared/sql_processor.py +1 -1
- airbyte_cdk/test/catalog_builder.py +2 -1
- airbyte_cdk/test/entrypoint_wrapper.py +16 -25
- airbyte_cdk/utils/datetime_helpers.py +5 -14
- airbyte_cdk/utils/traced_exception.py +2 -2
- {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.1.dist-info}/METADATA +11 -10
- {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.1.dist-info}/RECORD +34 -34
- {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.1.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.1.dist-info}/entry_points.txt +0 -0
@@ -1,13 +1,7 @@
|
|
1
1
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
-
import
|
3
|
-
import logging
|
4
|
-
import sys
|
5
|
-
from enum import Enum
|
6
|
-
from typing import Any, Callable, Dict, Type, TypeVar, cast
|
2
|
+
from typing import Any, Dict
|
7
3
|
|
8
|
-
import
|
9
|
-
import orjson
|
10
|
-
from pydantic import ValidationError
|
4
|
+
from serpyco_rs import CustomType, Serializer
|
11
5
|
|
12
6
|
from .airbyte_protocol import ( # type: ignore[attr-defined] # all classes are imported to airbyte_protocol via *
|
13
7
|
AirbyteCatalog,
|
@@ -21,154 +15,34 @@ from .airbyte_protocol import ( # type: ignore[attr-defined] # all classes are
|
|
21
15
|
ConnectorSpecification,
|
22
16
|
)
|
23
17
|
|
24
|
-
USE_RUST_BACKEND = sys.platform != "emscripten"
|
25
|
-
"""When run in WASM, use the pure Python backend for serpyco."""
|
26
18
|
|
27
|
-
|
28
|
-
|
19
|
+
class AirbyteStateBlobType(CustomType[AirbyteStateBlob, Dict[str, Any]]):
|
20
|
+
def serialize(self, value: AirbyteStateBlob) -> Dict[str, Any]:
|
21
|
+
# cant use orjson.dumps() directly because private attributes are excluded, e.g. "__ab_full_refresh_sync_complete"
|
22
|
+
return {k: v for k, v in value.__dict__.items()}
|
29
23
|
|
30
|
-
|
24
|
+
def deserialize(self, value: Dict[str, Any]) -> AirbyteStateBlob:
|
25
|
+
return AirbyteStateBlob(value)
|
31
26
|
|
32
|
-
|
27
|
+
def get_json_schema(self) -> Dict[str, Any]:
|
28
|
+
return {"type": "object"}
|
33
29
|
|
34
|
-
# Making this a no-op for now:
|
35
30
|
|
31
|
+
def custom_type_resolver(t: type) -> CustomType[AirbyteStateBlob, Dict[str, Any]] | None:
|
32
|
+
return AirbyteStateBlobType() if t is AirbyteStateBlob else None
|
36
33
|
|
37
|
-
def ab_message_to_string(
|
38
|
-
message: AirbyteMessage,
|
39
|
-
) -> str:
|
40
|
-
"""
|
41
|
-
Convert an AirbyteMessage to a JSON string.
|
42
34
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
Args:
|
59
|
-
message_str (str): The JSON string to convert.
|
60
|
-
|
61
|
-
Returns:
|
62
|
-
AirbyteMessage: The deserialized AirbyteMessage.
|
63
|
-
"""
|
64
|
-
try:
|
65
|
-
return AirbyteMessage.model_validate_json(message_json)
|
66
|
-
except ValidationError as e:
|
67
|
-
raise ValueError(f"Invalid AirbyteMessage format: {e}") from e
|
68
|
-
except orjson.JSONDecodeError as e:
|
69
|
-
raise ValueError(f"Failed to decode JSON: {e}") from e
|
70
|
-
|
71
|
-
|
72
|
-
def ab_connector_spec_from_string(
|
73
|
-
spec_json: str,
|
74
|
-
) -> ConnectorSpecification:
|
75
|
-
"""
|
76
|
-
Convert a JSON string to a ConnectorSpecification.
|
77
|
-
|
78
|
-
Args:
|
79
|
-
spec_str (str): The JSON string to convert.
|
80
|
-
|
81
|
-
Returns:
|
82
|
-
ConnectorSpecification: The deserialized ConnectorSpecification.
|
83
|
-
"""
|
84
|
-
try:
|
85
|
-
return ConnectorSpecification.model_validate_json(spec_json)
|
86
|
-
except ValidationError as e:
|
87
|
-
raise ValueError(f"Invalid ConnectorSpecification format: {e}") from e
|
88
|
-
except orjson.JSONDecodeError as e:
|
89
|
-
raise ValueError(f"Failed to decode JSON: {e}") from e
|
90
|
-
|
91
|
-
|
92
|
-
def ab_connector_spec_to_string(
|
93
|
-
spec: ConnectorSpecification,
|
94
|
-
) -> str:
|
95
|
-
"""
|
96
|
-
Convert a ConnectorSpecification to a JSON string.
|
97
|
-
|
98
|
-
Args:
|
99
|
-
spec (ConnectorSpecification): The ConnectorSpecification to convert.
|
100
|
-
|
101
|
-
Returns:
|
102
|
-
str: JSON string representation of the ConnectorSpecification.
|
103
|
-
"""
|
104
|
-
return spec.model_dump_json()
|
105
|
-
|
106
|
-
|
107
|
-
def ab_configured_catalog_to_string(
|
108
|
-
catalog: ConfiguredAirbyteCatalog,
|
109
|
-
) -> str:
|
110
|
-
"""
|
111
|
-
Convert a ConfiguredAirbyteCatalog to a JSON string.
|
112
|
-
|
113
|
-
Args:
|
114
|
-
catalog (ConfiguredAirbyteCatalog): The ConfiguredAirbyteCatalog to convert.
|
115
|
-
|
116
|
-
Returns:
|
117
|
-
str: JSON string representation of the ConfiguredAirbyteCatalog.
|
118
|
-
"""
|
119
|
-
return catalog.model_dump_json()
|
120
|
-
|
121
|
-
|
122
|
-
def ab_configured_catalog_from_string(
|
123
|
-
catalog_json: str,
|
124
|
-
) -> ConfiguredAirbyteCatalog:
|
125
|
-
"""
|
126
|
-
Convert a JSON string to a ConfiguredAirbyteCatalog.
|
127
|
-
|
128
|
-
Args:
|
129
|
-
catalog_json (str): The JSON string to convert.
|
130
|
-
|
131
|
-
Returns:
|
132
|
-
ConfiguredAirbyteCatalog: The deserialized ConfiguredAirbyteCatalog.
|
133
|
-
"""
|
134
|
-
try:
|
135
|
-
return ConfiguredAirbyteCatalog.model_validate_json(catalog_json)
|
136
|
-
except ValidationError as e:
|
137
|
-
raise ValueError(f"Invalid ConfiguredAirbyteCatalog format: {e}") from e
|
138
|
-
except orjson.JSONDecodeError as e:
|
139
|
-
raise ValueError(f"Failed to decode JSON: {e}") from e
|
140
|
-
|
141
|
-
|
142
|
-
def ab_state_message_from_string(
|
143
|
-
state_json: str,
|
144
|
-
) -> AirbyteStateMessage:
|
145
|
-
"""
|
146
|
-
Convert a JSON string to an AirbyteStateMessage.
|
147
|
-
|
148
|
-
Args:
|
149
|
-
state_json (str): The JSON string to convert.
|
150
|
-
|
151
|
-
Returns:
|
152
|
-
AirbyteStateMessage: The deserialized AirbyteStateMessage.
|
153
|
-
"""
|
154
|
-
try:
|
155
|
-
return AirbyteStateMessage.model_validate_json(state_json)
|
156
|
-
except ValidationError as e:
|
157
|
-
raise ValueError(f"Invalid AirbyteStateMessage format: {e}") from e
|
158
|
-
except orjson.JSONDecodeError as e:
|
159
|
-
raise ValueError(f"Failed to decode JSON: {e}") from e
|
160
|
-
|
161
|
-
|
162
|
-
def ab_state_message_to_string(
|
163
|
-
state: AirbyteStateMessage,
|
164
|
-
) -> str:
|
165
|
-
"""
|
166
|
-
Convert an AirbyteStateMessage to a JSON string.
|
167
|
-
|
168
|
-
Args:
|
169
|
-
state (AirbyteStateMessage): The AirbyteStateMessage to convert.
|
170
|
-
|
171
|
-
Returns:
|
172
|
-
str: JSON string representation of the AirbyteStateMessage.
|
173
|
-
"""
|
174
|
-
return state.model_dump_json()
|
35
|
+
AirbyteCatalogSerializer = Serializer(AirbyteCatalog, omit_none=True)
|
36
|
+
AirbyteStreamSerializer = Serializer(AirbyteStream, omit_none=True)
|
37
|
+
AirbyteStreamStateSerializer = Serializer(
|
38
|
+
AirbyteStreamState, omit_none=True, custom_type_resolver=custom_type_resolver
|
39
|
+
)
|
40
|
+
AirbyteStateMessageSerializer = Serializer(
|
41
|
+
AirbyteStateMessage, omit_none=True, custom_type_resolver=custom_type_resolver
|
42
|
+
)
|
43
|
+
AirbyteMessageSerializer = Serializer(
|
44
|
+
AirbyteMessage, omit_none=True, custom_type_resolver=custom_type_resolver
|
45
|
+
)
|
46
|
+
ConfiguredAirbyteCatalogSerializer = Serializer(ConfiguredAirbyteCatalog, omit_none=True)
|
47
|
+
ConfiguredAirbyteStreamSerializer = Serializer(ConfiguredAirbyteStream, omit_none=True)
|
48
|
+
ConnectorSpecificationSerializer = Serializer(ConnectorSpecification, omit_none=True)
|
@@ -63,7 +63,7 @@ from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_
|
|
63
63
|
class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
64
64
|
# By default, we defer to a value of 2. A value lower than than could cause a PartitionEnqueuer to be stuck in a state of deadlock
|
65
65
|
# because it has hit the limit of futures but not partition reader is consuming them.
|
66
|
-
_LOWEST_SAFE_CONCURRENCY_LEVEL =
|
66
|
+
_LOWEST_SAFE_CONCURRENCY_LEVEL = 2
|
67
67
|
|
68
68
|
def __init__(
|
69
69
|
self,
|
@@ -119,9 +119,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
119
119
|
) # Partition_generation iterates using range based on this value. If this is floored to zero we end up in a dead lock during start up
|
120
120
|
else:
|
121
121
|
concurrency_level = self._LOWEST_SAFE_CONCURRENCY_LEVEL
|
122
|
-
initial_number_of_partitions_to_generate =
|
123
|
-
self._LOWEST_SAFE_CONCURRENCY_LEVEL // 2, 1
|
124
|
-
)
|
122
|
+
initial_number_of_partitions_to_generate = self._LOWEST_SAFE_CONCURRENCY_LEVEL // 2
|
125
123
|
|
126
124
|
self._concurrent_source = ConcurrentSource.create(
|
127
125
|
num_workers=concurrency_level,
|
@@ -31,7 +31,7 @@ from airbyte_cdk.models import (
|
|
31
31
|
ConnectorSpecification,
|
32
32
|
FailureType,
|
33
33
|
)
|
34
|
-
from airbyte_cdk.models.airbyte_protocol_serializers import
|
34
|
+
from airbyte_cdk.models.airbyte_protocol_serializers import AirbyteMessageSerializer
|
35
35
|
from airbyte_cdk.sources.declarative.checks import COMPONENTS_CHECKER_TYPE_MAPPING
|
36
36
|
from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
|
37
37
|
from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
|
@@ -234,7 +234,7 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
234
234
|
)
|
235
235
|
# We have no mechanism for consuming the queue, so we print the messages to stdout
|
236
236
|
for message in self.message_repository.consume_queue():
|
237
|
-
print(
|
237
|
+
print(orjson.dumps(AirbyteMessageSerializer.dump(message)).decode())
|
238
238
|
self._spec_component.transform_config(mutable_config)
|
239
239
|
return mutable_config
|
240
240
|
|
@@ -542,11 +542,19 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
542
542
|
components_resolver_config["retriever"]["requester"]["use_cache"] = True
|
543
543
|
|
544
544
|
# Create a resolver for dynamic components based on type
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
545
|
+
if resolver_type == "HttpComponentsResolver":
|
546
|
+
components_resolver = self._constructor.create_component(
|
547
|
+
model_type=COMPONENTS_RESOLVER_TYPE_MAPPING[resolver_type],
|
548
|
+
component_definition=components_resolver_config,
|
549
|
+
config=config,
|
550
|
+
stream_name=dynamic_definition.get("name"),
|
551
|
+
)
|
552
|
+
else:
|
553
|
+
components_resolver = self._constructor.create_component(
|
554
|
+
model_type=COMPONENTS_RESOLVER_TYPE_MAPPING[resolver_type],
|
555
|
+
component_definition=components_resolver_config,
|
556
|
+
config=config,
|
557
|
+
)
|
550
558
|
|
551
559
|
stream_template_config = dynamic_definition["stream_template"]
|
552
560
|
|
@@ -3493,10 +3493,11 @@ class ModelToComponentFactory:
|
|
3493
3493
|
requester=download_requester,
|
3494
3494
|
record_selector=record_selector,
|
3495
3495
|
primary_key=None,
|
3496
|
-
name=
|
3496
|
+
name=name,
|
3497
3497
|
paginator=paginator,
|
3498
3498
|
config=config,
|
3499
3499
|
parameters={},
|
3500
|
+
log_formatter=self._get_log_formatter(None, name),
|
3500
3501
|
)
|
3501
3502
|
|
3502
3503
|
def _get_job_timeout() -> datetime.timedelta:
|
@@ -3805,7 +3806,7 @@ class ModelToComponentFactory:
|
|
3805
3806
|
)
|
3806
3807
|
|
3807
3808
|
def create_http_components_resolver(
|
3808
|
-
self, model: HttpComponentsResolverModel, config: Config
|
3809
|
+
self, model: HttpComponentsResolverModel, config: Config, stream_name: Optional[str] = None
|
3809
3810
|
) -> Any:
|
3810
3811
|
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
3811
3812
|
combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
|
@@ -3813,7 +3814,7 @@ class ModelToComponentFactory:
|
|
3813
3814
|
retriever = self._create_component_from_model(
|
3814
3815
|
model=model.retriever,
|
3815
3816
|
config=config,
|
3816
|
-
name="",
|
3817
|
+
name=f"{stream_name if stream_name else '__http_components_resolver'}",
|
3817
3818
|
primary_key=None,
|
3818
3819
|
stream_slicer=stream_slicer if stream_slicer else combined_slicers,
|
3819
3820
|
transformations=[],
|
@@ -3890,7 +3891,9 @@ class ModelToComponentFactory:
|
|
3890
3891
|
)
|
3891
3892
|
|
3892
3893
|
def create_parametrized_components_resolver(
|
3893
|
-
self,
|
3894
|
+
self,
|
3895
|
+
model: ParametrizedComponentsResolverModel,
|
3896
|
+
config: Config,
|
3894
3897
|
) -> ParametrizedComponentsResolver:
|
3895
3898
|
stream_parameters = StreamParametersDefinition(
|
3896
3899
|
list_of_parameters_for_stream=model.stream_parameters.list_of_parameters_for_stream
|
@@ -8,8 +8,8 @@ from typing import Any, List, Mapping, MutableMapping, Optional
|
|
8
8
|
from airbyte_cdk.models import (
|
9
9
|
AdvancedAuth,
|
10
10
|
ConnectorSpecification,
|
11
|
+
ConnectorSpecificationSerializer,
|
11
12
|
)
|
12
|
-
from airbyte_cdk.models.airbyte_protocol_serializers import ab_connector_spec_from_string
|
13
13
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import AuthFlow
|
14
14
|
from airbyte_cdk.sources.declarative.transformations.config_transformations.config_transformation import (
|
15
15
|
ConfigTransformation,
|
@@ -59,7 +59,7 @@ class Spec:
|
|
59
59
|
obj["advanced_auth"] = self.advanced_auth.dict()
|
60
60
|
|
61
61
|
# We remap these keys to camel case because that's the existing format expected by the rest of the platform
|
62
|
-
return
|
62
|
+
return ConnectorSpecificationSerializer.load(obj)
|
63
63
|
|
64
64
|
def migrate_config(self, config: MutableMapping[str, Any]) -> None:
|
65
65
|
"""
|
@@ -77,9 +77,9 @@ from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
|
77
77
|
from airbyte_cdk.utils.analytics_message import create_analytics_message
|
78
78
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
79
79
|
|
80
|
-
DEFAULT_CONCURRENCY =
|
81
|
-
MAX_CONCURRENCY =
|
82
|
-
INITIAL_N_PARTITIONS =
|
80
|
+
DEFAULT_CONCURRENCY = 100
|
81
|
+
MAX_CONCURRENCY = 100
|
82
|
+
INITIAL_N_PARTITIONS = MAX_CONCURRENCY // 2
|
83
83
|
IDENTITIES_STREAM = "identities"
|
84
84
|
|
85
85
|
|
airbyte_cdk/sources/source.py
CHANGED
@@ -12,7 +12,9 @@ from airbyte_cdk.models import (
|
|
12
12
|
AirbyteCatalog,
|
13
13
|
AirbyteMessage,
|
14
14
|
AirbyteStateMessage,
|
15
|
+
AirbyteStateMessageSerializer,
|
15
16
|
ConfiguredAirbyteCatalog,
|
17
|
+
ConfiguredAirbyteCatalogSerializer,
|
16
18
|
)
|
17
19
|
|
18
20
|
TState = TypeVar("TState")
|
@@ -70,7 +72,7 @@ class Source(
|
|
70
72
|
state_obj = BaseConnector._read_json_file(state_path)
|
71
73
|
if state_obj:
|
72
74
|
for state in state_obj: # type: ignore # `isinstance(state_obj, List)` ensures that this is a list
|
73
|
-
parsed_message =
|
75
|
+
parsed_message = AirbyteStateMessageSerializer.load(state)
|
74
76
|
if (
|
75
77
|
not parsed_message.stream
|
76
78
|
and not parsed_message.data
|
@@ -85,7 +87,7 @@ class Source(
|
|
85
87
|
# can be overridden to change an input catalog
|
86
88
|
@classmethod
|
87
89
|
def read_catalog(cls, catalog_path: str) -> ConfiguredAirbyteCatalog:
|
88
|
-
return
|
90
|
+
return ConfiguredAirbyteCatalogSerializer.load(cls._read_json_file(catalog_path))
|
89
91
|
|
90
92
|
@property
|
91
93
|
def name(self) -> str:
|
@@ -14,12 +14,12 @@ import requests_cache
|
|
14
14
|
from requests.auth import AuthBase
|
15
15
|
|
16
16
|
from airbyte_cdk.models import (
|
17
|
+
AirbyteMessageSerializer,
|
17
18
|
AirbyteStreamStatus,
|
18
19
|
AirbyteStreamStatusReason,
|
19
20
|
AirbyteStreamStatusReasonType,
|
20
21
|
Level,
|
21
22
|
StreamDescriptor,
|
22
|
-
ab_message_to_string,
|
23
23
|
)
|
24
24
|
from airbyte_cdk.sources.http_config import MAX_CONNECTION_POOL_SIZE
|
25
25
|
from airbyte_cdk.sources.message import MessageRepository
|
@@ -396,11 +396,13 @@ class HttpClient:
|
|
396
396
|
if error_resolution.response_action == ResponseAction.RATE_LIMITED:
|
397
397
|
# TODO: Update to handle with message repository when concurrent message repository is ready
|
398
398
|
reasons = [AirbyteStreamStatusReason(type=AirbyteStreamStatusReasonType.RATE_LIMITED)]
|
399
|
-
message =
|
400
|
-
|
401
|
-
|
399
|
+
message = orjson.dumps(
|
400
|
+
AirbyteMessageSerializer.dump(
|
401
|
+
stream_status_as_airbyte_message(
|
402
|
+
StreamDescriptor(name=self._name), AirbyteStreamStatus.RUNNING, reasons
|
403
|
+
)
|
402
404
|
)
|
403
|
-
)
|
405
|
+
).decode()
|
404
406
|
|
405
407
|
# Simply printing the stream status is a temporary solution and can cause future issues. Currently, the _send method is
|
406
408
|
# wrapped with backoff decorators, and we can only emit messages by iterating record_iterator in the abstract source at the
|
@@ -6,7 +6,7 @@ import traceback
|
|
6
6
|
from abc import ABC, abstractmethod
|
7
7
|
from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional
|
8
8
|
|
9
|
-
from
|
9
|
+
from airbyte_protocol_dataclasses.models import SyncMode
|
10
10
|
|
11
11
|
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level
|
12
12
|
from airbyte_cdk.models import Type as MessageType
|
@@ -13,7 +13,7 @@ from typing import TYPE_CHECKING, Any, final
|
|
13
13
|
import pandas as pd
|
14
14
|
import sqlalchemy
|
15
15
|
import ulid
|
16
|
-
from
|
16
|
+
from airbyte_protocol_dataclasses.models import AirbyteStateMessage
|
17
17
|
from pandas import Index
|
18
18
|
from pydantic import BaseModel, Field
|
19
19
|
from sqlalchemy import Column, Table, and_, create_engine, insert, null, select, text, update
|
@@ -5,6 +5,7 @@ from typing import Any, Dict, List, Union, overload
|
|
5
5
|
from airbyte_cdk.models import (
|
6
6
|
ConfiguredAirbyteCatalog,
|
7
7
|
ConfiguredAirbyteStream,
|
8
|
+
ConfiguredAirbyteStreamSerializer,
|
8
9
|
SyncMode,
|
9
10
|
)
|
10
11
|
|
@@ -41,7 +42,7 @@ class ConfiguredAirbyteStreamBuilder:
|
|
41
42
|
return self
|
42
43
|
|
43
44
|
def build(self) -> ConfiguredAirbyteStream:
|
44
|
-
return
|
45
|
+
return ConfiguredAirbyteStreamSerializer.load(self._stream)
|
45
46
|
|
46
47
|
|
47
48
|
class CatalogBuilder:
|
@@ -17,7 +17,6 @@ than that, there are integrations point that are annoying to integrate with usin
|
|
17
17
|
import json
|
18
18
|
import logging
|
19
19
|
import re
|
20
|
-
import sys
|
21
20
|
import tempfile
|
22
21
|
import traceback
|
23
22
|
from collections import deque
|
@@ -29,6 +28,7 @@ from typing import Any, List, Literal, Optional, Union, final, overload
|
|
29
28
|
|
30
29
|
import orjson
|
31
30
|
from pydantic import ValidationError as V2ValidationError
|
31
|
+
from serpyco_rs import SchemaValidationError
|
32
32
|
|
33
33
|
from airbyte_cdk.entrypoint import AirbyteEntrypoint
|
34
34
|
from airbyte_cdk.exception_handler import assemble_uncaught_exception
|
@@ -36,31 +36,20 @@ from airbyte_cdk.logger import AirbyteLogFormatter
|
|
36
36
|
from airbyte_cdk.models import (
|
37
37
|
AirbyteLogMessage,
|
38
38
|
AirbyteMessage,
|
39
|
+
AirbyteMessageSerializer,
|
39
40
|
AirbyteStateMessage,
|
41
|
+
AirbyteStateMessageSerializer,
|
40
42
|
AirbyteStreamState,
|
41
43
|
AirbyteStreamStatus,
|
42
44
|
ConfiguredAirbyteCatalog,
|
45
|
+
ConfiguredAirbyteCatalogSerializer,
|
43
46
|
Level,
|
44
47
|
TraceType,
|
45
48
|
Type,
|
46
|
-
ab_configured_catalog_from_string,
|
47
|
-
ab_configured_catalog_to_string,
|
48
|
-
ab_connector_spec_from_string,
|
49
|
-
ab_connector_spec_to_string,
|
50
|
-
ab_message_from_string,
|
51
|
-
ab_message_to_string,
|
52
|
-
ab_state_message_to_string,
|
53
49
|
)
|
54
50
|
from airbyte_cdk.sources import Source
|
55
51
|
from airbyte_cdk.test.models.scenario import ExpectedOutcome
|
56
52
|
|
57
|
-
JsonValidationErrors: tuple[type[Exception], ...] = (orjson.JSONDecodeError,)
|
58
|
-
# Conditionally import and create a union type for exception handling
|
59
|
-
if sys.platform != "emscripten":
|
60
|
-
from serpyco_rs import SchemaValidationError
|
61
|
-
|
62
|
-
JsonValidationErrors = (orjson.JSONDecodeError, SchemaValidationError)
|
63
|
-
|
64
53
|
|
65
54
|
class AirbyteEntrypointException(Exception):
|
66
55
|
"""Exception raised for errors in the AirbyteEntrypoint execution.
|
@@ -128,8 +117,8 @@ class EntrypointOutput:
|
|
128
117
|
@staticmethod
|
129
118
|
def _parse_message(message: str) -> AirbyteMessage:
|
130
119
|
try:
|
131
|
-
return
|
132
|
-
except
|
120
|
+
return AirbyteMessageSerializer.load(orjson.loads(message))
|
121
|
+
except (orjson.JSONDecodeError, SchemaValidationError):
|
133
122
|
# The platform assumes that logs that are not of AirbyteMessage format are log messages
|
134
123
|
return AirbyteMessage(
|
135
124
|
type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message=message)
|
@@ -452,7 +441,7 @@ def read(
|
|
452
441
|
config_file = make_file(tmp_directory_path / "config.json", config)
|
453
442
|
catalog_file = make_file(
|
454
443
|
tmp_directory_path / "catalog.json",
|
455
|
-
|
444
|
+
orjson.dumps(ConfiguredAirbyteCatalogSerializer.dump(catalog)).decode(),
|
456
445
|
)
|
457
446
|
args = [
|
458
447
|
"read",
|
@@ -464,13 +453,15 @@ def read(
|
|
464
453
|
if debug:
|
465
454
|
args.append("--debug")
|
466
455
|
if state is not None:
|
467
|
-
args.extend(
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
456
|
+
args.extend(
|
457
|
+
[
|
458
|
+
"--state",
|
459
|
+
make_file(
|
460
|
+
tmp_directory_path / "state.json",
|
461
|
+
f"[{','.join([orjson.dumps(AirbyteStateMessageSerializer.dump(stream_state)).decode() for stream_state in state])}]",
|
462
|
+
),
|
463
|
+
]
|
464
|
+
)
|
474
465
|
|
475
466
|
return _run_command(
|
476
467
|
source,
|
@@ -86,6 +86,7 @@ from typing import Any, Optional, Union, overload
|
|
86
86
|
|
87
87
|
from dateutil import parser
|
88
88
|
from typing_extensions import Never
|
89
|
+
from whenever import Instant, LocalDateTime, ZonedDateTime
|
89
90
|
|
90
91
|
|
91
92
|
class AirbyteDateTime(datetime):
|
@@ -137,18 +138,6 @@ class AirbyteDateTime(datetime):
|
|
137
138
|
dt.tzinfo or timezone.utc,
|
138
139
|
)
|
139
140
|
|
140
|
-
@classmethod
|
141
|
-
def from_timestamp(cls, timestamp: float) -> "AirbyteDateTime":
|
142
|
-
"""Creates an AirbyteDateTime from a Unix timestamp in seconds.
|
143
|
-
|
144
|
-
Args:
|
145
|
-
timestamp: A Unix timestamp in seconds (float).
|
146
|
-
|
147
|
-
Returns:
|
148
|
-
AirbyteDateTime: A new timezone-aware datetime instance (UTC).
|
149
|
-
"""
|
150
|
-
return AirbyteDateTime.from_datetime(datetime.fromtimestamp(timestamp, tz=timezone.utc))
|
151
|
-
|
152
141
|
def to_datetime(self) -> datetime:
|
153
142
|
"""Converts this AirbyteDateTime to a standard datetime object.
|
154
143
|
|
@@ -411,7 +400,8 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
|
|
411
400
|
raise ValueError("Timestamp cannot be negative")
|
412
401
|
if len(str(abs(timestamp))) > 10:
|
413
402
|
raise ValueError("Timestamp value too large")
|
414
|
-
|
403
|
+
instant = Instant.from_timestamp(timestamp)
|
404
|
+
return AirbyteDateTime.from_datetime(instant.py_datetime())
|
415
405
|
|
416
406
|
if not isinstance(dt_str, str):
|
417
407
|
raise ValueError(
|
@@ -424,7 +414,8 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
|
|
424
414
|
year, month, day = map(int, dt_str.split("-"))
|
425
415
|
if not (1 <= month <= 12 and 1 <= day <= 31):
|
426
416
|
raise ValueError(f"Invalid date format: {dt_str}")
|
427
|
-
|
417
|
+
instant = Instant.from_utc(year, month, day, 0, 0, 0)
|
418
|
+
return AirbyteDateTime.from_datetime(instant.py_datetime())
|
428
419
|
except (ValueError, TypeError):
|
429
420
|
raise ValueError(f"Invalid date format: {dt_str}")
|
430
421
|
|
@@ -11,12 +11,12 @@ from airbyte_cdk.models import (
|
|
11
11
|
AirbyteConnectionStatus,
|
12
12
|
AirbyteErrorTraceMessage,
|
13
13
|
AirbyteMessage,
|
14
|
+
AirbyteMessageSerializer,
|
14
15
|
AirbyteTraceMessage,
|
15
16
|
FailureType,
|
16
17
|
Status,
|
17
18
|
StreamDescriptor,
|
18
19
|
TraceType,
|
19
|
-
ab_message_to_string,
|
20
20
|
)
|
21
21
|
from airbyte_cdk.models import Type as MessageType
|
22
22
|
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
@@ -95,7 +95,7 @@ class AirbyteTracedException(Exception):
|
|
95
95
|
Prints the exception as an AirbyteTraceMessage.
|
96
96
|
Note that this will be called automatically on uncaught exceptions when using the airbyte_cdk entrypoint.
|
97
97
|
"""
|
98
|
-
message =
|
98
|
+
message = orjson.dumps(AirbyteMessageSerializer.dump(self.as_airbyte_message())).decode()
|
99
99
|
filtered_message = filter_secrets(message)
|
100
100
|
print(filtered_message)
|
101
101
|
|