airbyte-cdk 6.60.0.post35.dev16509779638__py3-none-any.whl → 6.60.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +35 -30
  2. airbyte_cdk/config_observation.py +2 -2
  3. airbyte_cdk/connector.py +2 -1
  4. airbyte_cdk/connector_builder/connector_builder_handler.py +6 -1
  5. airbyte_cdk/connector_builder/main.py +11 -18
  6. airbyte_cdk/connector_builder/test_reader/helpers.py +31 -0
  7. airbyte_cdk/connector_builder/test_reader/message_grouper.py +5 -0
  8. airbyte_cdk/connector_builder/test_reader/reader.py +8 -3
  9. airbyte_cdk/destinations/destination.py +7 -7
  10. airbyte_cdk/entrypoint.py +23 -8
  11. airbyte_cdk/logger.py +2 -2
  12. airbyte_cdk/models/__init__.py +6 -7
  13. airbyte_cdk/models/airbyte_protocol.py +81 -2
  14. airbyte_cdk/models/airbyte_protocol_serializers.py +26 -152
  15. airbyte_cdk/models/well_known_types.py +1 -1
  16. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +2 -4
  17. airbyte_cdk/sources/declarative/manifest_declarative_source.py +15 -7
  18. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +7 -4
  19. airbyte_cdk/sources/declarative/spec/spec.py +2 -2
  20. airbyte_cdk/sources/file_based/file_based_source.py +3 -3
  21. airbyte_cdk/sources/source.py +4 -2
  22. airbyte_cdk/sources/streams/http/http_client.py +7 -5
  23. airbyte_cdk/sources/streams/permissions/identities_stream.py +1 -1
  24. airbyte_cdk/sql/shared/sql_processor.py +1 -1
  25. airbyte_cdk/test/catalog_builder.py +2 -1
  26. airbyte_cdk/test/entrypoint_wrapper.py +16 -25
  27. airbyte_cdk/utils/datetime_helpers.py +5 -14
  28. airbyte_cdk/utils/traced_exception.py +2 -2
  29. {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.1.dist-info}/METADATA +11 -10
  30. {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.1.dist-info}/RECORD +34 -34
  31. {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.1.dist-info}/LICENSE.txt +0 -0
  32. {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.1.dist-info}/LICENSE_SHORT +0 -0
  33. {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.1.dist-info}/WHEEL +0 -0
  34. {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.1.dist-info}/entry_points.txt +0 -0
@@ -1,13 +1,7 @@
1
1
  # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
- import json
3
- import logging
4
- import sys
5
- from enum import Enum
6
- from typing import Any, Callable, Dict, Type, TypeVar, cast
2
+ from typing import Any, Dict
7
3
 
8
- import dacite
9
- import orjson
10
- from pydantic import ValidationError
4
+ from serpyco_rs import CustomType, Serializer
11
5
 
12
6
  from .airbyte_protocol import ( # type: ignore[attr-defined] # all classes are imported to airbyte_protocol via *
13
7
  AirbyteCatalog,
@@ -21,154 +15,34 @@ from .airbyte_protocol import ( # type: ignore[attr-defined] # all classes are
21
15
  ConnectorSpecification,
22
16
  )
23
17
 
24
- USE_RUST_BACKEND = sys.platform != "emscripten"
25
- """When run in WASM, use the pure Python backend for serpyco."""
26
18
 
27
- _HAS_LOGGED_FOR_SERIALIZATION_ERROR = False
28
- """Track if we have logged an error for serialization issues."""
19
+ class AirbyteStateBlobType(CustomType[AirbyteStateBlob, Dict[str, Any]]):
20
+ def serialize(self, value: AirbyteStateBlob) -> Dict[str, Any]:
21
+ # cant use orjson.dumps() directly because private attributes are excluded, e.g. "__ab_full_refresh_sync_complete"
22
+ return {k: v for k, v in value.__dict__.items()}
29
23
 
30
- T = TypeVar("T")
24
+ def deserialize(self, value: Dict[str, Any]) -> AirbyteStateBlob:
25
+ return AirbyteStateBlob(value)
31
26
 
32
- logger = logging.getLogger("airbyte")
27
+ def get_json_schema(self) -> Dict[str, Any]:
28
+ return {"type": "object"}
33
29
 
34
- # Making this a no-op for now:
35
30
 
31
+ def custom_type_resolver(t: type) -> CustomType[AirbyteStateBlob, Dict[str, Any]] | None:
32
+ return AirbyteStateBlobType() if t is AirbyteStateBlob else None
36
33
 
37
- def ab_message_to_string(
38
- message: AirbyteMessage,
39
- ) -> str:
40
- """
41
- Convert an AirbyteMessage to a JSON string.
42
34
 
43
- Args:
44
- message (AirbyteMessage): The Airbyte message to convert.
45
-
46
- Returns:
47
- str: JSON string representation of the AirbyteMessage.
48
- """
49
- return message.model_dump_json()
50
-
51
-
52
- def ab_message_from_string(
53
- message_json: str,
54
- ) -> AirbyteMessage:
55
- """
56
- Convert a JSON string to an AirbyteMessage.
57
-
58
- Args:
59
- message_str (str): The JSON string to convert.
60
-
61
- Returns:
62
- AirbyteMessage: The deserialized AirbyteMessage.
63
- """
64
- try:
65
- return AirbyteMessage.model_validate_json(message_json)
66
- except ValidationError as e:
67
- raise ValueError(f"Invalid AirbyteMessage format: {e}") from e
68
- except orjson.JSONDecodeError as e:
69
- raise ValueError(f"Failed to decode JSON: {e}") from e
70
-
71
-
72
- def ab_connector_spec_from_string(
73
- spec_json: str,
74
- ) -> ConnectorSpecification:
75
- """
76
- Convert a JSON string to a ConnectorSpecification.
77
-
78
- Args:
79
- spec_str (str): The JSON string to convert.
80
-
81
- Returns:
82
- ConnectorSpecification: The deserialized ConnectorSpecification.
83
- """
84
- try:
85
- return ConnectorSpecification.model_validate_json(spec_json)
86
- except ValidationError as e:
87
- raise ValueError(f"Invalid ConnectorSpecification format: {e}") from e
88
- except orjson.JSONDecodeError as e:
89
- raise ValueError(f"Failed to decode JSON: {e}") from e
90
-
91
-
92
- def ab_connector_spec_to_string(
93
- spec: ConnectorSpecification,
94
- ) -> str:
95
- """
96
- Convert a ConnectorSpecification to a JSON string.
97
-
98
- Args:
99
- spec (ConnectorSpecification): The ConnectorSpecification to convert.
100
-
101
- Returns:
102
- str: JSON string representation of the ConnectorSpecification.
103
- """
104
- return spec.model_dump_json()
105
-
106
-
107
- def ab_configured_catalog_to_string(
108
- catalog: ConfiguredAirbyteCatalog,
109
- ) -> str:
110
- """
111
- Convert a ConfiguredAirbyteCatalog to a JSON string.
112
-
113
- Args:
114
- catalog (ConfiguredAirbyteCatalog): The ConfiguredAirbyteCatalog to convert.
115
-
116
- Returns:
117
- str: JSON string representation of the ConfiguredAirbyteCatalog.
118
- """
119
- return catalog.model_dump_json()
120
-
121
-
122
- def ab_configured_catalog_from_string(
123
- catalog_json: str,
124
- ) -> ConfiguredAirbyteCatalog:
125
- """
126
- Convert a JSON string to a ConfiguredAirbyteCatalog.
127
-
128
- Args:
129
- catalog_json (str): The JSON string to convert.
130
-
131
- Returns:
132
- ConfiguredAirbyteCatalog: The deserialized ConfiguredAirbyteCatalog.
133
- """
134
- try:
135
- return ConfiguredAirbyteCatalog.model_validate_json(catalog_json)
136
- except ValidationError as e:
137
- raise ValueError(f"Invalid ConfiguredAirbyteCatalog format: {e}") from e
138
- except orjson.JSONDecodeError as e:
139
- raise ValueError(f"Failed to decode JSON: {e}") from e
140
-
141
-
142
- def ab_state_message_from_string(
143
- state_json: str,
144
- ) -> AirbyteStateMessage:
145
- """
146
- Convert a JSON string to an AirbyteStateMessage.
147
-
148
- Args:
149
- state_json (str): The JSON string to convert.
150
-
151
- Returns:
152
- AirbyteStateMessage: The deserialized AirbyteStateMessage.
153
- """
154
- try:
155
- return AirbyteStateMessage.model_validate_json(state_json)
156
- except ValidationError as e:
157
- raise ValueError(f"Invalid AirbyteStateMessage format: {e}") from e
158
- except orjson.JSONDecodeError as e:
159
- raise ValueError(f"Failed to decode JSON: {e}") from e
160
-
161
-
162
- def ab_state_message_to_string(
163
- state: AirbyteStateMessage,
164
- ) -> str:
165
- """
166
- Convert an AirbyteStateMessage to a JSON string.
167
-
168
- Args:
169
- state (AirbyteStateMessage): The AirbyteStateMessage to convert.
170
-
171
- Returns:
172
- str: JSON string representation of the AirbyteStateMessage.
173
- """
174
- return state.model_dump_json()
35
+ AirbyteCatalogSerializer = Serializer(AirbyteCatalog, omit_none=True)
36
+ AirbyteStreamSerializer = Serializer(AirbyteStream, omit_none=True)
37
+ AirbyteStreamStateSerializer = Serializer(
38
+ AirbyteStreamState, omit_none=True, custom_type_resolver=custom_type_resolver
39
+ )
40
+ AirbyteStateMessageSerializer = Serializer(
41
+ AirbyteStateMessage, omit_none=True, custom_type_resolver=custom_type_resolver
42
+ )
43
+ AirbyteMessageSerializer = Serializer(
44
+ AirbyteMessage, omit_none=True, custom_type_resolver=custom_type_resolver
45
+ )
46
+ ConfiguredAirbyteCatalogSerializer = Serializer(ConfiguredAirbyteCatalog, omit_none=True)
47
+ ConfiguredAirbyteStreamSerializer = Serializer(ConfiguredAirbyteStream, omit_none=True)
48
+ ConnectorSpecificationSerializer = Serializer(ConnectorSpecification, omit_none=True)
@@ -2,4 +2,4 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from airbyte_protocol.models.well_known_types import * # noqa: F403 # Allow '*'
5
+ from airbyte_protocol_dataclasses.models.well_known_types import * # noqa: F403 # Allow '*'
@@ -63,7 +63,7 @@ from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_
63
63
  class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
64
64
  # By default, we defer to a value of 2. A value lower than than could cause a PartitionEnqueuer to be stuck in a state of deadlock
65
65
  # because it has hit the limit of futures but not partition reader is consuming them.
66
- _LOWEST_SAFE_CONCURRENCY_LEVEL = 1 # TODO: revert-me: 2
66
+ _LOWEST_SAFE_CONCURRENCY_LEVEL = 2
67
67
 
68
68
  def __init__(
69
69
  self,
@@ -119,9 +119,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
119
119
  ) # Partition_generation iterates using range based on this value. If this is floored to zero we end up in a dead lock during start up
120
120
  else:
121
121
  concurrency_level = self._LOWEST_SAFE_CONCURRENCY_LEVEL
122
- initial_number_of_partitions_to_generate = max(
123
- self._LOWEST_SAFE_CONCURRENCY_LEVEL // 2, 1
124
- )
122
+ initial_number_of_partitions_to_generate = self._LOWEST_SAFE_CONCURRENCY_LEVEL // 2
125
123
 
126
124
  self._concurrent_source = ConcurrentSource.create(
127
125
  num_workers=concurrency_level,
@@ -31,7 +31,7 @@ from airbyte_cdk.models import (
31
31
  ConnectorSpecification,
32
32
  FailureType,
33
33
  )
34
- from airbyte_cdk.models.airbyte_protocol_serializers import ab_message_to_string
34
+ from airbyte_cdk.models.airbyte_protocol_serializers import AirbyteMessageSerializer
35
35
  from airbyte_cdk.sources.declarative.checks import COMPONENTS_CHECKER_TYPE_MAPPING
36
36
  from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
37
37
  from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
@@ -234,7 +234,7 @@ class ManifestDeclarativeSource(DeclarativeSource):
234
234
  )
235
235
  # We have no mechanism for consuming the queue, so we print the messages to stdout
236
236
  for message in self.message_repository.consume_queue():
237
- print(ab_message_to_string(message))
237
+ print(orjson.dumps(AirbyteMessageSerializer.dump(message)).decode())
238
238
  self._spec_component.transform_config(mutable_config)
239
239
  return mutable_config
240
240
 
@@ -542,11 +542,19 @@ class ManifestDeclarativeSource(DeclarativeSource):
542
542
  components_resolver_config["retriever"]["requester"]["use_cache"] = True
543
543
 
544
544
  # Create a resolver for dynamic components based on type
545
- components_resolver = self._constructor.create_component(
546
- COMPONENTS_RESOLVER_TYPE_MAPPING[resolver_type],
547
- components_resolver_config,
548
- config,
549
- )
545
+ if resolver_type == "HttpComponentsResolver":
546
+ components_resolver = self._constructor.create_component(
547
+ model_type=COMPONENTS_RESOLVER_TYPE_MAPPING[resolver_type],
548
+ component_definition=components_resolver_config,
549
+ config=config,
550
+ stream_name=dynamic_definition.get("name"),
551
+ )
552
+ else:
553
+ components_resolver = self._constructor.create_component(
554
+ model_type=COMPONENTS_RESOLVER_TYPE_MAPPING[resolver_type],
555
+ component_definition=components_resolver_config,
556
+ config=config,
557
+ )
550
558
 
551
559
  stream_template_config = dynamic_definition["stream_template"]
552
560
 
@@ -3493,10 +3493,11 @@ class ModelToComponentFactory:
3493
3493
  requester=download_requester,
3494
3494
  record_selector=record_selector,
3495
3495
  primary_key=None,
3496
- name=job_download_components_name,
3496
+ name=name,
3497
3497
  paginator=paginator,
3498
3498
  config=config,
3499
3499
  parameters={},
3500
+ log_formatter=self._get_log_formatter(None, name),
3500
3501
  )
3501
3502
 
3502
3503
  def _get_job_timeout() -> datetime.timedelta:
@@ -3805,7 +3806,7 @@ class ModelToComponentFactory:
3805
3806
  )
3806
3807
 
3807
3808
  def create_http_components_resolver(
3808
- self, model: HttpComponentsResolverModel, config: Config
3809
+ self, model: HttpComponentsResolverModel, config: Config, stream_name: Optional[str] = None
3809
3810
  ) -> Any:
3810
3811
  stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
3811
3812
  combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
@@ -3813,7 +3814,7 @@ class ModelToComponentFactory:
3813
3814
  retriever = self._create_component_from_model(
3814
3815
  model=model.retriever,
3815
3816
  config=config,
3816
- name="",
3817
+ name=f"{stream_name if stream_name else '__http_components_resolver'}",
3817
3818
  primary_key=None,
3818
3819
  stream_slicer=stream_slicer if stream_slicer else combined_slicers,
3819
3820
  transformations=[],
@@ -3890,7 +3891,9 @@ class ModelToComponentFactory:
3890
3891
  )
3891
3892
 
3892
3893
  def create_parametrized_components_resolver(
3893
- self, model: ParametrizedComponentsResolverModel, config: Config
3894
+ self,
3895
+ model: ParametrizedComponentsResolverModel,
3896
+ config: Config,
3894
3897
  ) -> ParametrizedComponentsResolver:
3895
3898
  stream_parameters = StreamParametersDefinition(
3896
3899
  list_of_parameters_for_stream=model.stream_parameters.list_of_parameters_for_stream
@@ -8,8 +8,8 @@ from typing import Any, List, Mapping, MutableMapping, Optional
8
8
  from airbyte_cdk.models import (
9
9
  AdvancedAuth,
10
10
  ConnectorSpecification,
11
+ ConnectorSpecificationSerializer,
11
12
  )
12
- from airbyte_cdk.models.airbyte_protocol_serializers import ab_connector_spec_from_string
13
13
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import AuthFlow
14
14
  from airbyte_cdk.sources.declarative.transformations.config_transformations.config_transformation import (
15
15
  ConfigTransformation,
@@ -59,7 +59,7 @@ class Spec:
59
59
  obj["advanced_auth"] = self.advanced_auth.dict()
60
60
 
61
61
  # We remap these keys to camel case because that's the existing format expected by the rest of the platform
62
- return ConnectorSpecification.model_validate(obj)
62
+ return ConnectorSpecificationSerializer.load(obj)
63
63
 
64
64
  def migrate_config(self, config: MutableMapping[str, Any]) -> None:
65
65
  """
@@ -77,9 +77,9 @@ from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
77
77
  from airbyte_cdk.utils.analytics_message import create_analytics_message
78
78
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
79
79
 
80
- DEFAULT_CONCURRENCY = 1.0 # TODO: Revert me: 100
81
- MAX_CONCURRENCY = 1 # TODO: Revert me: 100
82
- INITIAL_N_PARTITIONS = max(MAX_CONCURRENCY // 2, 1)
80
+ DEFAULT_CONCURRENCY = 100
81
+ MAX_CONCURRENCY = 100
82
+ INITIAL_N_PARTITIONS = MAX_CONCURRENCY // 2
83
83
  IDENTITIES_STREAM = "identities"
84
84
 
85
85
 
@@ -12,7 +12,9 @@ from airbyte_cdk.models import (
12
12
  AirbyteCatalog,
13
13
  AirbyteMessage,
14
14
  AirbyteStateMessage,
15
+ AirbyteStateMessageSerializer,
15
16
  ConfiguredAirbyteCatalog,
17
+ ConfiguredAirbyteCatalogSerializer,
16
18
  )
17
19
 
18
20
  TState = TypeVar("TState")
@@ -70,7 +72,7 @@ class Source(
70
72
  state_obj = BaseConnector._read_json_file(state_path)
71
73
  if state_obj:
72
74
  for state in state_obj: # type: ignore # `isinstance(state_obj, List)` ensures that this is a list
73
- parsed_message = AirbyteStateMessage.model_validate(state)
75
+ parsed_message = AirbyteStateMessageSerializer.load(state)
74
76
  if (
75
77
  not parsed_message.stream
76
78
  and not parsed_message.data
@@ -85,7 +87,7 @@ class Source(
85
87
  # can be overridden to change an input catalog
86
88
  @classmethod
87
89
  def read_catalog(cls, catalog_path: str) -> ConfiguredAirbyteCatalog:
88
- return ConfiguredAirbyteCatalog.model_validate(cls._read_json_file(catalog_path))
90
+ return ConfiguredAirbyteCatalogSerializer.load(cls._read_json_file(catalog_path))
89
91
 
90
92
  @property
91
93
  def name(self) -> str:
@@ -14,12 +14,12 @@ import requests_cache
14
14
  from requests.auth import AuthBase
15
15
 
16
16
  from airbyte_cdk.models import (
17
+ AirbyteMessageSerializer,
17
18
  AirbyteStreamStatus,
18
19
  AirbyteStreamStatusReason,
19
20
  AirbyteStreamStatusReasonType,
20
21
  Level,
21
22
  StreamDescriptor,
22
- ab_message_to_string,
23
23
  )
24
24
  from airbyte_cdk.sources.http_config import MAX_CONNECTION_POOL_SIZE
25
25
  from airbyte_cdk.sources.message import MessageRepository
@@ -396,11 +396,13 @@ class HttpClient:
396
396
  if error_resolution.response_action == ResponseAction.RATE_LIMITED:
397
397
  # TODO: Update to handle with message repository when concurrent message repository is ready
398
398
  reasons = [AirbyteStreamStatusReason(type=AirbyteStreamStatusReasonType.RATE_LIMITED)]
399
- message = ab_message_to_string(
400
- stream_status_as_airbyte_message(
401
- StreamDescriptor(name=self._name), AirbyteStreamStatus.RUNNING, reasons
399
+ message = orjson.dumps(
400
+ AirbyteMessageSerializer.dump(
401
+ stream_status_as_airbyte_message(
402
+ StreamDescriptor(name=self._name), AirbyteStreamStatus.RUNNING, reasons
403
+ )
402
404
  )
403
- )
405
+ ).decode()
404
406
 
405
407
  # Simply printing the stream status is a temporary solution and can cause future issues. Currently, the _send method is
406
408
  # wrapped with backoff decorators, and we can only emit messages by iterating record_iterator in the abstract source at the
@@ -6,7 +6,7 @@ import traceback
6
6
  from abc import ABC, abstractmethod
7
7
  from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional
8
8
 
9
- from airbyte_protocol.models import SyncMode
9
+ from airbyte_protocol_dataclasses.models import SyncMode
10
10
 
11
11
  from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level
12
12
  from airbyte_cdk.models import Type as MessageType
@@ -13,7 +13,7 @@ from typing import TYPE_CHECKING, Any, final
13
13
  import pandas as pd
14
14
  import sqlalchemy
15
15
  import ulid
16
- from airbyte_protocol.models import AirbyteStateMessage
16
+ from airbyte_protocol_dataclasses.models import AirbyteStateMessage
17
17
  from pandas import Index
18
18
  from pydantic import BaseModel, Field
19
19
  from sqlalchemy import Column, Table, and_, create_engine, insert, null, select, text, update
@@ -5,6 +5,7 @@ from typing import Any, Dict, List, Union, overload
5
5
  from airbyte_cdk.models import (
6
6
  ConfiguredAirbyteCatalog,
7
7
  ConfiguredAirbyteStream,
8
+ ConfiguredAirbyteStreamSerializer,
8
9
  SyncMode,
9
10
  )
10
11
 
@@ -41,7 +42,7 @@ class ConfiguredAirbyteStreamBuilder:
41
42
  return self
42
43
 
43
44
  def build(self) -> ConfiguredAirbyteStream:
44
- return ConfiguredAirbyteStream(**self._stream)
45
+ return ConfiguredAirbyteStreamSerializer.load(self._stream)
45
46
 
46
47
 
47
48
  class CatalogBuilder:
@@ -17,7 +17,6 @@ than that, there are integrations point that are annoying to integrate with usin
17
17
  import json
18
18
  import logging
19
19
  import re
20
- import sys
21
20
  import tempfile
22
21
  import traceback
23
22
  from collections import deque
@@ -29,6 +28,7 @@ from typing import Any, List, Literal, Optional, Union, final, overload
29
28
 
30
29
  import orjson
31
30
  from pydantic import ValidationError as V2ValidationError
31
+ from serpyco_rs import SchemaValidationError
32
32
 
33
33
  from airbyte_cdk.entrypoint import AirbyteEntrypoint
34
34
  from airbyte_cdk.exception_handler import assemble_uncaught_exception
@@ -36,31 +36,20 @@ from airbyte_cdk.logger import AirbyteLogFormatter
36
36
  from airbyte_cdk.models import (
37
37
  AirbyteLogMessage,
38
38
  AirbyteMessage,
39
+ AirbyteMessageSerializer,
39
40
  AirbyteStateMessage,
41
+ AirbyteStateMessageSerializer,
40
42
  AirbyteStreamState,
41
43
  AirbyteStreamStatus,
42
44
  ConfiguredAirbyteCatalog,
45
+ ConfiguredAirbyteCatalogSerializer,
43
46
  Level,
44
47
  TraceType,
45
48
  Type,
46
- ab_configured_catalog_from_string,
47
- ab_configured_catalog_to_string,
48
- ab_connector_spec_from_string,
49
- ab_connector_spec_to_string,
50
- ab_message_from_string,
51
- ab_message_to_string,
52
- ab_state_message_to_string,
53
49
  )
54
50
  from airbyte_cdk.sources import Source
55
51
  from airbyte_cdk.test.models.scenario import ExpectedOutcome
56
52
 
57
- JsonValidationErrors: tuple[type[Exception], ...] = (orjson.JSONDecodeError,)
58
- # Conditionally import and create a union type for exception handling
59
- if sys.platform != "emscripten":
60
- from serpyco_rs import SchemaValidationError
61
-
62
- JsonValidationErrors = (orjson.JSONDecodeError, SchemaValidationError)
63
-
64
53
 
65
54
  class AirbyteEntrypointException(Exception):
66
55
  """Exception raised for errors in the AirbyteEntrypoint execution.
@@ -128,8 +117,8 @@ class EntrypointOutput:
128
117
  @staticmethod
129
118
  def _parse_message(message: str) -> AirbyteMessage:
130
119
  try:
131
- return ab_message_from_string(message)
132
- except JsonValidationErrors:
120
+ return AirbyteMessageSerializer.load(orjson.loads(message))
121
+ except (orjson.JSONDecodeError, SchemaValidationError):
133
122
  # The platform assumes that logs that are not of AirbyteMessage format are log messages
134
123
  return AirbyteMessage(
135
124
  type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message=message)
@@ -452,7 +441,7 @@ def read(
452
441
  config_file = make_file(tmp_directory_path / "config.json", config)
453
442
  catalog_file = make_file(
454
443
  tmp_directory_path / "catalog.json",
455
- ab_configured_catalog_to_string(catalog),
444
+ orjson.dumps(ConfiguredAirbyteCatalogSerializer.dump(catalog)).decode(),
456
445
  )
457
446
  args = [
458
447
  "read",
@@ -464,13 +453,15 @@ def read(
464
453
  if debug:
465
454
  args.append("--debug")
466
455
  if state is not None:
467
- args.extend([
468
- "--state",
469
- make_file(
470
- tmp_directory_path / "state.json",
471
- f"[{','.join([ab_state_message_to_string(stream_state) for stream_state in state])}]",
472
- ),
473
- ])
456
+ args.extend(
457
+ [
458
+ "--state",
459
+ make_file(
460
+ tmp_directory_path / "state.json",
461
+ f"[{','.join([orjson.dumps(AirbyteStateMessageSerializer.dump(stream_state)).decode() for stream_state in state])}]",
462
+ ),
463
+ ]
464
+ )
474
465
 
475
466
  return _run_command(
476
467
  source,
@@ -86,6 +86,7 @@ from typing import Any, Optional, Union, overload
86
86
 
87
87
  from dateutil import parser
88
88
  from typing_extensions import Never
89
+ from whenever import Instant, LocalDateTime, ZonedDateTime
89
90
 
90
91
 
91
92
  class AirbyteDateTime(datetime):
@@ -137,18 +138,6 @@ class AirbyteDateTime(datetime):
137
138
  dt.tzinfo or timezone.utc,
138
139
  )
139
140
 
140
- @classmethod
141
- def from_timestamp(cls, timestamp: float) -> "AirbyteDateTime":
142
- """Creates an AirbyteDateTime from a Unix timestamp in seconds.
143
-
144
- Args:
145
- timestamp: A Unix timestamp in seconds (float).
146
-
147
- Returns:
148
- AirbyteDateTime: A new timezone-aware datetime instance (UTC).
149
- """
150
- return AirbyteDateTime.from_datetime(datetime.fromtimestamp(timestamp, tz=timezone.utc))
151
-
152
141
  def to_datetime(self) -> datetime:
153
142
  """Converts this AirbyteDateTime to a standard datetime object.
154
143
 
@@ -411,7 +400,8 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
411
400
  raise ValueError("Timestamp cannot be negative")
412
401
  if len(str(abs(timestamp))) > 10:
413
402
  raise ValueError("Timestamp value too large")
414
- return AirbyteDateTime.from_timestamp(timestamp)
403
+ instant = Instant.from_timestamp(timestamp)
404
+ return AirbyteDateTime.from_datetime(instant.py_datetime())
415
405
 
416
406
  if not isinstance(dt_str, str):
417
407
  raise ValueError(
@@ -424,7 +414,8 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
424
414
  year, month, day = map(int, dt_str.split("-"))
425
415
  if not (1 <= month <= 12 and 1 <= day <= 31):
426
416
  raise ValueError(f"Invalid date format: {dt_str}")
427
- return AirbyteDateTime(year, month, day, 0, 0, 0)
417
+ instant = Instant.from_utc(year, month, day, 0, 0, 0)
418
+ return AirbyteDateTime.from_datetime(instant.py_datetime())
428
419
  except (ValueError, TypeError):
429
420
  raise ValueError(f"Invalid date format: {dt_str}")
430
421
 
@@ -11,12 +11,12 @@ from airbyte_cdk.models import (
11
11
  AirbyteConnectionStatus,
12
12
  AirbyteErrorTraceMessage,
13
13
  AirbyteMessage,
14
+ AirbyteMessageSerializer,
14
15
  AirbyteTraceMessage,
15
16
  FailureType,
16
17
  Status,
17
18
  StreamDescriptor,
18
19
  TraceType,
19
- ab_message_to_string,
20
20
  )
21
21
  from airbyte_cdk.models import Type as MessageType
22
22
  from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
@@ -95,7 +95,7 @@ class AirbyteTracedException(Exception):
95
95
  Prints the exception as an AirbyteTraceMessage.
96
96
  Note that this will be called automatically on uncaught exceptions when using the airbyte_cdk entrypoint.
97
97
  """
98
- message = ab_message_to_string(self.as_airbyte_message())
98
+ message = orjson.dumps(AirbyteMessageSerializer.dump(self.as_airbyte_message())).decode()
99
99
  filtered_message = filter_secrets(message)
100
100
  print(filtered_message)
101
101