airbyte-cdk 6.60.0.post35.dev16509779638__py3-none-any.whl → 6.60.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +35 -30
  2. airbyte_cdk/config_observation.py +2 -2
  3. airbyte_cdk/connector.py +2 -1
  4. airbyte_cdk/connector_builder/connector_builder_handler.py +6 -1
  5. airbyte_cdk/connector_builder/main.py +11 -18
  6. airbyte_cdk/connector_builder/test_reader/helpers.py +31 -0
  7. airbyte_cdk/connector_builder/test_reader/message_grouper.py +5 -0
  8. airbyte_cdk/connector_builder/test_reader/reader.py +8 -3
  9. airbyte_cdk/destinations/destination.py +7 -7
  10. airbyte_cdk/entrypoint.py +23 -8
  11. airbyte_cdk/logger.py +2 -2
  12. airbyte_cdk/models/__init__.py +6 -7
  13. airbyte_cdk/models/airbyte_protocol.py +81 -2
  14. airbyte_cdk/models/airbyte_protocol_serializers.py +26 -152
  15. airbyte_cdk/models/well_known_types.py +1 -1
  16. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +2 -4
  17. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +0 -11
  18. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -6
  19. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +0 -15
  20. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +0 -3
  21. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +0 -6
  22. airbyte_cdk/sources/declarative/manifest_declarative_source.py +15 -7
  23. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +7 -4
  24. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +2 -25
  25. airbyte_cdk/sources/declarative/spec/spec.py +2 -2
  26. airbyte_cdk/sources/file_based/file_based_source.py +3 -3
  27. airbyte_cdk/sources/source.py +4 -2
  28. airbyte_cdk/sources/streams/checkpoint/cursor.py +0 -6
  29. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +0 -6
  30. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +0 -6
  31. airbyte_cdk/sources/streams/http/http_client.py +7 -5
  32. airbyte_cdk/sources/streams/permissions/identities_stream.py +1 -1
  33. airbyte_cdk/sql/shared/sql_processor.py +1 -1
  34. airbyte_cdk/test/catalog_builder.py +2 -1
  35. airbyte_cdk/test/entrypoint_wrapper.py +16 -25
  36. airbyte_cdk/utils/datetime_helpers.py +5 -14
  37. airbyte_cdk/utils/traced_exception.py +2 -2
  38. {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.2.dist-info}/METADATA +11 -10
  39. {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.2.dist-info}/RECORD +43 -43
  40. {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.2.dist-info}/LICENSE.txt +0 -0
  41. {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.2.dist-info}/LICENSE_SHORT +0 -0
  42. {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.2.dist-info}/WHEEL +0 -0
  43. {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.2.dist-info}/entry_points.txt +0 -0
@@ -1,13 +1,7 @@
1
1
  # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
- import json
3
- import logging
4
- import sys
5
- from enum import Enum
6
- from typing import Any, Callable, Dict, Type, TypeVar, cast
2
+ from typing import Any, Dict
7
3
 
8
- import dacite
9
- import orjson
10
- from pydantic import ValidationError
4
+ from serpyco_rs import CustomType, Serializer
11
5
 
12
6
  from .airbyte_protocol import ( # type: ignore[attr-defined] # all classes are imported to airbyte_protocol via *
13
7
  AirbyteCatalog,
@@ -21,154 +15,34 @@ from .airbyte_protocol import ( # type: ignore[attr-defined] # all classes are
21
15
  ConnectorSpecification,
22
16
  )
23
17
 
24
- USE_RUST_BACKEND = sys.platform != "emscripten"
25
- """When run in WASM, use the pure Python backend for serpyco."""
26
18
 
27
- _HAS_LOGGED_FOR_SERIALIZATION_ERROR = False
28
- """Track if we have logged an error for serialization issues."""
19
+ class AirbyteStateBlobType(CustomType[AirbyteStateBlob, Dict[str, Any]]):
20
+ def serialize(self, value: AirbyteStateBlob) -> Dict[str, Any]:
21
+ # cant use orjson.dumps() directly because private attributes are excluded, e.g. "__ab_full_refresh_sync_complete"
22
+ return {k: v for k, v in value.__dict__.items()}
29
23
 
30
- T = TypeVar("T")
24
+ def deserialize(self, value: Dict[str, Any]) -> AirbyteStateBlob:
25
+ return AirbyteStateBlob(value)
31
26
 
32
- logger = logging.getLogger("airbyte")
27
+ def get_json_schema(self) -> Dict[str, Any]:
28
+ return {"type": "object"}
33
29
 
34
- # Making this a no-op for now:
35
30
 
31
+ def custom_type_resolver(t: type) -> CustomType[AirbyteStateBlob, Dict[str, Any]] | None:
32
+ return AirbyteStateBlobType() if t is AirbyteStateBlob else None
36
33
 
37
- def ab_message_to_string(
38
- message: AirbyteMessage,
39
- ) -> str:
40
- """
41
- Convert an AirbyteMessage to a JSON string.
42
34
 
43
- Args:
44
- message (AirbyteMessage): The Airbyte message to convert.
45
-
46
- Returns:
47
- str: JSON string representation of the AirbyteMessage.
48
- """
49
- return message.model_dump_json()
50
-
51
-
52
- def ab_message_from_string(
53
- message_json: str,
54
- ) -> AirbyteMessage:
55
- """
56
- Convert a JSON string to an AirbyteMessage.
57
-
58
- Args:
59
- message_str (str): The JSON string to convert.
60
-
61
- Returns:
62
- AirbyteMessage: The deserialized AirbyteMessage.
63
- """
64
- try:
65
- return AirbyteMessage.model_validate_json(message_json)
66
- except ValidationError as e:
67
- raise ValueError(f"Invalid AirbyteMessage format: {e}") from e
68
- except orjson.JSONDecodeError as e:
69
- raise ValueError(f"Failed to decode JSON: {e}") from e
70
-
71
-
72
- def ab_connector_spec_from_string(
73
- spec_json: str,
74
- ) -> ConnectorSpecification:
75
- """
76
- Convert a JSON string to a ConnectorSpecification.
77
-
78
- Args:
79
- spec_str (str): The JSON string to convert.
80
-
81
- Returns:
82
- ConnectorSpecification: The deserialized ConnectorSpecification.
83
- """
84
- try:
85
- return ConnectorSpecification.model_validate_json(spec_json)
86
- except ValidationError as e:
87
- raise ValueError(f"Invalid ConnectorSpecification format: {e}") from e
88
- except orjson.JSONDecodeError as e:
89
- raise ValueError(f"Failed to decode JSON: {e}") from e
90
-
91
-
92
- def ab_connector_spec_to_string(
93
- spec: ConnectorSpecification,
94
- ) -> str:
95
- """
96
- Convert a ConnectorSpecification to a JSON string.
97
-
98
- Args:
99
- spec (ConnectorSpecification): The ConnectorSpecification to convert.
100
-
101
- Returns:
102
- str: JSON string representation of the ConnectorSpecification.
103
- """
104
- return spec.model_dump_json()
105
-
106
-
107
- def ab_configured_catalog_to_string(
108
- catalog: ConfiguredAirbyteCatalog,
109
- ) -> str:
110
- """
111
- Convert a ConfiguredAirbyteCatalog to a JSON string.
112
-
113
- Args:
114
- catalog (ConfiguredAirbyteCatalog): The ConfiguredAirbyteCatalog to convert.
115
-
116
- Returns:
117
- str: JSON string representation of the ConfiguredAirbyteCatalog.
118
- """
119
- return catalog.model_dump_json()
120
-
121
-
122
- def ab_configured_catalog_from_string(
123
- catalog_json: str,
124
- ) -> ConfiguredAirbyteCatalog:
125
- """
126
- Convert a JSON string to a ConfiguredAirbyteCatalog.
127
-
128
- Args:
129
- catalog_json (str): The JSON string to convert.
130
-
131
- Returns:
132
- ConfiguredAirbyteCatalog: The deserialized ConfiguredAirbyteCatalog.
133
- """
134
- try:
135
- return ConfiguredAirbyteCatalog.model_validate_json(catalog_json)
136
- except ValidationError as e:
137
- raise ValueError(f"Invalid ConfiguredAirbyteCatalog format: {e}") from e
138
- except orjson.JSONDecodeError as e:
139
- raise ValueError(f"Failed to decode JSON: {e}") from e
140
-
141
-
142
- def ab_state_message_from_string(
143
- state_json: str,
144
- ) -> AirbyteStateMessage:
145
- """
146
- Convert a JSON string to an AirbyteStateMessage.
147
-
148
- Args:
149
- state_json (str): The JSON string to convert.
150
-
151
- Returns:
152
- AirbyteStateMessage: The deserialized AirbyteStateMessage.
153
- """
154
- try:
155
- return AirbyteStateMessage.model_validate_json(state_json)
156
- except ValidationError as e:
157
- raise ValueError(f"Invalid AirbyteStateMessage format: {e}") from e
158
- except orjson.JSONDecodeError as e:
159
- raise ValueError(f"Failed to decode JSON: {e}") from e
160
-
161
-
162
- def ab_state_message_to_string(
163
- state: AirbyteStateMessage,
164
- ) -> str:
165
- """
166
- Convert an AirbyteStateMessage to a JSON string.
167
-
168
- Args:
169
- state (AirbyteStateMessage): The AirbyteStateMessage to convert.
170
-
171
- Returns:
172
- str: JSON string representation of the AirbyteStateMessage.
173
- """
174
- return state.model_dump_json()
35
+ AirbyteCatalogSerializer = Serializer(AirbyteCatalog, omit_none=True)
36
+ AirbyteStreamSerializer = Serializer(AirbyteStream, omit_none=True)
37
+ AirbyteStreamStateSerializer = Serializer(
38
+ AirbyteStreamState, omit_none=True, custom_type_resolver=custom_type_resolver
39
+ )
40
+ AirbyteStateMessageSerializer = Serializer(
41
+ AirbyteStateMessage, omit_none=True, custom_type_resolver=custom_type_resolver
42
+ )
43
+ AirbyteMessageSerializer = Serializer(
44
+ AirbyteMessage, omit_none=True, custom_type_resolver=custom_type_resolver
45
+ )
46
+ ConfiguredAirbyteCatalogSerializer = Serializer(ConfiguredAirbyteCatalog, omit_none=True)
47
+ ConfiguredAirbyteStreamSerializer = Serializer(ConfiguredAirbyteStream, omit_none=True)
48
+ ConnectorSpecificationSerializer = Serializer(ConnectorSpecification, omit_none=True)
@@ -2,4 +2,4 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from airbyte_protocol.models.well_known_types import * # noqa: F403 # Allow '*'
5
+ from airbyte_protocol_dataclasses.models.well_known_types import * # noqa: F403 # Allow '*'
@@ -63,7 +63,7 @@ from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_
63
63
  class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
64
64
  # By default, we defer to a value of 2. A value lower than than could cause a PartitionEnqueuer to be stuck in a state of deadlock
65
65
  # because it has hit the limit of futures but not partition reader is consuming them.
66
- _LOWEST_SAFE_CONCURRENCY_LEVEL = 1 # TODO: revert-me: 2
66
+ _LOWEST_SAFE_CONCURRENCY_LEVEL = 2
67
67
 
68
68
  def __init__(
69
69
  self,
@@ -119,9 +119,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
119
119
  ) # Partition_generation iterates using range based on this value. If this is floored to zero we end up in a dead lock during start up
120
120
  else:
121
121
  concurrency_level = self._LOWEST_SAFE_CONCURRENCY_LEVEL
122
- initial_number_of_partitions_to_generate = max(
123
- self._LOWEST_SAFE_CONCURRENCY_LEVEL // 2, 1
124
- )
122
+ initial_number_of_partitions_to_generate = self._LOWEST_SAFE_CONCURRENCY_LEVEL // 2
125
123
 
126
124
  self._concurrent_source = ConcurrentSource.create(
127
125
  num_workers=concurrency_level,
@@ -429,17 +429,6 @@ class DatetimeBasedCursor(DeclarativeCursor):
429
429
  )
430
430
  )
431
431
 
432
- def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
433
- cursor_field = self.cursor_field.eval(self.config) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
434
- first_cursor_value = first.get(cursor_field)
435
- second_cursor_value = second.get(cursor_field)
436
- if first_cursor_value and second_cursor_value:
437
- return self.parse_date(first_cursor_value) >= self.parse_date(second_cursor_value)
438
- elif first_cursor_value:
439
- return True
440
- else:
441
- return False
442
-
443
432
  def set_runtime_lookback_window(self, lookback_window_in_seconds: int) -> None:
444
433
  """
445
434
  Updates the lookback window based on a given number of seconds if the new duration
@@ -338,12 +338,6 @@ class GlobalSubstreamCursor(DeclarativeCursor):
338
338
  def should_be_synced(self, record: Record) -> bool:
339
339
  return self._stream_cursor.should_be_synced(self._convert_record_to_cursor_record(record))
340
340
 
341
- def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
342
- return self._stream_cursor.is_greater_than_or_equal(
343
- self._convert_record_to_cursor_record(first),
344
- self._convert_record_to_cursor_record(second),
345
- )
346
-
347
341
  @staticmethod
348
342
  def _convert_record_to_cursor_record(record: Record) -> Record:
349
343
  return Record(
@@ -315,21 +315,6 @@ class PerPartitionCursor(DeclarativeCursor):
315
315
  self._convert_record_to_cursor_record(record)
316
316
  )
317
317
 
318
- def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
319
- if not first.associated_slice or not second.associated_slice:
320
- raise ValueError(
321
- f"Both records should have an associated slice but got {first.associated_slice} and {second.associated_slice}"
322
- )
323
- if first.associated_slice.partition != second.associated_slice.partition:
324
- raise ValueError(
325
- f"To compare records, partition should be the same but got {first.associated_slice.partition} and {second.associated_slice.partition}"
326
- )
327
-
328
- return self._get_cursor(first).is_greater_than_or_equal(
329
- self._convert_record_to_cursor_record(first),
330
- self._convert_record_to_cursor_record(second),
331
- )
332
-
333
318
  @staticmethod
334
319
  def _convert_record_to_cursor_record(record: Record) -> Record:
335
320
  return Record(
@@ -195,6 +195,3 @@ class PerPartitionWithGlobalCursor(DeclarativeCursor):
195
195
 
196
196
  def should_be_synced(self, record: Record) -> bool:
197
197
  return self._get_active_cursor().should_be_synced(record)
198
-
199
- def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
200
- return self._global_cursor.is_greater_than_or_equal(first, second)
@@ -42,12 +42,6 @@ class ResumableFullRefreshCursor(DeclarativeCursor):
42
42
  """
43
43
  return True
44
44
 
45
- def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
46
- """
47
- RFR record don't have ordering to be compared between one another.
48
- """
49
- return False
50
-
51
45
  def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]:
52
46
  # A top-level RFR cursor only manages the state of a single partition
53
47
  return self._cursor
@@ -31,7 +31,7 @@ from airbyte_cdk.models import (
31
31
  ConnectorSpecification,
32
32
  FailureType,
33
33
  )
34
- from airbyte_cdk.models.airbyte_protocol_serializers import ab_message_to_string
34
+ from airbyte_cdk.models.airbyte_protocol_serializers import AirbyteMessageSerializer
35
35
  from airbyte_cdk.sources.declarative.checks import COMPONENTS_CHECKER_TYPE_MAPPING
36
36
  from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
37
37
  from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
@@ -234,7 +234,7 @@ class ManifestDeclarativeSource(DeclarativeSource):
234
234
  )
235
235
  # We have no mechanism for consuming the queue, so we print the messages to stdout
236
236
  for message in self.message_repository.consume_queue():
237
- print(ab_message_to_string(message))
237
+ print(orjson.dumps(AirbyteMessageSerializer.dump(message)).decode())
238
238
  self._spec_component.transform_config(mutable_config)
239
239
  return mutable_config
240
240
 
@@ -542,11 +542,19 @@ class ManifestDeclarativeSource(DeclarativeSource):
542
542
  components_resolver_config["retriever"]["requester"]["use_cache"] = True
543
543
 
544
544
  # Create a resolver for dynamic components based on type
545
- components_resolver = self._constructor.create_component(
546
- COMPONENTS_RESOLVER_TYPE_MAPPING[resolver_type],
547
- components_resolver_config,
548
- config,
549
- )
545
+ if resolver_type == "HttpComponentsResolver":
546
+ components_resolver = self._constructor.create_component(
547
+ model_type=COMPONENTS_RESOLVER_TYPE_MAPPING[resolver_type],
548
+ component_definition=components_resolver_config,
549
+ config=config,
550
+ stream_name=dynamic_definition.get("name"),
551
+ )
552
+ else:
553
+ components_resolver = self._constructor.create_component(
554
+ model_type=COMPONENTS_RESOLVER_TYPE_MAPPING[resolver_type],
555
+ component_definition=components_resolver_config,
556
+ config=config,
557
+ )
550
558
 
551
559
  stream_template_config = dynamic_definition["stream_template"]
552
560
 
@@ -3493,10 +3493,11 @@ class ModelToComponentFactory:
3493
3493
  requester=download_requester,
3494
3494
  record_selector=record_selector,
3495
3495
  primary_key=None,
3496
- name=job_download_components_name,
3496
+ name=name,
3497
3497
  paginator=paginator,
3498
3498
  config=config,
3499
3499
  parameters={},
3500
+ log_formatter=self._get_log_formatter(None, name),
3500
3501
  )
3501
3502
 
3502
3503
  def _get_job_timeout() -> datetime.timedelta:
@@ -3805,7 +3806,7 @@ class ModelToComponentFactory:
3805
3806
  )
3806
3807
 
3807
3808
  def create_http_components_resolver(
3808
- self, model: HttpComponentsResolverModel, config: Config
3809
+ self, model: HttpComponentsResolverModel, config: Config, stream_name: Optional[str] = None
3809
3810
  ) -> Any:
3810
3811
  stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
3811
3812
  combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
@@ -3813,7 +3814,7 @@ class ModelToComponentFactory:
3813
3814
  retriever = self._create_component_from_model(
3814
3815
  model=model.retriever,
3815
3816
  config=config,
3816
- name="",
3817
+ name=f"{stream_name if stream_name else '__http_components_resolver'}",
3817
3818
  primary_key=None,
3818
3819
  stream_slicer=stream_slicer if stream_slicer else combined_slicers,
3819
3820
  transformations=[],
@@ -3890,7 +3891,9 @@ class ModelToComponentFactory:
3890
3891
  )
3891
3892
 
3892
3893
  def create_parametrized_components_resolver(
3893
- self, model: ParametrizedComponentsResolverModel, config: Config
3894
+ self,
3895
+ model: ParametrizedComponentsResolverModel,
3896
+ config: Config,
3894
3897
  ) -> ParametrizedComponentsResolver:
3895
3898
  stream_parameters = StreamParametersDefinition(
3896
3899
  list_of_parameters_for_stream=model.stream_parameters.list_of_parameters_for_stream
@@ -41,7 +41,6 @@ from airbyte_cdk.sources.declarative.requesters.request_options import (
41
41
  from airbyte_cdk.sources.declarative.requesters.requester import Requester
42
42
  from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
43
43
  from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer
44
- from airbyte_cdk.sources.http_logger import format_http_message
45
44
  from airbyte_cdk.sources.source import ExperimentalClassWarning
46
45
  from airbyte_cdk.sources.streams.core import StreamData
47
46
  from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
@@ -528,35 +527,13 @@ class SimpleRetriever(Retriever):
528
527
  if self.cursor and current_record:
529
528
  self.cursor.observe(_slice, current_record)
530
529
 
531
- # Latest record read, not necessarily within slice boundaries.
532
- # TODO Remove once all custom components implement `observe` method.
533
- # https://github.com/airbytehq/airbyte-internal-issues/issues/6955
534
- most_recent_record_from_slice = self._get_most_recent_record(
535
- most_recent_record_from_slice, current_record, _slice
536
- )
537
530
  yield stream_data
538
531
 
539
532
  if self.cursor:
540
- self.cursor.close_slice(_slice, most_recent_record_from_slice)
533
+ self.cursor.close_slice(_slice)
541
534
  return
542
535
 
543
- def _get_most_recent_record(
544
- self,
545
- current_most_recent: Optional[Record],
546
- current_record: Optional[Record],
547
- stream_slice: StreamSlice,
548
- ) -> Optional[Record]:
549
- if self.cursor and current_record:
550
- if not current_most_recent:
551
- return current_record
552
- else:
553
- return (
554
- current_most_recent
555
- if self.cursor.is_greater_than_or_equal(current_most_recent, current_record)
556
- else current_record
557
- )
558
- else:
559
- return None
536
+ # FIXME based on the comment above in SimpleRetriever.read_records, it seems like we can tackle https://github.com/airbytehq/airbyte-internal-issues/issues/6955 and remove this
560
537
 
561
538
  def _extract_record(
562
539
  self, stream_data: StreamData, stream_slice: StreamSlice
@@ -8,8 +8,8 @@ from typing import Any, List, Mapping, MutableMapping, Optional
8
8
  from airbyte_cdk.models import (
9
9
  AdvancedAuth,
10
10
  ConnectorSpecification,
11
+ ConnectorSpecificationSerializer,
11
12
  )
12
- from airbyte_cdk.models.airbyte_protocol_serializers import ab_connector_spec_from_string
13
13
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import AuthFlow
14
14
  from airbyte_cdk.sources.declarative.transformations.config_transformations.config_transformation import (
15
15
  ConfigTransformation,
@@ -59,7 +59,7 @@ class Spec:
59
59
  obj["advanced_auth"] = self.advanced_auth.dict()
60
60
 
61
61
  # We remap these keys to camel case because that's the existing format expected by the rest of the platform
62
- return ConnectorSpecification.model_validate(obj)
62
+ return ConnectorSpecificationSerializer.load(obj)
63
63
 
64
64
  def migrate_config(self, config: MutableMapping[str, Any]) -> None:
65
65
  """
@@ -77,9 +77,9 @@ from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
77
77
  from airbyte_cdk.utils.analytics_message import create_analytics_message
78
78
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
79
79
 
80
- DEFAULT_CONCURRENCY = 1.0 # TODO: Revert me: 100
81
- MAX_CONCURRENCY = 1 # TODO: Revert me: 100
82
- INITIAL_N_PARTITIONS = max(MAX_CONCURRENCY // 2, 1)
80
+ DEFAULT_CONCURRENCY = 100
81
+ MAX_CONCURRENCY = 100
82
+ INITIAL_N_PARTITIONS = MAX_CONCURRENCY // 2
83
83
  IDENTITIES_STREAM = "identities"
84
84
 
85
85
 
@@ -12,7 +12,9 @@ from airbyte_cdk.models import (
12
12
  AirbyteCatalog,
13
13
  AirbyteMessage,
14
14
  AirbyteStateMessage,
15
+ AirbyteStateMessageSerializer,
15
16
  ConfiguredAirbyteCatalog,
17
+ ConfiguredAirbyteCatalogSerializer,
16
18
  )
17
19
 
18
20
  TState = TypeVar("TState")
@@ -70,7 +72,7 @@ class Source(
70
72
  state_obj = BaseConnector._read_json_file(state_path)
71
73
  if state_obj:
72
74
  for state in state_obj: # type: ignore # `isinstance(state_obj, List)` ensures that this is a list
73
- parsed_message = AirbyteStateMessage.model_validate(state)
75
+ parsed_message = AirbyteStateMessageSerializer.load(state)
74
76
  if (
75
77
  not parsed_message.stream
76
78
  and not parsed_message.data
@@ -85,7 +87,7 @@ class Source(
85
87
  # can be overridden to change an input catalog
86
88
  @classmethod
87
89
  def read_catalog(cls, catalog_path: str) -> ConfiguredAirbyteCatalog:
88
- return ConfiguredAirbyteCatalog.model_validate(cls._read_json_file(catalog_path))
90
+ return ConfiguredAirbyteCatalogSerializer.load(cls._read_json_file(catalog_path))
89
91
 
90
92
  @property
91
93
  def name(self) -> str:
@@ -62,12 +62,6 @@ class Cursor(ABC):
62
62
  Evaluating if a record should be synced allows for filtering and stop condition on pagination
63
63
  """
64
64
 
65
- @abstractmethod
66
- def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
67
- """
68
- Evaluating which record is greater in terms of cursor. This is used to avoid having to capture all the records to close a slice
69
- """
70
-
71
65
  @abstractmethod
72
66
  def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]:
73
67
  """
@@ -40,12 +40,6 @@ class ResumableFullRefreshCursor(Cursor):
40
40
  """
41
41
  return True
42
42
 
43
- def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
44
- """
45
- RFR record don't have ordering to be compared between one another.
46
- """
47
- return False
48
-
49
43
  def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]:
50
44
  # A top-level RFR cursor only manages the state of a single partition
51
45
  return self._cursor
@@ -89,12 +89,6 @@ class SubstreamResumableFullRefreshCursor(Cursor):
89
89
  """
90
90
  return True
91
91
 
92
- def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
93
- """
94
- RFR record don't have ordering to be compared between one another.
95
- """
96
- return False
97
-
98
92
  def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]:
99
93
  if not stream_slice:
100
94
  raise ValueError("A partition needs to be provided in order to extract a state")
@@ -14,12 +14,12 @@ import requests_cache
14
14
  from requests.auth import AuthBase
15
15
 
16
16
  from airbyte_cdk.models import (
17
+ AirbyteMessageSerializer,
17
18
  AirbyteStreamStatus,
18
19
  AirbyteStreamStatusReason,
19
20
  AirbyteStreamStatusReasonType,
20
21
  Level,
21
22
  StreamDescriptor,
22
- ab_message_to_string,
23
23
  )
24
24
  from airbyte_cdk.sources.http_config import MAX_CONNECTION_POOL_SIZE
25
25
  from airbyte_cdk.sources.message import MessageRepository
@@ -396,11 +396,13 @@ class HttpClient:
396
396
  if error_resolution.response_action == ResponseAction.RATE_LIMITED:
397
397
  # TODO: Update to handle with message repository when concurrent message repository is ready
398
398
  reasons = [AirbyteStreamStatusReason(type=AirbyteStreamStatusReasonType.RATE_LIMITED)]
399
- message = ab_message_to_string(
400
- stream_status_as_airbyte_message(
401
- StreamDescriptor(name=self._name), AirbyteStreamStatus.RUNNING, reasons
399
+ message = orjson.dumps(
400
+ AirbyteMessageSerializer.dump(
401
+ stream_status_as_airbyte_message(
402
+ StreamDescriptor(name=self._name), AirbyteStreamStatus.RUNNING, reasons
403
+ )
402
404
  )
403
- )
405
+ ).decode()
404
406
 
405
407
  # Simply printing the stream status is a temporary solution and can cause future issues. Currently, the _send method is
406
408
  # wrapped with backoff decorators, and we can only emit messages by iterating record_iterator in the abstract source at the
@@ -6,7 +6,7 @@ import traceback
6
6
  from abc import ABC, abstractmethod
7
7
  from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional
8
8
 
9
- from airbyte_protocol.models import SyncMode
9
+ from airbyte_protocol_dataclasses.models import SyncMode
10
10
 
11
11
  from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level
12
12
  from airbyte_cdk.models import Type as MessageType
@@ -13,7 +13,7 @@ from typing import TYPE_CHECKING, Any, final
13
13
  import pandas as pd
14
14
  import sqlalchemy
15
15
  import ulid
16
- from airbyte_protocol.models import AirbyteStateMessage
16
+ from airbyte_protocol_dataclasses.models import AirbyteStateMessage
17
17
  from pandas import Index
18
18
  from pydantic import BaseModel, Field
19
19
  from sqlalchemy import Column, Table, and_, create_engine, insert, null, select, text, update
@@ -5,6 +5,7 @@ from typing import Any, Dict, List, Union, overload
5
5
  from airbyte_cdk.models import (
6
6
  ConfiguredAirbyteCatalog,
7
7
  ConfiguredAirbyteStream,
8
+ ConfiguredAirbyteStreamSerializer,
8
9
  SyncMode,
9
10
  )
10
11
 
@@ -41,7 +42,7 @@ class ConfiguredAirbyteStreamBuilder:
41
42
  return self
42
43
 
43
44
  def build(self) -> ConfiguredAirbyteStream:
44
- return ConfiguredAirbyteStream(**self._stream)
45
+ return ConfiguredAirbyteStreamSerializer.load(self._stream)
45
46
 
46
47
 
47
48
  class CatalogBuilder: