airbyte-cdk 6.13.1.dev4101__py3-none-any.whl → 6.13.1.dev4103__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. airbyte_cdk/entrypoint.py +13 -1
  2. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +24 -51
  3. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +72 -1
  4. airbyte_cdk/sources/declarative/extractors/__init__.py +2 -0
  5. airbyte_cdk/sources/declarative/extractors/record_selector.py +5 -7
  6. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  7. airbyte_cdk/sources/declarative/interpolation/macros.py +21 -0
  8. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +40 -1
  9. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +43 -12
  10. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +52 -35
  11. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +10 -7
  12. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +9 -4
  13. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +11 -6
  14. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +13 -11
  15. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +14 -13
  16. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +6 -7
  17. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +10 -7
  18. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +1 -4
  19. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +71 -64
  20. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +4 -4
  21. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  22. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -8
  23. airbyte_cdk/sources/file_based/file_based_source.py +4 -3
  24. airbyte_cdk/sources/file_based/file_based_stream_reader.py +5 -4
  25. {airbyte_cdk-6.13.1.dev4101.dist-info → airbyte_cdk-6.13.1.dev4103.dist-info}/METADATA +1 -1
  26. {airbyte_cdk-6.13.1.dev4101.dist-info → airbyte_cdk-6.13.1.dev4103.dist-info}/RECORD +29 -27
  27. {airbyte_cdk-6.13.1.dev4101.dist-info → airbyte_cdk-6.13.1.dev4103.dist-info}/LICENSE.txt +0 -0
  28. {airbyte_cdk-6.13.1.dev4101.dist-info → airbyte_cdk-6.13.1.dev4103.dist-info}/WHEEL +0 -0
  29. {airbyte_cdk-6.13.1.dev4101.dist-info → airbyte_cdk-6.13.1.dev4103.dist-info}/entry_points.txt +0 -0
airbyte_cdk/entrypoint.py CHANGED
@@ -5,6 +5,7 @@
5
5
  import argparse
6
6
  import importlib
7
7
  import ipaddress
8
+ import json
8
9
  import logging
9
10
  import os.path
10
11
  import socket
@@ -46,6 +47,7 @@ logger = init_logger("airbyte")
46
47
 
47
48
  VALID_URL_SCHEMES = ["https"]
48
49
  CLOUD_DEPLOYMENT_MODE = "cloud"
50
+ _HAS_LOGGED_FOR_SERIALIZATION_ERROR = False
49
51
 
50
52
 
51
53
  class AirbyteEntrypoint(object):
@@ -291,7 +293,17 @@ class AirbyteEntrypoint(object):
291
293
 
292
294
  @staticmethod
293
295
  def airbyte_message_to_string(airbyte_message: AirbyteMessage) -> str:
294
- return orjson.dumps(AirbyteMessageSerializer.dump(airbyte_message)).decode()
296
+ global _HAS_LOGGED_FOR_SERIALIZATION_ERROR
297
+ serialized_message = AirbyteMessageSerializer.dump(airbyte_message)
298
+ try:
299
+ return orjson.dumps(serialized_message).decode()
300
+ except Exception as exception:
301
+ if not _HAS_LOGGED_FOR_SERIALIZATION_ERROR:
302
+ logger.warning(
303
+ f"There was an error during the serialization of an AirbyteMessage: `{exception}`. This might impact the sync performances."
304
+ )
305
+ _HAS_LOGGED_FOR_SERIALIZATION_ERROR = True
306
+ return json.dumps(serialized_message)
295
307
 
296
308
  @classmethod
297
309
  def extract_state(cls, args: List[str]) -> Optional[Any]:
@@ -3,7 +3,7 @@
3
3
  #
4
4
 
5
5
  import logging
6
- from typing import Any, Callable, Generic, Iterator, List, Mapping, Optional, Tuple, Union
6
+ from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple
7
7
 
8
8
  from airbyte_cdk.models import (
9
9
  AirbyteCatalog,
@@ -28,15 +28,11 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
28
28
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
29
29
  DatetimeBasedCursor as DatetimeBasedCursorModel,
30
30
  )
31
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
32
- DeclarativeStream as DeclarativeStreamModel,
33
- )
34
31
  from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
35
- ComponentDefinition,
36
32
  ModelToComponentFactory,
37
33
  )
38
34
  from airbyte_cdk.sources.declarative.requesters import HttpRequester
39
- from airbyte_cdk.sources.declarative.retrievers import Retriever, SimpleRetriever
35
+ from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
40
36
  from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
41
37
  DeclarativePartitionFactory,
42
38
  StreamSlicerPartitionGenerator,
@@ -52,7 +48,6 @@ from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
52
48
  from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
53
49
  from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
54
50
  from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
55
- from airbyte_cdk.sources.types import Config, StreamState
56
51
 
57
52
 
58
53
  class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
@@ -194,10 +189,11 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
194
189
  # Some low-code sources use a combination of DeclarativeStream and regular Python streams. We can't inspect
195
190
  # these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
196
191
  # so we need to treat them as synchronous
197
- if (
198
- isinstance(declarative_stream, DeclarativeStream)
199
- and name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
192
+ if isinstance(declarative_stream, DeclarativeStream) and (
193
+ name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
200
194
  == "SimpleRetriever"
195
+ or name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
196
+ == "AsyncRetriever"
201
197
  ):
202
198
  incremental_sync_component_definition = name_to_stream_mapping[
203
199
  declarative_stream.name
@@ -234,15 +230,27 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
234
230
  stream_state=stream_state,
235
231
  )
236
232
 
233
+ retriever = declarative_stream.retriever
234
+
235
+ # This is an optimization so that we don't invoke any cursor or state management flows within the
236
+ # low-code framework because state management is handled through the ConcurrentCursor.
237
+ if declarative_stream and isinstance(retriever, SimpleRetriever):
238
+ # Also a temporary hack. In the legacy Stream implementation, as part of the read,
239
+ # set_initial_state() is called to instantiate incoming state on the cursor. Although we no
240
+ # longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
241
+ # like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
242
+ # still rely on a DatetimeBasedCursor that is properly initialized with state.
243
+ if retriever.cursor:
244
+ retriever.cursor.set_initial_state(stream_state=stream_state)
245
+ # We zero it out here, but since this is a cursor reference, the state is still properly
246
+ # instantiated for the other components that reference it
247
+ retriever.cursor = None
248
+
237
249
  partition_generator = StreamSlicerPartitionGenerator(
238
250
  DeclarativePartitionFactory(
239
251
  declarative_stream.name,
240
252
  declarative_stream.get_json_schema(),
241
- self._retriever_factory(
242
- name_to_stream_mapping[declarative_stream.name],
243
- config,
244
- stream_state,
245
- ),
253
+ retriever,
246
254
  self.message_repository,
247
255
  ),
248
256
  cursor,
@@ -272,11 +280,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
272
280
  DeclarativePartitionFactory(
273
281
  declarative_stream.name,
274
282
  declarative_stream.get_json_schema(),
275
- self._retriever_factory(
276
- name_to_stream_mapping[declarative_stream.name],
277
- config,
278
- {},
279
- ),
283
+ declarative_stream.retriever,
280
284
  self.message_repository,
281
285
  ),
282
286
  declarative_stream.retriever.stream_slicer,
@@ -415,34 +419,3 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
415
419
  if stream.stream.name not in concurrent_stream_names
416
420
  ]
417
421
  )
418
-
419
- def _retriever_factory(
420
- self, stream_config: ComponentDefinition, source_config: Config, stream_state: StreamState
421
- ) -> Callable[[], Retriever]:
422
- def _factory_method() -> Retriever:
423
- declarative_stream: DeclarativeStream = self._constructor.create_component(
424
- DeclarativeStreamModel,
425
- stream_config,
426
- source_config,
427
- emit_connector_builder_messages=self._emit_connector_builder_messages,
428
- )
429
-
430
- # This is an optimization so that we don't invoke any cursor or state management flows within the
431
- # low-code framework because state management is handled through the ConcurrentCursor.
432
- if (
433
- declarative_stream
434
- and declarative_stream.retriever
435
- and isinstance(declarative_stream.retriever, SimpleRetriever)
436
- ):
437
- # Also a temporary hack. In the legacy Stream implementation, as part of the read, set_initial_state() is
438
- # called to instantiate incoming state on the cursor. Although we no longer rely on the legacy low-code cursor
439
- # for concurrent checkpointing, low-code components like StopConditionPaginationStrategyDecorator and
440
- # ClientSideIncrementalRecordFilterDecorator still rely on a DatetimeBasedCursor that is properly initialized
441
- # with state.
442
- if declarative_stream.retriever.cursor:
443
- declarative_stream.retriever.cursor.set_initial_state(stream_state=stream_state)
444
- declarative_stream.retriever.cursor = None
445
-
446
- return declarative_stream.retriever
447
-
448
- return _factory_method
@@ -667,6 +667,28 @@ definitions:
667
667
  $parameters:
668
668
  type: object
669
669
  additionalProperties: true
670
+ CustomSchemaNormalization:
671
+ title: Custom Schema Normalization
672
+ description: Schema normalization component whose behavior is derived from a custom code implementation of the connector.
673
+ type: object
674
+ additionalProperties: true
675
+ required:
676
+ - type
677
+ - class_name
678
+ properties:
679
+ type:
680
+ type: string
681
+ enum: [ CustomSchemaNormalization ]
682
+ class_name:
683
+ title: Class Name
684
+ description: Fully-qualified name of the class that will be implementing the custom normalization. The format is `source_<name>.<package>.<class_name>`.
685
+ type: string
686
+ additionalProperties: true
687
+ examples:
688
+ - "source_amazon_seller_partner.components.LedgerDetailedViewReportsTypeTransformer"
689
+ $parameters:
690
+ type: object
691
+ additionalProperties: true
670
692
  CustomStateMigration:
671
693
  title: Custom State Migration
672
694
  description: Apply a custom transformation on the input state.
@@ -1241,6 +1263,7 @@ definitions:
1241
1263
  - "$ref": "#/definitions/KeysToLower"
1242
1264
  - "$ref": "#/definitions/KeysToSnakeCase"
1243
1265
  - "$ref": "#/definitions/FlattenFields"
1266
+ - "$ref": "#/definitions/KeysReplace"
1244
1267
  state_migrations:
1245
1268
  title: State Migrations
1246
1269
  description: Array of state migrations to be applied on the input state
@@ -1785,6 +1808,7 @@ definitions:
1785
1808
  - "$ref": "#/definitions/KeysToLower"
1786
1809
  - "$ref": "#/definitions/KeysToSnakeCase"
1787
1810
  - "$ref": "#/definitions/FlattenFields"
1811
+ - "$ref": "#/definitions/KeysReplace"
1788
1812
  schema_type_identifier:
1789
1813
  "$ref": "#/definitions/SchemaTypeIdentifier"
1790
1814
  $parameters:
@@ -1883,6 +1907,49 @@ definitions:
1883
1907
  $parameters:
1884
1908
  type: object
1885
1909
  additionalProperties: true
1910
+ KeysReplace:
1911
+ title: Keys Replace
1912
+ description: A transformation that replaces symbols in keys.
1913
+ type: object
1914
+ required:
1915
+ - type
1916
+ - old
1917
+ - new
1918
+ properties:
1919
+ type:
1920
+ type: string
1921
+ enum: [KeysReplace]
1922
+ old:
1923
+ type: string
1924
+ title: Old value
1925
+ description: Old value to replace.
1926
+ examples:
1927
+ - " "
1928
+ - "{{ record.id }}"
1929
+ - "{{ config['id'] }}"
1930
+ - "{{ stream_slice['id'] }}"
1931
+ interpolation_context:
1932
+ - config
1933
+ - record
1934
+ - stream_state
1935
+ - stream_slice
1936
+ new:
1937
+ type: string
1938
+ title: New value
1939
+ description: New value to set.
1940
+ examples:
1941
+ - "_"
1942
+ - "{{ record.id }}"
1943
+ - "{{ config['id'] }}"
1944
+ - "{{ stream_slice['id'] }}"
1945
+ interpolation_context:
1946
+ - config
1947
+ - record
1948
+ - stream_state
1949
+ - stream_slice
1950
+ $parameters:
1951
+ type: object
1952
+ additionalProperties: true
1886
1953
  IterableDecoder:
1887
1954
  title: Iterable Decoder
1888
1955
  description: Use this if the response consists of strings separated by new lines (`\n`). The Decoder will wrap each row into a JSON object with the `record` key.
@@ -2555,7 +2622,11 @@ definitions:
2555
2622
  - "$ref": "#/definitions/CustomRecordFilter"
2556
2623
  - "$ref": "#/definitions/RecordFilter"
2557
2624
  schema_normalization:
2558
- "$ref": "#/definitions/SchemaNormalization"
2625
+ title: Schema Normalization
2626
+ description: Responsible for normalization according to the schema.
2627
+ anyOf:
2628
+ - "$ref": "#/definitions/SchemaNormalization"
2629
+ - "$ref": "#/definitions/CustomSchemaNormalization"
2559
2630
  default: None
2560
2631
  $parameters:
2561
2632
  type: object
@@ -9,8 +9,10 @@ from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSel
9
9
  from airbyte_cdk.sources.declarative.extractors.response_to_file_extractor import (
10
10
  ResponseToFileExtractor,
11
11
  )
12
+ from airbyte_cdk.sources.declarative.extractors.type_transformer import TypeTransformer
12
13
 
13
14
  __all__ = [
15
+ "TypeTransformer",
14
16
  "HttpSelector",
15
17
  "DpathExtractor",
16
18
  "RecordFilter",
@@ -10,16 +10,14 @@ import requests
10
10
  from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
11
11
  from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
12
12
  from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter
13
+ from airbyte_cdk.sources.declarative.extractors.type_transformer import (
14
+ TypeTransformer as DeclarativeTypeTransformer,
15
+ )
13
16
  from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
14
17
  from airbyte_cdk.sources.declarative.models import SchemaNormalization
15
18
  from airbyte_cdk.sources.declarative.transformations import RecordTransformation
16
19
  from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
17
- from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
18
-
19
- SCHEMA_TRANSFORMER_TYPE_MAPPING = {
20
- SchemaNormalization.None_: TransformConfig.NoTransform,
21
- SchemaNormalization.Default: TransformConfig.DefaultSchemaNormalization,
22
- }
20
+ from airbyte_cdk.sources.utils.transform import TypeTransformer
23
21
 
24
22
 
25
23
  @dataclass
@@ -38,7 +36,7 @@ class RecordSelector(HttpSelector):
38
36
  extractor: RecordExtractor
39
37
  config: Config
40
38
  parameters: InitVar[Mapping[str, Any]]
41
- schema_normalization: TypeTransformer
39
+ schema_normalization: Union[TypeTransformer, DeclarativeTypeTransformer]
42
40
  name: str
43
41
  _name: Union[InterpolatedString, str] = field(init=False, repr=False, default="")
44
42
  record_filter: Optional[RecordFilter] = None
@@ -0,0 +1,55 @@
1
+ #
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from abc import ABC, abstractmethod
6
+ from dataclasses import dataclass
7
+ from typing import Any, Dict, Mapping
8
+
9
+
10
+ @dataclass
11
+ class TypeTransformer(ABC):
12
+ """
13
+ Abstract base class for implementing type transformation logic.
14
+
15
+ This class provides a blueprint for defining custom transformations
16
+ on data records based on a provided schema. Implementing classes
17
+ must override the `transform` method to specify the transformation
18
+ logic.
19
+
20
+ Attributes:
21
+ None explicitly defined, as this is a dataclass intended to be
22
+ subclassed.
23
+
24
+ Methods:
25
+ transform(record: Dict[str, Any], schema: Mapping[str, Any]) -> None:
26
+ Abstract method that must be implemented by subclasses.
27
+ It performs a transformation on a given data record based
28
+ on the provided schema.
29
+
30
+ Usage:
31
+ To use this class, create a subclass that implements the
32
+ `transform` method with the desired transformation logic.
33
+ """
34
+
35
+ @abstractmethod
36
+ def transform(
37
+ self,
38
+ record: Dict[str, Any],
39
+ schema: Mapping[str, Any],
40
+ ) -> None:
41
+ """
42
+ Perform a transformation on a data record based on a given schema.
43
+
44
+ Args:
45
+ record (Dict[str, Any]): The data record to be transformed.
46
+ schema (Mapping[str, Any]): The schema that dictates how
47
+ the record should be transformed.
48
+
49
+ Returns:
50
+ None
51
+
52
+ Raises:
53
+ NotImplementedError: If the method is not implemented
54
+ by a subclass.
55
+ """
@@ -94,6 +94,26 @@ def max(*args: typing.Any) -> typing.Any:
94
94
  return builtins.max(*args)
95
95
 
96
96
 
97
+ def min(*args: typing.Any) -> typing.Any:
98
+ """
99
+ Returns smallest object of an iterable, or two or more arguments.
100
+
101
+ min(iterable, *[, default=obj, key=func]) -> value
102
+ min(arg1, arg2, *args, *[, key=func]) -> value
103
+
104
+ Usage:
105
+ `"{{ min(2,3) }}"
106
+
107
+ With a single iterable argument, return its smallest item. The
108
+ default keyword-only argument specifies an object to return if
109
+ the provided iterable is empty.
110
+ With two or more arguments, return the smallest argument.
111
+ :param args: args to compare
112
+ :return: smallest argument
113
+ """
114
+ return builtins.min(*args)
115
+
116
+
97
117
  def day_delta(num_days: int, format: str = "%Y-%m-%dT%H:%M:%S.%f%z") -> str:
98
118
  """
99
119
  Returns datetime of now() + num_days
@@ -147,6 +167,7 @@ _macros_list = [
147
167
  today_utc,
148
168
  timestamp,
149
169
  max,
170
+ min,
150
171
  day_delta,
151
172
  duration,
152
173
  format_datetime,
@@ -268,6 +268,22 @@ class CustomSchemaLoader(BaseModel):
268
268
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
269
269
 
270
270
 
271
+ class CustomSchemaNormalization(BaseModel):
272
+ class Config:
273
+ extra = Extra.allow
274
+
275
+ type: Literal["CustomSchemaNormalization"]
276
+ class_name: str = Field(
277
+ ...,
278
+ description="Fully-qualified name of the class that will be implementing the custom normalization. The format is `source_<name>.<package>.<class_name>`.",
279
+ examples=[
280
+ "source_amazon_seller_partner.components.LedgerDetailedViewReportsTypeTransformer"
281
+ ],
282
+ title="Class Name",
283
+ )
284
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
285
+
286
+
271
287
  class CustomStateMigration(BaseModel):
272
288
  class Config:
273
289
  extra = Extra.allow
@@ -721,6 +737,23 @@ class KeysToSnakeCase(BaseModel):
721
737
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
722
738
 
723
739
 
740
+ class KeysReplace(BaseModel):
741
+ type: Literal["KeysReplace"]
742
+ old: str = Field(
743
+ ...,
744
+ description="Old value to replace.",
745
+ examples=[" ", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
746
+ title="Old value",
747
+ )
748
+ new: str = Field(
749
+ ...,
750
+ description="New value to set.",
751
+ examples=["_", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
752
+ title="New value",
753
+ )
754
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
755
+
756
+
724
757
  class FlattenFields(BaseModel):
725
758
  type: Literal["FlattenFields"]
726
759
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
@@ -1513,7 +1546,11 @@ class RecordSelector(BaseModel):
1513
1546
  description="Responsible for filtering records to be emitted by the Source.",
1514
1547
  title="Record Filter",
1515
1548
  )
1516
- schema_normalization: Optional[SchemaNormalization] = SchemaNormalization.None_
1549
+ schema_normalization: Optional[Union[SchemaNormalization, CustomSchemaNormalization]] = Field(
1550
+ SchemaNormalization.None_,
1551
+ description="Responsible for normalization according to the schema.",
1552
+ title="Schema Normalization",
1553
+ )
1517
1554
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1518
1555
 
1519
1556
 
@@ -1701,6 +1738,7 @@ class DeclarativeStream(BaseModel):
1701
1738
  KeysToLower,
1702
1739
  KeysToSnakeCase,
1703
1740
  FlattenFields,
1741
+ KeysReplace,
1704
1742
  ]
1705
1743
  ]
1706
1744
  ] = Field(
@@ -1875,6 +1913,7 @@ class DynamicSchemaLoader(BaseModel):
1875
1913
  KeysToLower,
1876
1914
  KeysToSnakeCase,
1877
1915
  FlattenFields,
1916
+ KeysReplace,
1878
1917
  ]
1879
1918
  ]
1880
1919
  ] = Field(
@@ -82,9 +82,6 @@ from airbyte_cdk.sources.declarative.extractors import (
82
82
  from airbyte_cdk.sources.declarative.extractors.record_filter import (
83
83
  ClientSideIncrementalRecordFilterDecorator,
84
84
  )
85
- from airbyte_cdk.sources.declarative.extractors.record_selector import (
86
- SCHEMA_TRANSFORMER_TYPE_MAPPING,
87
- )
88
85
  from airbyte_cdk.sources.declarative.incremental import (
89
86
  ChildPartitionResumableFullRefreshCursor,
90
87
  CursorFactory,
@@ -100,7 +97,9 @@ from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import I
100
97
  from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import (
101
98
  LegacyToPerPartitionStateMigration,
102
99
  )
103
- from airbyte_cdk.sources.declarative.models import CustomStateMigration
100
+ from airbyte_cdk.sources.declarative.models import (
101
+ CustomStateMigration,
102
+ )
104
103
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
105
104
  AddedFieldDefinition as AddedFieldDefinitionModel,
106
105
  )
@@ -185,6 +184,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
185
184
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
186
185
  CustomSchemaLoader as CustomSchemaLoader,
187
186
  )
187
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
188
+ CustomSchemaNormalization as CustomSchemaNormalizationModel,
189
+ )
188
190
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
189
191
  CustomTransformation as CustomTransformationModel,
190
192
  )
@@ -254,6 +256,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
254
256
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
255
257
  JwtPayload as JwtPayloadModel,
256
258
  )
259
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
260
+ KeysReplace as KeysReplaceModel,
261
+ )
257
262
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
258
263
  KeysToLower as KeysToLowerModel,
259
264
  )
@@ -308,6 +313,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
308
313
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
309
314
  ResponseToFileExtractor as ResponseToFileExtractorModel,
310
315
  )
316
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
317
+ SchemaNormalization as SchemaNormalizationModel,
318
+ )
311
319
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
312
320
  SchemaTypeIdentifier as SchemaTypeIdentifierModel,
313
321
  )
@@ -417,6 +425,9 @@ from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFiel
417
425
  from airbyte_cdk.sources.declarative.transformations.flatten_fields import (
418
426
  FlattenFields,
419
427
  )
428
+ from airbyte_cdk.sources.declarative.transformations.keys_replace_transformation import (
429
+ KeysReplaceTransformation,
430
+ )
420
431
  from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import (
421
432
  KeysToLowerTransformation,
422
433
  )
@@ -439,6 +450,11 @@ from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
439
450
 
440
451
  ComponentDefinition = Mapping[str, Any]
441
452
 
453
+ SCHEMA_TRANSFORMER_TYPE_MAPPING = {
454
+ SchemaNormalizationModel.None_: TransformConfig.NoTransform,
455
+ SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization,
456
+ }
457
+
442
458
 
443
459
  class ModelToComponentFactory:
444
460
  EPOCH_DATETIME_FORMAT = "%s"
@@ -487,6 +503,7 @@ class ModelToComponentFactory:
487
503
  CustomRequesterModel: self.create_custom_component,
488
504
  CustomRetrieverModel: self.create_custom_component,
489
505
  CustomSchemaLoader: self.create_custom_component,
506
+ CustomSchemaNormalizationModel: self.create_custom_component,
490
507
  CustomStateMigration: self.create_custom_component,
491
508
  CustomPaginationStrategyModel: self.create_custom_component,
492
509
  CustomPartitionRouterModel: self.create_custom_component,
@@ -509,6 +526,7 @@ class ModelToComponentFactory:
509
526
  GzipParserModel: self.create_gzip_parser,
510
527
  KeysToLowerModel: self.create_keys_to_lower_transformation,
511
528
  KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
529
+ KeysReplaceModel: self.create_keys_replace_transformation,
512
530
  FlattenFieldsModel: self.create_flatten_fields,
513
531
  IterableDecoderModel: self.create_iterable_decoder,
514
532
  XmlDecoderModel: self.create_xml_decoder,
@@ -630,6 +648,13 @@ class ModelToComponentFactory:
630
648
  ) -> KeysToSnakeCaseTransformation:
631
649
  return KeysToSnakeCaseTransformation()
632
650
 
651
+ def create_keys_replace_transformation(
652
+ self, model: KeysReplaceModel, config: Config, **kwargs: Any
653
+ ) -> KeysReplaceTransformation:
654
+ return KeysReplaceTransformation(
655
+ old=model.old, new=model.new, parameters=model.parameters or {}
656
+ )
657
+
633
658
  def create_flatten_fields(
634
659
  self, model: FlattenFieldsModel, config: Config, **kwargs: Any
635
660
  ) -> FlattenFields:
@@ -1560,7 +1585,12 @@ class ModelToComponentFactory:
1560
1585
  )
1561
1586
 
1562
1587
  def create_http_requester(
1563
- self, model: HttpRequesterModel, decoder: Decoder, config: Config, *, name: str
1588
+ self,
1589
+ model: HttpRequesterModel,
1590
+ config: Config,
1591
+ decoder: Decoder = JsonDecoder(parameters={}),
1592
+ *,
1593
+ name: str,
1564
1594
  ) -> HttpRequester:
1565
1595
  authenticator = (
1566
1596
  self._create_component_from_model(
@@ -1976,12 +2006,11 @@ class ModelToComponentFactory:
1976
2006
  config: Config,
1977
2007
  *,
1978
2008
  name: str,
1979
- transformations: List[RecordTransformation],
1980
- decoder: Optional[Decoder] = None,
1981
- client_side_incremental_sync: Optional[Dict[str, Any]] = None,
2009
+ transformations: List[RecordTransformation] | None = None,
2010
+ decoder: Decoder | None = None,
2011
+ client_side_incremental_sync: Dict[str, Any] | None = None,
1982
2012
  **kwargs: Any,
1983
2013
  ) -> RecordSelector:
1984
- assert model.schema_normalization is not None # for mypy
1985
2014
  extractor = self._create_component_from_model(
1986
2015
  model=model.extractor, decoder=decoder, config=config
1987
2016
  )
@@ -1999,8 +2028,10 @@ class ModelToComponentFactory:
1999
2028
  else None,
2000
2029
  **client_side_incremental_sync,
2001
2030
  )
2002
- schema_normalization = TypeTransformer(
2003
- SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]
2031
+ schema_normalization = (
2032
+ TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
2033
+ if isinstance(model.schema_normalization, SchemaNormalizationModel)
2034
+ else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here
2004
2035
  )
2005
2036
 
2006
2037
  return RecordSelector(
@@ -2008,7 +2039,7 @@ class ModelToComponentFactory:
2008
2039
  name=name,
2009
2040
  config=config,
2010
2041
  record_filter=record_filter,
2011
- transformations=transformations,
2042
+ transformations=transformations or [],
2012
2043
  schema_normalization=schema_normalization,
2013
2044
  parameters=model.parameters or {},
2014
2045
  )