airbyte-cdk 6.13.1.dev4101__py3-none-any.whl → 6.13.1.dev4103__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/entrypoint.py +13 -1
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +24 -51
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +72 -1
- airbyte_cdk/sources/declarative/extractors/__init__.py +2 -0
- airbyte_cdk/sources/declarative/extractors/record_selector.py +5 -7
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/interpolation/macros.py +21 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +40 -1
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +43 -12
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +52 -35
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +10 -7
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +9 -4
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +11 -6
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +13 -11
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +14 -13
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +6 -7
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +10 -7
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +1 -4
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +71 -64
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +4 -4
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -8
- airbyte_cdk/sources/file_based/file_based_source.py +4 -3
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +5 -4
- {airbyte_cdk-6.13.1.dev4101.dist-info → airbyte_cdk-6.13.1.dev4103.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.13.1.dev4101.dist-info → airbyte_cdk-6.13.1.dev4103.dist-info}/RECORD +29 -27
- {airbyte_cdk-6.13.1.dev4101.dist-info → airbyte_cdk-6.13.1.dev4103.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.13.1.dev4101.dist-info → airbyte_cdk-6.13.1.dev4103.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.13.1.dev4101.dist-info → airbyte_cdk-6.13.1.dev4103.dist-info}/entry_points.txt +0 -0
airbyte_cdk/entrypoint.py
CHANGED
@@ -5,6 +5,7 @@
|
|
5
5
|
import argparse
|
6
6
|
import importlib
|
7
7
|
import ipaddress
|
8
|
+
import json
|
8
9
|
import logging
|
9
10
|
import os.path
|
10
11
|
import socket
|
@@ -46,6 +47,7 @@ logger = init_logger("airbyte")
|
|
46
47
|
|
47
48
|
VALID_URL_SCHEMES = ["https"]
|
48
49
|
CLOUD_DEPLOYMENT_MODE = "cloud"
|
50
|
+
_HAS_LOGGED_FOR_SERIALIZATION_ERROR = False
|
49
51
|
|
50
52
|
|
51
53
|
class AirbyteEntrypoint(object):
|
@@ -291,7 +293,17 @@ class AirbyteEntrypoint(object):
|
|
291
293
|
|
292
294
|
@staticmethod
|
293
295
|
def airbyte_message_to_string(airbyte_message: AirbyteMessage) -> str:
|
294
|
-
|
296
|
+
global _HAS_LOGGED_FOR_SERIALIZATION_ERROR
|
297
|
+
serialized_message = AirbyteMessageSerializer.dump(airbyte_message)
|
298
|
+
try:
|
299
|
+
return orjson.dumps(serialized_message).decode()
|
300
|
+
except Exception as exception:
|
301
|
+
if not _HAS_LOGGED_FOR_SERIALIZATION_ERROR:
|
302
|
+
logger.warning(
|
303
|
+
f"There was an error during the serialization of an AirbyteMessage: `{exception}`. This might impact the sync performances."
|
304
|
+
)
|
305
|
+
_HAS_LOGGED_FOR_SERIALIZATION_ERROR = True
|
306
|
+
return json.dumps(serialized_message)
|
295
307
|
|
296
308
|
@classmethod
|
297
309
|
def extract_state(cls, args: List[str]) -> Optional[Any]:
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from typing import Any,
|
6
|
+
from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple
|
7
7
|
|
8
8
|
from airbyte_cdk.models import (
|
9
9
|
AirbyteCatalog,
|
@@ -28,15 +28,11 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
28
28
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
29
29
|
DatetimeBasedCursor as DatetimeBasedCursorModel,
|
30
30
|
)
|
31
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
32
|
-
DeclarativeStream as DeclarativeStreamModel,
|
33
|
-
)
|
34
31
|
from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
35
|
-
ComponentDefinition,
|
36
32
|
ModelToComponentFactory,
|
37
33
|
)
|
38
34
|
from airbyte_cdk.sources.declarative.requesters import HttpRequester
|
39
|
-
from airbyte_cdk.sources.declarative.retrievers import
|
35
|
+
from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
|
40
36
|
from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
|
41
37
|
DeclarativePartitionFactory,
|
42
38
|
StreamSlicerPartitionGenerator,
|
@@ -52,7 +48,6 @@ from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
|
|
52
48
|
from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
|
53
49
|
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
54
50
|
from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
|
55
|
-
from airbyte_cdk.sources.types import Config, StreamState
|
56
51
|
|
57
52
|
|
58
53
|
class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
@@ -194,10 +189,11 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
194
189
|
# Some low-code sources use a combination of DeclarativeStream and regular Python streams. We can't inspect
|
195
190
|
# these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
|
196
191
|
# so we need to treat them as synchronous
|
197
|
-
if (
|
198
|
-
|
199
|
-
and name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
|
192
|
+
if isinstance(declarative_stream, DeclarativeStream) and (
|
193
|
+
name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
|
200
194
|
== "SimpleRetriever"
|
195
|
+
or name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
|
196
|
+
== "AsyncRetriever"
|
201
197
|
):
|
202
198
|
incremental_sync_component_definition = name_to_stream_mapping[
|
203
199
|
declarative_stream.name
|
@@ -234,15 +230,27 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
234
230
|
stream_state=stream_state,
|
235
231
|
)
|
236
232
|
|
233
|
+
retriever = declarative_stream.retriever
|
234
|
+
|
235
|
+
# This is an optimization so that we don't invoke any cursor or state management flows within the
|
236
|
+
# low-code framework because state management is handled through the ConcurrentCursor.
|
237
|
+
if declarative_stream and isinstance(retriever, SimpleRetriever):
|
238
|
+
# Also a temporary hack. In the legacy Stream implementation, as part of the read,
|
239
|
+
# set_initial_state() is called to instantiate incoming state on the cursor. Although we no
|
240
|
+
# longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
|
241
|
+
# like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
|
242
|
+
# still rely on a DatetimeBasedCursor that is properly initialized with state.
|
243
|
+
if retriever.cursor:
|
244
|
+
retriever.cursor.set_initial_state(stream_state=stream_state)
|
245
|
+
# We zero it out here, but since this is a cursor reference, the state is still properly
|
246
|
+
# instantiated for the other components that reference it
|
247
|
+
retriever.cursor = None
|
248
|
+
|
237
249
|
partition_generator = StreamSlicerPartitionGenerator(
|
238
250
|
DeclarativePartitionFactory(
|
239
251
|
declarative_stream.name,
|
240
252
|
declarative_stream.get_json_schema(),
|
241
|
-
|
242
|
-
name_to_stream_mapping[declarative_stream.name],
|
243
|
-
config,
|
244
|
-
stream_state,
|
245
|
-
),
|
253
|
+
retriever,
|
246
254
|
self.message_repository,
|
247
255
|
),
|
248
256
|
cursor,
|
@@ -272,11 +280,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
272
280
|
DeclarativePartitionFactory(
|
273
281
|
declarative_stream.name,
|
274
282
|
declarative_stream.get_json_schema(),
|
275
|
-
|
276
|
-
name_to_stream_mapping[declarative_stream.name],
|
277
|
-
config,
|
278
|
-
{},
|
279
|
-
),
|
283
|
+
declarative_stream.retriever,
|
280
284
|
self.message_repository,
|
281
285
|
),
|
282
286
|
declarative_stream.retriever.stream_slicer,
|
@@ -415,34 +419,3 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
415
419
|
if stream.stream.name not in concurrent_stream_names
|
416
420
|
]
|
417
421
|
)
|
418
|
-
|
419
|
-
def _retriever_factory(
|
420
|
-
self, stream_config: ComponentDefinition, source_config: Config, stream_state: StreamState
|
421
|
-
) -> Callable[[], Retriever]:
|
422
|
-
def _factory_method() -> Retriever:
|
423
|
-
declarative_stream: DeclarativeStream = self._constructor.create_component(
|
424
|
-
DeclarativeStreamModel,
|
425
|
-
stream_config,
|
426
|
-
source_config,
|
427
|
-
emit_connector_builder_messages=self._emit_connector_builder_messages,
|
428
|
-
)
|
429
|
-
|
430
|
-
# This is an optimization so that we don't invoke any cursor or state management flows within the
|
431
|
-
# low-code framework because state management is handled through the ConcurrentCursor.
|
432
|
-
if (
|
433
|
-
declarative_stream
|
434
|
-
and declarative_stream.retriever
|
435
|
-
and isinstance(declarative_stream.retriever, SimpleRetriever)
|
436
|
-
):
|
437
|
-
# Also a temporary hack. In the legacy Stream implementation, as part of the read, set_initial_state() is
|
438
|
-
# called to instantiate incoming state on the cursor. Although we no longer rely on the legacy low-code cursor
|
439
|
-
# for concurrent checkpointing, low-code components like StopConditionPaginationStrategyDecorator and
|
440
|
-
# ClientSideIncrementalRecordFilterDecorator still rely on a DatetimeBasedCursor that is properly initialized
|
441
|
-
# with state.
|
442
|
-
if declarative_stream.retriever.cursor:
|
443
|
-
declarative_stream.retriever.cursor.set_initial_state(stream_state=stream_state)
|
444
|
-
declarative_stream.retriever.cursor = None
|
445
|
-
|
446
|
-
return declarative_stream.retriever
|
447
|
-
|
448
|
-
return _factory_method
|
@@ -667,6 +667,28 @@ definitions:
|
|
667
667
|
$parameters:
|
668
668
|
type: object
|
669
669
|
additionalProperties: true
|
670
|
+
CustomSchemaNormalization:
|
671
|
+
title: Custom Schema Normalization
|
672
|
+
description: Schema normalization component whose behavior is derived from a custom code implementation of the connector.
|
673
|
+
type: object
|
674
|
+
additionalProperties: true
|
675
|
+
required:
|
676
|
+
- type
|
677
|
+
- class_name
|
678
|
+
properties:
|
679
|
+
type:
|
680
|
+
type: string
|
681
|
+
enum: [ CustomSchemaNormalization ]
|
682
|
+
class_name:
|
683
|
+
title: Class Name
|
684
|
+
description: Fully-qualified name of the class that will be implementing the custom normalization. The format is `source_<name>.<package>.<class_name>`.
|
685
|
+
type: string
|
686
|
+
additionalProperties: true
|
687
|
+
examples:
|
688
|
+
- "source_amazon_seller_partner.components.LedgerDetailedViewReportsTypeTransformer"
|
689
|
+
$parameters:
|
690
|
+
type: object
|
691
|
+
additionalProperties: true
|
670
692
|
CustomStateMigration:
|
671
693
|
title: Custom State Migration
|
672
694
|
description: Apply a custom transformation on the input state.
|
@@ -1241,6 +1263,7 @@ definitions:
|
|
1241
1263
|
- "$ref": "#/definitions/KeysToLower"
|
1242
1264
|
- "$ref": "#/definitions/KeysToSnakeCase"
|
1243
1265
|
- "$ref": "#/definitions/FlattenFields"
|
1266
|
+
- "$ref": "#/definitions/KeysReplace"
|
1244
1267
|
state_migrations:
|
1245
1268
|
title: State Migrations
|
1246
1269
|
description: Array of state migrations to be applied on the input state
|
@@ -1785,6 +1808,7 @@ definitions:
|
|
1785
1808
|
- "$ref": "#/definitions/KeysToLower"
|
1786
1809
|
- "$ref": "#/definitions/KeysToSnakeCase"
|
1787
1810
|
- "$ref": "#/definitions/FlattenFields"
|
1811
|
+
- "$ref": "#/definitions/KeysReplace"
|
1788
1812
|
schema_type_identifier:
|
1789
1813
|
"$ref": "#/definitions/SchemaTypeIdentifier"
|
1790
1814
|
$parameters:
|
@@ -1883,6 +1907,49 @@ definitions:
|
|
1883
1907
|
$parameters:
|
1884
1908
|
type: object
|
1885
1909
|
additionalProperties: true
|
1910
|
+
KeysReplace:
|
1911
|
+
title: Keys Replace
|
1912
|
+
description: A transformation that replaces symbols in keys.
|
1913
|
+
type: object
|
1914
|
+
required:
|
1915
|
+
- type
|
1916
|
+
- old
|
1917
|
+
- new
|
1918
|
+
properties:
|
1919
|
+
type:
|
1920
|
+
type: string
|
1921
|
+
enum: [KeysReplace]
|
1922
|
+
old:
|
1923
|
+
type: string
|
1924
|
+
title: Old value
|
1925
|
+
description: Old value to replace.
|
1926
|
+
examples:
|
1927
|
+
- " "
|
1928
|
+
- "{{ record.id }}"
|
1929
|
+
- "{{ config['id'] }}"
|
1930
|
+
- "{{ stream_slice['id'] }}"
|
1931
|
+
interpolation_context:
|
1932
|
+
- config
|
1933
|
+
- record
|
1934
|
+
- stream_state
|
1935
|
+
- stream_slice
|
1936
|
+
new:
|
1937
|
+
type: string
|
1938
|
+
title: New value
|
1939
|
+
description: New value to set.
|
1940
|
+
examples:
|
1941
|
+
- "_"
|
1942
|
+
- "{{ record.id }}"
|
1943
|
+
- "{{ config['id'] }}"
|
1944
|
+
- "{{ stream_slice['id'] }}"
|
1945
|
+
interpolation_context:
|
1946
|
+
- config
|
1947
|
+
- record
|
1948
|
+
- stream_state
|
1949
|
+
- stream_slice
|
1950
|
+
$parameters:
|
1951
|
+
type: object
|
1952
|
+
additionalProperties: true
|
1886
1953
|
IterableDecoder:
|
1887
1954
|
title: Iterable Decoder
|
1888
1955
|
description: Use this if the response consists of strings separated by new lines (`\n`). The Decoder will wrap each row into a JSON object with the `record` key.
|
@@ -2555,7 +2622,11 @@ definitions:
|
|
2555
2622
|
- "$ref": "#/definitions/CustomRecordFilter"
|
2556
2623
|
- "$ref": "#/definitions/RecordFilter"
|
2557
2624
|
schema_normalization:
|
2558
|
-
|
2625
|
+
title: Schema Normalization
|
2626
|
+
description: Responsible for normalization according to the schema.
|
2627
|
+
anyOf:
|
2628
|
+
- "$ref": "#/definitions/SchemaNormalization"
|
2629
|
+
- "$ref": "#/definitions/CustomSchemaNormalization"
|
2559
2630
|
default: None
|
2560
2631
|
$parameters:
|
2561
2632
|
type: object
|
@@ -9,8 +9,10 @@ from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSel
|
|
9
9
|
from airbyte_cdk.sources.declarative.extractors.response_to_file_extractor import (
|
10
10
|
ResponseToFileExtractor,
|
11
11
|
)
|
12
|
+
from airbyte_cdk.sources.declarative.extractors.type_transformer import TypeTransformer
|
12
13
|
|
13
14
|
__all__ = [
|
15
|
+
"TypeTransformer",
|
14
16
|
"HttpSelector",
|
15
17
|
"DpathExtractor",
|
16
18
|
"RecordFilter",
|
@@ -10,16 +10,14 @@ import requests
|
|
10
10
|
from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
|
11
11
|
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
|
12
12
|
from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter
|
13
|
+
from airbyte_cdk.sources.declarative.extractors.type_transformer import (
|
14
|
+
TypeTransformer as DeclarativeTypeTransformer,
|
15
|
+
)
|
13
16
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
14
17
|
from airbyte_cdk.sources.declarative.models import SchemaNormalization
|
15
18
|
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
16
19
|
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
|
17
|
-
from airbyte_cdk.sources.utils.transform import
|
18
|
-
|
19
|
-
SCHEMA_TRANSFORMER_TYPE_MAPPING = {
|
20
|
-
SchemaNormalization.None_: TransformConfig.NoTransform,
|
21
|
-
SchemaNormalization.Default: TransformConfig.DefaultSchemaNormalization,
|
22
|
-
}
|
20
|
+
from airbyte_cdk.sources.utils.transform import TypeTransformer
|
23
21
|
|
24
22
|
|
25
23
|
@dataclass
|
@@ -38,7 +36,7 @@ class RecordSelector(HttpSelector):
|
|
38
36
|
extractor: RecordExtractor
|
39
37
|
config: Config
|
40
38
|
parameters: InitVar[Mapping[str, Any]]
|
41
|
-
schema_normalization: TypeTransformer
|
39
|
+
schema_normalization: Union[TypeTransformer, DeclarativeTypeTransformer]
|
42
40
|
name: str
|
43
41
|
_name: Union[InterpolatedString, str] = field(init=False, repr=False, default="")
|
44
42
|
record_filter: Optional[RecordFilter] = None
|
@@ -0,0 +1,55 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from abc import ABC, abstractmethod
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from typing import Any, Dict, Mapping
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass
|
11
|
+
class TypeTransformer(ABC):
|
12
|
+
"""
|
13
|
+
Abstract base class for implementing type transformation logic.
|
14
|
+
|
15
|
+
This class provides a blueprint for defining custom transformations
|
16
|
+
on data records based on a provided schema. Implementing classes
|
17
|
+
must override the `transform` method to specify the transformation
|
18
|
+
logic.
|
19
|
+
|
20
|
+
Attributes:
|
21
|
+
None explicitly defined, as this is a dataclass intended to be
|
22
|
+
subclassed.
|
23
|
+
|
24
|
+
Methods:
|
25
|
+
transform(record: Dict[str, Any], schema: Mapping[str, Any]) -> None:
|
26
|
+
Abstract method that must be implemented by subclasses.
|
27
|
+
It performs a transformation on a given data record based
|
28
|
+
on the provided schema.
|
29
|
+
|
30
|
+
Usage:
|
31
|
+
To use this class, create a subclass that implements the
|
32
|
+
`transform` method with the desired transformation logic.
|
33
|
+
"""
|
34
|
+
|
35
|
+
@abstractmethod
|
36
|
+
def transform(
|
37
|
+
self,
|
38
|
+
record: Dict[str, Any],
|
39
|
+
schema: Mapping[str, Any],
|
40
|
+
) -> None:
|
41
|
+
"""
|
42
|
+
Perform a transformation on a data record based on a given schema.
|
43
|
+
|
44
|
+
Args:
|
45
|
+
record (Dict[str, Any]): The data record to be transformed.
|
46
|
+
schema (Mapping[str, Any]): The schema that dictates how
|
47
|
+
the record should be transformed.
|
48
|
+
|
49
|
+
Returns:
|
50
|
+
None
|
51
|
+
|
52
|
+
Raises:
|
53
|
+
NotImplementedError: If the method is not implemented
|
54
|
+
by a subclass.
|
55
|
+
"""
|
@@ -94,6 +94,26 @@ def max(*args: typing.Any) -> typing.Any:
|
|
94
94
|
return builtins.max(*args)
|
95
95
|
|
96
96
|
|
97
|
+
def min(*args: typing.Any) -> typing.Any:
|
98
|
+
"""
|
99
|
+
Returns smallest object of an iterable, or two or more arguments.
|
100
|
+
|
101
|
+
min(iterable, *[, default=obj, key=func]) -> value
|
102
|
+
min(arg1, arg2, *args, *[, key=func]) -> value
|
103
|
+
|
104
|
+
Usage:
|
105
|
+
`"{{ min(2,3) }}"
|
106
|
+
|
107
|
+
With a single iterable argument, return its smallest item. The
|
108
|
+
default keyword-only argument specifies an object to return if
|
109
|
+
the provided iterable is empty.
|
110
|
+
With two or more arguments, return the smallest argument.
|
111
|
+
:param args: args to compare
|
112
|
+
:return: smallest argument
|
113
|
+
"""
|
114
|
+
return builtins.min(*args)
|
115
|
+
|
116
|
+
|
97
117
|
def day_delta(num_days: int, format: str = "%Y-%m-%dT%H:%M:%S.%f%z") -> str:
|
98
118
|
"""
|
99
119
|
Returns datetime of now() + num_days
|
@@ -147,6 +167,7 @@ _macros_list = [
|
|
147
167
|
today_utc,
|
148
168
|
timestamp,
|
149
169
|
max,
|
170
|
+
min,
|
150
171
|
day_delta,
|
151
172
|
duration,
|
152
173
|
format_datetime,
|
@@ -268,6 +268,22 @@ class CustomSchemaLoader(BaseModel):
|
|
268
268
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
269
269
|
|
270
270
|
|
271
|
+
class CustomSchemaNormalization(BaseModel):
|
272
|
+
class Config:
|
273
|
+
extra = Extra.allow
|
274
|
+
|
275
|
+
type: Literal["CustomSchemaNormalization"]
|
276
|
+
class_name: str = Field(
|
277
|
+
...,
|
278
|
+
description="Fully-qualified name of the class that will be implementing the custom normalization. The format is `source_<name>.<package>.<class_name>`.",
|
279
|
+
examples=[
|
280
|
+
"source_amazon_seller_partner.components.LedgerDetailedViewReportsTypeTransformer"
|
281
|
+
],
|
282
|
+
title="Class Name",
|
283
|
+
)
|
284
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
285
|
+
|
286
|
+
|
271
287
|
class CustomStateMigration(BaseModel):
|
272
288
|
class Config:
|
273
289
|
extra = Extra.allow
|
@@ -721,6 +737,23 @@ class KeysToSnakeCase(BaseModel):
|
|
721
737
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
722
738
|
|
723
739
|
|
740
|
+
class KeysReplace(BaseModel):
|
741
|
+
type: Literal["KeysReplace"]
|
742
|
+
old: str = Field(
|
743
|
+
...,
|
744
|
+
description="Old value to replace.",
|
745
|
+
examples=[" ", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
|
746
|
+
title="Old value",
|
747
|
+
)
|
748
|
+
new: str = Field(
|
749
|
+
...,
|
750
|
+
description="New value to set.",
|
751
|
+
examples=["_", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
|
752
|
+
title="New value",
|
753
|
+
)
|
754
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
755
|
+
|
756
|
+
|
724
757
|
class FlattenFields(BaseModel):
|
725
758
|
type: Literal["FlattenFields"]
|
726
759
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
@@ -1513,7 +1546,11 @@ class RecordSelector(BaseModel):
|
|
1513
1546
|
description="Responsible for filtering records to be emitted by the Source.",
|
1514
1547
|
title="Record Filter",
|
1515
1548
|
)
|
1516
|
-
schema_normalization: Optional[SchemaNormalization] =
|
1549
|
+
schema_normalization: Optional[Union[SchemaNormalization, CustomSchemaNormalization]] = Field(
|
1550
|
+
SchemaNormalization.None_,
|
1551
|
+
description="Responsible for normalization according to the schema.",
|
1552
|
+
title="Schema Normalization",
|
1553
|
+
)
|
1517
1554
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1518
1555
|
|
1519
1556
|
|
@@ -1701,6 +1738,7 @@ class DeclarativeStream(BaseModel):
|
|
1701
1738
|
KeysToLower,
|
1702
1739
|
KeysToSnakeCase,
|
1703
1740
|
FlattenFields,
|
1741
|
+
KeysReplace,
|
1704
1742
|
]
|
1705
1743
|
]
|
1706
1744
|
] = Field(
|
@@ -1875,6 +1913,7 @@ class DynamicSchemaLoader(BaseModel):
|
|
1875
1913
|
KeysToLower,
|
1876
1914
|
KeysToSnakeCase,
|
1877
1915
|
FlattenFields,
|
1916
|
+
KeysReplace,
|
1878
1917
|
]
|
1879
1918
|
]
|
1880
1919
|
] = Field(
|
@@ -82,9 +82,6 @@ from airbyte_cdk.sources.declarative.extractors import (
|
|
82
82
|
from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
83
83
|
ClientSideIncrementalRecordFilterDecorator,
|
84
84
|
)
|
85
|
-
from airbyte_cdk.sources.declarative.extractors.record_selector import (
|
86
|
-
SCHEMA_TRANSFORMER_TYPE_MAPPING,
|
87
|
-
)
|
88
85
|
from airbyte_cdk.sources.declarative.incremental import (
|
89
86
|
ChildPartitionResumableFullRefreshCursor,
|
90
87
|
CursorFactory,
|
@@ -100,7 +97,9 @@ from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import I
|
|
100
97
|
from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import (
|
101
98
|
LegacyToPerPartitionStateMigration,
|
102
99
|
)
|
103
|
-
from airbyte_cdk.sources.declarative.models import
|
100
|
+
from airbyte_cdk.sources.declarative.models import (
|
101
|
+
CustomStateMigration,
|
102
|
+
)
|
104
103
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
105
104
|
AddedFieldDefinition as AddedFieldDefinitionModel,
|
106
105
|
)
|
@@ -185,6 +184,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
185
184
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
186
185
|
CustomSchemaLoader as CustomSchemaLoader,
|
187
186
|
)
|
187
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
188
|
+
CustomSchemaNormalization as CustomSchemaNormalizationModel,
|
189
|
+
)
|
188
190
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
189
191
|
CustomTransformation as CustomTransformationModel,
|
190
192
|
)
|
@@ -254,6 +256,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
254
256
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
255
257
|
JwtPayload as JwtPayloadModel,
|
256
258
|
)
|
259
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
260
|
+
KeysReplace as KeysReplaceModel,
|
261
|
+
)
|
257
262
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
258
263
|
KeysToLower as KeysToLowerModel,
|
259
264
|
)
|
@@ -308,6 +313,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
308
313
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
309
314
|
ResponseToFileExtractor as ResponseToFileExtractorModel,
|
310
315
|
)
|
316
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
317
|
+
SchemaNormalization as SchemaNormalizationModel,
|
318
|
+
)
|
311
319
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
312
320
|
SchemaTypeIdentifier as SchemaTypeIdentifierModel,
|
313
321
|
)
|
@@ -417,6 +425,9 @@ from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFiel
|
|
417
425
|
from airbyte_cdk.sources.declarative.transformations.flatten_fields import (
|
418
426
|
FlattenFields,
|
419
427
|
)
|
428
|
+
from airbyte_cdk.sources.declarative.transformations.keys_replace_transformation import (
|
429
|
+
KeysReplaceTransformation,
|
430
|
+
)
|
420
431
|
from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import (
|
421
432
|
KeysToLowerTransformation,
|
422
433
|
)
|
@@ -439,6 +450,11 @@ from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
|
439
450
|
|
440
451
|
ComponentDefinition = Mapping[str, Any]
|
441
452
|
|
453
|
+
SCHEMA_TRANSFORMER_TYPE_MAPPING = {
|
454
|
+
SchemaNormalizationModel.None_: TransformConfig.NoTransform,
|
455
|
+
SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization,
|
456
|
+
}
|
457
|
+
|
442
458
|
|
443
459
|
class ModelToComponentFactory:
|
444
460
|
EPOCH_DATETIME_FORMAT = "%s"
|
@@ -487,6 +503,7 @@ class ModelToComponentFactory:
|
|
487
503
|
CustomRequesterModel: self.create_custom_component,
|
488
504
|
CustomRetrieverModel: self.create_custom_component,
|
489
505
|
CustomSchemaLoader: self.create_custom_component,
|
506
|
+
CustomSchemaNormalizationModel: self.create_custom_component,
|
490
507
|
CustomStateMigration: self.create_custom_component,
|
491
508
|
CustomPaginationStrategyModel: self.create_custom_component,
|
492
509
|
CustomPartitionRouterModel: self.create_custom_component,
|
@@ -509,6 +526,7 @@ class ModelToComponentFactory:
|
|
509
526
|
GzipParserModel: self.create_gzip_parser,
|
510
527
|
KeysToLowerModel: self.create_keys_to_lower_transformation,
|
511
528
|
KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
|
529
|
+
KeysReplaceModel: self.create_keys_replace_transformation,
|
512
530
|
FlattenFieldsModel: self.create_flatten_fields,
|
513
531
|
IterableDecoderModel: self.create_iterable_decoder,
|
514
532
|
XmlDecoderModel: self.create_xml_decoder,
|
@@ -630,6 +648,13 @@ class ModelToComponentFactory:
|
|
630
648
|
) -> KeysToSnakeCaseTransformation:
|
631
649
|
return KeysToSnakeCaseTransformation()
|
632
650
|
|
651
|
+
def create_keys_replace_transformation(
|
652
|
+
self, model: KeysReplaceModel, config: Config, **kwargs: Any
|
653
|
+
) -> KeysReplaceTransformation:
|
654
|
+
return KeysReplaceTransformation(
|
655
|
+
old=model.old, new=model.new, parameters=model.parameters or {}
|
656
|
+
)
|
657
|
+
|
633
658
|
def create_flatten_fields(
|
634
659
|
self, model: FlattenFieldsModel, config: Config, **kwargs: Any
|
635
660
|
) -> FlattenFields:
|
@@ -1560,7 +1585,12 @@ class ModelToComponentFactory:
|
|
1560
1585
|
)
|
1561
1586
|
|
1562
1587
|
def create_http_requester(
|
1563
|
-
self,
|
1588
|
+
self,
|
1589
|
+
model: HttpRequesterModel,
|
1590
|
+
config: Config,
|
1591
|
+
decoder: Decoder = JsonDecoder(parameters={}),
|
1592
|
+
*,
|
1593
|
+
name: str,
|
1564
1594
|
) -> HttpRequester:
|
1565
1595
|
authenticator = (
|
1566
1596
|
self._create_component_from_model(
|
@@ -1976,12 +2006,11 @@ class ModelToComponentFactory:
|
|
1976
2006
|
config: Config,
|
1977
2007
|
*,
|
1978
2008
|
name: str,
|
1979
|
-
transformations: List[RecordTransformation],
|
1980
|
-
decoder:
|
1981
|
-
client_side_incremental_sync:
|
2009
|
+
transformations: List[RecordTransformation] | None = None,
|
2010
|
+
decoder: Decoder | None = None,
|
2011
|
+
client_side_incremental_sync: Dict[str, Any] | None = None,
|
1982
2012
|
**kwargs: Any,
|
1983
2013
|
) -> RecordSelector:
|
1984
|
-
assert model.schema_normalization is not None # for mypy
|
1985
2014
|
extractor = self._create_component_from_model(
|
1986
2015
|
model=model.extractor, decoder=decoder, config=config
|
1987
2016
|
)
|
@@ -1999,8 +2028,10 @@ class ModelToComponentFactory:
|
|
1999
2028
|
else None,
|
2000
2029
|
**client_side_incremental_sync,
|
2001
2030
|
)
|
2002
|
-
schema_normalization =
|
2003
|
-
SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]
|
2031
|
+
schema_normalization = (
|
2032
|
+
TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
|
2033
|
+
if isinstance(model.schema_normalization, SchemaNormalizationModel)
|
2034
|
+
else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here
|
2004
2035
|
)
|
2005
2036
|
|
2006
2037
|
return RecordSelector(
|
@@ -2008,7 +2039,7 @@ class ModelToComponentFactory:
|
|
2008
2039
|
name=name,
|
2009
2040
|
config=config,
|
2010
2041
|
record_filter=record_filter,
|
2011
|
-
transformations=transformations,
|
2042
|
+
transformations=transformations or [],
|
2012
2043
|
schema_normalization=schema_normalization,
|
2013
2044
|
parameters=model.parameters or {},
|
2014
2045
|
)
|