airbyte-cdk 6.9.1.dev3__py3-none-any.whl → 6.9.1rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +25 -31
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -4
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +100 -2
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +1 -1
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +53 -2
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +95 -2
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +6 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +95 -21
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +2 -1
- airbyte_cdk/sources/declarative/resolvers/__init__.py +13 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +106 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +2 -2
- airbyte_cdk-6.9.1rc2.dist-info/METADATA +108 -0
- {airbyte_cdk-6.9.1.dev3.dist-info → airbyte_cdk-6.9.1rc2.dist-info}/RECORD +18 -16
- airbyte_cdk/test/utils/manifest_only_fixtures.py +0 -81
- airbyte_cdk-6.9.1.dev3.dist-info/METADATA +0 -306
- {airbyte_cdk-6.9.1.dev3.dist-info → airbyte_cdk-6.9.1rc2.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.9.1.dev3.dist-info → airbyte_cdk-6.9.1rc2.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.9.1.dev3.dist-info → airbyte_cdk-6.9.1rc2.dist-info}/entry_points.txt +0 -0
@@ -119,6 +119,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
119
119
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
120
120
|
CheckStream as CheckStreamModel,
|
121
121
|
)
|
122
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
123
|
+
ComponentMappingDefinition as ComponentMappingDefinitionModel,
|
124
|
+
)
|
122
125
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
123
126
|
CompositeErrorHandler as CompositeErrorHandlerModel,
|
124
127
|
)
|
@@ -191,6 +194,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
191
194
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
192
195
|
GzipJsonDecoder as GzipJsonDecoderModel,
|
193
196
|
)
|
197
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
198
|
+
HttpComponentsResolver as HttpComponentsResolverModel,
|
199
|
+
)
|
194
200
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
195
201
|
HttpRequester as HttpRequesterModel,
|
196
202
|
)
|
@@ -298,6 +304,7 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
298
304
|
from airbyte_cdk.sources.declarative.partition_routers import (
|
299
305
|
CartesianProductStreamSlicer,
|
300
306
|
ListPartitionRouter,
|
307
|
+
PartitionRouter,
|
301
308
|
SinglePartitionRouter,
|
302
309
|
SubstreamPartitionRouter,
|
303
310
|
)
|
@@ -338,6 +345,10 @@ from airbyte_cdk.sources.declarative.requesters.request_options import (
|
|
338
345
|
)
|
339
346
|
from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
|
340
347
|
from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
|
348
|
+
from airbyte_cdk.sources.declarative.resolvers import (
|
349
|
+
ComponentMappingDefinition,
|
350
|
+
HttpComponentsResolver,
|
351
|
+
)
|
341
352
|
from airbyte_cdk.sources.declarative.retrievers import (
|
342
353
|
AsyncRetriever,
|
343
354
|
SimpleRetriever,
|
@@ -467,6 +478,8 @@ class ModelToComponentFactory:
|
|
467
478
|
WaitTimeFromHeaderModel: self.create_wait_time_from_header,
|
468
479
|
WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header,
|
469
480
|
AsyncRetrieverModel: self.create_async_retriever,
|
481
|
+
HttpComponentsResolverModel: self.create_http_components_resolver,
|
482
|
+
ComponentMappingDefinitionModel: self.create_components_mapping_definition,
|
470
483
|
}
|
471
484
|
|
472
485
|
# Needed for the case where we need to perform a second parse on the fields of a custom component
|
@@ -1281,19 +1294,20 @@ class ModelToComponentFactory:
|
|
1281
1294
|
parameters=model.parameters or {},
|
1282
1295
|
)
|
1283
1296
|
|
1284
|
-
def
|
1285
|
-
self,
|
1286
|
-
|
1287
|
-
|
1297
|
+
def _build_stream_slicer_from_partition_router(
|
1298
|
+
self,
|
1299
|
+
model: Union[AsyncRetrieverModel, CustomRetrieverModel, SimpleRetrieverModel],
|
1300
|
+
config: Config,
|
1301
|
+
) -> Optional[PartitionRouter]:
|
1288
1302
|
if (
|
1289
|
-
hasattr(model
|
1290
|
-
and isinstance(model
|
1291
|
-
and model.
|
1303
|
+
hasattr(model, "partition_router")
|
1304
|
+
and isinstance(model, SimpleRetrieverModel)
|
1305
|
+
and model.partition_router
|
1292
1306
|
):
|
1293
|
-
stream_slicer_model = model.
|
1307
|
+
stream_slicer_model = model.partition_router
|
1294
1308
|
|
1295
1309
|
if isinstance(stream_slicer_model, list):
|
1296
|
-
|
1310
|
+
return CartesianProductStreamSlicer(
|
1297
1311
|
[
|
1298
1312
|
self._create_component_from_model(model=slicer, config=config)
|
1299
1313
|
for slicer in stream_slicer_model
|
@@ -1301,9 +1315,24 @@ class ModelToComponentFactory:
|
|
1301
1315
|
parameters={},
|
1302
1316
|
)
|
1303
1317
|
else:
|
1304
|
-
|
1305
|
-
|
1306
|
-
|
1318
|
+
return self._create_component_from_model(model=stream_slicer_model, config=config) # type: ignore[no-any-return]
|
1319
|
+
# Will be created PartitionRouter as stream_slicer_model is model.partition_router
|
1320
|
+
return None
|
1321
|
+
|
1322
|
+
def _build_resumable_cursor_from_paginator(
|
1323
|
+
self,
|
1324
|
+
model: Union[AsyncRetrieverModel, CustomRetrieverModel, SimpleRetrieverModel],
|
1325
|
+
stream_slicer: Optional[StreamSlicer],
|
1326
|
+
) -> Optional[StreamSlicer]:
|
1327
|
+
if hasattr(model, "paginator") and model.paginator and not stream_slicer:
|
1328
|
+
# For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
|
1329
|
+
return ResumableFullRefreshCursor(parameters={})
|
1330
|
+
return None
|
1331
|
+
|
1332
|
+
def _merge_stream_slicers(
|
1333
|
+
self, model: DeclarativeStreamModel, config: Config
|
1334
|
+
) -> Optional[StreamSlicer]:
|
1335
|
+
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
1307
1336
|
|
1308
1337
|
if model.incremental_sync and stream_slicer:
|
1309
1338
|
incremental_sync_model = model.incremental_sync
|
@@ -1346,15 +1375,7 @@ class ModelToComponentFactory:
|
|
1346
1375
|
),
|
1347
1376
|
partition_router=stream_slicer,
|
1348
1377
|
)
|
1349
|
-
|
1350
|
-
hasattr(model.retriever, "paginator")
|
1351
|
-
and model.retriever.paginator
|
1352
|
-
and not stream_slicer
|
1353
|
-
):
|
1354
|
-
# For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
|
1355
|
-
return ResumableFullRefreshCursor(parameters={})
|
1356
|
-
else:
|
1357
|
-
return None
|
1378
|
+
return self._build_resumable_cursor_from_paginator(model.retriever, stream_slicer)
|
1358
1379
|
|
1359
1380
|
def create_default_error_handler(
|
1360
1381
|
self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
|
@@ -2218,3 +2239,56 @@ class ModelToComponentFactory:
|
|
2218
2239
|
|
2219
2240
|
def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level:
|
2220
2241
|
return Level.DEBUG if emit_connector_builder_messages else Level.INFO
|
2242
|
+
|
2243
|
+
@staticmethod
|
2244
|
+
def create_components_mapping_definition(
|
2245
|
+
model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any
|
2246
|
+
) -> ComponentMappingDefinition:
|
2247
|
+
interpolated_value = InterpolatedString.create(
|
2248
|
+
model.value, parameters=model.parameters or {}
|
2249
|
+
)
|
2250
|
+
field_path = [
|
2251
|
+
InterpolatedString.create(path, parameters=model.parameters or {})
|
2252
|
+
for path in model.field_path
|
2253
|
+
]
|
2254
|
+
return ComponentMappingDefinition(
|
2255
|
+
field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString
|
2256
|
+
value=interpolated_value,
|
2257
|
+
value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
|
2258
|
+
parameters=model.parameters or {},
|
2259
|
+
)
|
2260
|
+
|
2261
|
+
def create_http_components_resolver(
|
2262
|
+
self, model: HttpComponentsResolverModel, config: Config
|
2263
|
+
) -> Any:
|
2264
|
+
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
2265
|
+
combined_slicers = self._build_resumable_cursor_from_paginator(
|
2266
|
+
model.retriever, stream_slicer
|
2267
|
+
)
|
2268
|
+
|
2269
|
+
retriever = self._create_component_from_model(
|
2270
|
+
model=model.retriever,
|
2271
|
+
config=config,
|
2272
|
+
name="",
|
2273
|
+
primary_key=None,
|
2274
|
+
stream_slicer=combined_slicers,
|
2275
|
+
transformations=[],
|
2276
|
+
)
|
2277
|
+
|
2278
|
+
components_mapping = [
|
2279
|
+
self._create_component_from_model(
|
2280
|
+
model=components_mapping_definition_model,
|
2281
|
+
value_type=ModelToComponentFactory._json_schema_type_name_to_type(
|
2282
|
+
components_mapping_definition_model.value_type
|
2283
|
+
),
|
2284
|
+
config=config,
|
2285
|
+
)
|
2286
|
+
for components_mapping_definition_model in model.components_mapping
|
2287
|
+
]
|
2288
|
+
|
2289
|
+
return HttpComponentsResolver(
|
2290
|
+
retriever=retriever,
|
2291
|
+
config=config,
|
2292
|
+
components_mapping=components_mapping,
|
2293
|
+
parameters=model.parameters or {},
|
2294
|
+
)
|
@@ -6,5 +6,6 @@ from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_
|
|
6
6
|
from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import ListPartitionRouter
|
7
7
|
from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import SinglePartitionRouter
|
8
8
|
from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import SubstreamPartitionRouter
|
9
|
+
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
9
10
|
|
10
|
-
__all__ = ["CartesianProductStreamSlicer", "ListPartitionRouter", "SinglePartitionRouter", "SubstreamPartitionRouter"]
|
11
|
+
__all__ = ["CartesianProductStreamSlicer", "ListPartitionRouter", "SinglePartitionRouter", "SubstreamPartitionRouter", "PartitionRouter"]
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from airbyte_cdk.sources.declarative.resolvers.components_resolver import ComponentsResolver, ComponentMappingDefinition, ResolvedComponentMappingDefinition
|
6
|
+
from airbyte_cdk.sources.declarative.resolvers.http_components_resolver import HttpComponentsResolver
|
7
|
+
from airbyte_cdk.sources.declarative.models import HttpComponentsResolver as HttpComponentsResolverModel
|
8
|
+
|
9
|
+
COMPONENTS_RESOLVER_TYPE_MAPPING = {
|
10
|
+
"HttpComponentsResolver": HttpComponentsResolverModel
|
11
|
+
}
|
12
|
+
|
13
|
+
__all__ = ["ComponentsResolver", "HttpComponentsResolver", "ComponentMappingDefinition", "ResolvedComponentMappingDefinition", "COMPONENTS_RESOLVER_TYPE_MAPPING"]
|
@@ -0,0 +1,55 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from abc import ABC, abstractmethod
|
6
|
+
from dataclasses import InitVar, dataclass
|
7
|
+
from typing import Any, Dict, Iterable, List, Mapping, Optional, Type, Union
|
8
|
+
|
9
|
+
from typing_extensions import deprecated
|
10
|
+
|
11
|
+
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
12
|
+
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
13
|
+
|
14
|
+
|
15
|
+
@dataclass(frozen=True)
|
16
|
+
class ComponentMappingDefinition:
|
17
|
+
"""Defines the configuration for mapping a component in a stream. This class specifies
|
18
|
+
what field in the stream template should be updated with value, supporting dynamic interpolation
|
19
|
+
and type enforcement."""
|
20
|
+
|
21
|
+
field_path: List["InterpolatedString"]
|
22
|
+
value: Union["InterpolatedString", str]
|
23
|
+
value_type: Optional[Type[Any]]
|
24
|
+
parameters: InitVar[Mapping[str, Any]]
|
25
|
+
|
26
|
+
|
27
|
+
@dataclass(frozen=True)
|
28
|
+
class ResolvedComponentMappingDefinition:
|
29
|
+
"""Defines resolved configuration for mapping a component in a stream. This class specifies
|
30
|
+
what field in the stream template should be updated with value, supporting dynamic interpolation
|
31
|
+
and type enforcement."""
|
32
|
+
|
33
|
+
field_path: List["InterpolatedString"]
|
34
|
+
value: "InterpolatedString"
|
35
|
+
value_type: Optional[Type[Any]]
|
36
|
+
parameters: InitVar[Mapping[str, Any]]
|
37
|
+
|
38
|
+
|
39
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
40
|
+
@dataclass
|
41
|
+
class ComponentsResolver(ABC):
|
42
|
+
"""
|
43
|
+
Abstract base class for resolving components in a stream template.
|
44
|
+
"""
|
45
|
+
|
46
|
+
@abstractmethod
|
47
|
+
def resolve_components(
|
48
|
+
self, stream_template_config: Dict[str, Any]
|
49
|
+
) -> Iterable[Dict[str, Any]]:
|
50
|
+
"""
|
51
|
+
Maps and populates values into a stream template configuration.
|
52
|
+
:param stream_template_config: The stream template with placeholders for components.
|
53
|
+
:yields: The resolved stream config with populated values.
|
54
|
+
"""
|
55
|
+
pass
|
@@ -0,0 +1,106 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from copy import deepcopy
|
6
|
+
from dataclasses import InitVar, dataclass, field
|
7
|
+
from typing import Any, Dict, Iterable, List, Mapping
|
8
|
+
|
9
|
+
import dpath
|
10
|
+
from typing_extensions import deprecated
|
11
|
+
|
12
|
+
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
13
|
+
from airbyte_cdk.sources.declarative.resolvers.components_resolver import (
|
14
|
+
ComponentMappingDefinition,
|
15
|
+
ComponentsResolver,
|
16
|
+
ResolvedComponentMappingDefinition,
|
17
|
+
)
|
18
|
+
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
19
|
+
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
20
|
+
from airbyte_cdk.sources.types import Config
|
21
|
+
|
22
|
+
|
23
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
24
|
+
@dataclass
|
25
|
+
class HttpComponentsResolver(ComponentsResolver):
|
26
|
+
"""
|
27
|
+
Resolves and populates stream templates with components fetched via an HTTP retriever.
|
28
|
+
|
29
|
+
Attributes:
|
30
|
+
retriever (Retriever): The retriever used to fetch data from an API.
|
31
|
+
config (Config): Configuration object for the resolver.
|
32
|
+
components_mapping (List[ComponentMappingDefinition]): List of mappings to resolve.
|
33
|
+
parameters (InitVar[Mapping[str, Any]]): Additional parameters for interpolation.
|
34
|
+
"""
|
35
|
+
|
36
|
+
retriever: Retriever
|
37
|
+
config: Config
|
38
|
+
components_mapping: List[ComponentMappingDefinition]
|
39
|
+
parameters: InitVar[Mapping[str, Any]]
|
40
|
+
_resolved_components: List[ResolvedComponentMappingDefinition] = field(
|
41
|
+
init=False, repr=False, default_factory=list
|
42
|
+
)
|
43
|
+
|
44
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
45
|
+
"""
|
46
|
+
Initializes and parses component mappings, converting them to resolved definitions.
|
47
|
+
|
48
|
+
Args:
|
49
|
+
parameters (Mapping[str, Any]): Parameters for interpolation.
|
50
|
+
"""
|
51
|
+
for component_mapping in self.components_mapping:
|
52
|
+
if isinstance(component_mapping.value, (str, InterpolatedString)):
|
53
|
+
interpolated_value = (
|
54
|
+
InterpolatedString.create(component_mapping.value, parameters=parameters)
|
55
|
+
if isinstance(component_mapping.value, str)
|
56
|
+
else component_mapping.value
|
57
|
+
)
|
58
|
+
|
59
|
+
field_path = [
|
60
|
+
InterpolatedString.create(path, parameters=parameters)
|
61
|
+
for path in component_mapping.field_path
|
62
|
+
]
|
63
|
+
|
64
|
+
self._resolved_components.append(
|
65
|
+
ResolvedComponentMappingDefinition(
|
66
|
+
field_path=field_path,
|
67
|
+
value=interpolated_value,
|
68
|
+
value_type=component_mapping.value_type,
|
69
|
+
parameters=parameters,
|
70
|
+
)
|
71
|
+
)
|
72
|
+
else:
|
73
|
+
raise ValueError(
|
74
|
+
f"Expected a string or InterpolatedString for value in mapping: {component_mapping}"
|
75
|
+
)
|
76
|
+
|
77
|
+
def resolve_components(
|
78
|
+
self, stream_template_config: Dict[str, Any]
|
79
|
+
) -> Iterable[Dict[str, Any]]:
|
80
|
+
"""
|
81
|
+
Resolves components in the stream template configuration by populating values.
|
82
|
+
|
83
|
+
Args:
|
84
|
+
stream_template_config (Dict[str, Any]): Stream template to populate.
|
85
|
+
|
86
|
+
Yields:
|
87
|
+
Dict[str, Any]: Updated configurations with resolved components.
|
88
|
+
"""
|
89
|
+
kwargs = {"stream_template_config": stream_template_config}
|
90
|
+
|
91
|
+
for components_values in self.retriever.read_records({}):
|
92
|
+
updated_config = deepcopy(stream_template_config)
|
93
|
+
kwargs["components_values"] = components_values # type: ignore[assignment] # component_values will always be of type Mapping[str, Any]
|
94
|
+
|
95
|
+
for resolved_component in self._resolved_components:
|
96
|
+
valid_types = (
|
97
|
+
(resolved_component.value_type,) if resolved_component.value_type else None
|
98
|
+
)
|
99
|
+
value = resolved_component.value.eval(
|
100
|
+
self.config, valid_types=valid_types, **kwargs
|
101
|
+
)
|
102
|
+
|
103
|
+
path = [path.eval(self.config, **kwargs) for path in resolved_component.field_path]
|
104
|
+
dpath.set(updated_config, path, value)
|
105
|
+
|
106
|
+
yield updated_config
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from abc import abstractmethod
|
6
6
|
from datetime import datetime, timedelta, timezone
|
7
|
-
from typing import Any, Callable, List, MutableMapping, Optional, Tuple
|
7
|
+
from typing import Any, Callable, List, MutableMapping, Optional, Tuple, Union
|
8
8
|
|
9
9
|
import pendulum
|
10
10
|
from pendulum.datetime import DateTime
|
@@ -202,7 +202,7 @@ class CustomFormatConcurrentStreamStateConverter(IsoMillisConcurrentStreamStateC
|
|
202
202
|
self._input_datetime_formats += [self._datetime_format]
|
203
203
|
self._parser = DatetimeParser()
|
204
204
|
|
205
|
-
def output_format(self, timestamp: datetime) -> str:
|
205
|
+
def output_format(self, timestamp: datetime) -> Union[str, int]:
|
206
206
|
return self._parser.format(timestamp, self._datetime_format)
|
207
207
|
|
208
208
|
def parse_timestamp(self, timestamp: str) -> datetime:
|
@@ -0,0 +1,108 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: airbyte-cdk
|
3
|
+
Version: 6.9.1rc2
|
4
|
+
Summary: A framework for writing Airbyte Connectors.
|
5
|
+
Home-page: https://airbyte.com
|
6
|
+
License: MIT
|
7
|
+
Keywords: airbyte,connector-development-kit,cdk
|
8
|
+
Author: Airbyte
|
9
|
+
Author-email: contact@airbyte.io
|
10
|
+
Requires-Python: >=3.10,<3.13
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
12
|
+
Classifier: Intended Audience :: Developers
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
18
|
+
Classifier: Topic :: Scientific/Engineering
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
20
|
+
Provides-Extra: file-based
|
21
|
+
Provides-Extra: sql
|
22
|
+
Provides-Extra: vector-db-based
|
23
|
+
Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
|
24
|
+
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
25
|
+
Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
|
26
|
+
Requires-Dist: avro (>=1.11.2,<1.12.0) ; extra == "file-based"
|
27
|
+
Requires-Dist: backoff
|
28
|
+
Requires-Dist: cachetools
|
29
|
+
Requires-Dist: cohere (==4.21) ; extra == "vector-db-based"
|
30
|
+
Requires-Dist: cryptography (>=42.0.5,<44.0.0)
|
31
|
+
Requires-Dist: dpath (>=2.1.6,<3.0.0)
|
32
|
+
Requires-Dist: dunamai (>=1.22.0,<2.0.0)
|
33
|
+
Requires-Dist: fastavro (>=1.8.0,<1.9.0) ; extra == "file-based"
|
34
|
+
Requires-Dist: genson (==1.3.0)
|
35
|
+
Requires-Dist: isodate (>=0.6.1,<0.7.0)
|
36
|
+
Requires-Dist: jsonref (>=0.2,<0.3)
|
37
|
+
Requires-Dist: jsonschema (>=4.17.3,<4.18.0)
|
38
|
+
Requires-Dist: langchain (==0.1.16) ; extra == "vector-db-based"
|
39
|
+
Requires-Dist: langchain_core (==0.1.42)
|
40
|
+
Requires-Dist: markdown ; extra == "file-based"
|
41
|
+
Requires-Dist: nltk (==3.9.1)
|
42
|
+
Requires-Dist: numpy (<2)
|
43
|
+
Requires-Dist: openai[embeddings] (==0.27.9) ; extra == "vector-db-based"
|
44
|
+
Requires-Dist: orjson (>=3.10.7,<4.0.0)
|
45
|
+
Requires-Dist: pandas (==2.2.2)
|
46
|
+
Requires-Dist: pdf2image (==1.16.3) ; extra == "file-based"
|
47
|
+
Requires-Dist: pdfminer.six (==20221105) ; extra == "file-based"
|
48
|
+
Requires-Dist: pendulum (<3.0.0)
|
49
|
+
Requires-Dist: psutil (==6.1.0)
|
50
|
+
Requires-Dist: pyarrow (>=15.0.0,<15.1.0) ; extra == "file-based"
|
51
|
+
Requires-Dist: pydantic (>=2.7,<3.0)
|
52
|
+
Requires-Dist: pyjwt (>=2.8.0,<3.0.0)
|
53
|
+
Requires-Dist: pyrate-limiter (>=3.1.0,<3.2.0)
|
54
|
+
Requires-Dist: pytesseract (==0.3.10) ; extra == "file-based"
|
55
|
+
Requires-Dist: python-calamine (==0.2.3) ; extra == "file-based"
|
56
|
+
Requires-Dist: python-dateutil
|
57
|
+
Requires-Dist: python-snappy (==0.7.3) ; extra == "file-based"
|
58
|
+
Requires-Dist: python-ulid (>=3.0.0,<4.0.0)
|
59
|
+
Requires-Dist: pytz (==2024.1)
|
60
|
+
Requires-Dist: rapidfuzz (>=3.10.1,<4.0.0)
|
61
|
+
Requires-Dist: requests
|
62
|
+
Requires-Dist: requests_cache
|
63
|
+
Requires-Dist: serpyco-rs (>=1.10.2,<2.0.0)
|
64
|
+
Requires-Dist: sqlalchemy (>=2.0,<3.0,!=2.0.36) ; extra == "sql"
|
65
|
+
Requires-Dist: tiktoken (==0.8.0) ; extra == "vector-db-based"
|
66
|
+
Requires-Dist: unstructured.pytesseract (>=0.3.12) ; extra == "file-based"
|
67
|
+
Requires-Dist: unstructured[docx,pptx] (==0.10.27) ; extra == "file-based"
|
68
|
+
Requires-Dist: wcmatch (==10.0)
|
69
|
+
Requires-Dist: xmltodict (>=0.13.0,<0.14.0)
|
70
|
+
Project-URL: Documentation, https://docs.airbyte.io/
|
71
|
+
Project-URL: Repository, https://github.com/airbytehq/airbyte-python-cdk
|
72
|
+
Description-Content-Type: text/markdown
|
73
|
+
|
74
|
+
# Airbyte Python CDK and Low-Code CDK
|
75
|
+
|
76
|
+
Airbyte Python CDK is a framework for building Airbyte API Source Connectors. It provides a set of
|
77
|
+
classes and helpers that make it easy to build a connector against an HTTP API (REST, GraphQL, etc),
|
78
|
+
or a generic Python source connector.
|
79
|
+
|
80
|
+
## Building Connectors with the CDK
|
81
|
+
|
82
|
+
If you're looking to build a connector, we highly recommend that you first
|
83
|
+
[start with the Connector Builder](https://docs.airbyte.com/connector-development/connector-builder-ui/overview).
|
84
|
+
It should be enough for 90% connectors out there. For more flexible and complex connectors, use the
|
85
|
+
[low-code CDK and `SourceDeclarativeManifest`](https://docs.airbyte.com/connector-development/config-based/low-code-cdk-overview).
|
86
|
+
|
87
|
+
For more information on building connectors, please see the [Connector Development](https://docs.airbyte.com/connector-development/) guide on [docs.airbyte.com](https://docs.airbyte.com).
|
88
|
+
|
89
|
+
## Python CDK Overview
|
90
|
+
|
91
|
+
Airbyte CDK code is within `airbyte_cdk` directory. Here's a high level overview of what's inside:
|
92
|
+
|
93
|
+
- `airbyte_cdk/connector_builder`. Internal wrapper that helps the Connector Builder platform run a declarative manifest (low-code connector). You should not use this code directly. If you need to run a `SourceDeclarativeManifest`, take a look at [`source-declarative-manifest`](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-declarative-manifest) connector implementation instead.
|
94
|
+
- `airbyte_cdk/cli/source_declarative_manifest`. This module defines the `source-declarative-manifest` (aka "SDM") connector execution logic and associated CLI.
|
95
|
+
- `airbyte_cdk/destinations`. Basic Destination connector support! If you're building a Destination connector in Python, try that. Some of our vector DB destinations like `destination-pinecone` are using that code.
|
96
|
+
- `airbyte_cdk/models` expose `airbyte_protocol.models` as a part of `airbyte_cdk` package.
|
97
|
+
- `airbyte_cdk/sources/concurrent_source` is the Concurrent CDK implementation. It supports reading data from streams concurrently per slice / partition, useful for connectors with high throughput and high number of records.
|
98
|
+
- `airbyte_cdk/sources/declarative` is the low-code CDK. It works on top of Airbyte Python CDK, but provides a declarative manifest language to define streams, operations, etc. This makes it easier to build connectors without writing Python code.
|
99
|
+
- `airbyte_cdk/sources/file_based` is the CDK for file-based sources. Examples include S3, Azure, GCS, etc.
|
100
|
+
|
101
|
+
## Contributing
|
102
|
+
|
103
|
+
For instructions on how to contribute, please see our [Contributing Guide](docs/CONTRIBUTING.md).
|
104
|
+
|
105
|
+
## Release Management
|
106
|
+
|
107
|
+
Please see the [Release Management](docs/RELEASES.md) guide for information on how to perform releases and pre-releases.
|
108
|
+
|
@@ -62,11 +62,11 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
|
|
62
62
|
airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
|
63
63
|
airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
|
64
64
|
airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
|
65
|
-
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=
|
65
|
+
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=9OXOFzG5PBL_MHEJg4ETE0TXfXeuhvv-at38AN3wWEM,23323
|
66
66
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
|
67
|
-
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=
|
67
|
+
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=eqzLFjbowFLQDqjz8T29qVt835yqhfaeN0PouI7g7S8,2576
|
68
68
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
69
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
69
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=kZkGoasVzufDE2BkFo_7DVO6xHL9kueNBjddtl-7kaU,124134
|
70
70
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
71
71
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
|
72
72
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=hNlhaB5FjNC6IfJyglj5ZJWkYD2nEAukMDmzRz5PC6o,671
|
@@ -84,7 +84,7 @@ airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=OJ9xmhNWNwwzx
|
|
84
84
|
airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=AkXPOWyp741cpYLBl9AbmVmOQmQ2BzZ2XjgsMEB6gGc,6583
|
85
85
|
airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=LhqGDfX06_dDYLKsIVnwQ_nAWCln-v8PV7Wgt_QVeTI,6533
|
86
86
|
airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=CmZl9ddwMZFo8L7mEl_OFHN3ahIFRSYrJjMbR_cJaFA,1006
|
87
|
-
airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=
|
87
|
+
airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=UC74gxN4ZbNE6IP5s-q_l7DMrZ79WFdVhGfZ9eGvKD0,22033
|
88
88
|
airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
|
89
89
|
airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=3_EEZop94bMitZaJd2PF5Q2Xt9v94tYg7p7YJz8tAFc,15869
|
90
90
|
airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=hElcYijbOHjdLKOMA7W7aizEbf22r7OSApXALP875uI,15749
|
@@ -99,18 +99,18 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=LYEZ
|
|
99
99
|
airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=-V5UddGm69UKEB6o_O1EIES9kfY8FV_X4Ji8w1yOuSA,981
|
100
100
|
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=gcihTEnfD_6sUivxOomoY5r7VMAGqVVnK_HEsid9Y5k,6605
|
101
101
|
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=jf24RK-1fBhTYDpcGEakZtGNNJfG5NS8CCF5bEgNmRo,3977
|
102
|
-
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256
|
102
|
+
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=-v3GvuTVHwUonrfUwDj3wYKaZjX6hTyKmMBRgEzj-j0,15201
|
103
103
|
airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
104
104
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
|
105
105
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
106
106
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
107
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
107
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=6iAzpGmUrhwEUQcCL5bW-FXuLXPMeFqs_GR4B1rS3ZE,88511
|
108
108
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
109
109
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
110
|
-
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=
|
110
|
+
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=0jfi-ogL-rOVORTIYnu64wNfh1L8fYaLVDWzJ2zGdi8,8799
|
111
111
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
112
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
113
|
-
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=
|
112
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=lS2oKfkND54u66hocc2BycS-AIYIbkn4npq6CFRNokc,99573
|
113
|
+
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=1NjaZoGAIefvWwj6wx-LOKIXXWS-UnBlZFnuR7y6uYA,745
|
114
114
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
115
115
|
airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=t7pRdFWfFWJtQQG19c9PVeMODyO2BknRTakpM5U9N-8,4844
|
116
116
|
airbyte_cdk/sources/declarative/partition_routers/partition_router.py,sha256=YyEIzdmLd1FjbVP3QbQ2VFCLW_P-OGbVh6VpZShp54k,2218
|
@@ -152,6 +152,9 @@ airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_
|
|
152
152
|
airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py,sha256=8YRiDzjYvqJ-aMmKFcjqzv_-e8OZ5QG_TbpZ-nuCu6s,2590
|
153
153
|
airbyte_cdk/sources/declarative/requesters/request_path.py,sha256=S3MeFvcaQrMbOkSY2W2VbXLNomqt_3eXqVd9ZhgNwUs,299
|
154
154
|
airbyte_cdk/sources/declarative/requesters/requester.py,sha256=iVVpXQ4KEd9OyZNwmOofMvx7_06i8ZRxGo3aNTrEQLM,4946
|
155
|
+
airbyte_cdk/sources/declarative/resolvers/__init__.py,sha256=8cL--3WCJvtKVIIAqKWoIEhmmf3FemB7PlsnO0Mx_cY,688
|
156
|
+
airbyte_cdk/sources/declarative/resolvers/components_resolver.py,sha256=KPjKc0yb9artL4ZkeqN8RmEykHH6FJgqXD7fCEnh1X0,1936
|
157
|
+
airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=ZA2vrHQKfXNMcH3x1iuyFOTGNzYDhUFT2qcaiOzSK0A,4271
|
155
158
|
airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=FVQpUGVwp2Gibk4gp07VmLKX5AafUlsZWFSrDpUDuJM,443
|
156
159
|
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=WDFnjrXLz3-YEjFhmlMkWAn9AJvnZ0mk9FyC8DAhEYk,4976
|
157
160
|
airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
|
@@ -258,7 +261,7 @@ airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py,sha256=nbdkkH
|
|
258
261
|
airbyte_cdk/sources/streams/concurrent/partitions/types.py,sha256=frPVvHtY7vLxpGEbMQzNvF1Y52ZVyct9f1DDhGoRjwY,1166
|
259
262
|
airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
260
263
|
airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py,sha256=CXHUMOhndu-LOKgsnNTItv5s5qrKpmJDeHOzlH1nBy8,6819
|
261
|
-
airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py,sha256=
|
264
|
+
airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py,sha256=U8G6qkGz0-4fqp_lFYO_zvSIXUafQGugIjaTfqzYrlo,7542
|
262
265
|
airbyte_cdk/sources/streams/core.py,sha256=z4Oi5qmJPjs-RdMd5tPWHvHqIjkcxhkVKTvIpfAs2uA,32211
|
263
266
|
airbyte_cdk/sources/streams/http/__init__.py,sha256=NXaNlkzZMkh5kS8S5ujEaKEE6855sk6_HljF_GFjKZI,311
|
264
267
|
airbyte_cdk/sources/streams/http/availability_strategy.py,sha256=sovoGFThZr-doMN9vJvTuJBrvkwQVIO0qTQO64pGZPY,2428
|
@@ -313,7 +316,6 @@ airbyte_cdk/test/state_builder.py,sha256=kLPql9lNzUJaBg5YYRLJlY_Hy5JLHJDVyKPMZMo
|
|
313
316
|
airbyte_cdk/test/utils/__init__.py,sha256=Hu-1XT2KDoYjDF7-_ziDwv5bY3PueGjANOCbzeOegDg,57
|
314
317
|
airbyte_cdk/test/utils/data.py,sha256=CkCR1_-rujWNmPXFR1IXTMwx1rAl06wAyIKWpDcN02w,820
|
315
318
|
airbyte_cdk/test/utils/http_mocking.py,sha256=F2hpm2q4ijojQN5u2XtgTAp8aNgHgJ64eZNkZ9BW0ig,550
|
316
|
-
airbyte_cdk/test/utils/manifest_only_fixtures.py,sha256=2O9QlAoqeSsGm6TSZzwS434-48zHhXeNHQ-sLjxva4U,2666
|
317
319
|
airbyte_cdk/test/utils/reading.py,sha256=SOTDYlps6Te9KumfTJ3vVDSm9EUXhvKtE8aD7gvdPlg,965
|
318
320
|
airbyte_cdk/utils/__init__.py,sha256=gHjOCoUkolS_nKtgFSudXUY-ObK2vUo6aNQLvW7o8q8,347
|
319
321
|
airbyte_cdk/utils/airbyte_secrets_utils.py,sha256=wEtRnl5KRhN6eLJwrDrC4FJjyqt_4vkA1F65mdl8c24,3142
|
@@ -331,8 +333,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
|
|
331
333
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
332
334
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
333
335
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
334
|
-
airbyte_cdk-6.9.
|
335
|
-
airbyte_cdk-6.9.
|
336
|
-
airbyte_cdk-6.9.
|
337
|
-
airbyte_cdk-6.9.
|
338
|
-
airbyte_cdk-6.9.
|
336
|
+
airbyte_cdk-6.9.1rc2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
337
|
+
airbyte_cdk-6.9.1rc2.dist-info/METADATA,sha256=lJd2s0FZqap-lLBzxGM-O1w2DOnWPmC6bzcAsYg6apw,5952
|
338
|
+
airbyte_cdk-6.9.1rc2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
339
|
+
airbyte_cdk-6.9.1rc2.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
340
|
+
airbyte_cdk-6.9.1rc2.dist-info/RECORD,,
|