airbyte-cdk 6.9.1.dev2__py3-none-any.whl → 6.9.1rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +25 -31
  2. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -4
  3. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +100 -2
  4. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +1 -1
  5. airbyte_cdk/sources/declarative/manifest_declarative_source.py +53 -2
  6. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +95 -2
  7. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +6 -0
  8. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +95 -21
  9. airbyte_cdk/sources/declarative/partition_routers/__init__.py +2 -1
  10. airbyte_cdk/sources/declarative/resolvers/__init__.py +13 -0
  11. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  12. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +106 -0
  13. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +2 -2
  14. airbyte_cdk-6.9.1rc2.dist-info/METADATA +108 -0
  15. {airbyte_cdk-6.9.1.dev2.dist-info → airbyte_cdk-6.9.1rc2.dist-info}/RECORD +18 -16
  16. airbyte_cdk/test/utils/manifest_only_fixtures.py +0 -79
  17. airbyte_cdk-6.9.1.dev2.dist-info/METADATA +0 -306
  18. {airbyte_cdk-6.9.1.dev2.dist-info → airbyte_cdk-6.9.1rc2.dist-info}/LICENSE.txt +0 -0
  19. {airbyte_cdk-6.9.1.dev2.dist-info → airbyte_cdk-6.9.1rc2.dist-info}/WHEEL +0 -0
  20. {airbyte_cdk-6.9.1.dev2.dist-info → airbyte_cdk-6.9.1rc2.dist-info}/entry_points.txt +0 -0
@@ -119,6 +119,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
119
119
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
120
120
  CheckStream as CheckStreamModel,
121
121
  )
122
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
123
+ ComponentMappingDefinition as ComponentMappingDefinitionModel,
124
+ )
122
125
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
123
126
  CompositeErrorHandler as CompositeErrorHandlerModel,
124
127
  )
@@ -191,6 +194,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
191
194
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
192
195
  GzipJsonDecoder as GzipJsonDecoderModel,
193
196
  )
197
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
198
+ HttpComponentsResolver as HttpComponentsResolverModel,
199
+ )
194
200
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
195
201
  HttpRequester as HttpRequesterModel,
196
202
  )
@@ -298,6 +304,7 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
298
304
  from airbyte_cdk.sources.declarative.partition_routers import (
299
305
  CartesianProductStreamSlicer,
300
306
  ListPartitionRouter,
307
+ PartitionRouter,
301
308
  SinglePartitionRouter,
302
309
  SubstreamPartitionRouter,
303
310
  )
@@ -338,6 +345,10 @@ from airbyte_cdk.sources.declarative.requesters.request_options import (
338
345
  )
339
346
  from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
340
347
  from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
348
+ from airbyte_cdk.sources.declarative.resolvers import (
349
+ ComponentMappingDefinition,
350
+ HttpComponentsResolver,
351
+ )
341
352
  from airbyte_cdk.sources.declarative.retrievers import (
342
353
  AsyncRetriever,
343
354
  SimpleRetriever,
@@ -467,6 +478,8 @@ class ModelToComponentFactory:
467
478
  WaitTimeFromHeaderModel: self.create_wait_time_from_header,
468
479
  WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header,
469
480
  AsyncRetrieverModel: self.create_async_retriever,
481
+ HttpComponentsResolverModel: self.create_http_components_resolver,
482
+ ComponentMappingDefinitionModel: self.create_components_mapping_definition,
470
483
  }
471
484
 
472
485
  # Needed for the case where we need to perform a second parse on the fields of a custom component
@@ -1281,19 +1294,20 @@ class ModelToComponentFactory:
1281
1294
  parameters=model.parameters or {},
1282
1295
  )
1283
1296
 
1284
- def _merge_stream_slicers(
1285
- self, model: DeclarativeStreamModel, config: Config
1286
- ) -> Optional[StreamSlicer]:
1287
- stream_slicer = None
1297
+ def _build_stream_slicer_from_partition_router(
1298
+ self,
1299
+ model: Union[AsyncRetrieverModel, CustomRetrieverModel, SimpleRetrieverModel],
1300
+ config: Config,
1301
+ ) -> Optional[PartitionRouter]:
1288
1302
  if (
1289
- hasattr(model.retriever, "partition_router")
1290
- and isinstance(model.retriever, SimpleRetrieverModel)
1291
- and model.retriever.partition_router
1303
+ hasattr(model, "partition_router")
1304
+ and isinstance(model, SimpleRetrieverModel)
1305
+ and model.partition_router
1292
1306
  ):
1293
- stream_slicer_model = model.retriever.partition_router
1307
+ stream_slicer_model = model.partition_router
1294
1308
 
1295
1309
  if isinstance(stream_slicer_model, list):
1296
- stream_slicer = CartesianProductStreamSlicer(
1310
+ return CartesianProductStreamSlicer(
1297
1311
  [
1298
1312
  self._create_component_from_model(model=slicer, config=config)
1299
1313
  for slicer in stream_slicer_model
@@ -1301,9 +1315,24 @@ class ModelToComponentFactory:
1301
1315
  parameters={},
1302
1316
  )
1303
1317
  else:
1304
- stream_slicer = self._create_component_from_model(
1305
- model=stream_slicer_model, config=config
1306
- )
1318
+ return self._create_component_from_model(model=stream_slicer_model, config=config) # type: ignore[no-any-return]
1319
+ # Will be created PartitionRouter as stream_slicer_model is model.partition_router
1320
+ return None
1321
+
1322
+ def _build_resumable_cursor_from_paginator(
1323
+ self,
1324
+ model: Union[AsyncRetrieverModel, CustomRetrieverModel, SimpleRetrieverModel],
1325
+ stream_slicer: Optional[StreamSlicer],
1326
+ ) -> Optional[StreamSlicer]:
1327
+ if hasattr(model, "paginator") and model.paginator and not stream_slicer:
1328
+ # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
1329
+ return ResumableFullRefreshCursor(parameters={})
1330
+ return None
1331
+
1332
+ def _merge_stream_slicers(
1333
+ self, model: DeclarativeStreamModel, config: Config
1334
+ ) -> Optional[StreamSlicer]:
1335
+ stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
1307
1336
 
1308
1337
  if model.incremental_sync and stream_slicer:
1309
1338
  incremental_sync_model = model.incremental_sync
@@ -1346,15 +1375,7 @@ class ModelToComponentFactory:
1346
1375
  ),
1347
1376
  partition_router=stream_slicer,
1348
1377
  )
1349
- elif (
1350
- hasattr(model.retriever, "paginator")
1351
- and model.retriever.paginator
1352
- and not stream_slicer
1353
- ):
1354
- # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
1355
- return ResumableFullRefreshCursor(parameters={})
1356
- else:
1357
- return None
1378
+ return self._build_resumable_cursor_from_paginator(model.retriever, stream_slicer)
1358
1379
 
1359
1380
  def create_default_error_handler(
1360
1381
  self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
@@ -2218,3 +2239,56 @@ class ModelToComponentFactory:
2218
2239
 
2219
2240
  def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level:
2220
2241
  return Level.DEBUG if emit_connector_builder_messages else Level.INFO
2242
+
2243
+ @staticmethod
2244
+ def create_components_mapping_definition(
2245
+ model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any
2246
+ ) -> ComponentMappingDefinition:
2247
+ interpolated_value = InterpolatedString.create(
2248
+ model.value, parameters=model.parameters or {}
2249
+ )
2250
+ field_path = [
2251
+ InterpolatedString.create(path, parameters=model.parameters or {})
2252
+ for path in model.field_path
2253
+ ]
2254
+ return ComponentMappingDefinition(
2255
+ field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString
2256
+ value=interpolated_value,
2257
+ value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
2258
+ parameters=model.parameters or {},
2259
+ )
2260
+
2261
+ def create_http_components_resolver(
2262
+ self, model: HttpComponentsResolverModel, config: Config
2263
+ ) -> Any:
2264
+ stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
2265
+ combined_slicers = self._build_resumable_cursor_from_paginator(
2266
+ model.retriever, stream_slicer
2267
+ )
2268
+
2269
+ retriever = self._create_component_from_model(
2270
+ model=model.retriever,
2271
+ config=config,
2272
+ name="",
2273
+ primary_key=None,
2274
+ stream_slicer=combined_slicers,
2275
+ transformations=[],
2276
+ )
2277
+
2278
+ components_mapping = [
2279
+ self._create_component_from_model(
2280
+ model=components_mapping_definition_model,
2281
+ value_type=ModelToComponentFactory._json_schema_type_name_to_type(
2282
+ components_mapping_definition_model.value_type
2283
+ ),
2284
+ config=config,
2285
+ )
2286
+ for components_mapping_definition_model in model.components_mapping
2287
+ ]
2288
+
2289
+ return HttpComponentsResolver(
2290
+ retriever=retriever,
2291
+ config=config,
2292
+ components_mapping=components_mapping,
2293
+ parameters=model.parameters or {},
2294
+ )
@@ -6,5 +6,6 @@ from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_
6
6
  from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import ListPartitionRouter
7
7
  from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import SinglePartitionRouter
8
8
  from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import SubstreamPartitionRouter
9
+ from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
9
10
 
10
- __all__ = ["CartesianProductStreamSlicer", "ListPartitionRouter", "SinglePartitionRouter", "SubstreamPartitionRouter"]
11
+ __all__ = ["CartesianProductStreamSlicer", "ListPartitionRouter", "SinglePartitionRouter", "SubstreamPartitionRouter", "PartitionRouter"]
@@ -0,0 +1,13 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from airbyte_cdk.sources.declarative.resolvers.components_resolver import ComponentsResolver, ComponentMappingDefinition, ResolvedComponentMappingDefinition
6
+ from airbyte_cdk.sources.declarative.resolvers.http_components_resolver import HttpComponentsResolver
7
+ from airbyte_cdk.sources.declarative.models import HttpComponentsResolver as HttpComponentsResolverModel
8
+
9
+ COMPONENTS_RESOLVER_TYPE_MAPPING = {
10
+ "HttpComponentsResolver": HttpComponentsResolverModel
11
+ }
12
+
13
+ __all__ = ["ComponentsResolver", "HttpComponentsResolver", "ComponentMappingDefinition", "ResolvedComponentMappingDefinition", "COMPONENTS_RESOLVER_TYPE_MAPPING"]
@@ -0,0 +1,55 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from abc import ABC, abstractmethod
6
+ from dataclasses import InitVar, dataclass
7
+ from typing import Any, Dict, Iterable, List, Mapping, Optional, Type, Union
8
+
9
+ from typing_extensions import deprecated
10
+
11
+ from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
12
+ from airbyte_cdk.sources.source import ExperimentalClassWarning
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class ComponentMappingDefinition:
17
+ """Defines the configuration for mapping a component in a stream. This class specifies
18
+ what field in the stream template should be updated with value, supporting dynamic interpolation
19
+ and type enforcement."""
20
+
21
+ field_path: List["InterpolatedString"]
22
+ value: Union["InterpolatedString", str]
23
+ value_type: Optional[Type[Any]]
24
+ parameters: InitVar[Mapping[str, Any]]
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class ResolvedComponentMappingDefinition:
29
+ """Defines resolved configuration for mapping a component in a stream. This class specifies
30
+ what field in the stream template should be updated with value, supporting dynamic interpolation
31
+ and type enforcement."""
32
+
33
+ field_path: List["InterpolatedString"]
34
+ value: "InterpolatedString"
35
+ value_type: Optional[Type[Any]]
36
+ parameters: InitVar[Mapping[str, Any]]
37
+
38
+
39
+ @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
40
+ @dataclass
41
+ class ComponentsResolver(ABC):
42
+ """
43
+ Abstract base class for resolving components in a stream template.
44
+ """
45
+
46
+ @abstractmethod
47
+ def resolve_components(
48
+ self, stream_template_config: Dict[str, Any]
49
+ ) -> Iterable[Dict[str, Any]]:
50
+ """
51
+ Maps and populates values into a stream template configuration.
52
+ :param stream_template_config: The stream template with placeholders for components.
53
+ :yields: The resolved stream config with populated values.
54
+ """
55
+ pass
@@ -0,0 +1,106 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from copy import deepcopy
6
+ from dataclasses import InitVar, dataclass, field
7
+ from typing import Any, Dict, Iterable, List, Mapping
8
+
9
+ import dpath
10
+ from typing_extensions import deprecated
11
+
12
+ from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
13
+ from airbyte_cdk.sources.declarative.resolvers.components_resolver import (
14
+ ComponentMappingDefinition,
15
+ ComponentsResolver,
16
+ ResolvedComponentMappingDefinition,
17
+ )
18
+ from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
19
+ from airbyte_cdk.sources.source import ExperimentalClassWarning
20
+ from airbyte_cdk.sources.types import Config
21
+
22
+
23
+ @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
24
+ @dataclass
25
+ class HttpComponentsResolver(ComponentsResolver):
26
+ """
27
+ Resolves and populates stream templates with components fetched via an HTTP retriever.
28
+
29
+ Attributes:
30
+ retriever (Retriever): The retriever used to fetch data from an API.
31
+ config (Config): Configuration object for the resolver.
32
+ components_mapping (List[ComponentMappingDefinition]): List of mappings to resolve.
33
+ parameters (InitVar[Mapping[str, Any]]): Additional parameters for interpolation.
34
+ """
35
+
36
+ retriever: Retriever
37
+ config: Config
38
+ components_mapping: List[ComponentMappingDefinition]
39
+ parameters: InitVar[Mapping[str, Any]]
40
+ _resolved_components: List[ResolvedComponentMappingDefinition] = field(
41
+ init=False, repr=False, default_factory=list
42
+ )
43
+
44
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
45
+ """
46
+ Initializes and parses component mappings, converting them to resolved definitions.
47
+
48
+ Args:
49
+ parameters (Mapping[str, Any]): Parameters for interpolation.
50
+ """
51
+ for component_mapping in self.components_mapping:
52
+ if isinstance(component_mapping.value, (str, InterpolatedString)):
53
+ interpolated_value = (
54
+ InterpolatedString.create(component_mapping.value, parameters=parameters)
55
+ if isinstance(component_mapping.value, str)
56
+ else component_mapping.value
57
+ )
58
+
59
+ field_path = [
60
+ InterpolatedString.create(path, parameters=parameters)
61
+ for path in component_mapping.field_path
62
+ ]
63
+
64
+ self._resolved_components.append(
65
+ ResolvedComponentMappingDefinition(
66
+ field_path=field_path,
67
+ value=interpolated_value,
68
+ value_type=component_mapping.value_type,
69
+ parameters=parameters,
70
+ )
71
+ )
72
+ else:
73
+ raise ValueError(
74
+ f"Expected a string or InterpolatedString for value in mapping: {component_mapping}"
75
+ )
76
+
77
+ def resolve_components(
78
+ self, stream_template_config: Dict[str, Any]
79
+ ) -> Iterable[Dict[str, Any]]:
80
+ """
81
+ Resolves components in the stream template configuration by populating values.
82
+
83
+ Args:
84
+ stream_template_config (Dict[str, Any]): Stream template to populate.
85
+
86
+ Yields:
87
+ Dict[str, Any]: Updated configurations with resolved components.
88
+ """
89
+ kwargs = {"stream_template_config": stream_template_config}
90
+
91
+ for components_values in self.retriever.read_records({}):
92
+ updated_config = deepcopy(stream_template_config)
93
+ kwargs["components_values"] = components_values # type: ignore[assignment] # component_values will always be of type Mapping[str, Any]
94
+
95
+ for resolved_component in self._resolved_components:
96
+ valid_types = (
97
+ (resolved_component.value_type,) if resolved_component.value_type else None
98
+ )
99
+ value = resolved_component.value.eval(
100
+ self.config, valid_types=valid_types, **kwargs
101
+ )
102
+
103
+ path = [path.eval(self.config, **kwargs) for path in resolved_component.field_path]
104
+ dpath.set(updated_config, path, value)
105
+
106
+ yield updated_config
@@ -4,7 +4,7 @@
4
4
 
5
5
  from abc import abstractmethod
6
6
  from datetime import datetime, timedelta, timezone
7
- from typing import Any, Callable, List, MutableMapping, Optional, Tuple
7
+ from typing import Any, Callable, List, MutableMapping, Optional, Tuple, Union
8
8
 
9
9
  import pendulum
10
10
  from pendulum.datetime import DateTime
@@ -202,7 +202,7 @@ class CustomFormatConcurrentStreamStateConverter(IsoMillisConcurrentStreamStateC
202
202
  self._input_datetime_formats += [self._datetime_format]
203
203
  self._parser = DatetimeParser()
204
204
 
205
- def output_format(self, timestamp: datetime) -> str:
205
+ def output_format(self, timestamp: datetime) -> Union[str, int]:
206
206
  return self._parser.format(timestamp, self._datetime_format)
207
207
 
208
208
  def parse_timestamp(self, timestamp: str) -> datetime:
@@ -0,0 +1,108 @@
1
+ Metadata-Version: 2.1
2
+ Name: airbyte-cdk
3
+ Version: 6.9.1rc2
4
+ Summary: A framework for writing Airbyte Connectors.
5
+ Home-page: https://airbyte.com
6
+ License: MIT
7
+ Keywords: airbyte,connector-development-kit,cdk
8
+ Author: Airbyte
9
+ Author-email: contact@airbyte.io
10
+ Requires-Python: >=3.10,<3.13
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Provides-Extra: file-based
21
+ Provides-Extra: sql
22
+ Provides-Extra: vector-db-based
23
+ Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
24
+ Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
25
+ Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
26
+ Requires-Dist: avro (>=1.11.2,<1.12.0) ; extra == "file-based"
27
+ Requires-Dist: backoff
28
+ Requires-Dist: cachetools
29
+ Requires-Dist: cohere (==4.21) ; extra == "vector-db-based"
30
+ Requires-Dist: cryptography (>=42.0.5,<44.0.0)
31
+ Requires-Dist: dpath (>=2.1.6,<3.0.0)
32
+ Requires-Dist: dunamai (>=1.22.0,<2.0.0)
33
+ Requires-Dist: fastavro (>=1.8.0,<1.9.0) ; extra == "file-based"
34
+ Requires-Dist: genson (==1.3.0)
35
+ Requires-Dist: isodate (>=0.6.1,<0.7.0)
36
+ Requires-Dist: jsonref (>=0.2,<0.3)
37
+ Requires-Dist: jsonschema (>=4.17.3,<4.18.0)
38
+ Requires-Dist: langchain (==0.1.16) ; extra == "vector-db-based"
39
+ Requires-Dist: langchain_core (==0.1.42)
40
+ Requires-Dist: markdown ; extra == "file-based"
41
+ Requires-Dist: nltk (==3.9.1)
42
+ Requires-Dist: numpy (<2)
43
+ Requires-Dist: openai[embeddings] (==0.27.9) ; extra == "vector-db-based"
44
+ Requires-Dist: orjson (>=3.10.7,<4.0.0)
45
+ Requires-Dist: pandas (==2.2.2)
46
+ Requires-Dist: pdf2image (==1.16.3) ; extra == "file-based"
47
+ Requires-Dist: pdfminer.six (==20221105) ; extra == "file-based"
48
+ Requires-Dist: pendulum (<3.0.0)
49
+ Requires-Dist: psutil (==6.1.0)
50
+ Requires-Dist: pyarrow (>=15.0.0,<15.1.0) ; extra == "file-based"
51
+ Requires-Dist: pydantic (>=2.7,<3.0)
52
+ Requires-Dist: pyjwt (>=2.8.0,<3.0.0)
53
+ Requires-Dist: pyrate-limiter (>=3.1.0,<3.2.0)
54
+ Requires-Dist: pytesseract (==0.3.10) ; extra == "file-based"
55
+ Requires-Dist: python-calamine (==0.2.3) ; extra == "file-based"
56
+ Requires-Dist: python-dateutil
57
+ Requires-Dist: python-snappy (==0.7.3) ; extra == "file-based"
58
+ Requires-Dist: python-ulid (>=3.0.0,<4.0.0)
59
+ Requires-Dist: pytz (==2024.1)
60
+ Requires-Dist: rapidfuzz (>=3.10.1,<4.0.0)
61
+ Requires-Dist: requests
62
+ Requires-Dist: requests_cache
63
+ Requires-Dist: serpyco-rs (>=1.10.2,<2.0.0)
64
+ Requires-Dist: sqlalchemy (>=2.0,<3.0,!=2.0.36) ; extra == "sql"
65
+ Requires-Dist: tiktoken (==0.8.0) ; extra == "vector-db-based"
66
+ Requires-Dist: unstructured.pytesseract (>=0.3.12) ; extra == "file-based"
67
+ Requires-Dist: unstructured[docx,pptx] (==0.10.27) ; extra == "file-based"
68
+ Requires-Dist: wcmatch (==10.0)
69
+ Requires-Dist: xmltodict (>=0.13.0,<0.14.0)
70
+ Project-URL: Documentation, https://docs.airbyte.io/
71
+ Project-URL: Repository, https://github.com/airbytehq/airbyte-python-cdk
72
+ Description-Content-Type: text/markdown
73
+
74
+ # Airbyte Python CDK and Low-Code CDK
75
+
76
+ Airbyte Python CDK is a framework for building Airbyte API Source Connectors. It provides a set of
77
+ classes and helpers that make it easy to build a connector against an HTTP API (REST, GraphQL, etc),
78
+ or a generic Python source connector.
79
+
80
+ ## Building Connectors with the CDK
81
+
82
+ If you're looking to build a connector, we highly recommend that you first
83
+ [start with the Connector Builder](https://docs.airbyte.com/connector-development/connector-builder-ui/overview).
84
+ It should be enough for 90% connectors out there. For more flexible and complex connectors, use the
85
+ [low-code CDK and `SourceDeclarativeManifest`](https://docs.airbyte.com/connector-development/config-based/low-code-cdk-overview).
86
+
87
+ For more information on building connectors, please see the [Connector Development](https://docs.airbyte.com/connector-development/) guide on [docs.airbyte.com](https://docs.airbyte.com).
88
+
89
+ ## Python CDK Overview
90
+
91
+ Airbyte CDK code is within `airbyte_cdk` directory. Here's a high level overview of what's inside:
92
+
93
+ - `airbyte_cdk/connector_builder`. Internal wrapper that helps the Connector Builder platform run a declarative manifest (low-code connector). You should not use this code directly. If you need to run a `SourceDeclarativeManifest`, take a look at [`source-declarative-manifest`](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-declarative-manifest) connector implementation instead.
94
+ - `airbyte_cdk/cli/source_declarative_manifest`. This module defines the `source-declarative-manifest` (aka "SDM") connector execution logic and associated CLI.
95
+ - `airbyte_cdk/destinations`. Basic Destination connector support! If you're building a Destination connector in Python, try that. Some of our vector DB destinations like `destination-pinecone` are using that code.
96
+ - `airbyte_cdk/models` expose `airbyte_protocol.models` as a part of `airbyte_cdk` package.
97
+ - `airbyte_cdk/sources/concurrent_source` is the Concurrent CDK implementation. It supports reading data from streams concurrently per slice / partition, useful for connectors with high throughput and high number of records.
98
+ - `airbyte_cdk/sources/declarative` is the low-code CDK. It works on top of Airbyte Python CDK, but provides a declarative manifest language to define streams, operations, etc. This makes it easier to build connectors without writing Python code.
99
+ - `airbyte_cdk/sources/file_based` is the CDK for file-based sources. Examples include S3, Azure, GCS, etc.
100
+
101
+ ## Contributing
102
+
103
+ For instructions on how to contribute, please see our [Contributing Guide](docs/CONTRIBUTING.md).
104
+
105
+ ## Release Management
106
+
107
+ Please see the [Release Management](docs/RELEASES.md) guide for information on how to perform releases and pre-releases.
108
+
@@ -62,11 +62,11 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
62
62
  airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
63
63
  airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
64
64
  airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
65
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=F2X2ZS9eDfrohNbxG2TgPW-f4YP8IAkMjO1XHtD6NIg,23464
65
+ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=9OXOFzG5PBL_MHEJg4ETE0TXfXeuhvv-at38AN3wWEM,23323
66
66
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
67
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
67
+ airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=eqzLFjbowFLQDqjz8T29qVt835yqhfaeN0PouI7g7S8,2576
68
68
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
69
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=Q6zOEKvgAjMUsu89VmNteMhqdP0XwO9WWfA6-rMVn6A,120545
69
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=kZkGoasVzufDE2BkFo_7DVO6xHL9kueNBjddtl-7kaU,124134
70
70
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
71
71
  airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
72
72
  airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=hNlhaB5FjNC6IfJyglj5ZJWkYD2nEAukMDmzRz5PC6o,671
@@ -84,7 +84,7 @@ airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=OJ9xmhNWNwwzx
84
84
  airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=AkXPOWyp741cpYLBl9AbmVmOQmQ2BzZ2XjgsMEB6gGc,6583
85
85
  airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=LhqGDfX06_dDYLKsIVnwQ_nAWCln-v8PV7Wgt_QVeTI,6533
86
86
  airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=CmZl9ddwMZFo8L7mEl_OFHN3ahIFRSYrJjMbR_cJaFA,1006
87
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=_UzUnSIUsDbRgbFTXgSyZEFb4ws-KdhdQPWO8mFbV7U,22028
87
+ airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=UC74gxN4ZbNE6IP5s-q_l7DMrZ79WFdVhGfZ9eGvKD0,22033
88
88
  airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
89
89
  airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=3_EEZop94bMitZaJd2PF5Q2Xt9v94tYg7p7YJz8tAFc,15869
90
90
  airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=hElcYijbOHjdLKOMA7W7aizEbf22r7OSApXALP875uI,15749
@@ -99,18 +99,18 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=LYEZ
99
99
  airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=-V5UddGm69UKEB6o_O1EIES9kfY8FV_X4Ji8w1yOuSA,981
100
100
  airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=gcihTEnfD_6sUivxOomoY5r7VMAGqVVnK_HEsid9Y5k,6605
101
101
  airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=jf24RK-1fBhTYDpcGEakZtGNNJfG5NS8CCF5bEgNmRo,3977
102
- airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=FEOmFo2mwdpmO8pH9jnw-sUAnijjuigZWYqH_0Gq9oQ,12919
102
+ airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=-v3GvuTVHwUonrfUwDj3wYKaZjX6hTyKmMBRgEzj-j0,15201
103
103
  airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
104
104
  airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
105
105
  airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
106
106
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
107
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=tZcVSMRJ5-5MoX4ejpXxt5E2suqiQ5AwprQWQiWHhho,85060
107
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=6iAzpGmUrhwEUQcCL5bW-FXuLXPMeFqs_GR4B1rS3ZE,88511
108
108
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
109
109
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
110
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=jVZ3ZV5YZrmDNIX5cM2mugXmnbH27zHRcD22_3oatpo,8454
110
+ airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=0jfi-ogL-rOVORTIYnu64wNfh1L8fYaLVDWzJ2zGdi8,8799
111
111
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
112
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=tO7xkv4y5iH6wGkj5As1T5ItUQxlw6cLflHAH48PKwc,96355
113
- airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=8uGos2u7TFTx_EJBdcjdUGn3Eyx6jUuEa1_VB8UP_dI,631
112
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=lS2oKfkND54u66hocc2BycS-AIYIbkn4npq6CFRNokc,99573
113
+ airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=1NjaZoGAIefvWwj6wx-LOKIXXWS-UnBlZFnuR7y6uYA,745
114
114
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
115
115
  airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=t7pRdFWfFWJtQQG19c9PVeMODyO2BknRTakpM5U9N-8,4844
116
116
  airbyte_cdk/sources/declarative/partition_routers/partition_router.py,sha256=YyEIzdmLd1FjbVP3QbQ2VFCLW_P-OGbVh6VpZShp54k,2218
@@ -152,6 +152,9 @@ airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_
152
152
  airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py,sha256=8YRiDzjYvqJ-aMmKFcjqzv_-e8OZ5QG_TbpZ-nuCu6s,2590
153
153
  airbyte_cdk/sources/declarative/requesters/request_path.py,sha256=S3MeFvcaQrMbOkSY2W2VbXLNomqt_3eXqVd9ZhgNwUs,299
154
154
  airbyte_cdk/sources/declarative/requesters/requester.py,sha256=iVVpXQ4KEd9OyZNwmOofMvx7_06i8ZRxGo3aNTrEQLM,4946
155
+ airbyte_cdk/sources/declarative/resolvers/__init__.py,sha256=8cL--3WCJvtKVIIAqKWoIEhmmf3FemB7PlsnO0Mx_cY,688
156
+ airbyte_cdk/sources/declarative/resolvers/components_resolver.py,sha256=KPjKc0yb9artL4ZkeqN8RmEykHH6FJgqXD7fCEnh1X0,1936
157
+ airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=ZA2vrHQKfXNMcH3x1iuyFOTGNzYDhUFT2qcaiOzSK0A,4271
155
158
  airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=FVQpUGVwp2Gibk4gp07VmLKX5AafUlsZWFSrDpUDuJM,443
156
159
  airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=WDFnjrXLz3-YEjFhmlMkWAn9AJvnZ0mk9FyC8DAhEYk,4976
157
160
  airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
@@ -258,7 +261,7 @@ airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py,sha256=nbdkkH
258
261
  airbyte_cdk/sources/streams/concurrent/partitions/types.py,sha256=frPVvHtY7vLxpGEbMQzNvF1Y52ZVyct9f1DDhGoRjwY,1166
259
262
  airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
260
263
  airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py,sha256=CXHUMOhndu-LOKgsnNTItv5s5qrKpmJDeHOzlH1nBy8,6819
261
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py,sha256=syjdxEoElIOzqVS5Jrm5FOR70jsbBdttEO_3Iz12Jyo,7523
264
+ airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py,sha256=U8G6qkGz0-4fqp_lFYO_zvSIXUafQGugIjaTfqzYrlo,7542
262
265
  airbyte_cdk/sources/streams/core.py,sha256=z4Oi5qmJPjs-RdMd5tPWHvHqIjkcxhkVKTvIpfAs2uA,32211
263
266
  airbyte_cdk/sources/streams/http/__init__.py,sha256=NXaNlkzZMkh5kS8S5ujEaKEE6855sk6_HljF_GFjKZI,311
264
267
  airbyte_cdk/sources/streams/http/availability_strategy.py,sha256=sovoGFThZr-doMN9vJvTuJBrvkwQVIO0qTQO64pGZPY,2428
@@ -313,7 +316,6 @@ airbyte_cdk/test/state_builder.py,sha256=kLPql9lNzUJaBg5YYRLJlY_Hy5JLHJDVyKPMZMo
313
316
  airbyte_cdk/test/utils/__init__.py,sha256=Hu-1XT2KDoYjDF7-_ziDwv5bY3PueGjANOCbzeOegDg,57
314
317
  airbyte_cdk/test/utils/data.py,sha256=CkCR1_-rujWNmPXFR1IXTMwx1rAl06wAyIKWpDcN02w,820
315
318
  airbyte_cdk/test/utils/http_mocking.py,sha256=F2hpm2q4ijojQN5u2XtgTAp8aNgHgJ64eZNkZ9BW0ig,550
316
- airbyte_cdk/test/utils/manifest_only_fixtures.py,sha256=DH_v9_LFhAjFCq2Qq-CWoLkaCwpbaEzPvFCawBbiwS4,2672
317
319
  airbyte_cdk/test/utils/reading.py,sha256=SOTDYlps6Te9KumfTJ3vVDSm9EUXhvKtE8aD7gvdPlg,965
318
320
  airbyte_cdk/utils/__init__.py,sha256=gHjOCoUkolS_nKtgFSudXUY-ObK2vUo6aNQLvW7o8q8,347
319
321
  airbyte_cdk/utils/airbyte_secrets_utils.py,sha256=wEtRnl5KRhN6eLJwrDrC4FJjyqt_4vkA1F65mdl8c24,3142
@@ -331,8 +333,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
331
333
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
332
334
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
333
335
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
334
- airbyte_cdk-6.9.1.dev2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
335
- airbyte_cdk-6.9.1.dev2.dist-info/METADATA,sha256=wzYw-VKooxdzu5tA-Qa4bsNjkI4j7c7Lh_yRb178_J8,13485
336
- airbyte_cdk-6.9.1.dev2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
337
- airbyte_cdk-6.9.1.dev2.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
338
- airbyte_cdk-6.9.1.dev2.dist-info/RECORD,,
336
+ airbyte_cdk-6.9.1rc2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
337
+ airbyte_cdk-6.9.1rc2.dist-info/METADATA,sha256=lJd2s0FZqap-lLBzxGM-O1w2DOnWPmC6bzcAsYg6apw,5952
338
+ airbyte_cdk-6.9.1rc2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
339
+ airbyte_cdk-6.9.1rc2.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
340
+ airbyte_cdk-6.9.1rc2.dist-info/RECORD,,