airbyte-cdk 6.8.3rc1__py3-none-any.whl → 6.9.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -119,6 +119,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
119
119
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
120
120
  CheckStream as CheckStreamModel,
121
121
  )
122
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
123
+ ComponentMappingDefinition as ComponentMappingDefinitionModel,
124
+ )
122
125
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
123
126
  CompositeErrorHandler as CompositeErrorHandlerModel,
124
127
  )
@@ -185,12 +188,18 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
185
188
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
186
189
  DpathExtractor as DpathExtractorModel,
187
190
  )
191
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
192
+ DynamicSchemaLoader as DynamicSchemaLoaderModel,
193
+ )
188
194
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
189
195
  ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
190
196
  )
191
197
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
192
198
  GzipJsonDecoder as GzipJsonDecoderModel,
193
199
  )
200
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
201
+ HttpComponentsResolver as HttpComponentsResolverModel,
202
+ )
194
203
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
195
204
  HttpRequester as HttpRequesterModel,
196
205
  )
@@ -272,6 +281,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
272
281
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
273
282
  ResponseToFileExtractor as ResponseToFileExtractorModel,
274
283
  )
284
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
285
+ SchemaTypeIdentifier as SchemaTypeIdentifierModel,
286
+ )
275
287
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
276
288
  SelectiveAuthenticator as SelectiveAuthenticatorModel,
277
289
  )
@@ -285,6 +297,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
285
297
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
286
298
  SubstreamPartitionRouter as SubstreamPartitionRouterModel,
287
299
  )
300
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
301
+ TypesMap as TypesMapModel,
302
+ )
288
303
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
289
304
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
290
305
  WaitTimeFromHeader as WaitTimeFromHeaderModel,
@@ -298,6 +313,7 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
298
313
  from airbyte_cdk.sources.declarative.partition_routers import (
299
314
  CartesianProductStreamSlicer,
300
315
  ListPartitionRouter,
316
+ PartitionRouter,
301
317
  SinglePartitionRouter,
302
318
  SubstreamPartitionRouter,
303
319
  )
@@ -338,6 +354,10 @@ from airbyte_cdk.sources.declarative.requesters.request_options import (
338
354
  )
339
355
  from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
340
356
  from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
357
+ from airbyte_cdk.sources.declarative.resolvers import (
358
+ ComponentMappingDefinition,
359
+ HttpComponentsResolver,
360
+ )
341
361
  from airbyte_cdk.sources.declarative.retrievers import (
342
362
  AsyncRetriever,
343
363
  SimpleRetriever,
@@ -345,8 +365,11 @@ from airbyte_cdk.sources.declarative.retrievers import (
345
365
  )
346
366
  from airbyte_cdk.sources.declarative.schema import (
347
367
  DefaultSchemaLoader,
368
+ DynamicSchemaLoader,
348
369
  InlineSchemaLoader,
349
370
  JsonFileSchemaLoader,
371
+ SchemaTypeIdentifier,
372
+ TypesMap,
350
373
  )
351
374
  from airbyte_cdk.sources.declarative.spec import Spec
352
375
  from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
@@ -444,6 +467,9 @@ class ModelToComponentFactory:
444
467
  IterableDecoderModel: self.create_iterable_decoder,
445
468
  XmlDecoderModel: self.create_xml_decoder,
446
469
  JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
470
+ DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
471
+ SchemaTypeIdentifierModel: self.create_schema_type_identifier,
472
+ TypesMapModel: self.create_types_map,
447
473
  JwtAuthenticatorModel: self.create_jwt_authenticator,
448
474
  LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
449
475
  ListPartitionRouterModel: self.create_list_partition_router,
@@ -467,6 +493,8 @@ class ModelToComponentFactory:
467
493
  WaitTimeFromHeaderModel: self.create_wait_time_from_header,
468
494
  WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header,
469
495
  AsyncRetrieverModel: self.create_async_retriever,
496
+ HttpComponentsResolverModel: self.create_http_components_resolver,
497
+ ComponentMappingDefinitionModel: self.create_components_mapping_definition,
470
498
  }
471
499
 
472
500
  # Needed for the case where we need to perform a second parse on the fields of a custom component
@@ -1281,19 +1309,20 @@ class ModelToComponentFactory:
1281
1309
  parameters=model.parameters or {},
1282
1310
  )
1283
1311
 
1284
- def _merge_stream_slicers(
1285
- self, model: DeclarativeStreamModel, config: Config
1286
- ) -> Optional[StreamSlicer]:
1287
- stream_slicer = None
1312
+ def _build_stream_slicer_from_partition_router(
1313
+ self,
1314
+ model: Union[AsyncRetrieverModel, CustomRetrieverModel, SimpleRetrieverModel],
1315
+ config: Config,
1316
+ ) -> Optional[PartitionRouter]:
1288
1317
  if (
1289
- hasattr(model.retriever, "partition_router")
1290
- and isinstance(model.retriever, SimpleRetrieverModel)
1291
- and model.retriever.partition_router
1318
+ hasattr(model, "partition_router")
1319
+ and isinstance(model, SimpleRetrieverModel)
1320
+ and model.partition_router
1292
1321
  ):
1293
- stream_slicer_model = model.retriever.partition_router
1322
+ stream_slicer_model = model.partition_router
1294
1323
 
1295
1324
  if isinstance(stream_slicer_model, list):
1296
- stream_slicer = CartesianProductStreamSlicer(
1325
+ return CartesianProductStreamSlicer(
1297
1326
  [
1298
1327
  self._create_component_from_model(model=slicer, config=config)
1299
1328
  for slicer in stream_slicer_model
@@ -1301,9 +1330,24 @@ class ModelToComponentFactory:
1301
1330
  parameters={},
1302
1331
  )
1303
1332
  else:
1304
- stream_slicer = self._create_component_from_model(
1305
- model=stream_slicer_model, config=config
1306
- )
1333
+ return self._create_component_from_model(model=stream_slicer_model, config=config) # type: ignore[no-any-return]
1334
+ # Will be created PartitionRouter as stream_slicer_model is model.partition_router
1335
+ return None
1336
+
1337
+ def _build_resumable_cursor_from_paginator(
1338
+ self,
1339
+ model: Union[AsyncRetrieverModel, CustomRetrieverModel, SimpleRetrieverModel],
1340
+ stream_slicer: Optional[StreamSlicer],
1341
+ ) -> Optional[StreamSlicer]:
1342
+ if hasattr(model, "paginator") and model.paginator and not stream_slicer:
1343
+ # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
1344
+ return ResumableFullRefreshCursor(parameters={})
1345
+ return None
1346
+
1347
+ def _merge_stream_slicers(
1348
+ self, model: DeclarativeStreamModel, config: Config
1349
+ ) -> Optional[StreamSlicer]:
1350
+ stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
1307
1351
 
1308
1352
  if model.incremental_sync and stream_slicer:
1309
1353
  incremental_sync_model = model.incremental_sync
@@ -1346,15 +1390,7 @@ class ModelToComponentFactory:
1346
1390
  ),
1347
1391
  partition_router=stream_slicer,
1348
1392
  )
1349
- elif (
1350
- hasattr(model.retriever, "paginator")
1351
- and model.retriever.paginator
1352
- and not stream_slicer
1353
- ):
1354
- # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
1355
- return ResumableFullRefreshCursor(parameters={})
1356
- else:
1357
- return None
1393
+ return self._build_resumable_cursor_from_paginator(model.retriever, stream_slicer)
1358
1394
 
1359
1395
  def create_default_error_handler(
1360
1396
  self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
@@ -1553,6 +1589,63 @@ class ModelToComponentFactory:
1553
1589
  ) -> InlineSchemaLoader:
1554
1590
  return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
1555
1591
 
1592
+ @staticmethod
1593
+ def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
1594
+ return TypesMap(target_type=model.target_type, current_type=model.current_type)
1595
+
1596
+ def create_schema_type_identifier(
1597
+ self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
1598
+ ) -> SchemaTypeIdentifier:
1599
+ types_mapping = []
1600
+ if model.types_mapping:
1601
+ types_mapping.extend(
1602
+ [
1603
+ self._create_component_from_model(types_map, config=config)
1604
+ for types_map in model.types_mapping
1605
+ ]
1606
+ )
1607
+ model_schema_pointer: List[Union[InterpolatedString, str]] = (
1608
+ [x for x in model.schema_pointer] if model.schema_pointer else []
1609
+ )
1610
+ model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer]
1611
+ model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = (
1612
+ [x for x in model.type_pointer] if model.type_pointer else None
1613
+ )
1614
+
1615
+ return SchemaTypeIdentifier(
1616
+ schema_pointer=model_schema_pointer,
1617
+ key_pointer=model_key_pointer,
1618
+ type_pointer=model_type_pointer,
1619
+ types_mapping=types_mapping,
1620
+ parameters=model.parameters or {},
1621
+ )
1622
+
1623
+ def create_dynamic_schema_loader(
1624
+ self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
1625
+ ) -> DynamicSchemaLoader:
1626
+ stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
1627
+ combined_slicers = self._build_resumable_cursor_from_paginator(
1628
+ model.retriever, stream_slicer
1629
+ )
1630
+
1631
+ retriever = self._create_component_from_model(
1632
+ model=model.retriever,
1633
+ config=config,
1634
+ name="",
1635
+ primary_key=None,
1636
+ stream_slicer=combined_slicers,
1637
+ transformations=[],
1638
+ )
1639
+ schema_type_identifier = self._create_component_from_model(
1640
+ model.schema_type_identifier, config=config, parameters=model.parameters or {}
1641
+ )
1642
+ return DynamicSchemaLoader(
1643
+ retriever=retriever,
1644
+ config=config,
1645
+ schema_type_identifier=schema_type_identifier,
1646
+ parameters=model.parameters or {},
1647
+ )
1648
+
1556
1649
  @staticmethod
1557
1650
  def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> JsonDecoder:
1558
1651
  return JsonDecoder(parameters={})
@@ -2218,3 +2311,56 @@ class ModelToComponentFactory:
2218
2311
 
2219
2312
  def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level:
2220
2313
  return Level.DEBUG if emit_connector_builder_messages else Level.INFO
2314
+
2315
+ @staticmethod
2316
+ def create_components_mapping_definition(
2317
+ model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any
2318
+ ) -> ComponentMappingDefinition:
2319
+ interpolated_value = InterpolatedString.create(
2320
+ model.value, parameters=model.parameters or {}
2321
+ )
2322
+ field_path = [
2323
+ InterpolatedString.create(path, parameters=model.parameters or {})
2324
+ for path in model.field_path
2325
+ ]
2326
+ return ComponentMappingDefinition(
2327
+ field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString
2328
+ value=interpolated_value,
2329
+ value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
2330
+ parameters=model.parameters or {},
2331
+ )
2332
+
2333
+ def create_http_components_resolver(
2334
+ self, model: HttpComponentsResolverModel, config: Config
2335
+ ) -> Any:
2336
+ stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
2337
+ combined_slicers = self._build_resumable_cursor_from_paginator(
2338
+ model.retriever, stream_slicer
2339
+ )
2340
+
2341
+ retriever = self._create_component_from_model(
2342
+ model=model.retriever,
2343
+ config=config,
2344
+ name="",
2345
+ primary_key=None,
2346
+ stream_slicer=combined_slicers,
2347
+ transformations=[],
2348
+ )
2349
+
2350
+ components_mapping = [
2351
+ self._create_component_from_model(
2352
+ model=components_mapping_definition_model,
2353
+ value_type=ModelToComponentFactory._json_schema_type_name_to_type(
2354
+ components_mapping_definition_model.value_type
2355
+ ),
2356
+ config=config,
2357
+ )
2358
+ for components_mapping_definition_model in model.components_mapping
2359
+ ]
2360
+
2361
+ return HttpComponentsResolver(
2362
+ retriever=retriever,
2363
+ config=config,
2364
+ components_mapping=components_mapping,
2365
+ parameters=model.parameters or {},
2366
+ )
@@ -6,5 +6,6 @@ from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_
6
6
  from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import ListPartitionRouter
7
7
  from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import SinglePartitionRouter
8
8
  from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import SubstreamPartitionRouter
9
+ from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
9
10
 
10
- __all__ = ["CartesianProductStreamSlicer", "ListPartitionRouter", "SinglePartitionRouter", "SubstreamPartitionRouter"]
11
+ __all__ = ["CartesianProductStreamSlicer", "ListPartitionRouter", "SinglePartitionRouter", "SubstreamPartitionRouter", "PartitionRouter"]
@@ -0,0 +1,13 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from airbyte_cdk.sources.declarative.resolvers.components_resolver import ComponentsResolver, ComponentMappingDefinition, ResolvedComponentMappingDefinition
6
+ from airbyte_cdk.sources.declarative.resolvers.http_components_resolver import HttpComponentsResolver
7
+ from airbyte_cdk.sources.declarative.models import HttpComponentsResolver as HttpComponentsResolverModel
8
+
9
+ COMPONENTS_RESOLVER_TYPE_MAPPING = {
10
+ "HttpComponentsResolver": HttpComponentsResolverModel
11
+ }
12
+
13
+ __all__ = ["ComponentsResolver", "HttpComponentsResolver", "ComponentMappingDefinition", "ResolvedComponentMappingDefinition", "COMPONENTS_RESOLVER_TYPE_MAPPING"]
@@ -0,0 +1,55 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from abc import ABC, abstractmethod
6
+ from dataclasses import InitVar, dataclass
7
+ from typing import Any, Dict, Iterable, List, Mapping, Optional, Type, Union
8
+
9
+ from typing_extensions import deprecated
10
+
11
+ from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
12
+ from airbyte_cdk.sources.source import ExperimentalClassWarning
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class ComponentMappingDefinition:
17
+ """Defines the configuration for mapping a component in a stream. This class specifies
18
+ what field in the stream template should be updated with value, supporting dynamic interpolation
19
+ and type enforcement."""
20
+
21
+ field_path: List["InterpolatedString"]
22
+ value: Union["InterpolatedString", str]
23
+ value_type: Optional[Type[Any]]
24
+ parameters: InitVar[Mapping[str, Any]]
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class ResolvedComponentMappingDefinition:
29
+ """Defines resolved configuration for mapping a component in a stream. This class specifies
30
+ what field in the stream template should be updated with value, supporting dynamic interpolation
31
+ and type enforcement."""
32
+
33
+ field_path: List["InterpolatedString"]
34
+ value: "InterpolatedString"
35
+ value_type: Optional[Type[Any]]
36
+ parameters: InitVar[Mapping[str, Any]]
37
+
38
+
39
+ @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
40
+ @dataclass
41
+ class ComponentsResolver(ABC):
42
+ """
43
+ Abstract base class for resolving components in a stream template.
44
+ """
45
+
46
+ @abstractmethod
47
+ def resolve_components(
48
+ self, stream_template_config: Dict[str, Any]
49
+ ) -> Iterable[Dict[str, Any]]:
50
+ """
51
+ Maps and populates values into a stream template configuration.
52
+ :param stream_template_config: The stream template with placeholders for components.
53
+ :yields: The resolved stream config with populated values.
54
+ """
55
+ pass
@@ -0,0 +1,106 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from copy import deepcopy
6
+ from dataclasses import InitVar, dataclass, field
7
+ from typing import Any, Dict, Iterable, List, Mapping
8
+
9
+ import dpath
10
+ from typing_extensions import deprecated
11
+
12
+ from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
13
+ from airbyte_cdk.sources.declarative.resolvers.components_resolver import (
14
+ ComponentMappingDefinition,
15
+ ComponentsResolver,
16
+ ResolvedComponentMappingDefinition,
17
+ )
18
+ from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
19
+ from airbyte_cdk.sources.source import ExperimentalClassWarning
20
+ from airbyte_cdk.sources.types import Config
21
+
22
+
23
+ @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
24
+ @dataclass
25
+ class HttpComponentsResolver(ComponentsResolver):
26
+ """
27
+ Resolves and populates stream templates with components fetched via an HTTP retriever.
28
+
29
+ Attributes:
30
+ retriever (Retriever): The retriever used to fetch data from an API.
31
+ config (Config): Configuration object for the resolver.
32
+ components_mapping (List[ComponentMappingDefinition]): List of mappings to resolve.
33
+ parameters (InitVar[Mapping[str, Any]]): Additional parameters for interpolation.
34
+ """
35
+
36
+ retriever: Retriever
37
+ config: Config
38
+ components_mapping: List[ComponentMappingDefinition]
39
+ parameters: InitVar[Mapping[str, Any]]
40
+ _resolved_components: List[ResolvedComponentMappingDefinition] = field(
41
+ init=False, repr=False, default_factory=list
42
+ )
43
+
44
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
45
+ """
46
+ Initializes and parses component mappings, converting them to resolved definitions.
47
+
48
+ Args:
49
+ parameters (Mapping[str, Any]): Parameters for interpolation.
50
+ """
51
+ for component_mapping in self.components_mapping:
52
+ if isinstance(component_mapping.value, (str, InterpolatedString)):
53
+ interpolated_value = (
54
+ InterpolatedString.create(component_mapping.value, parameters=parameters)
55
+ if isinstance(component_mapping.value, str)
56
+ else component_mapping.value
57
+ )
58
+
59
+ field_path = [
60
+ InterpolatedString.create(path, parameters=parameters)
61
+ for path in component_mapping.field_path
62
+ ]
63
+
64
+ self._resolved_components.append(
65
+ ResolvedComponentMappingDefinition(
66
+ field_path=field_path,
67
+ value=interpolated_value,
68
+ value_type=component_mapping.value_type,
69
+ parameters=parameters,
70
+ )
71
+ )
72
+ else:
73
+ raise ValueError(
74
+ f"Expected a string or InterpolatedString for value in mapping: {component_mapping}"
75
+ )
76
+
77
+ def resolve_components(
78
+ self, stream_template_config: Dict[str, Any]
79
+ ) -> Iterable[Dict[str, Any]]:
80
+ """
81
+ Resolves components in the stream template configuration by populating values.
82
+
83
+ Args:
84
+ stream_template_config (Dict[str, Any]): Stream template to populate.
85
+
86
+ Yields:
87
+ Dict[str, Any]: Updated configurations with resolved components.
88
+ """
89
+ kwargs = {"stream_template_config": stream_template_config}
90
+
91
+ for components_values in self.retriever.read_records({}):
92
+ updated_config = deepcopy(stream_template_config)
93
+ kwargs["components_values"] = components_values # type: ignore[assignment] # component_values will always be of type Mapping[str, Any]
94
+
95
+ for resolved_component in self._resolved_components:
96
+ valid_types = (
97
+ (resolved_component.value_type,) if resolved_component.value_type else None
98
+ )
99
+ value = resolved_component.value.eval(
100
+ self.config, valid_types=valid_types, **kwargs
101
+ )
102
+
103
+ path = [path.eval(self.config, **kwargs) for path in resolved_component.field_path]
104
+ dpath.set(updated_config, path, value)
105
+
106
+ yield updated_config
@@ -6,5 +6,6 @@ from airbyte_cdk.sources.declarative.schema.default_schema_loader import Default
6
6
  from airbyte_cdk.sources.declarative.schema.inline_schema_loader import InlineSchemaLoader
7
7
  from airbyte_cdk.sources.declarative.schema.json_file_schema_loader import JsonFileSchemaLoader
8
8
  from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
9
+ from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import DynamicSchemaLoader, TypesMap, SchemaTypeIdentifier
9
10
 
10
- __all__ = ["JsonFileSchemaLoader", "DefaultSchemaLoader", "SchemaLoader", "InlineSchemaLoader"]
11
+ __all__ = ["JsonFileSchemaLoader", "DefaultSchemaLoader", "SchemaLoader", "InlineSchemaLoader", "DynamicSchemaLoader", "TypesMap", "SchemaTypeIdentifier"]