airbyte-cdk 6.9.0.dev0__py3-none-any.whl → 6.9.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -86,23 +86,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
86
86
  component_factory=component_factory,
87
87
  )
88
88
 
89
+ # todo: We could remove state from initialization. Now that streams are grouped during the read(), a source
90
+ # no longer needs to store the original incoming state. But maybe there's an edge case?
89
91
  self._state = state
90
92
 
91
- self._concurrent_streams: Optional[List[AbstractStream]]
92
- self._synchronous_streams: Optional[List[Stream]]
93
-
94
- # If the connector command was SPEC, there is no incoming config, and we cannot instantiate streams because
95
- # they might depend on it. Ideally we want to have a static method on this class to get the spec without
96
- # any other arguments, but the existing entrypoint.py isn't designed to support this. Just noting this
97
- # for our future improvements to the CDK.
98
- if config:
99
- self._concurrent_streams, self._synchronous_streams = self._group_streams(
100
- config=config or {}
101
- )
102
- else:
103
- self._concurrent_streams = None
104
- self._synchronous_streams = None
105
-
106
93
  concurrency_level_from_manifest = self._source_config.get("concurrency_level")
107
94
  if concurrency_level_from_manifest:
108
95
  concurrency_level_component = self._constructor.create_component(
@@ -136,17 +123,20 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
136
123
  logger: logging.Logger,
137
124
  config: Mapping[str, Any],
138
125
  catalog: ConfiguredAirbyteCatalog,
139
- state: Optional[Union[List[AirbyteStateMessage]]] = None,
126
+ state: Optional[List[AirbyteStateMessage]] = None,
140
127
  ) -> Iterator[AirbyteMessage]:
141
- # ConcurrentReadProcessor pops streams that are finished being read so before syncing, the names of the concurrent
142
- # streams must be saved so that they can be removed from the catalog before starting synchronous streams
143
- if self._concurrent_streams:
128
+ concurrent_streams, _ = self._group_streams(config=config)
129
+
130
+ # ConcurrentReadProcessor pops streams that are finished being read so before syncing, the names of
131
+ # the concurrent streams must be saved so that they can be removed from the catalog before starting
132
+ # synchronous streams
133
+ if len(concurrent_streams) > 0:
144
134
  concurrent_stream_names = set(
145
- [concurrent_stream.name for concurrent_stream in self._concurrent_streams]
135
+ [concurrent_stream.name for concurrent_stream in concurrent_streams]
146
136
  )
147
137
 
148
138
  selected_concurrent_streams = self._select_streams(
149
- streams=self._concurrent_streams, configured_catalog=catalog
139
+ streams=concurrent_streams, configured_catalog=catalog
150
140
  )
151
141
  # It would appear that passing in an empty set of streams causes an infinite loop in ConcurrentReadProcessor.
152
142
  # This is also evident in concurrent_source_adapter.py so I'll leave this out of scope to fix for now
@@ -165,8 +155,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
165
155
  yield from super().read(logger, config, filtered_catalog, state)
166
156
 
167
157
  def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
168
- concurrent_streams = self._concurrent_streams or []
169
- synchronous_streams = self._synchronous_streams or []
158
+ concurrent_streams, synchronous_streams = self._group_streams(config=config)
170
159
  return AirbyteCatalog(
171
160
  streams=[
172
161
  stream.as_airbyte_stream() for stream in concurrent_streams + synchronous_streams
@@ -206,7 +195,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
206
195
  # so we need to treat them as synchronous
207
196
  if (
208
197
  isinstance(declarative_stream, DeclarativeStream)
209
- and name_to_stream_mapping[declarative_stream.name].get("retriever")["type"]
198
+ and name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
210
199
  == "SimpleRetriever"
211
200
  ):
212
201
  incremental_sync_component_definition = name_to_stream_mapping[
@@ -215,7 +204,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
215
204
 
216
205
  partition_router_component_definition = (
217
206
  name_to_stream_mapping[declarative_stream.name]
218
- .get("retriever")
207
+ .get("retriever", {})
219
208
  .get("partition_router")
220
209
  )
221
210
  is_without_partition_router_or_cursor = not bool(
@@ -237,7 +226,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
237
226
  cursor = self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
238
227
  state_manager=state_manager,
239
228
  model_type=DatetimeBasedCursorModel,
240
- component_definition=incremental_sync_component_definition,
229
+ component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
241
230
  stream_name=declarative_stream.name,
242
231
  stream_namespace=declarative_stream.namespace,
243
232
  config=config or {},
@@ -320,10 +309,11 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
320
309
  def _is_datetime_incremental_without_partition_routing(
321
310
  self,
322
311
  declarative_stream: DeclarativeStream,
323
- incremental_sync_component_definition: Mapping[str, Any],
312
+ incremental_sync_component_definition: Mapping[str, Any] | None,
324
313
  ) -> bool:
325
314
  return (
326
- bool(incremental_sync_component_definition)
315
+ incremental_sync_component_definition is not None
316
+ and bool(incremental_sync_component_definition)
327
317
  and incremental_sync_component_definition.get("type", "")
328
318
  == DatetimeBasedCursorModel.__name__
329
319
  and self._stream_supports_concurrent_partition_processing(
@@ -1684,92 +1684,6 @@ definitions:
1684
1684
  $parameters:
1685
1685
  type: object
1686
1686
  additionalProperties: true
1687
- TypesMap:
1688
- title: Types Map
1689
- description: (This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type.
1690
- type: object
1691
- required:
1692
- - target_type
1693
- - current_type
1694
- properties:
1695
- target_type:
1696
- anyOf:
1697
- - type: string
1698
- - type: array
1699
- items:
1700
- type: string
1701
- current_type:
1702
- anyOf:
1703
- - type: string
1704
- - type: array
1705
- items:
1706
- type: string
1707
- SchemaTypeIdentifier:
1708
- title: Schema Type Identifier
1709
- description: (This component is experimental. Use at your own risk.) Identifies schema details for dynamic schema extraction and processing.
1710
- type: object
1711
- required:
1712
- - key_pointer
1713
- properties:
1714
- type:
1715
- type: string
1716
- enum: [SchemaTypeIdentifier]
1717
- schema_pointer:
1718
- title: Schema Path
1719
- description: List of nested fields defining the schema field path to extract. Defaults to [].
1720
- type: array
1721
- default: []
1722
- items:
1723
- - type: string
1724
- interpolation_content:
1725
- - config
1726
- key_pointer:
1727
- title: Key Path
1728
- description: List of potentially nested fields describing the full path of the field key to extract.
1729
- type: array
1730
- items:
1731
- - type: string
1732
- interpolation_content:
1733
- - config
1734
- type_pointer:
1735
- title: Type Path
1736
- description: List of potentially nested fields describing the full path of the field type to extract.
1737
- type: array
1738
- items:
1739
- - type: string
1740
- interpolation_content:
1741
- - config
1742
- types_mapping:
1743
- type: array
1744
- items:
1745
- - "$ref": "#/definitions/TypesMap"
1746
- $parameters:
1747
- type: object
1748
- additionalProperties: true
1749
- DynamicSchemaLoader:
1750
- title: Dynamic Schema Loader
1751
- description: (This component is experimental. Use at your own risk.) Loads a schema by extracting data from retrieved records.
1752
- type: object
1753
- required:
1754
- - type
1755
- - retriever
1756
- - schema_type_identifier
1757
- properties:
1758
- type:
1759
- type: string
1760
- enum: [DynamicSchemaLoader]
1761
- retriever:
1762
- title: Retriever
1763
- description: Component used to coordinate how records are extracted across stream slices and request pages.
1764
- anyOf:
1765
- - "$ref": "#/definitions/AsyncRetriever"
1766
- - "$ref": "#/definitions/CustomRetriever"
1767
- - "$ref": "#/definitions/SimpleRetriever"
1768
- schema_type_identifier:
1769
- "$ref": "#/definitions/SchemaTypeIdentifier"
1770
- $parameters:
1771
- type: object
1772
- additionalProperties: true
1773
1687
  InlineSchemaLoader:
1774
1688
  title: Inline Schema Loader
1775
1689
  description: Loads a schema that is defined directly in the manifest file.
@@ -650,32 +650,6 @@ class HttpResponseFilter(BaseModel):
650
650
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
651
651
 
652
652
 
653
- class TypesMap(BaseModel):
654
- target_type: Union[str, List[str]]
655
- current_type: Union[str, List[str]]
656
-
657
-
658
- class SchemaTypeIdentifier(BaseModel):
659
- type: Optional[Literal["SchemaTypeIdentifier"]] = None
660
- schema_pointer: Optional[List[str]] = Field(
661
- [],
662
- description="List of nested fields defining the schema field path to extract. Defaults to [].",
663
- title="Schema Path",
664
- )
665
- key_pointer: List[str] = Field(
666
- ...,
667
- description="List of potentially nested fields describing the full path of the field key to extract.",
668
- title="Key Path",
669
- )
670
- type_pointer: Optional[List[str]] = Field(
671
- None,
672
- description="List of potentially nested fields describing the full path of the field type to extract.",
673
- title="Type Path",
674
- )
675
- types_mapping: Optional[List[TypesMap]] = None
676
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
677
-
678
-
679
653
  class InlineSchemaLoader(BaseModel):
680
654
  type: Literal["InlineSchemaLoader"]
681
655
  schema_: Optional[Dict[str, Any]] = Field(
@@ -848,13 +822,13 @@ class OauthConnectorInputSpecification(BaseModel):
848
822
  )
849
823
  extract_output: List[str] = Field(
850
824
  ...,
851
- description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.",
825
+ description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config. ",
852
826
  examples=[{"extract_output": ["access_token", "refresh_token", "other_field"]}],
853
827
  title="DeclarativeOAuth Extract Output",
854
828
  )
855
829
  state: Optional[State] = Field(
856
830
  None,
857
- description="The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,\nincluding length and complexity.",
831
+ description="The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,\nincluding length and complexity. ",
858
832
  examples=[{"state": {"min": 7, "max": 128}}],
859
833
  title="(Optional) DeclarativeOAuth Configurable State Query Param",
860
834
  )
@@ -878,13 +852,13 @@ class OauthConnectorInputSpecification(BaseModel):
878
852
  )
879
853
  state_key: Optional[str] = Field(
880
854
  None,
881
- description="The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.",
855
+ description="The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider. ",
882
856
  examples=[{"state_key": "my_custom_state_key_key_name"}],
883
857
  title="(Optional) DeclarativeOAuth State Key Override",
884
858
  )
885
859
  auth_code_key: Optional[str] = Field(
886
860
  None,
887
- description="The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.",
861
+ description="The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider. ",
888
862
  examples=[{"auth_code_key": "my_custom_auth_code_key_name"}],
889
863
  title="(Optional) DeclarativeOAuth Auth Code Key Override",
890
864
  )
@@ -1800,17 +1774,6 @@ class HttpRequester(BaseModel):
1800
1774
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1801
1775
 
1802
1776
 
1803
- class DynamicSchemaLoader(BaseModel):
1804
- type: Literal["DynamicSchemaLoader"]
1805
- retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
1806
- ...,
1807
- description="Component used to coordinate how records are extracted across stream slices and request pages.",
1808
- title="Retriever",
1809
- )
1810
- schema_type_identifier: SchemaTypeIdentifier
1811
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1812
-
1813
-
1814
1777
  class ParentStreamConfig(BaseModel):
1815
1778
  type: Literal["ParentStreamConfig"]
1816
1779
  parent_key: str = Field(
@@ -2018,6 +1981,5 @@ DeclarativeSource2.update_forward_refs()
2018
1981
  SelectiveAuthenticator.update_forward_refs()
2019
1982
  DeclarativeStream.update_forward_refs()
2020
1983
  SessionTokenAuthenticator.update_forward_refs()
2021
- DynamicSchemaLoader.update_forward_refs()
2022
1984
  SimpleRetriever.update_forward_refs()
2023
1985
  AsyncRetriever.update_forward_refs()
@@ -64,10 +64,6 @@ DEFAULT_MODEL_TYPES: Mapping[str, str] = {
64
64
  "AddFields.fields": "AddedFieldDefinition",
65
65
  # CustomPartitionRouter
66
66
  "CustomPartitionRouter.parent_stream_configs": "ParentStreamConfig",
67
- # DynamicSchemaLoader
68
- "DynamicSchemaLoader.retriever": "SimpleRetriever",
69
- # SchemaTypeIdentifier
70
- "SchemaTypeIdentifier.types_map": "TypesMap",
71
67
  }
72
68
 
73
69
  # We retain a separate registry for custom components to automatically insert the type if it is missing. This is intended to
@@ -188,9 +188,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
188
188
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
189
189
  DpathExtractor as DpathExtractorModel,
190
190
  )
191
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
192
- DynamicSchemaLoader as DynamicSchemaLoaderModel,
193
- )
194
191
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
195
192
  ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
196
193
  )
@@ -281,9 +278,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
281
278
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
282
279
  ResponseToFileExtractor as ResponseToFileExtractorModel,
283
280
  )
284
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
285
- SchemaTypeIdentifier as SchemaTypeIdentifierModel,
286
- )
287
281
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
288
282
  SelectiveAuthenticator as SelectiveAuthenticatorModel,
289
283
  )
@@ -297,9 +291,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
297
291
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
298
292
  SubstreamPartitionRouter as SubstreamPartitionRouterModel,
299
293
  )
300
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
301
- TypesMap as TypesMapModel,
302
- )
303
294
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
304
295
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
305
296
  WaitTimeFromHeader as WaitTimeFromHeaderModel,
@@ -365,11 +356,8 @@ from airbyte_cdk.sources.declarative.retrievers import (
365
356
  )
366
357
  from airbyte_cdk.sources.declarative.schema import (
367
358
  DefaultSchemaLoader,
368
- DynamicSchemaLoader,
369
359
  InlineSchemaLoader,
370
360
  JsonFileSchemaLoader,
371
- SchemaTypeIdentifier,
372
- TypesMap,
373
361
  )
374
362
  from airbyte_cdk.sources.declarative.spec import Spec
375
363
  from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
@@ -467,9 +455,6 @@ class ModelToComponentFactory:
467
455
  IterableDecoderModel: self.create_iterable_decoder,
468
456
  XmlDecoderModel: self.create_xml_decoder,
469
457
  JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
470
- DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
471
- SchemaTypeIdentifierModel: self.create_schema_type_identifier,
472
- TypesMapModel: self.create_types_map,
473
458
  JwtAuthenticatorModel: self.create_jwt_authenticator,
474
459
  LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
475
460
  ListPartitionRouterModel: self.create_list_partition_router,
@@ -1589,63 +1574,6 @@ class ModelToComponentFactory:
1589
1574
  ) -> InlineSchemaLoader:
1590
1575
  return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
1591
1576
 
1592
- @staticmethod
1593
- def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
1594
- return TypesMap(target_type=model.target_type, current_type=model.current_type)
1595
-
1596
- def create_schema_type_identifier(
1597
- self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
1598
- ) -> SchemaTypeIdentifier:
1599
- types_mapping = []
1600
- if model.types_mapping:
1601
- types_mapping.extend(
1602
- [
1603
- self._create_component_from_model(types_map, config=config)
1604
- for types_map in model.types_mapping
1605
- ]
1606
- )
1607
- model_schema_pointer: List[Union[InterpolatedString, str]] = (
1608
- [x for x in model.schema_pointer] if model.schema_pointer else []
1609
- )
1610
- model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer]
1611
- model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = (
1612
- [x for x in model.type_pointer] if model.type_pointer else None
1613
- )
1614
-
1615
- return SchemaTypeIdentifier(
1616
- schema_pointer=model_schema_pointer,
1617
- key_pointer=model_key_pointer,
1618
- type_pointer=model_type_pointer,
1619
- types_mapping=types_mapping,
1620
- parameters=model.parameters or {},
1621
- )
1622
-
1623
- def create_dynamic_schema_loader(
1624
- self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
1625
- ) -> DynamicSchemaLoader:
1626
- stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
1627
- combined_slicers = self._build_resumable_cursor_from_paginator(
1628
- model.retriever, stream_slicer
1629
- )
1630
-
1631
- retriever = self._create_component_from_model(
1632
- model=model.retriever,
1633
- config=config,
1634
- name="",
1635
- primary_key=None,
1636
- stream_slicer=combined_slicers,
1637
- transformations=[],
1638
- )
1639
- schema_type_identifier = self._create_component_from_model(
1640
- model.schema_type_identifier, config=config, parameters=model.parameters or {}
1641
- )
1642
- return DynamicSchemaLoader(
1643
- retriever=retriever,
1644
- config=config,
1645
- schema_type_identifier=schema_type_identifier,
1646
- parameters=model.parameters or {},
1647
- )
1648
-
1649
1577
  @staticmethod
1650
1578
  def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> JsonDecoder:
1651
1579
  return JsonDecoder(parameters={})
@@ -6,6 +6,5 @@ from airbyte_cdk.sources.declarative.schema.default_schema_loader import Default
6
6
  from airbyte_cdk.sources.declarative.schema.inline_schema_loader import InlineSchemaLoader
7
7
  from airbyte_cdk.sources.declarative.schema.json_file_schema_loader import JsonFileSchemaLoader
8
8
  from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
9
- from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import DynamicSchemaLoader, TypesMap, SchemaTypeIdentifier
10
9
 
11
- __all__ = ["JsonFileSchemaLoader", "DefaultSchemaLoader", "SchemaLoader", "InlineSchemaLoader", "DynamicSchemaLoader", "TypesMap", "SchemaTypeIdentifier"]
10
+ __all__ = ["JsonFileSchemaLoader", "DefaultSchemaLoader", "SchemaLoader", "InlineSchemaLoader"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.9.0.dev0
3
+ Version: 6.9.1.dev0
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -18,12 +18,10 @@ Classifier: Programming Language :: Python :: 3.12
18
18
  Classifier: Topic :: Scientific/Engineering
19
19
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
20
  Provides-Extra: file-based
21
- Provides-Extra: sphinx-docs
22
21
  Provides-Extra: sql
23
22
  Provides-Extra: vector-db-based
24
23
  Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
25
24
  Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
26
- Requires-Dist: Sphinx (>=4.2,<4.3) ; extra == "sphinx-docs"
27
25
  Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
28
26
  Requires-Dist: avro (>=1.11.2,<1.12.0) ; extra == "file-based"
29
27
  Requires-Dist: backoff
@@ -63,7 +61,6 @@ Requires-Dist: rapidfuzz (>=3.10.1,<4.0.0)
63
61
  Requires-Dist: requests
64
62
  Requires-Dist: requests_cache
65
63
  Requires-Dist: serpyco-rs (>=1.10.2,<2.0.0)
66
- Requires-Dist: sphinx-rtd-theme (>=1.0,<1.1) ; extra == "sphinx-docs"
67
64
  Requires-Dist: sqlalchemy (>=2.0,<3.0,!=2.0.36) ; extra == "sql"
68
65
  Requires-Dist: tiktoken (==0.8.0) ; extra == "vector-db-based"
69
66
  Requires-Dist: unstructured.pytesseract (>=0.3.12) ; extra == "file-based"
@@ -62,11 +62,11 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
62
62
  airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
63
63
  airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
64
64
  airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
65
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=hgKamhOh1B8RA4Fx8FmCl4ORc7eO2h_RhxbkQovh3FM,23724
65
+ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=9OXOFzG5PBL_MHEJg4ETE0TXfXeuhvv-at38AN3wWEM,23323
66
66
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
67
67
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
68
68
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
69
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=ZCYl6v0miacvpIt6M8FakkGZpEsY8SmB4_436sHEw9Y,126841
69
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=kZkGoasVzufDE2BkFo_7DVO6xHL9kueNBjddtl-7kaU,124134
70
70
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
71
71
  airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
72
72
  airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=hNlhaB5FjNC6IfJyglj5ZJWkYD2nEAukMDmzRz5PC6o,671
@@ -104,12 +104,12 @@ airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW
104
104
  airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
105
105
  airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
106
106
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
107
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=qjzXe162aUcaq1n6B8KhA6Z9B8boM9yY8dAsLXll5-g,89872
107
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=6iAzpGmUrhwEUQcCL5bW-FXuLXPMeFqs_GR4B1rS3ZE,88511
108
108
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
109
109
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
110
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=KflzFl_ZKRSW9XkH16sfr5-9HnAAI0T5s8CVBrJK2Ao,8958
110
+ airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=0jfi-ogL-rOVORTIYnu64wNfh1L8fYaLVDWzJ2zGdi8,8799
111
111
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
112
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=kGU0re3R-Ujtn6Gp96KpEVRHJB5P-B8sRa0aMR_jDdk,102536
112
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=lS2oKfkND54u66hocc2BycS-AIYIbkn4npq6CFRNokc,99573
113
113
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=1NjaZoGAIefvWwj6wx-LOKIXXWS-UnBlZFnuR7y6uYA,745
114
114
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
115
115
  airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=t7pRdFWfFWJtQQG19c9PVeMODyO2BknRTakpM5U9N-8,4844
@@ -159,9 +159,8 @@ airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=FVQpUGVwp2Gibk4gp0
159
159
  airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=WDFnjrXLz3-YEjFhmlMkWAn9AJvnZ0mk9FyC8DAhEYk,4976
160
160
  airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
161
161
  airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=N4swGw5mfuTXJ2R7AKX18CHzizsr69pXwt5uSHLPi48,24172
162
- airbyte_cdk/sources/declarative/schema/__init__.py,sha256=Io9vninzlEjQ2uFmWklxfwNM0cXfljtzOz5zL1OVyT4,701
162
+ airbyte_cdk/sources/declarative/schema/__init__.py,sha256=ul8L9S0-__AMEdbCLHBq-PMEeA928NVp8BB83BMotfU,517
163
163
  airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
164
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=tP5DIEMn-k2JshWeXmo53ZEudDAVb4AJ50Z5tfme_ZU,8063
165
164
  airbyte_cdk/sources/declarative/schema/inline_schema_loader.py,sha256=bVETE10hRsatRJq3R3BeyRR0wIoK3gcP1gcpVRQ_P5U,464
166
165
  airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py,sha256=5Wl-fqW-pVf_dxJ4yGHMAFfC4JjKHYJhqFJT1xA57F4,4177
167
166
  airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLnrDLxf1PJKdUqvQq2RVnAOAzNSY,379
@@ -334,8 +333,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
334
333
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
335
334
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
336
335
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
337
- airbyte_cdk-6.9.0.dev0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
338
- airbyte_cdk-6.9.0.dev0.dist-info/METADATA,sha256=VS_vbZgaobGxk6eCGkFZuQgCR-xzMrHzJWL2FpCnyM8,6112
339
- airbyte_cdk-6.9.0.dev0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
340
- airbyte_cdk-6.9.0.dev0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
341
- airbyte_cdk-6.9.0.dev0.dist-info/RECORD,,
336
+ airbyte_cdk-6.9.1.dev0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
337
+ airbyte_cdk-6.9.1.dev0.dist-info/METADATA,sha256=pA5YAkFWlFKzRsXDIw20NDtNEH7Sk9lHgR5bMgR8DVM,5954
338
+ airbyte_cdk-6.9.1.dev0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
339
+ airbyte_cdk-6.9.1.dev0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
340
+ airbyte_cdk-6.9.1.dev0.dist-info/RECORD,,
@@ -1,219 +0,0 @@
1
- #
2
- # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
- #
4
-
5
-
6
- from copy import deepcopy
7
- from dataclasses import InitVar, dataclass
8
- from typing import Any, List, Mapping, MutableMapping, Optional, Union
9
-
10
- import dpath
11
- from typing_extensions import deprecated
12
-
13
- from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
14
- from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
15
- from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
16
- from airbyte_cdk.sources.source import ExperimentalClassWarning
17
- from airbyte_cdk.sources.types import Config
18
-
19
- AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
20
- "string": {"type": ["null", "string"]},
21
- "boolean": {"type": ["null", "boolean"]},
22
- "date": {"type": ["null", "string"], "format": "date"},
23
- "timestamp_without_timezone": {
24
- "type": ["null", "string"],
25
- "format": "date-time",
26
- "airbyte_type": "timestamp_without_timezone",
27
- },
28
- "timestamp_with_timezone": {"type": ["null", "string"], "format": "date-time"},
29
- "time_without_timezone": {
30
- "type": ["null", "string"],
31
- "format": "time",
32
- "airbyte_type": "time_without_timezone",
33
- },
34
- "time_with_timezone": {
35
- "type": ["null", "string"],
36
- "format": "time",
37
- "airbyte_type": "time_with_timezone",
38
- },
39
- "integer": {"type": ["null", "integer"]},
40
- "number": {"type": ["null", "number"]},
41
- "array": {"type": ["null", "array"]},
42
- "object": {"type": ["null", "object"]},
43
- }
44
-
45
-
46
- @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
47
- @dataclass(frozen=True)
48
- class TypesMap:
49
- """
50
- Represents a mapping between a current type and its corresponding target type.
51
- """
52
-
53
- target_type: Union[List[str], str]
54
- current_type: Union[List[str], str]
55
-
56
-
57
- @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
58
- @dataclass
59
- class SchemaTypeIdentifier:
60
- """
61
- Identifies schema details for dynamic schema extraction and processing.
62
- """
63
-
64
- key_pointer: List[Union[InterpolatedString, str]]
65
- parameters: InitVar[Mapping[str, Any]]
66
- type_pointer: Optional[List[Union[InterpolatedString, str]]] = None
67
- types_mapping: Optional[List[TypesMap]] = None
68
- schema_pointer: Optional[List[Union[InterpolatedString, str]]] = None
69
-
70
- def __post_init__(self, parameters: Mapping[str, Any]) -> None:
71
- self.schema_pointer = (
72
- self._update_pointer(self.schema_pointer, parameters) if self.schema_pointer else []
73
- ) # type: ignore[assignment] # This is reqired field in model
74
- self.key_pointer = self._update_pointer(self.key_pointer, parameters) # type: ignore[assignment] # This is reqired field in model
75
- self.type_pointer = (
76
- self._update_pointer(self.type_pointer, parameters) if self.type_pointer else None
77
- )
78
-
79
- @staticmethod
80
- def _update_pointer(
81
- pointer: Optional[List[Union[InterpolatedString, str]]], parameters: Mapping[str, Any]
82
- ) -> Optional[List[Union[InterpolatedString, str]]]:
83
- return (
84
- [
85
- InterpolatedString.create(path, parameters=parameters)
86
- if isinstance(path, str)
87
- else path
88
- for path in pointer
89
- ]
90
- if pointer
91
- else None
92
- )
93
-
94
-
95
- @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
96
- @dataclass
97
- class DynamicSchemaLoader(SchemaLoader):
98
- """
99
- Dynamically loads a JSON Schema by extracting data from retrieved records.
100
- """
101
-
102
- retriever: Retriever
103
- config: Config
104
- parameters: InitVar[Mapping[str, Any]]
105
- schema_type_identifier: SchemaTypeIdentifier
106
-
107
- def get_json_schema(self) -> Mapping[str, Any]:
108
- """
109
- Constructs a JSON Schema based on retrieved data.
110
- """
111
- properties = {}
112
- retrieved_record = next(self.retriever.read_records({}), None) # type: ignore[call-overload] # read_records return Iterable data type
113
-
114
- raw_schema = (
115
- self._extract_data(
116
- retrieved_record, # type: ignore[arg-type] # Expected that retrieved_record will be only Mapping[str, Any]
117
- self.schema_type_identifier.schema_pointer,
118
- )
119
- if retrieved_record
120
- else []
121
- )
122
-
123
- for property_definition in raw_schema:
124
- key = self._get_key(property_definition, self.schema_type_identifier.key_pointer)
125
- value = self._get_type(
126
- property_definition,
127
- self.schema_type_identifier.type_pointer,
128
- )
129
- properties[key] = value
130
-
131
- return {
132
- "$schema": "http://json-schema.org/draft-07/schema#",
133
- "type": "object",
134
- "properties": properties,
135
- }
136
-
137
- def _get_key(
138
- self,
139
- raw_schema: MutableMapping[str, Any],
140
- field_key_path: List[Union[InterpolatedString, str]],
141
- ) -> str:
142
- """
143
- Extracts the key field from the schema using the specified path.
144
- """
145
- field_key = self._extract_data(raw_schema, field_key_path)
146
- if not isinstance(field_key, str):
147
- raise ValueError(f"Expected key to be a string. Got {field_key}")
148
- return field_key
149
-
150
- def _get_type(
151
- self,
152
- raw_schema: MutableMapping[str, Any],
153
- field_type_path: Optional[List[Union[InterpolatedString, str]]],
154
- ) -> Union[Mapping[str, Any], List[Mapping[str, Any]]]:
155
- """
156
- Determines the JSON Schema type for a field, supporting nullable and combined types.
157
- """
158
- raw_field_type = (
159
- self._extract_data(raw_schema, field_type_path, default="string")
160
- if field_type_path
161
- else "string"
162
- )
163
- mapped_field_type = self._replace_type_if_not_valid(raw_field_type)
164
- if (
165
- isinstance(mapped_field_type, list)
166
- and len(mapped_field_type) == 2
167
- and all(isinstance(item, str) for item in mapped_field_type)
168
- ):
169
- first_type = self._get_airbyte_type(mapped_field_type[0])
170
- second_type = self._get_airbyte_type(mapped_field_type[1])
171
- return {"oneOf": [first_type, second_type]}
172
- elif isinstance(mapped_field_type, str):
173
- return self._get_airbyte_type(mapped_field_type)
174
- else:
175
- raise ValueError(
176
- f"Invalid data type. Available string or two items list of string. Got {mapped_field_type}."
177
- )
178
-
179
- def _replace_type_if_not_valid(
180
- self, field_type: Union[List[str], str]
181
- ) -> Union[List[str], str]:
182
- """
183
- Replaces a field type if it matches a type mapping in `types_map`.
184
- """
185
- if self.schema_type_identifier.types_mapping:
186
- for types_map in self.schema_type_identifier.types_mapping:
187
- if field_type == types_map.current_type:
188
- return types_map.target_type
189
- return field_type
190
-
191
- @staticmethod
192
- def _get_airbyte_type(field_type: str) -> Mapping[str, Any]:
193
- """
194
- Maps a field type to its corresponding Airbyte type definition.
195
- """
196
- if field_type not in AIRBYTE_DATA_TYPES:
197
- raise ValueError(f"Invalid Airbyte data type: {field_type}")
198
-
199
- return deepcopy(AIRBYTE_DATA_TYPES[field_type])
200
-
201
- def _extract_data(
202
- self,
203
- body: Mapping[str, Any],
204
- extraction_path: Optional[List[Union[InterpolatedString, str]]] = None,
205
- default: Any = None,
206
- ) -> Any:
207
- """
208
- Extracts data from the body based on the provided extraction path.
209
- """
210
-
211
- if not extraction_path:
212
- return body
213
-
214
- path = [
215
- node.eval(self.config) if not isinstance(node, str) else node
216
- for node in extraction_path
217
- ]
218
-
219
- return dpath.get(body, path, default=default) # type: ignore # extracted will be a MutableMapping, given input data structure