airbyte-cdk 6.9.2.dev4100__py3-none-any.whl → 6.9.3rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -56,8 +56,9 @@ from airbyte_cdk.sources.types import Config, StreamState
56
56
 
57
57
 
58
58
  class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
59
- # By default, we defer to a value of 1 which represents running a connector using the Concurrent CDK engine on only one thread.
60
- SINGLE_THREADED_CONCURRENCY_LEVEL = 1
59
+ # By default, we defer to a value of 2. A value lower than than could cause a PartitionEnqueuer to be stuck in a state of deadlock
60
+ # because it has hit the limit of futures but not partition reader is consuming them.
61
+ _LOWEST_SAFE_CONCURRENCY_LEVEL = 2
61
62
 
62
63
  def __init__(
63
64
  self,
@@ -107,8 +108,8 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
107
108
  concurrency_level // 2, 1
108
109
  ) # Partition_generation iterates using range based on this value. If this is floored to zero we end up in a dead lock during start up
109
110
  else:
110
- concurrency_level = self.SINGLE_THREADED_CONCURRENCY_LEVEL
111
- initial_number_of_partitions_to_generate = self.SINGLE_THREADED_CONCURRENCY_LEVEL
111
+ concurrency_level = self._LOWEST_SAFE_CONCURRENCY_LEVEL
112
+ initial_number_of_partitions_to_generate = self._LOWEST_SAFE_CONCURRENCY_LEVEL // 2
112
113
 
113
114
  self._concurrent_source = ConcurrentSource.create(
114
115
  num_workers=concurrency_level,
@@ -327,7 +327,7 @@ definitions:
327
327
  additionalProperties: true
328
328
  ConcurrencyLevel:
329
329
  title: Concurrency Level
330
- description: Defines the amount of parallelization for the streams that are being synced. The factor of parallelization is how many partitions or streams are synced at the same time. For example, with a concurrency_level of 10, ten streams or partitions of data will processed at the same time.
330
+ description: Defines the amount of parallelization for the streams that are being synced. The factor of parallelization is how many partitions or streams are synced at the same time. For example, with a concurrency_level of 10, ten streams or partitions of data will processed at the same time. Note that a value of 1 could create deadlock if a stream has a very high number of partitions.
331
331
  type: object
332
332
  required:
333
333
  - default_concurrency
@@ -1221,7 +1221,6 @@ definitions:
1221
1221
  - "$ref": "#/definitions/InlineSchemaLoader"
1222
1222
  - "$ref": "#/definitions/JsonFileSchemaLoader"
1223
1223
  - "$ref": "#/definitions/CustomSchemaLoader"
1224
- - "$ref": "#/definitions/DynamicSchemaLoader"
1225
1224
  # TODO we have move the transformation to the RecordSelector level in the code but kept this here for
1226
1225
  # compatibility reason. We should eventually move this to align with the code.
1227
1226
  transformations:
@@ -1685,92 +1684,6 @@ definitions:
1685
1684
  $parameters:
1686
1685
  type: object
1687
1686
  additionalProperties: true
1688
- TypesMap:
1689
- title: Types Map
1690
- description: (This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type.
1691
- type: object
1692
- required:
1693
- - target_type
1694
- - current_type
1695
- properties:
1696
- target_type:
1697
- anyOf:
1698
- - type: string
1699
- - type: array
1700
- items:
1701
- type: string
1702
- current_type:
1703
- anyOf:
1704
- - type: string
1705
- - type: array
1706
- items:
1707
- type: string
1708
- SchemaTypeIdentifier:
1709
- title: Schema Type Identifier
1710
- description: (This component is experimental. Use at your own risk.) Identifies schema details for dynamic schema extraction and processing.
1711
- type: object
1712
- required:
1713
- - key_pointer
1714
- properties:
1715
- type:
1716
- type: string
1717
- enum: [SchemaTypeIdentifier]
1718
- schema_pointer:
1719
- title: Schema Path
1720
- description: List of nested fields defining the schema field path to extract. Defaults to [].
1721
- type: array
1722
- default: []
1723
- items:
1724
- - type: string
1725
- interpolation_context:
1726
- - config
1727
- key_pointer:
1728
- title: Key Path
1729
- description: List of potentially nested fields describing the full path of the field key to extract.
1730
- type: array
1731
- items:
1732
- - type: string
1733
- interpolation_context:
1734
- - config
1735
- type_pointer:
1736
- title: Type Path
1737
- description: List of potentially nested fields describing the full path of the field type to extract.
1738
- type: array
1739
- items:
1740
- - type: string
1741
- interpolation_context:
1742
- - config
1743
- types_mapping:
1744
- type: array
1745
- items:
1746
- - "$ref": "#/definitions/TypesMap"
1747
- $parameters:
1748
- type: object
1749
- additionalProperties: true
1750
- DynamicSchemaLoader:
1751
- title: Dynamic Schema Loader
1752
- description: (This component is experimental. Use at your own risk.) Loads a schema by extracting data from retrieved records.
1753
- type: object
1754
- required:
1755
- - type
1756
- - retriever
1757
- - schema_type_identifier
1758
- properties:
1759
- type:
1760
- type: string
1761
- enum: [DynamicSchemaLoader]
1762
- retriever:
1763
- title: Retriever
1764
- description: Component used to coordinate how records are extracted across stream slices and request pages.
1765
- anyOf:
1766
- - "$ref": "#/definitions/AsyncRetriever"
1767
- - "$ref": "#/definitions/CustomRetriever"
1768
- - "$ref": "#/definitions/SimpleRetriever"
1769
- schema_type_identifier:
1770
- "$ref": "#/definitions/SchemaTypeIdentifier"
1771
- $parameters:
1772
- type: object
1773
- additionalProperties: true
1774
1687
  InlineSchemaLoader:
1775
1688
  title: Inline Schema Loader
1776
1689
  description: Loads a schema that is defined directly in the manifest file.
@@ -4,7 +4,7 @@
4
4
 
5
5
  import ast
6
6
  from functools import cache
7
- from typing import Any, Mapping, Optional, Tuple, Type
7
+ from typing import Any, Mapping, Optional, Set, Tuple, Type
8
8
 
9
9
  from jinja2 import meta
10
10
  from jinja2.environment import Template
@@ -27,7 +27,35 @@ class StreamPartitionAccessEnvironment(SandboxedEnvironment):
27
27
  def is_safe_attribute(self, obj: Any, attr: str, value: Any) -> bool:
28
28
  if attr in ["_partition"]:
29
29
  return True
30
- return super().is_safe_attribute(obj, attr, value)
30
+ return super().is_safe_attribute(obj, attr, value) # type: ignore # for some reason, mypy says 'Returning Any from function declared to return "bool"'
31
+
32
+
33
+ # These aliases are used to deprecate existing keywords without breaking all existing connectors.
34
+ _ALIASES = {
35
+ "stream_interval": "stream_slice", # Use stream_interval to access incremental_sync values
36
+ "stream_partition": "stream_slice", # Use stream_partition to access partition router's values
37
+ }
38
+
39
+ # These extensions are not installed so they're not currently a problem,
40
+ # but we're still explicitly removing them from the jinja context.
41
+ # At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
42
+ _RESTRICTED_EXTENSIONS = ["jinja2.ext.loopcontrols"] # Adds support for break continue in loops
43
+
44
+ # By default, these Python builtin functions are available in the Jinja context.
45
+ # We explicitly remove them because of the potential security risk.
46
+ # Please add a unit test to test_jinja.py when adding a restriction.
47
+ _RESTRICTED_BUILTIN_FUNCTIONS = [
48
+ "range"
49
+ ] # The range function can cause very expensive computations
50
+
51
+ _ENVIRONMENT = StreamPartitionAccessEnvironment()
52
+ _ENVIRONMENT.filters.update(**filters)
53
+ _ENVIRONMENT.globals.update(**macros)
54
+
55
+ for extension in _RESTRICTED_EXTENSIONS:
56
+ _ENVIRONMENT.extensions.pop(extension, None)
57
+ for builtin in _RESTRICTED_BUILTIN_FUNCTIONS:
58
+ _ENVIRONMENT.globals.pop(builtin, None)
31
59
 
32
60
 
33
61
  class JinjaInterpolation(Interpolation):
@@ -48,34 +76,6 @@ class JinjaInterpolation(Interpolation):
48
76
  Additional information on jinja templating can be found at https://jinja.palletsprojects.com/en/3.1.x/templates/#
49
77
  """
50
78
 
51
- # These aliases are used to deprecate existing keywords without breaking all existing connectors.
52
- ALIASES = {
53
- "stream_interval": "stream_slice", # Use stream_interval to access incremental_sync values
54
- "stream_partition": "stream_slice", # Use stream_partition to access partition router's values
55
- }
56
-
57
- # These extensions are not installed so they're not currently a problem,
58
- # but we're still explicitely removing them from the jinja context.
59
- # At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
60
- RESTRICTED_EXTENSIONS = ["jinja2.ext.loopcontrols"] # Adds support for break continue in loops
61
-
62
- # By default, these Python builtin functions are available in the Jinja context.
63
- # We explicitely remove them because of the potential security risk.
64
- # Please add a unit test to test_jinja.py when adding a restriction.
65
- RESTRICTED_BUILTIN_FUNCTIONS = [
66
- "range"
67
- ] # The range function can cause very expensive computations
68
-
69
- def __init__(self) -> None:
70
- self._environment = StreamPartitionAccessEnvironment()
71
- self._environment.filters.update(**filters)
72
- self._environment.globals.update(**macros)
73
-
74
- for extension in self.RESTRICTED_EXTENSIONS:
75
- self._environment.extensions.pop(extension, None)
76
- for builtin in self.RESTRICTED_BUILTIN_FUNCTIONS:
77
- self._environment.globals.pop(builtin, None)
78
-
79
79
  def eval(
80
80
  self,
81
81
  input_str: str,
@@ -86,7 +86,7 @@ class JinjaInterpolation(Interpolation):
86
86
  ) -> Any:
87
87
  context = {"config": config, **additional_parameters}
88
88
 
89
- for alias, equivalent in self.ALIASES.items():
89
+ for alias, equivalent in _ALIASES.items():
90
90
  if alias in context:
91
91
  # This is unexpected. We could ignore or log a warning, but failing loudly should result in fewer surprises
92
92
  raise ValueError(
@@ -105,6 +105,7 @@ class JinjaInterpolation(Interpolation):
105
105
  raise Exception(f"Expected a string, got {input_str}")
106
106
  except UndefinedError:
107
107
  pass
108
+
108
109
  # If result is empty or resulted in an undefined error, evaluate and return the default string
109
110
  return self._literal_eval(self._eval(default, context), valid_types)
110
111
 
@@ -132,16 +133,16 @@ class JinjaInterpolation(Interpolation):
132
133
  return s
133
134
 
134
135
  @cache
135
- def _find_undeclared_variables(self, s: Optional[str]) -> set[str]:
136
+ def _find_undeclared_variables(self, s: Optional[str]) -> Set[str]:
136
137
  """
137
138
  Find undeclared variables and cache them
138
139
  """
139
- ast = self._environment.parse(s) # type: ignore # parse is able to handle None
140
+ ast = _ENVIRONMENT.parse(s) # type: ignore # parse is able to handle None
140
141
  return meta.find_undeclared_variables(ast)
141
142
 
142
143
  @cache
143
- def _compile(self, s: Optional[str]) -> Template:
144
+ def _compile(self, s: str) -> Template:
144
145
  """
145
146
  We must cache the Jinja Template ourselves because we're using `from_string` instead of a template loader
146
147
  """
147
- return self._environment.from_string(s) # type: ignore [arg-type] # Expected `str | Template` but passed `str | None`
148
+ return _ENVIRONMENT.from_string(s)
@@ -650,32 +650,6 @@ class HttpResponseFilter(BaseModel):
650
650
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
651
651
 
652
652
 
653
- class TypesMap(BaseModel):
654
- target_type: Union[str, List[str]]
655
- current_type: Union[str, List[str]]
656
-
657
-
658
- class SchemaTypeIdentifier(BaseModel):
659
- type: Optional[Literal["SchemaTypeIdentifier"]] = None
660
- schema_pointer: Optional[List[str]] = Field(
661
- [],
662
- description="List of nested fields defining the schema field path to extract. Defaults to [].",
663
- title="Schema Path",
664
- )
665
- key_pointer: List[str] = Field(
666
- ...,
667
- description="List of potentially nested fields describing the full path of the field key to extract.",
668
- title="Key Path",
669
- )
670
- type_pointer: Optional[List[str]] = Field(
671
- None,
672
- description="List of potentially nested fields describing the full path of the field type to extract.",
673
- title="Type Path",
674
- )
675
- types_mapping: Optional[List[TypesMap]] = None
676
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
677
-
678
-
679
653
  class InlineSchemaLoader(BaseModel):
680
654
  type: Literal["InlineSchemaLoader"]
681
655
  schema_: Optional[Dict[str, Any]] = Field(
@@ -848,13 +822,13 @@ class OauthConnectorInputSpecification(BaseModel):
848
822
  )
849
823
  extract_output: List[str] = Field(
850
824
  ...,
851
- description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.",
825
+ description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config. ",
852
826
  examples=[{"extract_output": ["access_token", "refresh_token", "other_field"]}],
853
827
  title="DeclarativeOAuth Extract Output",
854
828
  )
855
829
  state: Optional[State] = Field(
856
830
  None,
857
- description="The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,\nincluding length and complexity.",
831
+ description="The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,\nincluding length and complexity. ",
858
832
  examples=[{"state": {"min": 7, "max": 128}}],
859
833
  title="(Optional) DeclarativeOAuth Configurable State Query Param",
860
834
  )
@@ -878,13 +852,13 @@ class OauthConnectorInputSpecification(BaseModel):
878
852
  )
879
853
  state_key: Optional[str] = Field(
880
854
  None,
881
- description="The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.",
855
+ description="The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider. ",
882
856
  examples=[{"state_key": "my_custom_state_key_key_name"}],
883
857
  title="(Optional) DeclarativeOAuth State Key Override",
884
858
  )
885
859
  auth_code_key: Optional[str] = Field(
886
860
  None,
887
- description="The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.",
861
+ description="The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider. ",
888
862
  examples=[{"auth_code_key": "my_custom_auth_code_key_name"}],
889
863
  title="(Optional) DeclarativeOAuth Auth Code Key Override",
890
864
  )
@@ -1635,7 +1609,7 @@ class DeclarativeStream(BaseModel):
1635
1609
  primary_key: Optional[PrimaryKey] = Field(
1636
1610
  "", description="The primary key of the stream.", title="Primary Key"
1637
1611
  )
1638
- schema_loader: Optional[Union[InlineSchemaLoader, JsonFileSchemaLoader, CustomSchemaLoader, DynamicSchemaLoader]] = (
1612
+ schema_loader: Optional[Union[InlineSchemaLoader, JsonFileSchemaLoader, CustomSchemaLoader]] = (
1639
1613
  Field(
1640
1614
  None,
1641
1615
  description="Component used to retrieve the schema for the current stream.",
@@ -1800,17 +1774,6 @@ class HttpRequester(BaseModel):
1800
1774
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1801
1775
 
1802
1776
 
1803
- class DynamicSchemaLoader(BaseModel):
1804
- type: Literal["DynamicSchemaLoader"]
1805
- retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
1806
- ...,
1807
- description="Component used to coordinate how records are extracted across stream slices and request pages.",
1808
- title="Retriever",
1809
- )
1810
- schema_type_identifier: SchemaTypeIdentifier
1811
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1812
-
1813
-
1814
1777
  class ParentStreamConfig(BaseModel):
1815
1778
  type: Literal["ParentStreamConfig"]
1816
1779
  parent_key: str = Field(
@@ -2018,6 +1981,5 @@ DeclarativeSource2.update_forward_refs()
2018
1981
  SelectiveAuthenticator.update_forward_refs()
2019
1982
  DeclarativeStream.update_forward_refs()
2020
1983
  SessionTokenAuthenticator.update_forward_refs()
2021
- DynamicSchemaLoader.update_forward_refs()
2022
1984
  SimpleRetriever.update_forward_refs()
2023
1985
  AsyncRetriever.update_forward_refs()
@@ -64,10 +64,6 @@ DEFAULT_MODEL_TYPES: Mapping[str, str] = {
64
64
  "AddFields.fields": "AddedFieldDefinition",
65
65
  # CustomPartitionRouter
66
66
  "CustomPartitionRouter.parent_stream_configs": "ParentStreamConfig",
67
- # DynamicSchemaLoader
68
- "DynamicSchemaLoader.retriever": "SimpleRetriever",
69
- # SchemaTypeIdentifier
70
- "SchemaTypeIdentifier.types_map": "TypesMap",
71
67
  }
72
68
 
73
69
  # We retain a separate registry for custom components to automatically insert the type if it is missing. This is intended to
@@ -188,9 +188,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
188
188
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
189
189
  DpathExtractor as DpathExtractorModel,
190
190
  )
191
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
192
- DynamicSchemaLoader as DynamicSchemaLoaderModel,
193
- )
194
191
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
195
192
  ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
196
193
  )
@@ -281,9 +278,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
281
278
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
282
279
  ResponseToFileExtractor as ResponseToFileExtractorModel,
283
280
  )
284
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
285
- SchemaTypeIdentifier as SchemaTypeIdentifierModel,
286
- )
287
281
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
288
282
  SelectiveAuthenticator as SelectiveAuthenticatorModel,
289
283
  )
@@ -297,9 +291,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
297
291
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
298
292
  SubstreamPartitionRouter as SubstreamPartitionRouterModel,
299
293
  )
300
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
301
- TypesMap as TypesMapModel,
302
- )
303
294
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
304
295
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
305
296
  WaitTimeFromHeader as WaitTimeFromHeaderModel,
@@ -365,11 +356,8 @@ from airbyte_cdk.sources.declarative.retrievers import (
365
356
  )
366
357
  from airbyte_cdk.sources.declarative.schema import (
367
358
  DefaultSchemaLoader,
368
- DynamicSchemaLoader,
369
359
  InlineSchemaLoader,
370
360
  JsonFileSchemaLoader,
371
- SchemaTypeIdentifier,
372
- TypesMap,
373
361
  )
374
362
  from airbyte_cdk.sources.declarative.spec import Spec
375
363
  from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
@@ -467,9 +455,6 @@ class ModelToComponentFactory:
467
455
  IterableDecoderModel: self.create_iterable_decoder,
468
456
  XmlDecoderModel: self.create_xml_decoder,
469
457
  JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
470
- DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
471
- SchemaTypeIdentifierModel: self.create_schema_type_identifier,
472
- TypesMapModel: self.create_types_map,
473
458
  JwtAuthenticatorModel: self.create_jwt_authenticator,
474
459
  LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
475
460
  ListPartitionRouterModel: self.create_list_partition_router,
@@ -1589,63 +1574,6 @@ class ModelToComponentFactory:
1589
1574
  ) -> InlineSchemaLoader:
1590
1575
  return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
1591
1576
 
1592
- @staticmethod
1593
- def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
1594
- return TypesMap(target_type=model.target_type, current_type=model.current_type)
1595
-
1596
- def create_schema_type_identifier(
1597
- self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
1598
- ) -> SchemaTypeIdentifier:
1599
- types_mapping = []
1600
- if model.types_mapping:
1601
- types_mapping.extend(
1602
- [
1603
- self._create_component_from_model(types_map, config=config)
1604
- for types_map in model.types_mapping
1605
- ]
1606
- )
1607
- model_schema_pointer: List[Union[InterpolatedString, str]] = (
1608
- [x for x in model.schema_pointer] if model.schema_pointer else []
1609
- )
1610
- model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer]
1611
- model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = (
1612
- [x for x in model.type_pointer] if model.type_pointer else None
1613
- )
1614
-
1615
- return SchemaTypeIdentifier(
1616
- schema_pointer=model_schema_pointer,
1617
- key_pointer=model_key_pointer,
1618
- type_pointer=model_type_pointer,
1619
- types_mapping=types_mapping,
1620
- parameters=model.parameters or {},
1621
- )
1622
-
1623
- def create_dynamic_schema_loader(
1624
- self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
1625
- ) -> DynamicSchemaLoader:
1626
- stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
1627
- combined_slicers = self._build_resumable_cursor_from_paginator(
1628
- model.retriever, stream_slicer
1629
- )
1630
-
1631
- retriever = self._create_component_from_model(
1632
- model=model.retriever,
1633
- config=config,
1634
- name="",
1635
- primary_key=None,
1636
- stream_slicer=combined_slicers,
1637
- transformations=[],
1638
- )
1639
- schema_type_identifier = self._create_component_from_model(
1640
- model.schema_type_identifier, config=config, parameters=model.parameters or {}
1641
- )
1642
- return DynamicSchemaLoader(
1643
- retriever=retriever,
1644
- config=config,
1645
- schema_type_identifier=schema_type_identifier,
1646
- parameters=model.parameters or {},
1647
- )
1648
-
1649
1577
  @staticmethod
1650
1578
  def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> JsonDecoder:
1651
1579
  return JsonDecoder(parameters={})
@@ -6,6 +6,5 @@ from airbyte_cdk.sources.declarative.schema.default_schema_loader import Default
6
6
  from airbyte_cdk.sources.declarative.schema.inline_schema_loader import InlineSchemaLoader
7
7
  from airbyte_cdk.sources.declarative.schema.json_file_schema_loader import JsonFileSchemaLoader
8
8
  from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
9
- from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import DynamicSchemaLoader, TypesMap, SchemaTypeIdentifier
10
9
 
11
- __all__ = ["JsonFileSchemaLoader", "DefaultSchemaLoader", "SchemaLoader", "InlineSchemaLoader", "DynamicSchemaLoader", "TypesMap", "SchemaTypeIdentifier"]
10
+ __all__ = ["JsonFileSchemaLoader", "DefaultSchemaLoader", "SchemaLoader", "InlineSchemaLoader"]
@@ -138,12 +138,22 @@ class HttpClient:
138
138
  cache_dir = os.getenv(ENV_REQUEST_CACHE_PATH)
139
139
  # Use in-memory cache if cache_dir is not set
140
140
  # This is a non-obvious interface, but it ensures we don't write sql files when running unit tests
141
- if cache_dir:
142
- sqlite_path = str(Path(cache_dir) / self.cache_filename)
143
- else:
144
- sqlite_path = "file::memory:?cache=shared"
141
+ # Use in-memory cache if cache_dir is not set
142
+ # This is a non-obvious interface, but it ensures we don't write sql files when running unit tests
143
+ sqlite_path = (
144
+ str(Path(cache_dir) / self.cache_filename)
145
+ if cache_dir
146
+ else "file::memory:?cache=shared"
147
+ )
148
+ # By using `PRAGMA synchronous=OFF` and `PRAGMA journal_mode=WAL`, we reduce the possible occurrences of `database table is locked` errors.
149
+ # Note that those were blindly added at the same time and one or the other might be sufficient to prevent the issues but we have seen good results with both. Feel free to revisit given more information.
150
+ # There are strong signals that `fast_save` might create problems but if the sync crashes, we start back from the beginning in terms of sqlite anyway so the impact should be minimal. Signals are:
151
+ # * https://github.com/requests-cache/requests-cache/commit/7fa89ffda300331c37d8fad7f773348a3b5b0236#diff-f43db4a5edf931647c32dec28ea7557aae4cae8444af4b26c8ecbe88d8c925aaR238
152
+ # * https://github.com/requests-cache/requests-cache/commit/7fa89ffda300331c37d8fad7f773348a3b5b0236#diff-2e7f95b7d7be270ff1a8118f817ea3e6663cdad273592e536a116c24e6d23c18R164-R168
153
+ # * `If the application running SQLite crashes, the data will be safe, but the database [might become corrupted](https://www.sqlite.org/howtocorrupt.html#cfgerr) if the operating system crashes or the computer loses power before that data has been written to the disk surface.` in [this description](https://www.sqlite.org/pragma.html#pragma_synchronous).
154
+ backend = requests_cache.SQLiteCache(sqlite_path, fast_save=True, wal=True)
145
155
  return CachedLimiterSession(
146
- sqlite_path, backend="sqlite", api_budget=self._api_budget, match_headers=True
156
+ sqlite_path, backend=backend, api_budget=self._api_budget, match_headers=True
147
157
  )
148
158
  else:
149
159
  return LimiterSession(api_budget=self._api_budget)
@@ -0,0 +1,60 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+
3
+
4
+ import importlib.util
5
+ from pathlib import Path
6
+ from types import ModuleType
7
+ from typing import Optional
8
+
9
+ import pytest
10
+
11
+ # The following fixtures are used to load a manifest-only connector's components module and manifest file.
12
+ # They can be accessed from any test file in the connector's unit_tests directory by importing them as follows:
13
+
14
+ # from airbyte_cdk.test.utils.manifest_only_fixtures import components_module, connector_dir, manifest_path
15
+
16
+ # individual components can then be referenced as: components_module.<CustomComponentClass>
17
+
18
+
19
+ @pytest.fixture(scope="session")
20
+ def connector_dir(request: pytest.FixtureRequest) -> Path:
21
+ """Return the connector's root directory."""
22
+
23
+ current_dir = Path(request.config.invocation_params.dir)
24
+
25
+ # If the tests are run locally from the connector's unit_tests directory, return the parent (connector) directory
26
+ if current_dir.name == "unit_tests":
27
+ return current_dir.parent
28
+ # In CI, the tests are run from the connector directory itself
29
+ return current_dir
30
+
31
+
32
+ @pytest.fixture(scope="session")
33
+ def components_module(connector_dir: Path) -> Optional[ModuleType]:
34
+ """Load and return the components module from the connector directory.
35
+
36
+ This assumes the components module is located at <connector_dir>/components.py.
37
+ """
38
+ components_path = connector_dir / "components.py"
39
+ if not components_path.exists():
40
+ return None
41
+
42
+ components_spec = importlib.util.spec_from_file_location("components", components_path)
43
+ if components_spec is None:
44
+ return None
45
+
46
+ components_module = importlib.util.module_from_spec(components_spec)
47
+ if components_spec.loader is None:
48
+ return None
49
+
50
+ components_spec.loader.exec_module(components_module)
51
+ return components_module
52
+
53
+
54
+ @pytest.fixture(scope="session")
55
+ def manifest_path(connector_dir: Path) -> Path:
56
+ """Return the path to the connector's manifest file."""
57
+ path = connector_dir / "manifest.yaml"
58
+ if not path.exists():
59
+ raise FileNotFoundError(f"Manifest file not found at {path}")
60
+ return path
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.9.2.dev4100
3
+ Version: 6.9.3rc1
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -62,11 +62,11 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
62
62
  airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
63
63
  airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
64
64
  airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
65
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=9OXOFzG5PBL_MHEJg4ETE0TXfXeuhvv-at38AN3wWEM,23323
65
+ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=PxP4p2686wsf1gjsumGKnh2o2Jjnrqg8QLGijEIrp-A,23412
66
66
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
67
67
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
68
68
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
69
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=rBwFwaPXRtHTDMsjxjX2VDksJ1EroS4qYucWcDHfjHc,126897
69
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=Z1v19wOXYpuffvcmZ5TZyU4kSCFyt3Hba7qfY-2o46U,124229
70
70
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
71
71
  airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
72
72
  airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=hNlhaB5FjNC6IfJyglj5ZJWkYD2nEAukMDmzRz5PC6o,671
@@ -97,19 +97,19 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py,sha256=UrF
97
97
  airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py,sha256=i2L0gREX8nHA-pKokdVqwBf4aJgWP71KOxIABj_DHcY,1857
98
98
  airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=LYEZnZ_hB7rvBSZxG9s0RSrzsOkDWbBY0_P6qu5lEfc,3212
99
99
  airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=-V5UddGm69UKEB6o_O1EIES9kfY8FV_X4Ji8w1yOuSA,981
100
- airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=gcihTEnfD_6sUivxOomoY5r7VMAGqVVnK_HEsid9Y5k,6605
100
+ airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=BtsY_jtT4MihFqeQgc05HXj3Ndt-e2ESQgGwbg3Sdxc,6430
101
101
  airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=jf24RK-1fBhTYDpcGEakZtGNNJfG5NS8CCF5bEgNmRo,3977
102
102
  airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=-v3GvuTVHwUonrfUwDj3wYKaZjX6hTyKmMBRgEzj-j0,15201
103
103
  airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
104
104
  airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
105
105
  airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
106
106
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
107
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=cQk4epWUcGRA2AC0VwwuBzer3kwjtSO4GHWfB0YHECs,89893
107
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=6iAzpGmUrhwEUQcCL5bW-FXuLXPMeFqs_GR4B1rS3ZE,88511
108
108
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
109
109
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
110
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=KflzFl_ZKRSW9XkH16sfr5-9HnAAI0T5s8CVBrJK2Ao,8958
110
+ airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=0jfi-ogL-rOVORTIYnu64wNfh1L8fYaLVDWzJ2zGdi8,8799
111
111
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
112
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=kGU0re3R-Ujtn6Gp96KpEVRHJB5P-B8sRa0aMR_jDdk,102536
112
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=lS2oKfkND54u66hocc2BycS-AIYIbkn4npq6CFRNokc,99573
113
113
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=1NjaZoGAIefvWwj6wx-LOKIXXWS-UnBlZFnuR7y6uYA,745
114
114
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
115
115
  airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=t7pRdFWfFWJtQQG19c9PVeMODyO2BknRTakpM5U9N-8,4844
@@ -159,9 +159,8 @@ airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=FVQpUGVwp2Gibk4gp0
159
159
  airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=WDFnjrXLz3-YEjFhmlMkWAn9AJvnZ0mk9FyC8DAhEYk,4976
160
160
  airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
161
161
  airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=N4swGw5mfuTXJ2R7AKX18CHzizsr69pXwt5uSHLPi48,24172
162
- airbyte_cdk/sources/declarative/schema/__init__.py,sha256=Io9vninzlEjQ2uFmWklxfwNM0cXfljtzOz5zL1OVyT4,701
162
+ airbyte_cdk/sources/declarative/schema/__init__.py,sha256=ul8L9S0-__AMEdbCLHBq-PMEeA928NVp8BB83BMotfU,517
163
163
  airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
164
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=tP5DIEMn-k2JshWeXmo53ZEudDAVb4AJ50Z5tfme_ZU,8063
165
164
  airbyte_cdk/sources/declarative/schema/inline_schema_loader.py,sha256=bVETE10hRsatRJq3R3BeyRR0wIoK3gcP1gcpVRQ_P5U,464
166
165
  airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py,sha256=5Wl-fqW-pVf_dxJ4yGHMAFfC4JjKHYJhqFJT1xA57F4,4177
167
166
  airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLnrDLxf1PJKdUqvQq2RVnAOAzNSY,379
@@ -277,7 +276,7 @@ airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py,sha
277
276
  airbyte_cdk/sources/streams/http/error_handlers/response_models.py,sha256=xGIVELBFY0TmH9aUq1ikoqJz8oHLr6di2JLvKWVEO-s,2236
278
277
  airbyte_cdk/sources/streams/http/exceptions.py,sha256=njC7MlMJoFYcSGz4mIp6-bqLFTr6vC8ej25X0oSeyjE,1824
279
278
  airbyte_cdk/sources/streams/http/http.py,sha256=JAMpiTdS9HFNOlwayWNvQdxoqs2rpW9wdYlhFHv_1Q4,28496
280
- airbyte_cdk/sources/streams/http/http_client.py,sha256=Jqmbd3jL8jjnOfA1325-cpG3nE80YDMDwyxPZ08D7wo,21341
279
+ airbyte_cdk/sources/streams/http/http_client.py,sha256=dyNrbcahEnDfGTrhqkr1XdfPiuVVRlKOdj-TJ5WRVrk,22923
281
280
  airbyte_cdk/sources/streams/http/rate_limiting.py,sha256=IwdjrHKUnU97XO4qONgYRv4YYW51xQ8SJm4WLafXDB8,6351
282
281
  airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py,sha256=RN0D3nOX1xLgwEwKWu6pkGy3XqBFzKSNZ8Lf6umU2eY,413
283
282
  airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=nxI94yJ3bGfpDO8RR3QvOJ-PSW0n9CElSAkgl5ae80Y,10321
@@ -317,6 +316,7 @@ airbyte_cdk/test/state_builder.py,sha256=kLPql9lNzUJaBg5YYRLJlY_Hy5JLHJDVyKPMZMo
317
316
  airbyte_cdk/test/utils/__init__.py,sha256=Hu-1XT2KDoYjDF7-_ziDwv5bY3PueGjANOCbzeOegDg,57
318
317
  airbyte_cdk/test/utils/data.py,sha256=CkCR1_-rujWNmPXFR1IXTMwx1rAl06wAyIKWpDcN02w,820
319
318
  airbyte_cdk/test/utils/http_mocking.py,sha256=F2hpm2q4ijojQN5u2XtgTAp8aNgHgJ64eZNkZ9BW0ig,550
319
+ airbyte_cdk/test/utils/manifest_only_fixtures.py,sha256=zpOL9UkuXprXHF-wAiBeH6CLpoLEaF3Vh6_9rcItj3Y,2135
320
320
  airbyte_cdk/test/utils/reading.py,sha256=SOTDYlps6Te9KumfTJ3vVDSm9EUXhvKtE8aD7gvdPlg,965
321
321
  airbyte_cdk/utils/__init__.py,sha256=gHjOCoUkolS_nKtgFSudXUY-ObK2vUo6aNQLvW7o8q8,347
322
322
  airbyte_cdk/utils/airbyte_secrets_utils.py,sha256=wEtRnl5KRhN6eLJwrDrC4FJjyqt_4vkA1F65mdl8c24,3142
@@ -334,8 +334,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
334
334
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
335
335
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
336
336
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
337
- airbyte_cdk-6.9.2.dev4100.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
338
- airbyte_cdk-6.9.2.dev4100.dist-info/METADATA,sha256=MlRINdXtRATUHEZkZ61QvvV0_c4u0j962cpT1HXSul0,5957
339
- airbyte_cdk-6.9.2.dev4100.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
340
- airbyte_cdk-6.9.2.dev4100.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
341
- airbyte_cdk-6.9.2.dev4100.dist-info/RECORD,,
337
+ airbyte_cdk-6.9.3rc1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
338
+ airbyte_cdk-6.9.3rc1.dist-info/METADATA,sha256=t_8bgC-oSwCBbpuCrYoLrS-mw15JcqfJAw0Ajv9GXhY,5952
339
+ airbyte_cdk-6.9.3rc1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
340
+ airbyte_cdk-6.9.3rc1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
341
+ airbyte_cdk-6.9.3rc1.dist-info/RECORD,,
@@ -1,219 +0,0 @@
1
- #
2
- # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
- #
4
-
5
-
6
- from copy import deepcopy
7
- from dataclasses import InitVar, dataclass
8
- from typing import Any, List, Mapping, MutableMapping, Optional, Union
9
-
10
- import dpath
11
- from typing_extensions import deprecated
12
-
13
- from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
14
- from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
15
- from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
16
- from airbyte_cdk.sources.source import ExperimentalClassWarning
17
- from airbyte_cdk.sources.types import Config
18
-
19
- AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
20
- "string": {"type": ["null", "string"]},
21
- "boolean": {"type": ["null", "boolean"]},
22
- "date": {"type": ["null", "string"], "format": "date"},
23
- "timestamp_without_timezone": {
24
- "type": ["null", "string"],
25
- "format": "date-time",
26
- "airbyte_type": "timestamp_without_timezone",
27
- },
28
- "timestamp_with_timezone": {"type": ["null", "string"], "format": "date-time"},
29
- "time_without_timezone": {
30
- "type": ["null", "string"],
31
- "format": "time",
32
- "airbyte_type": "time_without_timezone",
33
- },
34
- "time_with_timezone": {
35
- "type": ["null", "string"],
36
- "format": "time",
37
- "airbyte_type": "time_with_timezone",
38
- },
39
- "integer": {"type": ["null", "integer"]},
40
- "number": {"type": ["null", "number"]},
41
- "array": {"type": ["null", "array"]},
42
- "object": {"type": ["null", "object"]},
43
- }
44
-
45
-
46
- @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
47
- @dataclass(frozen=True)
48
- class TypesMap:
49
- """
50
- Represents a mapping between a current type and its corresponding target type.
51
- """
52
-
53
- target_type: Union[List[str], str]
54
- current_type: Union[List[str], str]
55
-
56
-
57
- @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
58
- @dataclass
59
- class SchemaTypeIdentifier:
60
- """
61
- Identifies schema details for dynamic schema extraction and processing.
62
- """
63
-
64
- key_pointer: List[Union[InterpolatedString, str]]
65
- parameters: InitVar[Mapping[str, Any]]
66
- type_pointer: Optional[List[Union[InterpolatedString, str]]] = None
67
- types_mapping: Optional[List[TypesMap]] = None
68
- schema_pointer: Optional[List[Union[InterpolatedString, str]]] = None
69
-
70
- def __post_init__(self, parameters: Mapping[str, Any]) -> None:
71
- self.schema_pointer = (
72
- self._update_pointer(self.schema_pointer, parameters) if self.schema_pointer else []
73
- ) # type: ignore[assignment] # This is reqired field in model
74
- self.key_pointer = self._update_pointer(self.key_pointer, parameters) # type: ignore[assignment] # This is reqired field in model
75
- self.type_pointer = (
76
- self._update_pointer(self.type_pointer, parameters) if self.type_pointer else None
77
- )
78
-
79
- @staticmethod
80
- def _update_pointer(
81
- pointer: Optional[List[Union[InterpolatedString, str]]], parameters: Mapping[str, Any]
82
- ) -> Optional[List[Union[InterpolatedString, str]]]:
83
- return (
84
- [
85
- InterpolatedString.create(path, parameters=parameters)
86
- if isinstance(path, str)
87
- else path
88
- for path in pointer
89
- ]
90
- if pointer
91
- else None
92
- )
93
-
94
-
95
- @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
96
- @dataclass
97
- class DynamicSchemaLoader(SchemaLoader):
98
- """
99
- Dynamically loads a JSON Schema by extracting data from retrieved records.
100
- """
101
-
102
- retriever: Retriever
103
- config: Config
104
- parameters: InitVar[Mapping[str, Any]]
105
- schema_type_identifier: SchemaTypeIdentifier
106
-
107
- def get_json_schema(self) -> Mapping[str, Any]:
108
- """
109
- Constructs a JSON Schema based on retrieved data.
110
- """
111
- properties = {}
112
- retrieved_record = next(self.retriever.read_records({}), None) # type: ignore[call-overload] # read_records return Iterable data type
113
-
114
- raw_schema = (
115
- self._extract_data(
116
- retrieved_record, # type: ignore[arg-type] # Expected that retrieved_record will be only Mapping[str, Any]
117
- self.schema_type_identifier.schema_pointer,
118
- )
119
- if retrieved_record
120
- else []
121
- )
122
-
123
- for property_definition in raw_schema:
124
- key = self._get_key(property_definition, self.schema_type_identifier.key_pointer)
125
- value = self._get_type(
126
- property_definition,
127
- self.schema_type_identifier.type_pointer,
128
- )
129
- properties[key] = value
130
-
131
- return {
132
- "$schema": "http://json-schema.org/draft-07/schema#",
133
- "type": "object",
134
- "properties": properties,
135
- }
136
-
137
- def _get_key(
138
- self,
139
- raw_schema: MutableMapping[str, Any],
140
- field_key_path: List[Union[InterpolatedString, str]],
141
- ) -> str:
142
- """
143
- Extracts the key field from the schema using the specified path.
144
- """
145
- field_key = self._extract_data(raw_schema, field_key_path)
146
- if not isinstance(field_key, str):
147
- raise ValueError(f"Expected key to be a string. Got {field_key}")
148
- return field_key
149
-
150
- def _get_type(
151
- self,
152
- raw_schema: MutableMapping[str, Any],
153
- field_type_path: Optional[List[Union[InterpolatedString, str]]],
154
- ) -> Union[Mapping[str, Any], List[Mapping[str, Any]]]:
155
- """
156
- Determines the JSON Schema type for a field, supporting nullable and combined types.
157
- """
158
- raw_field_type = (
159
- self._extract_data(raw_schema, field_type_path, default="string")
160
- if field_type_path
161
- else "string"
162
- )
163
- mapped_field_type = self._replace_type_if_not_valid(raw_field_type)
164
- if (
165
- isinstance(mapped_field_type, list)
166
- and len(mapped_field_type) == 2
167
- and all(isinstance(item, str) for item in mapped_field_type)
168
- ):
169
- first_type = self._get_airbyte_type(mapped_field_type[0])
170
- second_type = self._get_airbyte_type(mapped_field_type[1])
171
- return {"oneOf": [first_type, second_type]}
172
- elif isinstance(mapped_field_type, str):
173
- return self._get_airbyte_type(mapped_field_type)
174
- else:
175
- raise ValueError(
176
- f"Invalid data type. Available string or two items list of string. Got {mapped_field_type}."
177
- )
178
-
179
- def _replace_type_if_not_valid(
180
- self, field_type: Union[List[str], str]
181
- ) -> Union[List[str], str]:
182
- """
183
- Replaces a field type if it matches a type mapping in `types_map`.
184
- """
185
- if self.schema_type_identifier.types_mapping:
186
- for types_map in self.schema_type_identifier.types_mapping:
187
- if field_type == types_map.current_type:
188
- return types_map.target_type
189
- return field_type
190
-
191
- @staticmethod
192
- def _get_airbyte_type(field_type: str) -> Mapping[str, Any]:
193
- """
194
- Maps a field type to its corresponding Airbyte type definition.
195
- """
196
- if field_type not in AIRBYTE_DATA_TYPES:
197
- raise ValueError(f"Invalid Airbyte data type: {field_type}")
198
-
199
- return deepcopy(AIRBYTE_DATA_TYPES[field_type])
200
-
201
- def _extract_data(
202
- self,
203
- body: Mapping[str, Any],
204
- extraction_path: Optional[List[Union[InterpolatedString, str]]] = None,
205
- default: Any = None,
206
- ) -> Any:
207
- """
208
- Extracts data from the body based on the provided extraction path.
209
- """
210
-
211
- if not extraction_path:
212
- return body
213
-
214
- path = [
215
- node.eval(self.config) if not isinstance(node, str) else node
216
- for node in extraction_path
217
- ]
218
-
219
- return dpath.get(body, path, default=default) # type: ignore # extracted will be a MutableMapping, given input data structure