airbyte-cdk 6.12.1.dev0__py3-none-any.whl → 6.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -197,6 +197,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
197
197
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
198
198
  ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
199
199
  )
200
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
201
+ FlattenFields as FlattenFieldsModel,
202
+ )
200
203
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
201
204
  GzipJsonDecoder as GzipJsonDecoderModel,
202
205
  )
@@ -236,6 +239,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
236
239
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
237
240
  KeysToLower as KeysToLowerModel,
238
241
  )
242
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
243
+ KeysToSnakeCase as KeysToSnakeCaseModel,
244
+ )
239
245
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
240
246
  LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel,
241
247
  )
@@ -323,6 +329,9 @@ from airbyte_cdk.sources.declarative.partition_routers import (
323
329
  SinglePartitionRouter,
324
330
  SubstreamPartitionRouter,
325
331
  )
332
+ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
333
+ AsyncJobPartitionRouter,
334
+ )
326
335
  from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
327
336
  ParentStreamConfig,
328
337
  )
@@ -387,9 +396,15 @@ from airbyte_cdk.sources.declarative.transformations import (
387
396
  RemoveFields,
388
397
  )
389
398
  from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition
399
+ from airbyte_cdk.sources.declarative.transformations.flatten_fields import (
400
+ FlattenFields,
401
+ )
390
402
  from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import (
391
403
  KeysToLowerTransformation,
392
404
  )
405
+ from airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation import (
406
+ KeysToSnakeCaseTransformation,
407
+ )
393
408
  from airbyte_cdk.sources.message import (
394
409
  InMemoryMessageRepository,
395
410
  LogAppenderMessageRepositoryDecorator,
@@ -472,6 +487,8 @@ class ModelToComponentFactory:
472
487
  JsonlDecoderModel: self.create_jsonl_decoder,
473
488
  GzipJsonDecoderModel: self.create_gzipjson_decoder,
474
489
  KeysToLowerModel: self.create_keys_to_lower_transformation,
490
+ KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
491
+ FlattenFieldsModel: self.create_flatten_fields,
475
492
  IterableDecoderModel: self.create_iterable_decoder,
476
493
  XmlDecoderModel: self.create_xml_decoder,
477
494
  JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
@@ -587,6 +604,16 @@ class ModelToComponentFactory:
587
604
  ) -> KeysToLowerTransformation:
588
605
  return KeysToLowerTransformation()
589
606
 
607
+ def create_keys_to_snake_transformation(
608
+ self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any
609
+ ) -> KeysToSnakeCaseTransformation:
610
+ return KeysToSnakeCaseTransformation()
611
+
612
+ def create_flatten_fields(
613
+ self, model: FlattenFieldsModel, config: Config, **kwargs: Any
614
+ ) -> FlattenFields:
615
+ return FlattenFields()
616
+
590
617
  @staticmethod
591
618
  def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]:
592
619
  if not value_type:
@@ -1638,6 +1665,13 @@ class ModelToComponentFactory:
1638
1665
  model.retriever, stream_slicer
1639
1666
  )
1640
1667
 
1668
+ schema_transformations = []
1669
+ if model.schema_transformations:
1670
+ for transformation_model in model.schema_transformations:
1671
+ schema_transformations.append(
1672
+ self._create_component_from_model(model=transformation_model, config=config)
1673
+ )
1674
+
1641
1675
  retriever = self._create_component_from_model(
1642
1676
  model=model.retriever,
1643
1677
  config=config,
@@ -1652,6 +1686,7 @@ class ModelToComponentFactory:
1652
1686
  return DynamicSchemaLoader(
1653
1687
  retriever=retriever,
1654
1688
  config=config,
1689
+ schema_transformations=schema_transformations,
1655
1690
  schema_type_identifier=schema_type_identifier,
1656
1691
  parameters=model.parameters or {},
1657
1692
  )
@@ -1765,7 +1800,8 @@ class ModelToComponentFactory:
1765
1800
  return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore
1766
1801
  config,
1767
1802
  InterpolatedString.create(
1768
- model.token_refresh_endpoint, parameters=model.parameters or {}
1803
+ model.token_refresh_endpoint, # type: ignore
1804
+ parameters=model.parameters or {},
1769
1805
  ).eval(config),
1770
1806
  access_token_name=InterpolatedString.create(
1771
1807
  model.access_token_name or "access_token", parameters=model.parameters or {}
@@ -1799,6 +1835,7 @@ class ModelToComponentFactory:
1799
1835
  # ignore type error because fixing it would have a lot of dependencies, revisit later
1800
1836
  return DeclarativeOauth2Authenticator( # type: ignore
1801
1837
  access_token_name=model.access_token_name or "access_token",
1838
+ access_token_value=model.access_token_value,
1802
1839
  client_id=model.client_id,
1803
1840
  client_secret=model.client_secret,
1804
1841
  expires_in_name=model.expires_in_name or "expires_in",
@@ -2228,22 +2265,28 @@ class ModelToComponentFactory:
2228
2265
  urls_extractor=urls_extractor,
2229
2266
  )
2230
2267
 
2231
- return AsyncRetriever(
2268
+ async_job_partition_router = AsyncJobPartitionRouter(
2232
2269
  job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
2233
2270
  job_repository,
2234
2271
  stream_slices,
2235
- JobTracker(
2236
- 1
2237
- ), # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
2272
+ JobTracker(1),
2273
+ # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
2238
2274
  self._message_repository,
2239
- has_bulk_parent=False, # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
2275
+ has_bulk_parent=False,
2276
+ # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
2240
2277
  ),
2241
- record_selector=record_selector,
2242
2278
  stream_slicer=stream_slicer,
2243
2279
  config=config,
2244
2280
  parameters=model.parameters or {},
2245
2281
  )
2246
2282
 
2283
+ return AsyncRetriever(
2284
+ record_selector=record_selector,
2285
+ stream_slicer=async_job_partition_router,
2286
+ config=config,
2287
+ parameters=model.parameters or {},
2288
+ )
2289
+
2247
2290
  @staticmethod
2248
2291
  def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec:
2249
2292
  return Spec(
@@ -2353,7 +2396,7 @@ class ModelToComponentFactory:
2353
2396
  config=config,
2354
2397
  name="",
2355
2398
  primary_key=None,
2356
- stream_slicer=combined_slicers,
2399
+ stream_slicer=stream_slicer if stream_slicer else combined_slicers,
2357
2400
  transformations=[],
2358
2401
  )
2359
2402
 
@@ -2,10 +2,18 @@
2
2
  # Copyright (c) 2022 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import AsyncJobPartitionRouter
5
6
  from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_slicer import CartesianProductStreamSlicer
6
7
  from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import ListPartitionRouter
7
8
  from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import SinglePartitionRouter
8
9
  from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import SubstreamPartitionRouter
9
10
  from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
10
11
 
11
- __all__ = ["CartesianProductStreamSlicer", "ListPartitionRouter", "SinglePartitionRouter", "SubstreamPartitionRouter", "PartitionRouter"]
12
+ __all__ = [
13
+ "AsyncJobPartitionRouter",
14
+ "CartesianProductStreamSlicer",
15
+ "ListPartitionRouter",
16
+ "SinglePartitionRouter",
17
+ "SubstreamPartitionRouter",
18
+ "PartitionRouter"
19
+ ]
@@ -0,0 +1,65 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+
3
+ from dataclasses import InitVar, dataclass, field
4
+ from typing import Any, Callable, Iterable, Mapping, Optional
5
+
6
+ from airbyte_cdk.models import FailureType
7
+ from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
8
+ AsyncJobOrchestrator,
9
+ AsyncPartition,
10
+ )
11
+ from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
12
+ SinglePartitionRouter,
13
+ )
14
+ from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
15
+ from airbyte_cdk.sources.types import Config, StreamSlice
16
+ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
17
+
18
+
19
+ @dataclass
20
+ class AsyncJobPartitionRouter(StreamSlicer):
21
+ """
22
+ Partition router that creates async jobs in a source API, periodically polls for job
23
+ completion, and supplies the completed job URL locations as stream slices so that
24
+ records can be extracted.
25
+ """
26
+
27
+ config: Config
28
+ parameters: InitVar[Mapping[str, Any]]
29
+ job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
30
+ stream_slicer: StreamSlicer = field(
31
+ default_factory=lambda: SinglePartitionRouter(parameters={})
32
+ )
33
+
34
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
35
+ self._job_orchestrator_factory = self.job_orchestrator_factory
36
+ self._job_orchestrator: Optional[AsyncJobOrchestrator] = None
37
+ self._parameters = parameters
38
+
39
+ def stream_slices(self) -> Iterable[StreamSlice]:
40
+ slices = self.stream_slicer.stream_slices()
41
+ self._job_orchestrator = self._job_orchestrator_factory(slices)
42
+
43
+ for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
44
+ yield StreamSlice(
45
+ partition=dict(completed_partition.stream_slice.partition)
46
+ | {"partition": completed_partition},
47
+ cursor_slice=completed_partition.stream_slice.cursor_slice,
48
+ )
49
+
50
+ def fetch_records(self, partition: AsyncPartition) -> Iterable[Mapping[str, Any]]:
51
+ """
52
+ This method of fetching records extends beyond what a PartitionRouter/StreamSlicer should
53
+ be responsible for. However, this was added in because the JobOrchestrator is required to
54
+ retrieve records. And without defining fetch_records() on this class, we're stuck with either
55
+ passing the JobOrchestrator to the AsyncRetriever or storing it on multiple classes.
56
+ """
57
+
58
+ if not self._job_orchestrator:
59
+ raise AirbyteTracedException(
60
+ message="Invalid state within AsyncJobRetriever. Please contact Airbyte Support",
61
+ internal_message="AsyncPartitionRepository is expected to be accessed only after `stream_slices`",
62
+ failure_type=FailureType.system_error,
63
+ )
64
+
65
+ return self._job_orchestrator.fetch_records(partition=partition)
@@ -88,19 +88,25 @@ class HttpComponentsResolver(ComponentsResolver):
88
88
  """
89
89
  kwargs = {"stream_template_config": stream_template_config}
90
90
 
91
- for components_values in self.retriever.read_records({}):
92
- updated_config = deepcopy(stream_template_config)
93
- kwargs["components_values"] = components_values # type: ignore[assignment] # component_values will always be of type Mapping[str, Any]
94
-
95
- for resolved_component in self._resolved_components:
96
- valid_types = (
97
- (resolved_component.value_type,) if resolved_component.value_type else None
98
- )
99
- value = resolved_component.value.eval(
100
- self.config, valid_types=valid_types, **kwargs
101
- )
91
+ for stream_slice in self.retriever.stream_slices():
92
+ for components_values in self.retriever.read_records(
93
+ records_schema={}, stream_slice=stream_slice
94
+ ):
95
+ updated_config = deepcopy(stream_template_config)
96
+ kwargs["components_values"] = components_values # type: ignore[assignment] # component_values will always be of type Mapping[str, Any]
97
+ kwargs["stream_slice"] = stream_slice # type: ignore[assignment] # stream_slice will always be of type Mapping[str, Any]
98
+
99
+ for resolved_component in self._resolved_components:
100
+ valid_types = (
101
+ (resolved_component.value_type,) if resolved_component.value_type else None
102
+ )
103
+ value = resolved_component.value.eval(
104
+ self.config, valid_types=valid_types, **kwargs
105
+ )
102
106
 
103
- path = [path.eval(self.config, **kwargs) for path in resolved_component.field_path]
104
- dpath.set(updated_config, path, value)
107
+ path = [
108
+ path.eval(self.config, **kwargs) for path in resolved_component.field_path
109
+ ]
110
+ dpath.set(updated_config, path, value)
105
111
 
106
- yield updated_config
112
+ yield updated_config
@@ -1,8 +1,8 @@
1
1
  # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
2
 
3
3
 
4
- from dataclasses import InitVar, dataclass, field
5
- from typing import Any, Callable, Iterable, Mapping, Optional
4
+ from dataclasses import InitVar, dataclass
5
+ from typing import Any, Iterable, Mapping, Optional
6
6
 
7
7
  from typing_extensions import deprecated
8
8
 
@@ -12,9 +12,10 @@ from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
12
12
  AsyncPartition,
13
13
  )
14
14
  from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
15
- from airbyte_cdk.sources.declarative.partition_routers import SinglePartitionRouter
15
+ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
16
+ AsyncJobPartitionRouter,
17
+ )
16
18
  from airbyte_cdk.sources.declarative.retrievers import Retriever
17
- from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
18
19
  from airbyte_cdk.sources.source import ExperimentalClassWarning
19
20
  from airbyte_cdk.sources.streams.core import StreamData
20
21
  from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
@@ -29,15 +30,10 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
29
30
  class AsyncRetriever(Retriever):
30
31
  config: Config
31
32
  parameters: InitVar[Mapping[str, Any]]
32
- job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
33
33
  record_selector: RecordSelector
34
- stream_slicer: StreamSlicer = field(
35
- default_factory=lambda: SinglePartitionRouter(parameters={})
36
- )
34
+ stream_slicer: AsyncJobPartitionRouter
37
35
 
38
36
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
39
- self._job_orchestrator_factory = self.job_orchestrator_factory
40
- self.__job_orchestrator: Optional[AsyncJobOrchestrator] = None
41
37
  self._parameters = parameters
42
38
 
43
39
  @property
@@ -54,17 +50,6 @@ class AsyncRetriever(Retriever):
54
50
  """
55
51
  pass
56
52
 
57
- @property
58
- def _job_orchestrator(self) -> AsyncJobOrchestrator:
59
- if not self.__job_orchestrator:
60
- raise AirbyteTracedException(
61
- message="Invalid state within AsyncJobRetriever. Please contact Airbyte Support",
62
- internal_message="AsyncPartitionRepository is expected to be accessed only after `stream_slices`",
63
- failure_type=FailureType.system_error,
64
- )
65
-
66
- return self.__job_orchestrator
67
-
68
53
  def _get_stream_state(self) -> StreamState:
69
54
  """
70
55
  Gets the current state of the stream.
@@ -99,15 +84,7 @@ class AsyncRetriever(Retriever):
99
84
  return stream_slice["partition"] # type: ignore # stream_slice["partition"] has been added as an AsyncPartition as part of stream_slices
100
85
 
101
86
  def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
102
- slices = self.stream_slicer.stream_slices()
103
- self.__job_orchestrator = self._job_orchestrator_factory(slices)
104
-
105
- for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
106
- yield StreamSlice(
107
- partition=dict(completed_partition.stream_slice.partition)
108
- | {"partition": completed_partition},
109
- cursor_slice=completed_partition.stream_slice.cursor_slice,
110
- )
87
+ return self.stream_slicer.stream_slices()
111
88
 
112
89
  def read_records(
113
90
  self,
@@ -116,7 +93,7 @@ class AsyncRetriever(Retriever):
116
93
  ) -> Iterable[StreamData]:
117
94
  stream_state: StreamState = self._get_stream_state()
118
95
  partition: AsyncPartition = self._validate_and_get_stream_slice_partition(stream_slice)
119
- records: Iterable[Mapping[str, Any]] = self._job_orchestrator.fetch_records(partition)
96
+ records: Iterable[Mapping[str, Any]] = self.stream_slicer.fetch_records(partition)
120
97
 
121
98
  yield from self.record_selector.filter_and_transform(
122
99
  all_data=records,
@@ -4,7 +4,7 @@
4
4
 
5
5
 
6
6
  from copy import deepcopy
7
- from dataclasses import InitVar, dataclass
7
+ from dataclasses import InitVar, dataclass, field
8
8
  from typing import Any, List, Mapping, MutableMapping, Optional, Union
9
9
 
10
10
  import dpath
@@ -13,8 +13,9 @@ from typing_extensions import deprecated
13
13
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
14
14
  from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
15
15
  from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
16
+ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
16
17
  from airbyte_cdk.sources.source import ExperimentalClassWarning
17
- from airbyte_cdk.sources.types import Config
18
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
18
19
 
19
20
  AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
20
21
  "string": {"type": ["null", "string"]},
@@ -103,6 +104,7 @@ class DynamicSchemaLoader(SchemaLoader):
103
104
  config: Config
104
105
  parameters: InitVar[Mapping[str, Any]]
105
106
  schema_type_identifier: SchemaTypeIdentifier
107
+ schema_transformations: List[RecordTransformation] = field(default_factory=lambda: [])
106
108
 
107
109
  def get_json_schema(self) -> Mapping[str, Any]:
108
110
  """
@@ -128,12 +130,27 @@ class DynamicSchemaLoader(SchemaLoader):
128
130
  )
129
131
  properties[key] = value
130
132
 
133
+ transformed_properties = self._transform(properties, {})
134
+
131
135
  return {
132
136
  "$schema": "http://json-schema.org/draft-07/schema#",
133
137
  "type": "object",
134
- "properties": properties,
138
+ "properties": transformed_properties,
135
139
  }
136
140
 
141
+ def _transform(
142
+ self,
143
+ properties: Mapping[str, Any],
144
+ stream_state: StreamState,
145
+ stream_slice: Optional[StreamSlice] = None,
146
+ ) -> Mapping[str, Any]:
147
+ for transformation in self.schema_transformations:
148
+ transformation.transform(
149
+ properties, # type: ignore # properties has type Mapping[str, Any], but Dict[str, Any] expected
150
+ config=self.config,
151
+ )
152
+ return properties
153
+
137
154
  def _get_key(
138
155
  self,
139
156
  raw_schema: MutableMapping[str, Any],
@@ -0,0 +1,50 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Dict, Optional
7
+
8
+ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
9
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
10
+
11
+
12
+ @dataclass
13
+ class FlattenFields(RecordTransformation):
14
+ def transform(
15
+ self,
16
+ record: Dict[str, Any],
17
+ config: Optional[Config] = None,
18
+ stream_state: Optional[StreamState] = None,
19
+ stream_slice: Optional[StreamSlice] = None,
20
+ ) -> None:
21
+ transformed_record = self.flatten_record(record)
22
+ record.clear()
23
+ record.update(transformed_record)
24
+
25
+ def flatten_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
26
+ stack = [(record, "_")]
27
+ transformed_record: Dict[str, Any] = {}
28
+ force_with_parent_name = False
29
+
30
+ while stack:
31
+ current_record, parent_key = stack.pop()
32
+
33
+ if isinstance(current_record, dict):
34
+ for current_key, value in current_record.items():
35
+ new_key = (
36
+ f"{parent_key}.{current_key}"
37
+ if (current_key in transformed_record or force_with_parent_name)
38
+ else current_key
39
+ )
40
+ stack.append((value, new_key))
41
+
42
+ elif isinstance(current_record, list):
43
+ for i, item in enumerate(current_record):
44
+ force_with_parent_name = True
45
+ stack.append((item, f"{parent_key}.{i}"))
46
+
47
+ else:
48
+ transformed_record[parent_key] = current_record
49
+
50
+ return transformed_record
@@ -0,0 +1,68 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import re
6
+ from dataclasses import dataclass
7
+ from typing import Any, Dict, List, Optional
8
+
9
+ import unidecode
10
+
11
+ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
12
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
13
+
14
+
15
+ @dataclass
16
+ class KeysToSnakeCaseTransformation(RecordTransformation):
17
+ token_pattern: re.Pattern[str] = re.compile(
18
+ r"[A-Z]+[a-z]*|[a-z]+|\d+|(?P<NoToken>[^a-zA-Z\d]+)"
19
+ )
20
+
21
+ def transform(
22
+ self,
23
+ record: Dict[str, Any],
24
+ config: Optional[Config] = None,
25
+ stream_state: Optional[StreamState] = None,
26
+ stream_slice: Optional[StreamSlice] = None,
27
+ ) -> None:
28
+ transformed_record = self._transform_record(record)
29
+ record.clear()
30
+ record.update(transformed_record)
31
+
32
+ def _transform_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
33
+ transformed_record = {}
34
+ for key, value in record.items():
35
+ transformed_key = self.process_key(key)
36
+ transformed_value = value
37
+
38
+ if isinstance(value, dict):
39
+ transformed_value = self._transform_record(value)
40
+
41
+ transformed_record[transformed_key] = transformed_value
42
+ return transformed_record
43
+
44
+ def process_key(self, key: str) -> str:
45
+ key = self.normalize_key(key)
46
+ tokens = self.tokenize_key(key)
47
+ tokens = self.filter_tokens(tokens)
48
+ return self.tokens_to_snake_case(tokens)
49
+
50
+ def normalize_key(self, key: str) -> str:
51
+ return unidecode.unidecode(key)
52
+
53
+ def tokenize_key(self, key: str) -> List[str]:
54
+ tokens = []
55
+ for match in self.token_pattern.finditer(key):
56
+ token = match.group(0) if match.group("NoToken") is None else ""
57
+ tokens.append(token)
58
+ return tokens
59
+
60
+ def filter_tokens(self, tokens: List[str]) -> List[str]:
61
+ if len(tokens) >= 3:
62
+ tokens = tokens[:1] + [t for t in tokens[1:-1] if t] + tokens[-1:]
63
+ if tokens and tokens[0].isdigit():
64
+ tokens.insert(0, "")
65
+ return tokens
66
+
67
+ def tokens_to_snake_case(self, tokens: List[str]) -> str:
68
+ return "_".join(token.lower() for token in tokens)
@@ -223,17 +223,17 @@ class Stream(ABC):
223
223
  record_counter += 1
224
224
 
225
225
  checkpoint_interval = self.state_checkpoint_interval
226
+ checkpoint = checkpoint_reader.get_checkpoint()
226
227
  if (
227
228
  should_checkpoint
228
229
  and checkpoint_interval
229
230
  and record_counter % checkpoint_interval == 0
231
+ and checkpoint is not None
230
232
  ):
231
- checkpoint = checkpoint_reader.get_checkpoint()
232
- if checkpoint:
233
- airbyte_state_message = self._checkpoint_state(
234
- checkpoint, state_manager=state_manager
235
- )
236
- yield airbyte_state_message
233
+ airbyte_state_message = self._checkpoint_state(
234
+ checkpoint, state_manager=state_manager
235
+ )
236
+ yield airbyte_state_message
237
237
 
238
238
  if internal_config.is_limit_reached(record_counter):
239
239
  break
@@ -262,7 +262,7 @@ class HttpClient:
262
262
  user_backoff_handler = user_defined_backoff_handler(max_tries=max_tries, max_time=max_time)(
263
263
  self._send
264
264
  )
265
- rate_limit_backoff_handler = rate_limit_default_backoff_handler()
265
+ rate_limit_backoff_handler = rate_limit_default_backoff_handler(max_tries=max_tries)
266
266
  backoff_handler = http_client_default_backoff_handler(
267
267
  max_tries=max_tries, max_time=max_time
268
268
  )
@@ -472,7 +472,9 @@ class HttpClient:
472
472
 
473
473
  elif retry_endlessly:
474
474
  raise RateLimitBackoffException(
475
- request=request, response=response or exc, error_message=error_message
475
+ request=request,
476
+ response=(response if response is not None else exc),
477
+ error_message=error_message,
476
478
  )
477
479
 
478
480
  raise DefaultBackoffException(
@@ -54,7 +54,16 @@ class AbstractOauth2Authenticator(AuthBase):
54
54
 
55
55
  def get_auth_header(self) -> Mapping[str, Any]:
56
56
  """HTTP header to set on the requests"""
57
- return {"Authorization": f"Bearer {self.get_access_token()}"}
57
+ token = (
58
+ self.access_token
59
+ if (
60
+ not self.get_token_refresh_endpoint()
61
+ or not self.get_refresh_token()
62
+ and self.access_token
63
+ )
64
+ else self.get_access_token()
65
+ )
66
+ return {"Authorization": f"Bearer {token}"}
58
67
 
59
68
  def get_access_token(self) -> str:
60
69
  """Returns the access token"""
@@ -121,7 +130,7 @@ class AbstractOauth2Authenticator(AuthBase):
121
130
  try:
122
131
  response = requests.request(
123
132
  method="POST",
124
- url=self.get_token_refresh_endpoint(),
133
+ url=self.get_token_refresh_endpoint(), # type: ignore # returns None, if not provided, but str | bytes is expected.
125
134
  data=self.build_refresh_request_body(),
126
135
  )
127
136
  if response.ok:
@@ -198,7 +207,7 @@ class AbstractOauth2Authenticator(AuthBase):
198
207
  return None
199
208
 
200
209
  @abstractmethod
201
- def get_token_refresh_endpoint(self) -> str:
210
+ def get_token_refresh_endpoint(self) -> Optional[str]:
202
211
  """Returns the endpoint to refresh the access token"""
203
212
 
204
213
  @abstractmethod
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.12.1.dev0
3
+ Version: 6.12.3
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -22,6 +22,7 @@ Provides-Extra: sql
22
22
  Provides-Extra: vector-db-based
23
23
  Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
24
24
  Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
25
+ Requires-Dist: Unidecode (>=1.3,<2.0)
25
26
  Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
26
27
  Requires-Dist: avro (>=1.11.2,<1.12.0) ; extra == "file-based"
27
28
  Requires-Dist: backoff
@@ -56,7 +57,7 @@ Requires-Dist: python-calamine (==0.2.3) ; extra == "file-based"
56
57
  Requires-Dist: python-dateutil
57
58
  Requires-Dist: python-snappy (==0.7.3) ; extra == "file-based"
58
59
  Requires-Dist: python-ulid (>=3.0.0,<4.0.0)
59
- Requires-Dist: pytz (==2024.1)
60
+ Requires-Dist: pytz (==2024.2)
60
61
  Requires-Dist: rapidfuzz (>=3.10.1,<4.0.0)
61
62
  Requires-Dist: requests
62
63
  Requires-Dist: requests_cache