airbyte-cdk 6.34.0.dev1__py3-none-any.whl → 6.34.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +12 -16
  2. airbyte_cdk/connector_builder/message_grouper.py +448 -0
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  4. airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
  5. airbyte_cdk/sources/declarative/auth/oauth.py +1 -6
  6. airbyte_cdk/sources/declarative/auth/token.py +8 -3
  7. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +19 -30
  8. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +85 -203
  9. airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
  10. airbyte_cdk/sources/declarative/decoders/__init__.py +4 -0
  11. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +2 -7
  12. airbyte_cdk/sources/declarative/decoders/json_decoder.py +58 -12
  13. airbyte_cdk/sources/declarative/extractors/record_selector.py +3 -12
  14. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +25 -56
  15. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -12
  16. airbyte_cdk/sources/declarative/manifest_declarative_source.py +0 -9
  17. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +41 -150
  18. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +84 -234
  19. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  20. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
  21. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +18 -26
  22. airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -8
  23. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -16
  24. airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
  25. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
  26. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
  27. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -4
  28. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -2
  29. airbyte_cdk/sources/file_based/file_based_source.py +37 -70
  30. airbyte_cdk/sources/file_based/file_based_stream_reader.py +12 -107
  31. airbyte_cdk/sources/file_based/stream/__init__.py +1 -10
  32. airbyte_cdk/sources/streams/call_rate.py +47 -185
  33. airbyte_cdk/sources/streams/http/http.py +2 -1
  34. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +56 -217
  35. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +73 -144
  36. airbyte_cdk/utils/datetime_helpers.py +66 -48
  37. airbyte_cdk/utils/mapping_helpers.py +26 -126
  38. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/METADATA +1 -1
  39. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/RECORD +43 -52
  40. airbyte_cdk/connector_builder/test_reader/__init__.py +0 -7
  41. airbyte_cdk/connector_builder/test_reader/helpers.py +0 -591
  42. airbyte_cdk/connector_builder/test_reader/message_grouper.py +0 -160
  43. airbyte_cdk/connector_builder/test_reader/reader.py +0 -441
  44. airbyte_cdk/connector_builder/test_reader/types.py +0 -75
  45. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +0 -81
  46. airbyte_cdk/sources/file_based/stream/identities_stream.py +0 -47
  47. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +0 -85
  48. airbyte_cdk/sources/specs/transfer_modes.py +0 -26
  49. airbyte_cdk/sources/streams/permissions/identities_stream.py +0 -75
  50. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE.txt +0 -0
  51. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE_SHORT +0 -0
  52. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/WHEEL +0 -0
  53. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/entry_points.txt +0 -0
@@ -1,81 +0,0 @@
1
- #
2
- # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
- #
4
-
5
- from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
6
- AbstractFileBasedSpec,
7
- DeliverRawFiles,
8
- )
9
- from airbyte_cdk.sources.specs.transfer_modes import DeliverPermissions
10
-
11
- DELIVERY_TYPE_KEY = "delivery_type"
12
- DELIVERY_TYPE_PERMISSION_TRANSFER_MODE_VALUE = "use_permissions_transfer"
13
- DELIVERY_TYPE_FILES_TRANSFER_MODE_VALUE = "use_file_transfer"
14
- PRESERVE_DIRECTORY_STRUCTURE_KEY = "preserve_directory_structure"
15
- INCLUDE_IDENTITIES_STREAM_KEY = "include_identities_stream"
16
-
17
-
18
- def use_file_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
19
- """Returns `True` if the configuration uses file transfer mode."""
20
- return (
21
- hasattr(parsed_config.delivery_method, DELIVERY_TYPE_KEY)
22
- and parsed_config.delivery_method.delivery_type == DELIVERY_TYPE_FILES_TRANSFER_MODE_VALUE
23
- )
24
-
25
-
26
- def preserve_directory_structure(parsed_config: AbstractFileBasedSpec) -> bool:
27
- """
28
- Determines whether to preserve directory structure during file transfer.
29
-
30
- When enabled, files maintain their subdirectory paths in the destination.
31
- When disabled, files are flattened to the root of the destination.
32
-
33
- Args:
34
- parsed_config: The parsed configuration containing delivery method settings
35
-
36
- Returns:
37
- True if directory structure should be preserved (default), False otherwise
38
- """
39
- if (
40
- use_file_transfer(parsed_config)
41
- and hasattr(parsed_config.delivery_method, PRESERVE_DIRECTORY_STRUCTURE_KEY)
42
- and isinstance(parsed_config.delivery_method, DeliverRawFiles)
43
- ):
44
- return parsed_config.delivery_method.preserve_directory_structure
45
- return True
46
-
47
-
48
- def use_permissions_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
49
- """
50
- Determines whether to use permissions transfer to sync ACLs and Identities
51
-
52
- Args:
53
- parsed_config: The parsed configuration containing delivery method settings
54
-
55
- Returns:
56
- True if permissions transfer should be enabled, False otherwise
57
- """
58
- return (
59
- hasattr(parsed_config.delivery_method, DELIVERY_TYPE_KEY)
60
- and parsed_config.delivery_method.delivery_type
61
- == DELIVERY_TYPE_PERMISSION_TRANSFER_MODE_VALUE
62
- )
63
-
64
-
65
- def include_identities_stream(parsed_config: AbstractFileBasedSpec) -> bool:
66
- """
67
- There are scenarios where user may not have access to identities but still is valuable to get ACLs
68
-
69
- Args:
70
- parsed_config: The parsed configuration containing delivery method settings
71
-
72
- Returns:
73
- True if we should include Identities stream.
74
- """
75
- if (
76
- use_permissions_transfer(parsed_config)
77
- and hasattr(parsed_config.delivery_method, INCLUDE_IDENTITIES_STREAM_KEY)
78
- and isinstance(parsed_config.delivery_method, DeliverPermissions)
79
- ):
80
- return parsed_config.delivery_method.include_identities_stream
81
- return False
@@ -1,47 +0,0 @@
1
- #
2
- # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
- #
4
-
5
- from functools import cache
6
- from typing import Any, Dict, Iterable, Mapping, MutableMapping, Optional
7
-
8
- from airbyte_cdk.sources.file_based.config.file_based_stream_config import PrimaryKeyType
9
- from airbyte_cdk.sources.file_based.discovery_policy import AbstractDiscoveryPolicy
10
- from airbyte_cdk.sources.file_based.exceptions import FileBasedErrorsCollector
11
- from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
12
- from airbyte_cdk.sources.streams.core import JsonSchema
13
- from airbyte_cdk.sources.streams.permissions.identities_stream import IdentitiesStream
14
-
15
-
16
- class FileIdentitiesStream(IdentitiesStream):
17
- """
18
- The identities stream. A full refresh stream to sync identities from a certain domain.
19
- The stream reader manage the logic to get such data, which is implemented on connector side.
20
- """
21
-
22
- is_resumable = False
23
-
24
- def __init__(
25
- self,
26
- catalog_schema: Optional[Mapping[str, Any]],
27
- stream_reader: AbstractFileBasedStreamReader,
28
- discovery_policy: AbstractDiscoveryPolicy,
29
- errors_collector: FileBasedErrorsCollector,
30
- ) -> None:
31
- super().__init__()
32
- self.catalog_schema = catalog_schema
33
- self.stream_reader = stream_reader
34
- self._discovery_policy = discovery_policy
35
- self.errors_collector = errors_collector
36
- self._cursor: MutableMapping[str, Any] = {}
37
-
38
- @property
39
- def primary_key(self) -> PrimaryKeyType:
40
- return None
41
-
42
- def load_identity_groups(self) -> Iterable[Dict[str, Any]]:
43
- return self.stream_reader.load_identity_groups(logger=self.logger)
44
-
45
- @cache
46
- def get_json_schema(self) -> JsonSchema:
47
- return self.stream_reader.identities_schema
@@ -1,85 +0,0 @@
1
- #
2
- # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
- #
4
-
5
- import traceback
6
- from typing import Any, Dict, Iterable
7
-
8
- from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level
9
- from airbyte_cdk.models import Type as MessageType
10
- from airbyte_cdk.sources.file_based.stream import DefaultFileBasedStream
11
- from airbyte_cdk.sources.file_based.types import StreamSlice
12
- from airbyte_cdk.sources.streams.core import JsonSchema
13
- from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
14
-
15
-
16
- class PermissionsFileBasedStream(DefaultFileBasedStream):
17
- """
18
- A specialized stream for handling file-based ACL permissions.
19
-
20
- This stream works with the stream_reader to:
21
- 1. Fetch ACL permissions for each file in the source
22
- 2. Transform permissions into a standardized format
23
- 3. Generate records containing permission information
24
-
25
- The stream_reader is responsible for the actual implementation of permission retrieval
26
- and schema definition, while this class handles the streaming interface.
27
- """
28
-
29
- def _filter_schema_invalid_properties(
30
- self, configured_catalog_json_schema: Dict[str, Any]
31
- ) -> Dict[str, Any]:
32
- return self.stream_reader.file_permissions_schema
33
-
34
- def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[AirbyteMessage]:
35
- """
36
- Yield permissions records from all remote files
37
- """
38
-
39
- for file in stream_slice["files"]:
40
- no_permissions = False
41
- file_datetime_string = file.last_modified.strftime(self.DATE_TIME_FORMAT)
42
- try:
43
- permissions_record = self.stream_reader.get_file_acl_permissions(
44
- file, logger=self.logger
45
- )
46
- if not permissions_record:
47
- no_permissions = True
48
- self.logger.warning(
49
- f"Unable to fetch permissions. stream={self.name} file={file.uri}"
50
- )
51
- continue
52
- permissions_record = self.transform_record(
53
- permissions_record, file, file_datetime_string
54
- )
55
- yield stream_data_to_airbyte_message(
56
- self.name, permissions_record, is_file_transfer_message=False
57
- )
58
- except Exception as e:
59
- self.logger.error(f"Failed to retrieve permissions for file {file.uri}: {str(e)}")
60
- yield AirbyteMessage(
61
- type=MessageType.LOG,
62
- log=AirbyteLogMessage(
63
- level=Level.ERROR,
64
- message=f"Error retrieving files permissions: stream={self.name} file={file.uri}",
65
- stack_trace=traceback.format_exc(),
66
- ),
67
- )
68
- finally:
69
- if no_permissions:
70
- yield AirbyteMessage(
71
- type=MessageType.LOG,
72
- log=AirbyteLogMessage(
73
- level=Level.WARN,
74
- message=f"Unable to fetch permissions. stream={self.name} file={file.uri}",
75
- ),
76
- )
77
-
78
- def _get_raw_json_schema(self) -> JsonSchema:
79
- """
80
- Retrieve the raw JSON schema for file permissions from the stream reader.
81
-
82
- Returns:
83
- The file permissions schema that defines the structure of permission records
84
- """
85
- return self.stream_reader.file_permissions_schema
@@ -1,26 +0,0 @@
1
- #
2
- # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
- #
4
-
5
- from typing import Literal
6
-
7
- from pydantic.v1 import AnyUrl, BaseModel, Field
8
-
9
- from airbyte_cdk import OneOfOptionConfig
10
-
11
-
12
- class DeliverPermissions(BaseModel):
13
- class Config(OneOfOptionConfig):
14
- title = "Replicate Permissions ACL"
15
- description = "Sends one identity stream and one for more permissions (ACL) streams to the destination. This data can be used in downstream systems to recreate permission restrictions mirroring the original source."
16
- discriminator = "delivery_type"
17
-
18
- delivery_type: Literal["use_permissions_transfer"] = Field(
19
- "use_permissions_transfer", const=True
20
- )
21
-
22
- include_identities_stream: bool = Field(
23
- title="Include Identity Stream",
24
- description="This data can be used in downstream systems to recreate permission restrictions mirroring the original source",
25
- default=True,
26
- )
@@ -1,75 +0,0 @@
1
- #
2
- # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
- #
4
-
5
- import traceback
6
- from abc import ABC, abstractmethod
7
- from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional
8
-
9
- from airbyte_protocol_dataclasses.models import SyncMode
10
-
11
- from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level
12
- from airbyte_cdk.models import Type as MessageType
13
- from airbyte_cdk.sources.streams import Stream
14
- from airbyte_cdk.sources.streams.checkpoint import Cursor
15
- from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
16
- from airbyte_cdk.utils.traced_exception import AirbyteTracedException
17
-
18
-
19
- class IdentitiesStream(Stream, ABC):
20
- """
21
- The identities stream. A full refresh stream to sync identities from a certain domain.
22
- The load_identity_groups method manage the logic to get such data.
23
- """
24
-
25
- IDENTITIES_STREAM_NAME = "identities"
26
-
27
- is_resumable = False
28
-
29
- def __init__(self) -> None:
30
- super().__init__()
31
- self._cursor: MutableMapping[str, Any] = {}
32
-
33
- @property
34
- def state(self) -> MutableMapping[str, Any]:
35
- return self._cursor
36
-
37
- @state.setter
38
- def state(self, value: MutableMapping[str, Any]) -> None:
39
- """State setter, accept state serialized by state getter."""
40
- self._cursor = value
41
-
42
- def read_records(
43
- self,
44
- sync_mode: SyncMode,
45
- cursor_field: Optional[List[str]] = None,
46
- stream_slice: Optional[Mapping[str, Any]] = None,
47
- stream_state: Optional[Mapping[str, Any]] = None,
48
- ) -> Iterable[Mapping[str, Any] | AirbyteMessage]:
49
- try:
50
- identity_groups = self.load_identity_groups()
51
- for record in identity_groups:
52
- yield stream_data_to_airbyte_message(self.name, record)
53
- except AirbyteTracedException as exc:
54
- # Re-raise the exception to stop the whole sync immediately as this is a fatal error
55
- raise exc
56
- except Exception as e:
57
- yield AirbyteMessage(
58
- type=MessageType.LOG,
59
- log=AirbyteLogMessage(
60
- level=Level.ERROR,
61
- message=f"Error trying to read identities: {e} stream={self.name}",
62
- stack_trace=traceback.format_exc(),
63
- ),
64
- )
65
-
66
- @abstractmethod
67
- def load_identity_groups(self) -> Iterable[Dict[str, Any]]:
68
- raise NotImplementedError("Implement this method to read identity records")
69
-
70
- @property
71
- def name(self) -> str:
72
- return self.IDENTITIES_STREAM_NAME
73
-
74
- def get_cursor(self) -> Optional[Cursor]:
75
- return None