airbyte-cdk 6.45.0__py3-none-any.whl → 6.45.0.dev4101__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +6 -45
- airbyte_cdk/connector_builder/main.py +2 -5
- airbyte_cdk/models/__init__.py +1 -0
- airbyte_cdk/models/airbyte_protocol.py +1 -3
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +1 -1
- airbyte_cdk/sources/declarative/async_job/job.py +0 -6
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +18 -18
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +6 -22
- airbyte_cdk/sources/declarative/checks/__init__.py +2 -5
- airbyte_cdk/sources/declarative/checks/check_stream.py +11 -113
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +8 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +50 -210
- airbyte_cdk/sources/declarative/extractors/record_selector.py +6 -1
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +1 -2
- airbyte_cdk/sources/declarative/interpolation/macros.py +4 -8
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +2 -23
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +43 -142
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +4 -16
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +50 -263
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +0 -4
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +1 -5
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -25
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +30 -101
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +9 -4
- airbyte_cdk/sources/declarative/transformations/add_fields.py +1 -3
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +32 -14
- airbyte_cdk/sources/file_based/file_record_data.py +24 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -15
- airbyte_cdk/sources/file_based/schema_helpers.py +11 -1
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +0 -1
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +16 -31
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +1 -3
- airbyte_cdk/sources/streams/concurrent/default_stream.py +3 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +0 -4
- airbyte_cdk/sources/types.py +11 -2
- airbyte_cdk/sources/utils/files_directory.py +15 -0
- airbyte_cdk/sources/utils/record_helper.py +8 -8
- {airbyte_cdk-6.45.0.dist-info → airbyte_cdk-6.45.0.dev4101.dist-info}/METADATA +2 -2
- {airbyte_cdk-6.45.0.dist-info → airbyte_cdk-6.45.0.dev4101.dist-info}/RECORD +44 -50
- airbyte_cdk/models/file_transfer_record_message.py +0 -13
- airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -150
- airbyte_cdk/sources/declarative/requesters/query_properties/__init__.py +0 -13
- airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py +0 -40
- airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py +0 -69
- airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py +0 -58
- airbyte_cdk/sources/declarative/requesters/query_properties/strategies/__init__.py +0 -10
- airbyte_cdk/sources/declarative/requesters/query_properties/strategies/group_by_key.py +0 -33
- airbyte_cdk/sources/declarative/requesters/query_properties/strategies/merge_strategy.py +0 -19
- {airbyte_cdk-6.45.0.dist-info → airbyte_cdk-6.45.0.dev4101.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.45.0.dist-info → airbyte_cdk-6.45.0.dev4101.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.45.0.dist-info → airbyte_cdk-6.45.0.dev4101.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.45.0.dist-info → airbyte_cdk-6.45.0.dev4101.dist-info}/entry_points.txt +0 -0
@@ -1,69 +0,0 @@
|
|
1
|
-
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
-
|
3
|
-
from dataclasses import InitVar, dataclass
|
4
|
-
from enum import Enum
|
5
|
-
from typing import Any, Iterable, List, Mapping, Optional
|
6
|
-
|
7
|
-
from airbyte_cdk.sources.declarative.requesters.query_properties.strategies import GroupByKey
|
8
|
-
from airbyte_cdk.sources.declarative.requesters.query_properties.strategies.merge_strategy import (
|
9
|
-
RecordMergeStrategy,
|
10
|
-
)
|
11
|
-
from airbyte_cdk.sources.types import Config, Record
|
12
|
-
|
13
|
-
|
14
|
-
class PropertyLimitType(Enum):
|
15
|
-
"""
|
16
|
-
The heuristic that determines when the maximum size of the current chunk of properties and when a new
|
17
|
-
one should be started.
|
18
|
-
"""
|
19
|
-
|
20
|
-
characters = "characters"
|
21
|
-
property_count = "property_count"
|
22
|
-
|
23
|
-
|
24
|
-
@dataclass
|
25
|
-
class PropertyChunking:
|
26
|
-
"""
|
27
|
-
Defines the behavior for how the complete list of properties to query for are broken down into smaller groups
|
28
|
-
that will be used for multiple requests to the target API.
|
29
|
-
"""
|
30
|
-
|
31
|
-
property_limit_type: PropertyLimitType
|
32
|
-
property_limit: Optional[int]
|
33
|
-
record_merge_strategy: Optional[RecordMergeStrategy]
|
34
|
-
parameters: InitVar[Mapping[str, Any]]
|
35
|
-
config: Config
|
36
|
-
|
37
|
-
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
38
|
-
self._record_merge_strategy = self.record_merge_strategy or GroupByKey(
|
39
|
-
key="id", config=self.config, parameters=parameters
|
40
|
-
)
|
41
|
-
|
42
|
-
def get_request_property_chunks(
|
43
|
-
self, property_fields: Iterable[str], always_include_properties: Optional[List[str]]
|
44
|
-
) -> Iterable[List[str]]:
|
45
|
-
if not self.property_limit:
|
46
|
-
single_property_chunk = list(property_fields)
|
47
|
-
if always_include_properties:
|
48
|
-
single_property_chunk.extend(always_include_properties)
|
49
|
-
yield single_property_chunk
|
50
|
-
return
|
51
|
-
current_chunk = list(always_include_properties) if always_include_properties else []
|
52
|
-
chunk_size = 0
|
53
|
-
for property_field in property_fields:
|
54
|
-
# If property_limit_type is not defined, we default to property_count which is just an incrementing count
|
55
|
-
property_field_size = (
|
56
|
-
len(property_field)
|
57
|
-
if self.property_limit_type == PropertyLimitType.characters
|
58
|
-
else 1
|
59
|
-
)
|
60
|
-
if chunk_size + property_field_size > self.property_limit:
|
61
|
-
yield current_chunk
|
62
|
-
current_chunk = list(always_include_properties) if always_include_properties else []
|
63
|
-
chunk_size = 0
|
64
|
-
current_chunk.append(property_field)
|
65
|
-
chunk_size += property_field_size
|
66
|
-
yield current_chunk
|
67
|
-
|
68
|
-
def get_merge_key(self, record: Record) -> Optional[str]:
|
69
|
-
return self._record_merge_strategy.get_group_key(record=record)
|
@@ -1,58 +0,0 @@
|
|
1
|
-
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
-
|
3
|
-
from dataclasses import InitVar, dataclass
|
4
|
-
from typing import Any, Iterable, List, Mapping, Optional, Union
|
5
|
-
|
6
|
-
from airbyte_cdk.sources.declarative.requesters.query_properties import (
|
7
|
-
PropertiesFromEndpoint,
|
8
|
-
PropertyChunking,
|
9
|
-
)
|
10
|
-
from airbyte_cdk.sources.types import Config, StreamSlice
|
11
|
-
|
12
|
-
|
13
|
-
@dataclass
|
14
|
-
class QueryProperties:
|
15
|
-
"""
|
16
|
-
Low-code component that encompasses the behavior to inject additional property values into the outbound API
|
17
|
-
requests. Property values can be defined statically within the manifest or dynamically by making requests
|
18
|
-
to a partner API to retrieve the properties. Query properties also allow for splitting of the total set of
|
19
|
-
properties into smaller chunks to satisfy API restrictions around the total amount of data retrieved
|
20
|
-
"""
|
21
|
-
|
22
|
-
property_list: Optional[Union[List[str], PropertiesFromEndpoint]]
|
23
|
-
always_include_properties: Optional[List[str]]
|
24
|
-
property_chunking: Optional[PropertyChunking]
|
25
|
-
config: Config
|
26
|
-
parameters: InitVar[Mapping[str, Any]]
|
27
|
-
|
28
|
-
def get_request_property_chunks(
|
29
|
-
self, stream_slice: Optional[StreamSlice] = None
|
30
|
-
) -> Iterable[List[str]]:
|
31
|
-
"""
|
32
|
-
Uses the defined property_list to fetch the total set of properties dynamically or from a static list
|
33
|
-
and based on the resulting properties, performs property chunking if applicable.
|
34
|
-
:param stream_slice: The StreamSlice of the current partition being processed during the sync. This is included
|
35
|
-
because subcomponents of QueryProperties can make use of interpolation of the top-level StreamSlice object
|
36
|
-
"""
|
37
|
-
fields: Union[Iterable[str], List[str]]
|
38
|
-
if isinstance(self.property_list, PropertiesFromEndpoint):
|
39
|
-
fields = self.property_list.get_properties_from_endpoint(stream_slice=stream_slice)
|
40
|
-
else:
|
41
|
-
fields = self.property_list if self.property_list else []
|
42
|
-
|
43
|
-
if self.property_chunking:
|
44
|
-
yield from self.property_chunking.get_request_property_chunks(
|
45
|
-
property_fields=fields, always_include_properties=self.always_include_properties
|
46
|
-
)
|
47
|
-
else:
|
48
|
-
yield list(fields)
|
49
|
-
|
50
|
-
# delete later, but leaving this to keep the discussion thread on the PR from getting hidden
|
51
|
-
def has_multiple_chunks(self, stream_slice: Optional[StreamSlice]) -> bool:
|
52
|
-
property_chunks = iter(self.get_request_property_chunks(stream_slice=stream_slice))
|
53
|
-
try:
|
54
|
-
next(property_chunks)
|
55
|
-
next(property_chunks)
|
56
|
-
return True
|
57
|
-
except StopIteration:
|
58
|
-
return False
|
@@ -1,10 +0,0 @@
|
|
1
|
-
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
-
|
3
|
-
from airbyte_cdk.sources.declarative.requesters.query_properties.strategies.group_by_key import (
|
4
|
-
GroupByKey,
|
5
|
-
)
|
6
|
-
from airbyte_cdk.sources.declarative.requesters.query_properties.strategies.merge_strategy import (
|
7
|
-
RecordMergeStrategy,
|
8
|
-
)
|
9
|
-
|
10
|
-
__all__ = ["GroupByKey", "RecordMergeStrategy"]
|
@@ -1,33 +0,0 @@
|
|
1
|
-
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
-
|
3
|
-
from dataclasses import InitVar, dataclass
|
4
|
-
from typing import Any, List, Mapping, Optional, Union
|
5
|
-
|
6
|
-
from airbyte_cdk.sources.declarative.requesters.query_properties.strategies.merge_strategy import (
|
7
|
-
RecordMergeStrategy,
|
8
|
-
)
|
9
|
-
from airbyte_cdk.sources.types import Config, Record
|
10
|
-
|
11
|
-
|
12
|
-
@dataclass
|
13
|
-
class GroupByKey(RecordMergeStrategy):
|
14
|
-
"""
|
15
|
-
Record merge strategy that combines records together according to values on the record for one or many keys.
|
16
|
-
"""
|
17
|
-
|
18
|
-
key: Union[str, List[str]]
|
19
|
-
parameters: InitVar[Mapping[str, Any]]
|
20
|
-
config: Config
|
21
|
-
|
22
|
-
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
23
|
-
self._keys = [self.key] if isinstance(self.key, str) else self.key
|
24
|
-
|
25
|
-
def get_group_key(self, record: Record) -> Optional[str]:
|
26
|
-
resolved_keys = []
|
27
|
-
for key in self._keys:
|
28
|
-
key_value = record.data.get(key)
|
29
|
-
if key_value:
|
30
|
-
resolved_keys.append(key_value)
|
31
|
-
else:
|
32
|
-
return None
|
33
|
-
return ",".join(resolved_keys)
|
@@ -1,19 +0,0 @@
|
|
1
|
-
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
-
|
3
|
-
from abc import ABC, abstractmethod
|
4
|
-
from dataclasses import dataclass
|
5
|
-
from typing import Optional
|
6
|
-
|
7
|
-
from airbyte_cdk.sources.types import Record
|
8
|
-
|
9
|
-
|
10
|
-
@dataclass
|
11
|
-
class RecordMergeStrategy(ABC):
|
12
|
-
"""
|
13
|
-
Describe the interface for how records that required multiple requests to get the complete set of fields
|
14
|
-
should be merged back into a single record.
|
15
|
-
"""
|
16
|
-
|
17
|
-
@abstractmethod
|
18
|
-
def get_group_key(self, record: Record) -> Optional[str]:
|
19
|
-
pass
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|