airbyte-cdk 6.41.9__py3-none-any.whl → 6.41.9.dev4101__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/models/__init__.py +1 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +1 -0
- airbyte_cdk/sources/declarative/async_job/job.py +0 -6
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +18 -18
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +6 -22
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +22 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +39 -64
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +1 -2
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +25 -45
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +24 -45
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +0 -4
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +1 -5
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +61 -0
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +17 -4
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +2 -8
- airbyte_cdk/sources/streams/concurrent/default_stream.py +3 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +0 -4
- airbyte_cdk/sources/types.py +11 -0
- airbyte_cdk/sources/utils/files_directory.py +15 -0
- airbyte_cdk/sources/utils/record_helper.py +8 -1
- {airbyte_cdk-6.41.9.dist-info → airbyte_cdk-6.41.9.dev4101.dist-info}/METADATA +2 -2
- {airbyte_cdk-6.41.9.dist-info → airbyte_cdk-6.41.9.dev4101.dist-info}/RECORD +26 -25
- airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -150
- {airbyte_cdk-6.41.9.dist-info → airbyte_cdk-6.41.9.dev4101.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.41.9.dist-info → airbyte_cdk-6.41.9.dev4101.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.41.9.dist-info → airbyte_cdk-6.41.9.dev4101.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.41.9.dist-info → airbyte_cdk-6.41.9.dev4101.dist-info}/entry_points.txt +0 -0
@@ -1,150 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
from dataclasses import dataclass
|
6
|
-
from typing import Any, Iterable, Mapping, Optional
|
7
|
-
|
8
|
-
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
9
|
-
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
10
|
-
|
11
|
-
|
12
|
-
@dataclass
|
13
|
-
class GroupingPartitionRouter(PartitionRouter):
|
14
|
-
"""
|
15
|
-
A partition router that groups partitions from an underlying partition router into batches of a specified size.
|
16
|
-
This is useful for APIs that support filtering by multiple partition keys in a single request.
|
17
|
-
|
18
|
-
Attributes:
|
19
|
-
group_size (int): The number of partitions to include in each group.
|
20
|
-
underlying_partition_router (PartitionRouter): The partition router whose output will be grouped.
|
21
|
-
deduplicate (bool): If True, ensures unique partitions within each group by removing duplicates based on the partition key.
|
22
|
-
config (Config): The connector configuration.
|
23
|
-
parameters (Mapping[str, Any]): Additional parameters for interpolation and configuration.
|
24
|
-
"""
|
25
|
-
|
26
|
-
group_size: int
|
27
|
-
underlying_partition_router: PartitionRouter
|
28
|
-
config: Config
|
29
|
-
deduplicate: bool = True
|
30
|
-
|
31
|
-
def __post_init__(self) -> None:
|
32
|
-
self._state: Optional[Mapping[str, StreamState]] = {}
|
33
|
-
|
34
|
-
def stream_slices(self) -> Iterable[StreamSlice]:
|
35
|
-
"""
|
36
|
-
Lazily groups partitions from the underlying partition router into batches of size `group_size`.
|
37
|
-
|
38
|
-
This method processes partitions one at a time from the underlying router, maintaining a batch buffer.
|
39
|
-
When the buffer reaches `group_size` or the underlying router is exhausted, it yields a grouped slice.
|
40
|
-
If deduplication is enabled, it tracks seen partition keys to ensure uniqueness within the current batch.
|
41
|
-
|
42
|
-
Yields:
|
43
|
-
Iterable[StreamSlice]: An iterable of StreamSlice objects, where each slice contains a batch of partition values.
|
44
|
-
"""
|
45
|
-
batch = []
|
46
|
-
seen_keys = set()
|
47
|
-
|
48
|
-
# Iterate over partitions lazily from the underlying router
|
49
|
-
for partition in self.underlying_partition_router.stream_slices():
|
50
|
-
# Extract the partition key (assuming single key-value pair, e.g., {"board_ids": value})
|
51
|
-
partition_keys = list(partition.partition.keys())
|
52
|
-
# skip parent_slice as it is part of SubstreamPartitionRouter partition
|
53
|
-
if "parent_slice" in partition_keys:
|
54
|
-
partition_keys.remove("parent_slice")
|
55
|
-
if len(partition_keys) != 1:
|
56
|
-
raise ValueError(
|
57
|
-
f"GroupingPartitionRouter expects a single partition key-value pair. Got {partition.partition}"
|
58
|
-
)
|
59
|
-
key = partition.partition[partition_keys[0]]
|
60
|
-
|
61
|
-
# Skip duplicates if deduplication is enabled
|
62
|
-
if self.deduplicate and key in seen_keys:
|
63
|
-
continue
|
64
|
-
|
65
|
-
# Add partition to the batch
|
66
|
-
batch.append(partition)
|
67
|
-
if self.deduplicate:
|
68
|
-
seen_keys.add(key)
|
69
|
-
|
70
|
-
# Yield the batch when it reaches the group_size
|
71
|
-
if len(batch) == self.group_size:
|
72
|
-
self._state = self.underlying_partition_router.get_stream_state()
|
73
|
-
yield self._create_grouped_slice(batch)
|
74
|
-
batch = [] # Reset the batch
|
75
|
-
|
76
|
-
self._state = self.underlying_partition_router.get_stream_state()
|
77
|
-
# Yield any remaining partitions if the batch isn't empty
|
78
|
-
if batch:
|
79
|
-
yield self._create_grouped_slice(batch)
|
80
|
-
|
81
|
-
def _create_grouped_slice(self, batch: list[StreamSlice]) -> StreamSlice:
|
82
|
-
"""
|
83
|
-
Creates a grouped StreamSlice from a batch of partitions, aggregating extra fields into a dictionary with list values.
|
84
|
-
|
85
|
-
Args:
|
86
|
-
batch (list[StreamSlice]): A list of StreamSlice objects to group.
|
87
|
-
|
88
|
-
Returns:
|
89
|
-
StreamSlice: A single StreamSlice with combined partition and extra field values.
|
90
|
-
"""
|
91
|
-
# Combine partition values into a single dict with lists
|
92
|
-
grouped_partition = {
|
93
|
-
key: [p.partition.get(key) for p in batch] for key in batch[0].partition.keys()
|
94
|
-
}
|
95
|
-
|
96
|
-
# Aggregate extra fields into a dict with list values
|
97
|
-
extra_fields_dict = (
|
98
|
-
{
|
99
|
-
key: [p.extra_fields.get(key) for p in batch]
|
100
|
-
for key in set().union(*(p.extra_fields.keys() for p in batch if p.extra_fields))
|
101
|
-
}
|
102
|
-
if any(p.extra_fields for p in batch)
|
103
|
-
else {}
|
104
|
-
)
|
105
|
-
return StreamSlice(
|
106
|
-
partition=grouped_partition,
|
107
|
-
cursor_slice={}, # Cursor is managed by the underlying router or incremental sync
|
108
|
-
extra_fields=extra_fields_dict,
|
109
|
-
)
|
110
|
-
|
111
|
-
def get_request_params(
|
112
|
-
self,
|
113
|
-
stream_state: Optional[StreamState] = None,
|
114
|
-
stream_slice: Optional[StreamSlice] = None,
|
115
|
-
next_page_token: Optional[Mapping[str, Any]] = None,
|
116
|
-
) -> Mapping[str, Any]:
|
117
|
-
return {}
|
118
|
-
|
119
|
-
def get_request_headers(
|
120
|
-
self,
|
121
|
-
stream_state: Optional[StreamState] = None,
|
122
|
-
stream_slice: Optional[StreamSlice] = None,
|
123
|
-
next_page_token: Optional[Mapping[str, Any]] = None,
|
124
|
-
) -> Mapping[str, Any]:
|
125
|
-
return {}
|
126
|
-
|
127
|
-
def get_request_body_data(
|
128
|
-
self,
|
129
|
-
stream_state: Optional[StreamState] = None,
|
130
|
-
stream_slice: Optional[StreamSlice] = None,
|
131
|
-
next_page_token: Optional[Mapping[str, Any]] = None,
|
132
|
-
) -> Mapping[str, Any]:
|
133
|
-
return {}
|
134
|
-
|
135
|
-
def get_request_body_json(
|
136
|
-
self,
|
137
|
-
stream_state: Optional[StreamState] = None,
|
138
|
-
stream_slice: Optional[StreamSlice] = None,
|
139
|
-
next_page_token: Optional[Mapping[str, Any]] = None,
|
140
|
-
) -> Mapping[str, Any]:
|
141
|
-
return {}
|
142
|
-
|
143
|
-
def set_initial_state(self, stream_state: StreamState) -> None:
|
144
|
-
"""Delegate state initialization to the underlying partition router."""
|
145
|
-
self.underlying_partition_router.set_initial_state(stream_state)
|
146
|
-
self._state = self.underlying_partition_router.get_stream_state()
|
147
|
-
|
148
|
-
def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
|
149
|
-
"""Delegate state retrieval to the underlying partition router."""
|
150
|
-
return self._state
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|