airbyte-cdk 6.41.8__py3-none-any.whl → 6.41.9.dev4101__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. airbyte_cdk/models/__init__.py +1 -0
  2. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +1 -0
  3. airbyte_cdk/sources/declarative/async_job/job.py +0 -6
  4. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +18 -18
  5. airbyte_cdk/sources/declarative/async_job/job_tracker.py +6 -22
  6. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +22 -0
  7. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +39 -64
  8. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +1 -2
  9. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +25 -45
  10. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +24 -45
  11. airbyte_cdk/sources/declarative/partition_routers/__init__.py +0 -4
  12. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +61 -0
  13. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +17 -4
  14. airbyte_cdk/sources/file_based/file_types/file_transfer.py +2 -8
  15. airbyte_cdk/sources/streams/concurrent/default_stream.py +3 -0
  16. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +0 -4
  17. airbyte_cdk/sources/types.py +11 -0
  18. airbyte_cdk/sources/utils/files_directory.py +15 -0
  19. airbyte_cdk/sources/utils/record_helper.py +8 -1
  20. {airbyte_cdk-6.41.8.dist-info → airbyte_cdk-6.41.9.dev4101.dist-info}/METADATA +2 -2
  21. {airbyte_cdk-6.41.8.dist-info → airbyte_cdk-6.41.9.dev4101.dist-info}/RECORD +25 -24
  22. airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -150
  23. {airbyte_cdk-6.41.8.dist-info → airbyte_cdk-6.41.9.dev4101.dist-info}/LICENSE.txt +0 -0
  24. {airbyte_cdk-6.41.8.dist-info → airbyte_cdk-6.41.9.dev4101.dist-info}/LICENSE_SHORT +0 -0
  25. {airbyte_cdk-6.41.8.dist-info → airbyte_cdk-6.41.9.dev4101.dist-info}/WHEEL +0 -0
  26. {airbyte_cdk-6.41.8.dist-info → airbyte_cdk-6.41.9.dev4101.dist-info}/entry_points.txt +0 -0
@@ -1,150 +0,0 @@
1
- #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
- #
4
-
5
- from dataclasses import dataclass
6
- from typing import Any, Iterable, Mapping, Optional
7
-
8
- from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
9
- from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
10
-
11
-
12
- @dataclass
13
- class GroupingPartitionRouter(PartitionRouter):
14
- """
15
- A partition router that groups partitions from an underlying partition router into batches of a specified size.
16
- This is useful for APIs that support filtering by multiple partition keys in a single request.
17
-
18
- Attributes:
19
- group_size (int): The number of partitions to include in each group.
20
- underlying_partition_router (PartitionRouter): The partition router whose output will be grouped.
21
- deduplicate (bool): If True, ensures unique partitions within each group by removing duplicates based on the partition key.
22
- config (Config): The connector configuration.
23
- parameters (Mapping[str, Any]): Additional parameters for interpolation and configuration.
24
- """
25
-
26
- group_size: int
27
- underlying_partition_router: PartitionRouter
28
- config: Config
29
- deduplicate: bool = True
30
-
31
- def __post_init__(self) -> None:
32
- self._state: Optional[Mapping[str, StreamState]] = {}
33
-
34
- def stream_slices(self) -> Iterable[StreamSlice]:
35
- """
36
- Lazily groups partitions from the underlying partition router into batches of size `group_size`.
37
-
38
- This method processes partitions one at a time from the underlying router, maintaining a batch buffer.
39
- When the buffer reaches `group_size` or the underlying router is exhausted, it yields a grouped slice.
40
- If deduplication is enabled, it tracks seen partition keys to ensure uniqueness within the current batch.
41
-
42
- Yields:
43
- Iterable[StreamSlice]: An iterable of StreamSlice objects, where each slice contains a batch of partition values.
44
- """
45
- batch = []
46
- seen_keys = set()
47
-
48
- # Iterate over partitions lazily from the underlying router
49
- for partition in self.underlying_partition_router.stream_slices():
50
- # Extract the partition key (assuming single key-value pair, e.g., {"board_ids": value})
51
- partition_keys = list(partition.partition.keys())
52
- # skip parent_slice as it is part of SubstreamPartitionRouter partition
53
- if "parent_slice" in partition_keys:
54
- partition_keys.remove("parent_slice")
55
- if len(partition_keys) != 1:
56
- raise ValueError(
57
- f"GroupingPartitionRouter expects a single partition key-value pair. Got {partition.partition}"
58
- )
59
- key = partition.partition[partition_keys[0]]
60
-
61
- # Skip duplicates if deduplication is enabled
62
- if self.deduplicate and key in seen_keys:
63
- continue
64
-
65
- # Add partition to the batch
66
- batch.append(partition)
67
- if self.deduplicate:
68
- seen_keys.add(key)
69
-
70
- # Yield the batch when it reaches the group_size
71
- if len(batch) == self.group_size:
72
- self._state = self.underlying_partition_router.get_stream_state()
73
- yield self._create_grouped_slice(batch)
74
- batch = [] # Reset the batch
75
-
76
- self._state = self.underlying_partition_router.get_stream_state()
77
- # Yield any remaining partitions if the batch isn't empty
78
- if batch:
79
- yield self._create_grouped_slice(batch)
80
-
81
- def _create_grouped_slice(self, batch: list[StreamSlice]) -> StreamSlice:
82
- """
83
- Creates a grouped StreamSlice from a batch of partitions, aggregating extra fields into a dictionary with list values.
84
-
85
- Args:
86
- batch (list[StreamSlice]): A list of StreamSlice objects to group.
87
-
88
- Returns:
89
- StreamSlice: A single StreamSlice with combined partition and extra field values.
90
- """
91
- # Combine partition values into a single dict with lists
92
- grouped_partition = {
93
- key: [p.partition.get(key) for p in batch] for key in batch[0].partition.keys()
94
- }
95
-
96
- # Aggregate extra fields into a dict with list values
97
- extra_fields_dict = (
98
- {
99
- key: [p.extra_fields.get(key) for p in batch]
100
- for key in set().union(*(p.extra_fields.keys() for p in batch if p.extra_fields))
101
- }
102
- if any(p.extra_fields for p in batch)
103
- else {}
104
- )
105
- return StreamSlice(
106
- partition=grouped_partition,
107
- cursor_slice={}, # Cursor is managed by the underlying router or incremental sync
108
- extra_fields=extra_fields_dict,
109
- )
110
-
111
- def get_request_params(
112
- self,
113
- stream_state: Optional[StreamState] = None,
114
- stream_slice: Optional[StreamSlice] = None,
115
- next_page_token: Optional[Mapping[str, Any]] = None,
116
- ) -> Mapping[str, Any]:
117
- return {}
118
-
119
- def get_request_headers(
120
- self,
121
- stream_state: Optional[StreamState] = None,
122
- stream_slice: Optional[StreamSlice] = None,
123
- next_page_token: Optional[Mapping[str, Any]] = None,
124
- ) -> Mapping[str, Any]:
125
- return {}
126
-
127
- def get_request_body_data(
128
- self,
129
- stream_state: Optional[StreamState] = None,
130
- stream_slice: Optional[StreamSlice] = None,
131
- next_page_token: Optional[Mapping[str, Any]] = None,
132
- ) -> Mapping[str, Any]:
133
- return {}
134
-
135
- def get_request_body_json(
136
- self,
137
- stream_state: Optional[StreamState] = None,
138
- stream_slice: Optional[StreamSlice] = None,
139
- next_page_token: Optional[Mapping[str, Any]] = None,
140
- ) -> Mapping[str, Any]:
141
- return {}
142
-
143
- def set_initial_state(self, stream_state: StreamState) -> None:
144
- """Delegate state initialization to the underlying partition router."""
145
- self.underlying_partition_router.set_initial_state(stream_state)
146
- self._state = self.underlying_partition_router.get_stream_state()
147
-
148
- def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
149
- """Delegate state retrieval to the underlying partition router."""
150
- return self._state