airbyte-cdk 6.36.1__py3-none-any.whl → 6.37.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/models.py +16 -14
- airbyte_cdk/connector_builder/test_reader/helpers.py +120 -22
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +16 -3
- airbyte_cdk/connector_builder/test_reader/types.py +9 -1
- airbyte_cdk/entrypoint.py +7 -7
- airbyte_cdk/sources/declarative/auth/token_provider.py +1 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +83 -17
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +42 -23
- airbyte_cdk/sources/http_logger.py +3 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +1 -0
- {airbyte_cdk-6.36.1.dist-info → airbyte_cdk-6.37.0.dev0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.36.1.dist-info → airbyte_cdk-6.37.0.dev0.dist-info}/RECORD +16 -16
- {airbyte_cdk-6.36.1.dist-info → airbyte_cdk-6.37.0.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.36.1.dist-info → airbyte_cdk-6.37.0.dev0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.36.1.dist-info → airbyte_cdk-6.37.0.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.36.1.dist-info → airbyte_cdk-6.37.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -21,20 +21,6 @@ class HttpRequest:
|
|
21
21
|
body: Optional[str] = None
|
22
22
|
|
23
23
|
|
24
|
-
@dataclass
|
25
|
-
class StreamReadPages:
|
26
|
-
records: List[object]
|
27
|
-
request: Optional[HttpRequest] = None
|
28
|
-
response: Optional[HttpResponse] = None
|
29
|
-
|
30
|
-
|
31
|
-
@dataclass
|
32
|
-
class StreamReadSlices:
|
33
|
-
pages: List[StreamReadPages]
|
34
|
-
slice_descriptor: Optional[Dict[str, Any]]
|
35
|
-
state: Optional[List[Dict[str, Any]]] = None
|
36
|
-
|
37
|
-
|
38
24
|
@dataclass
|
39
25
|
class LogMessage:
|
40
26
|
message: str
|
@@ -46,11 +32,27 @@ class LogMessage:
|
|
46
32
|
@dataclass
|
47
33
|
class AuxiliaryRequest:
|
48
34
|
title: str
|
35
|
+
type: str
|
49
36
|
description: str
|
50
37
|
request: HttpRequest
|
51
38
|
response: HttpResponse
|
52
39
|
|
53
40
|
|
41
|
+
@dataclass
|
42
|
+
class StreamReadPages:
|
43
|
+
records: List[object]
|
44
|
+
request: Optional[HttpRequest] = None
|
45
|
+
response: Optional[HttpResponse] = None
|
46
|
+
|
47
|
+
|
48
|
+
@dataclass
|
49
|
+
class StreamReadSlices:
|
50
|
+
pages: List[StreamReadPages]
|
51
|
+
slice_descriptor: Optional[Dict[str, Any]]
|
52
|
+
state: Optional[List[Dict[str, Any]]] = None
|
53
|
+
auxiliary_requests: Optional[List[AuxiliaryRequest]] = None
|
54
|
+
|
55
|
+
|
54
56
|
@dataclass
|
55
57
|
class StreamRead(object):
|
56
58
|
logs: List[LogMessage]
|
@@ -28,7 +28,7 @@ from airbyte_cdk.utils.schema_inferrer import (
|
|
28
28
|
SchemaInferrer,
|
29
29
|
)
|
30
30
|
|
31
|
-
from .types import LOG_MESSAGES_OUTPUT_TYPE
|
31
|
+
from .types import ASYNC_AUXILIARY_REQUEST_TYPES, LOG_MESSAGES_OUTPUT_TYPE
|
32
32
|
|
33
33
|
# -------
|
34
34
|
# Parsers
|
@@ -226,7 +226,8 @@ def should_close_page(
|
|
226
226
|
at_least_one_page_in_group
|
227
227
|
and is_log_message(message)
|
228
228
|
and (
|
229
|
-
is_page_http_request(json_message)
|
229
|
+
is_page_http_request(json_message)
|
230
|
+
or message.log.message.startswith(SliceLogger.SLICE_LOG_PREFIX) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
|
230
231
|
)
|
231
232
|
)
|
232
233
|
|
@@ -330,6 +331,10 @@ def is_auxiliary_http_request(message: Optional[Dict[str, Any]]) -> bool:
|
|
330
331
|
return is_http_log(message) and message.get("http", {}).get("is_auxiliary", False)
|
331
332
|
|
332
333
|
|
334
|
+
def is_async_auxiliary_request(message: AuxiliaryRequest) -> bool:
|
335
|
+
return message.type in ASYNC_AUXILIARY_REQUEST_TYPES
|
336
|
+
|
337
|
+
|
333
338
|
def is_log_message(message: AirbyteMessage) -> bool:
|
334
339
|
"""
|
335
340
|
Determines whether the provided message is of type LOG.
|
@@ -413,6 +418,7 @@ def handle_current_slice(
|
|
413
418
|
current_slice_pages: List[StreamReadPages],
|
414
419
|
current_slice_descriptor: Optional[Dict[str, Any]] = None,
|
415
420
|
latest_state_message: Optional[Dict[str, Any]] = None,
|
421
|
+
auxiliary_requests: Optional[List[AuxiliaryRequest]] = None,
|
416
422
|
) -> StreamReadSlices:
|
417
423
|
"""
|
418
424
|
Handles the current slice by packaging its pages, descriptor, and state into a StreamReadSlices instance.
|
@@ -421,6 +427,7 @@ def handle_current_slice(
|
|
421
427
|
current_slice_pages (List[StreamReadPages]): The pages to be included in the slice.
|
422
428
|
current_slice_descriptor (Optional[Dict[str, Any]]): Descriptor for the current slice, optional.
|
423
429
|
latest_state_message (Optional[Dict[str, Any]]): The latest state message, optional.
|
430
|
+
auxiliary_requests (Optional[List[AuxiliaryRequest]]): The auxiliary requests to include, optional.
|
424
431
|
|
425
432
|
Returns:
|
426
433
|
StreamReadSlices: An object containing the current slice's pages, descriptor, and state.
|
@@ -429,6 +436,7 @@ def handle_current_slice(
|
|
429
436
|
pages=current_slice_pages,
|
430
437
|
slice_descriptor=current_slice_descriptor,
|
431
438
|
state=[latest_state_message] if latest_state_message else [],
|
439
|
+
auxiliary_requests=auxiliary_requests if auxiliary_requests else [],
|
432
440
|
)
|
433
441
|
|
434
442
|
|
@@ -486,29 +494,24 @@ def handle_auxiliary_request(json_message: Dict[str, JsonType]) -> AuxiliaryRequ
|
|
486
494
|
Raises:
|
487
495
|
ValueError: If any of the "airbyte_cdk", "stream", or "http" fields is not a dictionary.
|
488
496
|
"""
|
489
|
-
airbyte_cdk = json_message.get("airbyte_cdk", {})
|
490
|
-
|
491
|
-
if not isinstance(airbyte_cdk, dict):
|
492
|
-
raise ValueError(
|
493
|
-
f"Expected airbyte_cdk to be a dict, got {airbyte_cdk} of type {type(airbyte_cdk)}"
|
494
|
-
)
|
495
|
-
|
496
|
-
stream = airbyte_cdk.get("stream", {})
|
497
497
|
|
498
|
-
|
499
|
-
|
498
|
+
airbyte_cdk = get_airbyte_cdk_from_message(json_message)
|
499
|
+
stream = get_stream_from_airbyte_cdk(airbyte_cdk)
|
500
|
+
title_prefix = get_auxiliary_request_title_prefix(stream)
|
501
|
+
http = get_http_property_from_message(json_message)
|
502
|
+
request_type = get_auxiliary_request_type(stream, http)
|
500
503
|
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
raise ValueError(f"Expected http to be a dict, got {http} of type {type(http)}")
|
504
|
+
title = title_prefix + str(http.get("title", None))
|
505
|
+
description = str(http.get("description", None))
|
506
|
+
request = create_request_from_log_message(json_message)
|
507
|
+
response = create_response_from_log_message(json_message)
|
506
508
|
|
507
509
|
return AuxiliaryRequest(
|
508
|
-
title=
|
509
|
-
|
510
|
-
|
511
|
-
|
510
|
+
title=title,
|
511
|
+
type=request_type,
|
512
|
+
description=description,
|
513
|
+
request=request,
|
514
|
+
response=response,
|
512
515
|
)
|
513
516
|
|
514
517
|
|
@@ -558,7 +561,8 @@ def handle_log_message(
|
|
558
561
|
at_least_one_page_in_group,
|
559
562
|
current_page_request,
|
560
563
|
current_page_response,
|
561
|
-
auxiliary_request
|
564
|
+
auxiliary_request,
|
565
|
+
log_message,
|
562
566
|
)
|
563
567
|
|
564
568
|
|
@@ -589,3 +593,97 @@ def handle_record_message(
|
|
589
593
|
datetime_format_inferrer.accumulate(message.record) # type: ignore
|
590
594
|
|
591
595
|
return records_count
|
596
|
+
|
597
|
+
|
598
|
+
# -------
|
599
|
+
# Reusable Getters
|
600
|
+
# -------
|
601
|
+
|
602
|
+
|
603
|
+
def get_airbyte_cdk_from_message(json_message: Dict[str, JsonType]) -> dict: # type: ignore
|
604
|
+
"""
|
605
|
+
Retrieves the "airbyte_cdk" dictionary from the provided JSON message.
|
606
|
+
|
607
|
+
This function validates that the extracted "airbyte_cdk" is of type dict,
|
608
|
+
raising a ValueError if the validation fails.
|
609
|
+
|
610
|
+
Parameters:
|
611
|
+
json_message (Dict[str, JsonType]): A dictionary representing the JSON message.
|
612
|
+
|
613
|
+
Returns:
|
614
|
+
dict: The "airbyte_cdk" dictionary extracted from the JSON message.
|
615
|
+
|
616
|
+
Raises:
|
617
|
+
ValueError: If the "airbyte_cdk" field is not a dictionary.
|
618
|
+
"""
|
619
|
+
airbyte_cdk = json_message.get("airbyte_cdk", {})
|
620
|
+
|
621
|
+
if not isinstance(airbyte_cdk, dict):
|
622
|
+
raise ValueError(
|
623
|
+
f"Expected airbyte_cdk to be a dict, got {airbyte_cdk} of type {type(airbyte_cdk)}"
|
624
|
+
)
|
625
|
+
|
626
|
+
return airbyte_cdk
|
627
|
+
|
628
|
+
|
629
|
+
def get_stream_from_airbyte_cdk(airbyte_cdk: dict) -> dict: # type: ignore
|
630
|
+
"""
|
631
|
+
Retrieves the "stream" dictionary from the provided "airbyte_cdk" dictionary.
|
632
|
+
|
633
|
+
This function ensures that the extracted "stream" is of type dict,
|
634
|
+
raising a ValueError if the validation fails.
|
635
|
+
|
636
|
+
Parameters:
|
637
|
+
airbyte_cdk (dict): The dictionary representing the Airbyte CDK data.
|
638
|
+
|
639
|
+
Returns:
|
640
|
+
dict: The "stream" dictionary extracted from the Airbyte CDK data.
|
641
|
+
|
642
|
+
Raises:
|
643
|
+
ValueError: If the "stream" field is not a dictionary.
|
644
|
+
"""
|
645
|
+
|
646
|
+
stream = airbyte_cdk.get("stream", {})
|
647
|
+
|
648
|
+
if not isinstance(stream, dict):
|
649
|
+
raise ValueError(f"Expected stream to be a dict, got {stream} of type {type(stream)}")
|
650
|
+
|
651
|
+
return stream
|
652
|
+
|
653
|
+
|
654
|
+
def get_auxiliary_request_title_prefix(stream: dict) -> str: # type: ignore
|
655
|
+
"""
|
656
|
+
Generates a title prefix based on the stream type.
|
657
|
+
"""
|
658
|
+
return "Parent stream: " if stream.get("is_substream", False) else ""
|
659
|
+
|
660
|
+
|
661
|
+
def get_http_property_from_message(json_message: Dict[str, JsonType]) -> dict: # type: ignore
|
662
|
+
"""
|
663
|
+
Retrieves the "http" dictionary from the provided JSON message.
|
664
|
+
|
665
|
+
This function validates that the extracted "http" is of type dict,
|
666
|
+
raising a ValueError if the validation fails.
|
667
|
+
|
668
|
+
Parameters:
|
669
|
+
json_message (Dict[str, JsonType]): A dictionary representing the JSON message.
|
670
|
+
|
671
|
+
Returns:
|
672
|
+
dict: The "http" dictionary extracted from the JSON message.
|
673
|
+
|
674
|
+
Raises:
|
675
|
+
ValueError: If the "http" field is not a dictionary.
|
676
|
+
"""
|
677
|
+
http = json_message.get("http", {})
|
678
|
+
|
679
|
+
if not isinstance(http, dict):
|
680
|
+
raise ValueError(f"Expected http to be a dict, got {http} of type {type(http)}")
|
681
|
+
|
682
|
+
return http
|
683
|
+
|
684
|
+
|
685
|
+
def get_auxiliary_request_type(stream: dict, http: dict) -> str: # type: ignore
|
686
|
+
"""
|
687
|
+
Determines the type of the auxiliary request based on the stream and HTTP properties.
|
688
|
+
"""
|
689
|
+
return "PARENT_STREAM" if stream.get("is_substream", False) else str(http.get("type", None))
|
@@ -6,6 +6,7 @@
|
|
6
6
|
from typing import Any, Dict, Iterator, List, Mapping, Optional
|
7
7
|
|
8
8
|
from airbyte_cdk.connector_builder.models import (
|
9
|
+
AuxiliaryRequest,
|
9
10
|
HttpRequest,
|
10
11
|
HttpResponse,
|
11
12
|
StreamReadPages,
|
@@ -24,6 +25,7 @@ from .helpers import (
|
|
24
25
|
handle_current_slice,
|
25
26
|
handle_log_message,
|
26
27
|
handle_record_message,
|
28
|
+
is_async_auxiliary_request,
|
27
29
|
is_config_update_message,
|
28
30
|
is_log_message,
|
29
31
|
is_record_message,
|
@@ -89,6 +91,7 @@ def get_message_groups(
|
|
89
91
|
current_page_request: Optional[HttpRequest] = None
|
90
92
|
current_page_response: Optional[HttpResponse] = None
|
91
93
|
latest_state_message: Optional[Dict[str, Any]] = None
|
94
|
+
slice_auxiliary_requests: List[AuxiliaryRequest] = []
|
92
95
|
|
93
96
|
while records_count < limit and (message := next(messages, None)):
|
94
97
|
json_message = airbyte_message_to_json(message)
|
@@ -106,6 +109,7 @@ def get_message_groups(
|
|
106
109
|
current_slice_pages,
|
107
110
|
current_slice_descriptor,
|
108
111
|
latest_state_message,
|
112
|
+
slice_auxiliary_requests,
|
109
113
|
)
|
110
114
|
current_slice_descriptor = parse_slice_description(message.log.message) # type: ignore
|
111
115
|
current_slice_pages = []
|
@@ -118,7 +122,8 @@ def get_message_groups(
|
|
118
122
|
at_least_one_page_in_group,
|
119
123
|
current_page_request,
|
120
124
|
current_page_response,
|
121
|
-
|
125
|
+
auxiliary_request,
|
126
|
+
log_message,
|
122
127
|
) = handle_log_message(
|
123
128
|
message,
|
124
129
|
json_message,
|
@@ -126,8 +131,15 @@ def get_message_groups(
|
|
126
131
|
current_page_request,
|
127
132
|
current_page_response,
|
128
133
|
)
|
129
|
-
|
130
|
-
|
134
|
+
|
135
|
+
if auxiliary_request:
|
136
|
+
if is_async_auxiliary_request(auxiliary_request):
|
137
|
+
slice_auxiliary_requests.append(auxiliary_request)
|
138
|
+
else:
|
139
|
+
yield auxiliary_request
|
140
|
+
|
141
|
+
if log_message:
|
142
|
+
yield log_message
|
131
143
|
elif is_trace_with_error(message):
|
132
144
|
if message.trace is not None:
|
133
145
|
yield message.trace
|
@@ -157,4 +169,5 @@ def get_message_groups(
|
|
157
169
|
current_slice_pages,
|
158
170
|
current_slice_descriptor,
|
159
171
|
latest_state_message,
|
172
|
+
slice_auxiliary_requests,
|
160
173
|
)
|
@@ -71,5 +71,13 @@ LOG_MESSAGES_OUTPUT_TYPE = tuple[
|
|
71
71
|
bool,
|
72
72
|
HttpRequest | None,
|
73
73
|
HttpResponse | None,
|
74
|
-
AuxiliaryRequest |
|
74
|
+
AuxiliaryRequest | None,
|
75
|
+
AirbyteLogMessage | None,
|
76
|
+
]
|
77
|
+
|
78
|
+
ASYNC_AUXILIARY_REQUEST_TYPES = [
|
79
|
+
"ASYNC_CREATE",
|
80
|
+
"ASYNC_POLL",
|
81
|
+
"ASYNC_ABORT",
|
82
|
+
"ASYNC_DELETE",
|
75
83
|
]
|
airbyte_cdk/entrypoint.py
CHANGED
@@ -37,8 +37,8 @@ from airbyte_cdk.sources import Source
|
|
37
37
|
from airbyte_cdk.sources.connector_state_manager import HashableStreamDescriptor
|
38
38
|
from airbyte_cdk.sources.utils.schema_helpers import check_config_against_spec_or_exit, split_config
|
39
39
|
|
40
|
-
|
41
|
-
from airbyte_cdk.utils import is_cloud_environment, message_utils
|
40
|
+
from airbyte_cdk.utils import PrintBuffer, is_cloud_environment, message_utils # add PrintBuffer back once fixed
|
41
|
+
# from airbyte_cdk.utils import is_cloud_environment, message_utils
|
42
42
|
from airbyte_cdk.utils.airbyte_secrets_utils import get_secrets, update_secrets
|
43
43
|
from airbyte_cdk.utils.constants import ENV_REQUEST_CACHE_PATH
|
44
44
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
@@ -337,11 +337,11 @@ def launch(source: Source, args: List[str]) -> None:
|
|
337
337
|
parsed_args = source_entrypoint.parse_args(args)
|
338
338
|
# temporarily removes the PrintBuffer because we're seeing weird print behavior for concurrent syncs
|
339
339
|
# Refer to: https://github.com/airbytehq/oncall/issues/6235
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
340
|
+
with PrintBuffer():
|
341
|
+
for message in source_entrypoint.run(parsed_args):
|
342
|
+
# simply printing is creating issues for concurrent CDK as Python uses different two instructions to print: one for the message and
|
343
|
+
# the other for the break line. Adding `\n` to the message ensure that both are printed at the same time
|
344
|
+
print(f"{message}\n", end="", flush=True)
|
345
345
|
|
346
346
|
|
347
347
|
def _init_internal_request_filter() -> None:
|
@@ -95,6 +95,10 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
95
95
|
# the oldest partitions can be efficiently removed, maintaining the most recent partitions.
|
96
96
|
self._cursor_per_partition: OrderedDict[str, ConcurrentCursor] = OrderedDict()
|
97
97
|
self._semaphore_per_partition: OrderedDict[str, threading.Semaphore] = OrderedDict()
|
98
|
+
|
99
|
+
# Parent-state tracking: store each partition’s parent state in creation order
|
100
|
+
self._partition_parent_state_map: OrderedDict[str, Mapping[str, Any]] = OrderedDict()
|
101
|
+
|
98
102
|
self._finished_partitions: set[str] = set()
|
99
103
|
self._lock = threading.Lock()
|
100
104
|
self._timer = Timer()
|
@@ -155,11 +159,62 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
155
159
|
and self._semaphore_per_partition[partition_key]._value == 0
|
156
160
|
):
|
157
161
|
self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
|
158
|
-
|
162
|
+
|
163
|
+
self._check_and_update_parent_state()
|
164
|
+
|
165
|
+
self._emit_state_message()
|
166
|
+
|
167
|
+
def _check_and_update_parent_state(self) -> None:
|
168
|
+
"""
|
169
|
+
Pop the leftmost partition state from _partition_parent_state_map only if
|
170
|
+
*all partitions* up to (and including) that partition key in _semaphore_per_partition
|
171
|
+
are fully finished (i.e. in _finished_partitions and semaphore._value == 0).
|
172
|
+
Additionally, delete finished semaphores with a value of 0 to free up memory,
|
173
|
+
as they are only needed to track errors and completion status.
|
174
|
+
"""
|
175
|
+
last_closed_state = None
|
176
|
+
|
177
|
+
while self._partition_parent_state_map:
|
178
|
+
# Look at the earliest partition key in creation order
|
179
|
+
earliest_key = next(iter(self._partition_parent_state_map))
|
180
|
+
|
181
|
+
# Verify ALL partitions from the left up to earliest_key are finished
|
182
|
+
all_left_finished = True
|
183
|
+
for p_key, sem in list(
|
184
|
+
self._semaphore_per_partition.items()
|
185
|
+
): # Use list to allow modification during iteration
|
186
|
+
# If any earlier partition is still not finished, we must stop
|
187
|
+
if p_key not in self._finished_partitions or sem._value != 0:
|
188
|
+
all_left_finished = False
|
189
|
+
break
|
190
|
+
# Once we've reached earliest_key in the semaphore order, we can stop checking
|
191
|
+
if p_key == earliest_key:
|
192
|
+
break
|
193
|
+
|
194
|
+
# If the partitions up to earliest_key are not all finished, break the while-loop
|
195
|
+
if not all_left_finished:
|
196
|
+
break
|
197
|
+
|
198
|
+
# Pop the leftmost entry from parent-state map
|
199
|
+
_, closed_parent_state = self._partition_parent_state_map.popitem(last=False)
|
200
|
+
last_closed_state = closed_parent_state
|
201
|
+
|
202
|
+
# Clean up finished semaphores with value 0 up to and including earliest_key
|
203
|
+
for p_key in list(self._semaphore_per_partition.keys()):
|
204
|
+
sem = self._semaphore_per_partition[p_key]
|
205
|
+
if p_key in self._finished_partitions and sem._value == 0:
|
206
|
+
del self._semaphore_per_partition[p_key]
|
207
|
+
logger.debug(f"Deleted finished semaphore for partition {p_key} with value 0")
|
208
|
+
if p_key == earliest_key:
|
209
|
+
break
|
210
|
+
|
211
|
+
# Update _parent_state if we popped at least one partition
|
212
|
+
if last_closed_state is not None:
|
213
|
+
self._parent_state = last_closed_state
|
159
214
|
|
160
215
|
def ensure_at_least_one_state_emitted(self) -> None:
|
161
216
|
"""
|
162
|
-
The platform
|
217
|
+
The platform expects at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
|
163
218
|
called.
|
164
219
|
"""
|
165
220
|
if not any(
|
@@ -201,13 +256,19 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
201
256
|
|
202
257
|
slices = self._partition_router.stream_slices()
|
203
258
|
self._timer.start()
|
204
|
-
for partition in
|
205
|
-
|
259
|
+
for partition, last, parent_state in iterate_with_last_flag_and_state(
|
260
|
+
slices, self._partition_router.get_stream_state
|
261
|
+
):
|
262
|
+
yield from self._generate_slices_from_partition(partition, parent_state)
|
206
263
|
|
207
|
-
def _generate_slices_from_partition(
|
264
|
+
def _generate_slices_from_partition(
|
265
|
+
self, partition: StreamSlice, parent_state: Mapping[str, Any]
|
266
|
+
) -> Iterable[StreamSlice]:
|
208
267
|
# Ensure the maximum number of partitions is not exceeded
|
209
268
|
self._ensure_partition_limit()
|
210
269
|
|
270
|
+
partition_key = self._to_partition_key(partition.partition)
|
271
|
+
|
211
272
|
cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
|
212
273
|
if not cursor:
|
213
274
|
cursor = self._create_cursor(
|
@@ -216,18 +277,26 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
216
277
|
)
|
217
278
|
with self._lock:
|
218
279
|
self._number_of_partitions += 1
|
219
|
-
self._cursor_per_partition[
|
220
|
-
|
221
|
-
|
222
|
-
|
280
|
+
self._cursor_per_partition[partition_key] = cursor
|
281
|
+
self._semaphore_per_partition[partition_key] = threading.Semaphore(0)
|
282
|
+
|
283
|
+
with self._lock:
|
284
|
+
if (
|
285
|
+
len(self._partition_parent_state_map) == 0
|
286
|
+
or self._partition_parent_state_map[
|
287
|
+
next(reversed(self._partition_parent_state_map))
|
288
|
+
]
|
289
|
+
!= parent_state
|
290
|
+
):
|
291
|
+
self._partition_parent_state_map[partition_key] = deepcopy(parent_state)
|
223
292
|
|
224
293
|
for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
|
225
294
|
cursor.stream_slices(),
|
226
295
|
lambda: None,
|
227
296
|
):
|
228
|
-
self._semaphore_per_partition[
|
297
|
+
self._semaphore_per_partition[partition_key].release()
|
229
298
|
if is_last_slice:
|
230
|
-
self._finished_partitions.add(
|
299
|
+
self._finished_partitions.add(partition_key)
|
231
300
|
yield StreamSlice(
|
232
301
|
partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
|
233
302
|
)
|
@@ -257,9 +326,9 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
257
326
|
while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
|
258
327
|
# Try removing finished partitions first
|
259
328
|
for partition_key in list(self._cursor_per_partition.keys()):
|
260
|
-
if (
|
261
|
-
partition_key in self.
|
262
|
-
|
329
|
+
if partition_key in self._finished_partitions and (
|
330
|
+
partition_key not in self._semaphore_per_partition
|
331
|
+
or self._semaphore_per_partition[partition_key]._value == 0
|
263
332
|
):
|
264
333
|
oldest_partition = self._cursor_per_partition.pop(
|
265
334
|
partition_key
|
@@ -338,9 +407,6 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
338
407
|
self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
|
339
408
|
self._create_cursor(state["cursor"])
|
340
409
|
)
|
341
|
-
self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
|
342
|
-
threading.Semaphore(0)
|
343
|
-
)
|
344
410
|
|
345
411
|
# set default state for missing partitions if it is per partition with fallback to global
|
346
412
|
if self._GLOBAL_STATE_KEY in stream_state:
|
@@ -2629,6 +2629,47 @@ class ModelToComponentFactory:
|
|
2629
2629
|
transformations: List[RecordTransformation],
|
2630
2630
|
**kwargs: Any,
|
2631
2631
|
) -> AsyncRetriever:
|
2632
|
+
def _get_download_retriever() -> SimpleRetrieverTestReadDecorator | SimpleRetriever:
|
2633
|
+
record_selector = RecordSelector(
|
2634
|
+
extractor=download_extractor,
|
2635
|
+
name=name,
|
2636
|
+
record_filter=None,
|
2637
|
+
transformations=transformations,
|
2638
|
+
schema_normalization=TypeTransformer(TransformConfig.NoTransform),
|
2639
|
+
config=config,
|
2640
|
+
parameters={},
|
2641
|
+
)
|
2642
|
+
paginator = (
|
2643
|
+
self._create_component_from_model(
|
2644
|
+
model=model.download_paginator, decoder=decoder, config=config, url_base=""
|
2645
|
+
)
|
2646
|
+
if model.download_paginator
|
2647
|
+
else NoPagination(parameters={})
|
2648
|
+
)
|
2649
|
+
maximum_number_of_slices = self._limit_slices_fetched or 5
|
2650
|
+
|
2651
|
+
if self._limit_slices_fetched or self._emit_connector_builder_messages:
|
2652
|
+
return SimpleRetrieverTestReadDecorator(
|
2653
|
+
requester=download_requester,
|
2654
|
+
record_selector=record_selector,
|
2655
|
+
primary_key=None,
|
2656
|
+
name=job_download_components_name,
|
2657
|
+
paginator=paginator,
|
2658
|
+
config=config,
|
2659
|
+
parameters={},
|
2660
|
+
maximum_number_of_slices=maximum_number_of_slices,
|
2661
|
+
)
|
2662
|
+
|
2663
|
+
return SimpleRetriever(
|
2664
|
+
requester=download_requester,
|
2665
|
+
record_selector=record_selector,
|
2666
|
+
primary_key=None,
|
2667
|
+
name=job_download_components_name,
|
2668
|
+
paginator=paginator,
|
2669
|
+
config=config,
|
2670
|
+
parameters={},
|
2671
|
+
)
|
2672
|
+
|
2632
2673
|
decoder = (
|
2633
2674
|
self._create_component_from_model(model=model.decoder, config=config)
|
2634
2675
|
if model.decoder
|
@@ -2682,29 +2723,7 @@ class ModelToComponentFactory:
|
|
2682
2723
|
config=config,
|
2683
2724
|
name=job_download_components_name,
|
2684
2725
|
)
|
2685
|
-
download_retriever =
|
2686
|
-
requester=download_requester,
|
2687
|
-
record_selector=RecordSelector(
|
2688
|
-
extractor=download_extractor,
|
2689
|
-
name=name,
|
2690
|
-
record_filter=None,
|
2691
|
-
transformations=transformations,
|
2692
|
-
schema_normalization=TypeTransformer(TransformConfig.NoTransform),
|
2693
|
-
config=config,
|
2694
|
-
parameters={},
|
2695
|
-
),
|
2696
|
-
primary_key=None,
|
2697
|
-
name=job_download_components_name,
|
2698
|
-
paginator=(
|
2699
|
-
self._create_component_from_model(
|
2700
|
-
model=model.download_paginator, decoder=decoder, config=config, url_base=""
|
2701
|
-
)
|
2702
|
-
if model.download_paginator
|
2703
|
-
else NoPagination(parameters={})
|
2704
|
-
),
|
2705
|
-
config=config,
|
2706
|
-
parameters={},
|
2707
|
-
)
|
2726
|
+
download_retriever = _get_download_retriever()
|
2708
2727
|
abort_requester = (
|
2709
2728
|
self._create_component_from_model(
|
2710
2729
|
model=model.abort_requester,
|
@@ -15,11 +15,14 @@ def format_http_message(
|
|
15
15
|
description: str,
|
16
16
|
stream_name: Optional[str],
|
17
17
|
is_auxiliary: bool | None = None,
|
18
|
+
type: Optional[str] = None,
|
18
19
|
) -> LogMessage:
|
20
|
+
request_type: str = type if type else "HTTP"
|
19
21
|
request = response.request
|
20
22
|
log_message = {
|
21
23
|
"http": {
|
22
24
|
"title": title,
|
25
|
+
"type": request_type,
|
23
26
|
"description": description,
|
24
27
|
"request": {
|
25
28
|
"method": request.method,
|
@@ -9,12 +9,12 @@ airbyte_cdk/connector_builder/README.md,sha256=Hw3wvVewuHG9-QgsAq1jDiKuLlStDxKBz
|
|
9
9
|
airbyte_cdk/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
10
10
|
airbyte_cdk/connector_builder/connector_builder_handler.py,sha256=BntqkP63RBPvGCtB3CrLLtYplfSlBR42kwXyyk4YGas,4268
|
11
11
|
airbyte_cdk/connector_builder/main.py,sha256=ubAPE0Oo5gjZOa-KMtLLJQkc8_inUpFR3sIb2DEh2No,3722
|
12
|
-
airbyte_cdk/connector_builder/models.py,sha256=
|
12
|
+
airbyte_cdk/connector_builder/models.py,sha256=9pIZ98LW_d6fRS39VdnUOf3cxGt4TkC5MJ0_OrzcCRk,1578
|
13
13
|
airbyte_cdk/connector_builder/test_reader/__init__.py,sha256=iTwBMoI9vaJotEgpqZbFjlxRcbxXYypSVJ9YxeHk7wc,120
|
14
|
-
airbyte_cdk/connector_builder/test_reader/helpers.py,sha256=
|
15
|
-
airbyte_cdk/connector_builder/test_reader/message_grouper.py,sha256=
|
14
|
+
airbyte_cdk/connector_builder/test_reader/helpers.py,sha256=Iczn-_iczS2CaIAunWwyFcX0uLTra8Wh9JVfzm1Gfxo,26765
|
15
|
+
airbyte_cdk/connector_builder/test_reader/message_grouper.py,sha256=84BAEPIBHMq3WCfO14WNvh_q7OsjGgDt0q1FTu8eW-w,6918
|
16
16
|
airbyte_cdk/connector_builder/test_reader/reader.py,sha256=GurMB4ITO_PntvhIHSJkXbhynLilI4DObY5A2axavXo,20667
|
17
|
-
airbyte_cdk/connector_builder/test_reader/types.py,sha256=
|
17
|
+
airbyte_cdk/connector_builder/test_reader/types.py,sha256=hPZG3jO03kBaPyW94NI3JHRS1jxXGSNBcN1HFzOxo5Y,2528
|
18
18
|
airbyte_cdk/destinations/__init__.py,sha256=FyDp28PT_YceJD5HDFhA-mrGfX9AONIyMQ4d68CHNxQ,213
|
19
19
|
airbyte_cdk/destinations/destination.py,sha256=CIq-yb8C_0QvcKCtmStaHfiqn53GEfRAIGGCkJhKP1Q,5880
|
20
20
|
airbyte_cdk/destinations/vector_db_based/README.md,sha256=QAe8c_1Afme4r2TCE10cTSaxUE3zgCBuArSuRQqK8tA,2115
|
@@ -26,7 +26,7 @@ airbyte_cdk/destinations/vector_db_based/indexer.py,sha256=beiSi2Uu67EoTr7yQSaCJ
|
|
26
26
|
airbyte_cdk/destinations/vector_db_based/test_utils.py,sha256=MkqLiOJ5QyKbV4rNiJhe-BHM7FD-ADHQ4bQGf4c5lRY,1932
|
27
27
|
airbyte_cdk/destinations/vector_db_based/utils.py,sha256=FOyEo8Lc-fY8UyhpCivhZtIqBRyxf3cUt6anmK03fUY,1127
|
28
28
|
airbyte_cdk/destinations/vector_db_based/writer.py,sha256=nZ00xPiohElJmYktEZZIhr0m5EDETCHGhg0Lb2S7A20,5095
|
29
|
-
airbyte_cdk/entrypoint.py,sha256=
|
29
|
+
airbyte_cdk/entrypoint.py,sha256=2b8lsoJkMIVtQE0vKkpsjHYnIUJm9G6HDuEN_lc4SP0,18569
|
30
30
|
airbyte_cdk/exception_handler.py,sha256=D_doVl3Dt60ASXlJsfviOCswxGyKF2q0RL6rif3fNks,2013
|
31
31
|
airbyte_cdk/logger.py,sha256=qi4UGuSYQQGaFaTVJlMD9lLppwqLXt1XBhwSXo-Q5IA,3660
|
32
32
|
airbyte_cdk/models/__init__.py,sha256=MOTiuML2wShBaMSIwikdjyye2uUWBjo4J1QFSbnoiM4,2075
|
@@ -60,7 +60,7 @@ airbyte_cdk/sources/declarative/auth/jwt.py,sha256=SICqNsN2Cn_EgKadIgWuZpQxuMHyz
|
|
60
60
|
airbyte_cdk/sources/declarative/auth/oauth.py,sha256=SUfib1oSzlyRRnOSg8Bui73mfyrcyr9OssdchbKdu4s,14162
|
61
61
|
airbyte_cdk/sources/declarative/auth/selective_authenticator.py,sha256=qGwC6YsCldr1bIeKG6Qo-A9a5cTdHw-vcOn3OtQrS4c,1540
|
62
62
|
airbyte_cdk/sources/declarative/auth/token.py,sha256=2EnE78EhBOY9hbeZnQJ9AuFaM-G7dccU-oKo_LThRQk,11070
|
63
|
-
airbyte_cdk/sources/declarative/auth/token_provider.py,sha256=
|
63
|
+
airbyte_cdk/sources/declarative/auth/token_provider.py,sha256=Jzuxlmt1_-_aFC_n0OmP8L1nDOacLzbEVVx3kjdX_W8,3104
|
64
64
|
airbyte_cdk/sources/declarative/checks/__init__.py,sha256=nsVV5Bo0E_tBNd8A4Xdsdb-75PpcLo5RQu2RQ_Gv-ME,806
|
65
65
|
airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py,sha256=HUktywjI8pqOeED08UGqponUSwxs2TOAECTowlWlrRE,2138
|
66
66
|
airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQrilWCfJmncBzXCZ18ptRNip3XA,2139
|
@@ -92,7 +92,7 @@ airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=HCqx7IyENM_
|
|
92
92
|
airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=LhqGDfX06_dDYLKsIVnwQ_nAWCln-v8PV7Wgt_QVeTI,6533
|
93
93
|
airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
|
94
94
|
airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=U1oZKtBaEC6IACmvziY9Wzg7Z8EgF4ZuR7NwvjlB_Sk,1255
|
95
|
-
airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=
|
95
|
+
airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=MT5JbdEbnPzk3VWZGGvThe4opoX5dHhSXFrnTRYC6dg,22210
|
96
96
|
airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=Rbe6lJLTtZ5en33MwZiB9-H9-AwDMNHgwBZs8EqhYqk,22172
|
97
97
|
airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
|
98
98
|
airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=2tsE6FgXzemf4fZZ4uGtd8QpRBl9GJ2CRqSNJE5p0EI,16077
|
@@ -119,7 +119,7 @@ airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=958MMX6_Z
|
|
119
119
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
120
120
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
|
121
121
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
122
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
122
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=dj7b3s8jaDV7Tb7EXVSzkvC8QY-mxyOf48rxkSMws6A,134851
|
123
123
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
|
124
124
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
|
125
125
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
@@ -250,7 +250,7 @@ airbyte_cdk/sources/file_based/stream/identities_stream.py,sha256=DwgNU-jDp5vZ_W
|
|
250
250
|
airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py,sha256=i0Jn0zuAPomLa4pHSu9TQ3gAN5xXhNzPTYVwUDiDEyE,3523
|
251
251
|
airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
|
252
252
|
airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
|
253
|
-
airbyte_cdk/sources/http_logger.py,sha256=
|
253
|
+
airbyte_cdk/sources/http_logger.py,sha256=H93kPAujHhPmXNX0JSFG3D-SL6yEFA5PtKot9Hu3TYA,1690
|
254
254
|
airbyte_cdk/sources/message/__init__.py,sha256=y98fzHsQBwXwp2zEa4K5mxGFqjnx9lDn9O0pTk-VS4U,395
|
255
255
|
airbyte_cdk/sources/message/repository.py,sha256=SG7avgti_-dj8FcRHTTrhgLLGJbElv14_zIB0SH8AIc,4763
|
256
256
|
airbyte_cdk/sources/source.py,sha256=KIBBH5VLEb8BZ8B9aROlfaI6OLoJqKDPMJ10jkAR7nk,3611
|
@@ -303,7 +303,7 @@ airbyte_cdk/sources/streams/http/http.py,sha256=0uariNq8OFnlX7iqOHwBhecxA-Hfd5hS
|
|
303
303
|
airbyte_cdk/sources/streams/http/http_client.py,sha256=tDE0ROtxjGMVphvsw8INvGMtZ97hIF-v47pZ3jIyiwc,23011
|
304
304
|
airbyte_cdk/sources/streams/http/rate_limiting.py,sha256=IwdjrHKUnU97XO4qONgYRv4YYW51xQ8SJm4WLafXDB8,6351
|
305
305
|
airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py,sha256=RN0D3nOX1xLgwEwKWu6pkGy3XqBFzKSNZ8Lf6umU2eY,413
|
306
|
-
airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=
|
306
|
+
airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=P9U8vtcrZ3m0InSG2W0H4gTYTxjQxkIe6mhF9xvO8Ug,18824
|
307
307
|
airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py,sha256=Y3n7J-sk5yGjv_OxtY6Z6k0PEsFZmtIRi-x0KCbaHdA,1010
|
308
308
|
airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py,sha256=C2j2uVfi9d-3KgHO3NGxIiFdfASjHOtsd6g_LWPYOAs,20311
|
309
309
|
airbyte_cdk/sources/streams/http/requests_native_auth/token.py,sha256=h5PTzcdH-RQLeCg7xZ45w_484OPUDSwNWl_iMJQmZoI,2526
|
@@ -360,9 +360,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
|
|
360
360
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
361
361
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
362
362
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
363
|
-
airbyte_cdk-6.
|
364
|
-
airbyte_cdk-6.
|
365
|
-
airbyte_cdk-6.
|
366
|
-
airbyte_cdk-6.
|
367
|
-
airbyte_cdk-6.
|
368
|
-
airbyte_cdk-6.
|
363
|
+
airbyte_cdk-6.37.0.dev0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
364
|
+
airbyte_cdk-6.37.0.dev0.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
|
365
|
+
airbyte_cdk-6.37.0.dev0.dist-info/METADATA,sha256=kcKo6BoaqSRqsOMLq2fmnSa6iVkhtMy4YMEL4Dk1WYU,6015
|
366
|
+
airbyte_cdk-6.37.0.dev0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
367
|
+
airbyte_cdk-6.37.0.dev0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
368
|
+
airbyte_cdk-6.37.0.dev0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|