airbyte-cdk 6.41.3__py3-none-any.whl → 6.41.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/async_job/job.py +6 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +18 -18
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +4 -1
- airbyte_cdk/test/mock_http/mocker.py +13 -8
- {airbyte_cdk-6.41.3.dist-info → airbyte_cdk-6.41.5.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.41.3.dist-info → airbyte_cdk-6.41.5.dist-info}/RECORD +10 -10
- {airbyte_cdk-6.41.3.dist-info → airbyte_cdk-6.41.5.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.41.3.dist-info → airbyte_cdk-6.41.5.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.41.3.dist-info → airbyte_cdk-6.41.5.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.41.3.dist-info → airbyte_cdk-6.41.5.dist-info}/entry_points.txt +0 -0
@@ -34,6 +34,12 @@ class AsyncJob:
|
|
34
34
|
|
35
35
|
def status(self) -> AsyncJobStatus:
|
36
36
|
if self._timer.has_timed_out():
|
37
|
+
# TODO: we should account the fact that,
|
38
|
+
# certain APIs could send the `Timeout` status,
|
39
|
+
# thus we should not return `Timeout` in that case,
|
40
|
+
# but act based on the scenario.
|
41
|
+
|
42
|
+
# the default behavior is to return `Timeout` status and retry.
|
37
43
|
return AsyncJobStatus.TIMED_OUT
|
38
44
|
return self._status
|
39
45
|
|
@@ -44,16 +44,21 @@ class AsyncPartition:
|
|
44
44
|
This bucket of api_jobs is a bit useless for this iteration but should become interesting when we will be able to split jobs
|
45
45
|
"""
|
46
46
|
|
47
|
-
|
47
|
+
_DEFAULT_MAX_JOB_RETRY = 3
|
48
48
|
|
49
|
-
def __init__(
|
49
|
+
def __init__(
|
50
|
+
self, jobs: List[AsyncJob], stream_slice: StreamSlice, job_max_retry: Optional[int] = None
|
51
|
+
) -> None:
|
50
52
|
self._attempts_per_job = {job: 1 for job in jobs}
|
51
53
|
self._stream_slice = stream_slice
|
54
|
+
self._job_max_retry = (
|
55
|
+
job_max_retry if job_max_retry is not None else self._DEFAULT_MAX_JOB_RETRY
|
56
|
+
)
|
52
57
|
|
53
58
|
def has_reached_max_attempt(self) -> bool:
|
54
59
|
return any(
|
55
60
|
map(
|
56
|
-
lambda attempt_count: attempt_count >= self.
|
61
|
+
lambda attempt_count: attempt_count >= self._job_max_retry,
|
57
62
|
self._attempts_per_job.values(),
|
58
63
|
)
|
59
64
|
)
|
@@ -62,7 +67,7 @@ class AsyncPartition:
|
|
62
67
|
current_attempt_count = self._attempts_per_job.pop(job_to_replace, None)
|
63
68
|
if current_attempt_count is None:
|
64
69
|
raise ValueError("Could not find job to replace")
|
65
|
-
elif current_attempt_count >= self.
|
70
|
+
elif current_attempt_count >= self._job_max_retry:
|
66
71
|
raise ValueError(f"Max attempt reached for job in partition {self._stream_slice}")
|
67
72
|
|
68
73
|
new_attempt_count = current_attempt_count + 1
|
@@ -155,6 +160,7 @@ class AsyncJobOrchestrator:
|
|
155
160
|
message_repository: MessageRepository,
|
156
161
|
exceptions_to_break_on: Iterable[Type[Exception]] = tuple(),
|
157
162
|
has_bulk_parent: bool = False,
|
163
|
+
job_max_retry: Optional[int] = None,
|
158
164
|
) -> None:
|
159
165
|
"""
|
160
166
|
If the stream slices provided as a parameters relies on a async job streams that relies on the same JobTracker, `has_bulk_parent`
|
@@ -175,11 +181,12 @@ class AsyncJobOrchestrator:
|
|
175
181
|
self._message_repository = message_repository
|
176
182
|
self._exceptions_to_break_on: Tuple[Type[Exception], ...] = tuple(exceptions_to_break_on)
|
177
183
|
self._has_bulk_parent = has_bulk_parent
|
184
|
+
self._job_max_retry = job_max_retry
|
178
185
|
|
179
186
|
self._non_breaking_exceptions: List[Exception] = []
|
180
187
|
|
181
188
|
def _replace_failed_jobs(self, partition: AsyncPartition) -> None:
|
182
|
-
failed_status_jobs = (AsyncJobStatus.FAILED,)
|
189
|
+
failed_status_jobs = (AsyncJobStatus.FAILED, AsyncJobStatus.TIMED_OUT)
|
183
190
|
jobs_to_replace = [job for job in partition.jobs if job.status() in failed_status_jobs]
|
184
191
|
for job in jobs_to_replace:
|
185
192
|
new_job = self._start_job(job.job_parameters(), job.api_job_id())
|
@@ -214,7 +221,7 @@ class AsyncJobOrchestrator:
|
|
214
221
|
for _slice in self._slice_iterator:
|
215
222
|
at_least_one_slice_consumed_from_slice_iterator_during_current_iteration = True
|
216
223
|
job = self._start_job(_slice)
|
217
|
-
self._running_partitions.append(AsyncPartition([job], _slice))
|
224
|
+
self._running_partitions.append(AsyncPartition([job], _slice, self._job_max_retry))
|
218
225
|
if self._has_bulk_parent and self._slice_iterator.has_next():
|
219
226
|
break
|
220
227
|
except ConcurrentJobLimitReached:
|
@@ -363,7 +370,7 @@ class AsyncJobOrchestrator:
|
|
363
370
|
self._reallocate_partition(current_running_partitions, partition)
|
364
371
|
|
365
372
|
# We only remove completed / timeout jobs jobs as we want failed jobs to be re-allocated in priority
|
366
|
-
self.
|
373
|
+
self._remove_completed_jobs(partition)
|
367
374
|
|
368
375
|
# update the referenced list with running partitions
|
369
376
|
self._running_partitions = current_running_partitions
|
@@ -378,11 +385,7 @@ class AsyncJobOrchestrator:
|
|
378
385
|
def _stop_timed_out_jobs(self, partition: AsyncPartition) -> None:
|
379
386
|
for job in partition.jobs:
|
380
387
|
if job.status() == AsyncJobStatus.TIMED_OUT:
|
381
|
-
self._abort_job(job, free_job_allocation=
|
382
|
-
raise AirbyteTracedException(
|
383
|
-
internal_message=f"Job {job.api_job_id()} has timed out. Try increasing the `polling job timeout`.",
|
384
|
-
failure_type=FailureType.config_error,
|
385
|
-
)
|
388
|
+
self._abort_job(job, free_job_allocation=False)
|
386
389
|
|
387
390
|
def _abort_job(self, job: AsyncJob, free_job_allocation: bool = True) -> None:
|
388
391
|
try:
|
@@ -392,7 +395,7 @@ class AsyncJobOrchestrator:
|
|
392
395
|
except Exception as exception:
|
393
396
|
LOGGER.warning(f"Could not free budget for job {job.api_job_id()}: {exception}")
|
394
397
|
|
395
|
-
def
|
398
|
+
def _remove_completed_jobs(self, partition: AsyncPartition) -> None:
|
396
399
|
"""
|
397
400
|
Remove completed or timed out jobs from the partition.
|
398
401
|
|
@@ -400,7 +403,7 @@ class AsyncJobOrchestrator:
|
|
400
403
|
partition (AsyncPartition): The partition to process.
|
401
404
|
"""
|
402
405
|
for job in partition.jobs:
|
403
|
-
if job.status()
|
406
|
+
if job.status() == AsyncJobStatus.COMPLETED:
|
404
407
|
self._job_tracker.remove_job(job.api_job_id())
|
405
408
|
|
406
409
|
def _reallocate_partition(
|
@@ -415,10 +418,7 @@ class AsyncJobOrchestrator:
|
|
415
418
|
current_running_partitions (list): The list of currently running partitions.
|
416
419
|
partition (AsyncPartition): The partition to reallocate.
|
417
420
|
"""
|
418
|
-
|
419
|
-
if job.status() != AsyncJobStatus.TIMED_OUT:
|
420
|
-
# allow the FAILED jobs to be re-allocated for partition
|
421
|
-
current_running_partitions.insert(0, partition)
|
421
|
+
current_running_partitions.insert(0, partition)
|
422
422
|
|
423
423
|
def _process_partitions_with_errors(self, partition: AsyncPartition) -> None:
|
424
424
|
"""
|
@@ -3073,8 +3073,11 @@ class ModelToComponentFactory:
|
|
3073
3073
|
stream_slices,
|
3074
3074
|
self._job_tracker,
|
3075
3075
|
self._message_repository,
|
3076
|
-
has_bulk_parent=False,
|
3077
3076
|
# FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
|
3077
|
+
has_bulk_parent=False,
|
3078
|
+
# set the `job_max_retry` to 1 for the `Connector Builder`` use-case.
|
3079
|
+
# `None` == default retry is set to 3 attempts, under the hood.
|
3080
|
+
job_max_retry=1 if self._emit_connector_builder_messages else None,
|
3078
3081
|
),
|
3079
3082
|
stream_slicer=stream_slicer,
|
3080
3083
|
config=config,
|
@@ -2,9 +2,10 @@
|
|
2
2
|
|
3
3
|
import contextlib
|
4
4
|
import functools
|
5
|
+
from collections import defaultdict
|
5
6
|
from enum import Enum
|
6
7
|
from types import TracebackType
|
7
|
-
from typing import Callable, List, Optional, Union
|
8
|
+
from typing import Callable, Dict, Iterable, List, Optional, Union
|
8
9
|
|
9
10
|
import requests_mock
|
10
11
|
|
@@ -40,7 +41,7 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
40
41
|
|
41
42
|
def __init__(self) -> None:
|
42
43
|
self._mocker = requests_mock.Mocker()
|
43
|
-
self._matchers: List[HttpRequestMatcher] =
|
44
|
+
self._matchers: Dict[SupportedHttpMethods, List[HttpRequestMatcher]] = defaultdict(list)
|
44
45
|
|
45
46
|
def __enter__(self) -> "HttpMocker":
|
46
47
|
self._mocker.__enter__()
|
@@ -55,7 +56,7 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
55
56
|
self._mocker.__exit__(exc_type, exc_val, exc_tb)
|
56
57
|
|
57
58
|
def _validate_all_matchers_called(self) -> None:
|
58
|
-
for matcher in self.
|
59
|
+
for matcher in self._get_matchers():
|
59
60
|
if not matcher.has_expected_match_count():
|
60
61
|
raise ValueError(f"Invalid number of matches for `{matcher}`")
|
61
62
|
|
@@ -69,9 +70,9 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
69
70
|
responses = [responses]
|
70
71
|
|
71
72
|
matcher = HttpRequestMatcher(request, len(responses))
|
72
|
-
if matcher in self._matchers:
|
73
|
+
if matcher in self._matchers[method]:
|
73
74
|
raise ValueError(f"Request {matcher.request} already mocked")
|
74
|
-
self._matchers.append(matcher)
|
75
|
+
self._matchers[method].append(matcher)
|
75
76
|
|
76
77
|
getattr(self._mocker, method)(
|
77
78
|
requests_mock.ANY,
|
@@ -129,7 +130,7 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
129
130
|
|
130
131
|
def assert_number_of_calls(self, request: HttpRequest, number_of_calls: int) -> None:
|
131
132
|
corresponding_matchers = list(
|
132
|
-
filter(lambda matcher: matcher.request
|
133
|
+
filter(lambda matcher: matcher.request is request, self._get_matchers())
|
133
134
|
)
|
134
135
|
if len(corresponding_matchers) != 1:
|
135
136
|
raise ValueError(
|
@@ -150,7 +151,7 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
150
151
|
result = f(*args, **kwargs)
|
151
152
|
except requests_mock.NoMockAddress as no_mock_exception:
|
152
153
|
matchers_as_string = "\n\t".join(
|
153
|
-
map(lambda matcher: str(matcher.request), self.
|
154
|
+
map(lambda matcher: str(matcher.request), self._get_matchers())
|
154
155
|
)
|
155
156
|
raise ValueError(
|
156
157
|
f"No matcher matches {no_mock_exception.args[0]} with headers `{no_mock_exception.request.headers}` "
|
@@ -175,6 +176,10 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
175
176
|
|
176
177
|
return wrapper
|
177
178
|
|
179
|
+
def _get_matchers(self) -> Iterable[HttpRequestMatcher]:
|
180
|
+
for matchers in self._matchers.values():
|
181
|
+
yield from matchers
|
182
|
+
|
178
183
|
def clear_all_matchers(self) -> None:
|
179
184
|
"""Clears all stored matchers by resetting the _matchers list to an empty state."""
|
180
|
-
self._matchers =
|
185
|
+
self._matchers = defaultdict(list)
|
@@ -48,8 +48,8 @@ airbyte_cdk/sources/config.py,sha256=wtwFF_7G_S2KB0IE2W5LBs7RO5e7EbgCAMzJpTcYTKo
|
|
48
48
|
airbyte_cdk/sources/connector_state_manager.py,sha256=hw3TJJWl3UJKSDsH-PypFQU7mD0ifffh1Noz-t_THr8,7486
|
49
49
|
airbyte_cdk/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
50
50
|
airbyte_cdk/sources/declarative/async_job/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
|
-
airbyte_cdk/sources/declarative/async_job/job.py,sha256=
|
52
|
-
airbyte_cdk/sources/declarative/async_job/job_orchestrator.py,sha256=
|
51
|
+
airbyte_cdk/sources/declarative/async_job/job.py,sha256=aR5UZAkNUYA1I1zjUMAcvdzCFL3lXXOllkFmlhEKgkc,2001
|
52
|
+
airbyte_cdk/sources/declarative/async_job/job_orchestrator.py,sha256=tcHvB5QdBnx4XQmFvr4Swdq2DLRPst5w5M-OIJHnp5c,22034
|
53
53
|
airbyte_cdk/sources/declarative/async_job/job_tracker.py,sha256=oAaqKxj5dGKeF5wkqiOZbu5gW6JvtaROxirDU2KqT1o,2565
|
54
54
|
airbyte_cdk/sources/declarative/async_job/repository.py,sha256=2OkWiZp5IKTOi_SIpP1U-Rw3gH36LBy_a8CgXoENTtg,1044
|
55
55
|
airbyte_cdk/sources/declarative/async_job/status.py,sha256=mkExR-uOAO1ckUnclaUOa74l2N9CdhLbVFM6KDoBgBM,715
|
@@ -120,7 +120,7 @@ airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=nlVvHC511
|
|
120
120
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
121
121
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
|
122
122
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
123
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
123
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=72haNs6JXWSbe9Vwya2mJo3GFBvzYwjLlReWmvO2uPo,147623
|
124
124
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
|
125
125
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
|
126
126
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
@@ -331,7 +331,7 @@ airbyte_cdk/test/catalog_builder.py,sha256=-y05Cz1x0Dlk6oE9LSKhCozssV2gYBNtMdV5Y
|
|
331
331
|
airbyte_cdk/test/entrypoint_wrapper.py,sha256=9XBii_YguQp0d8cykn3hy102FsJcwIBQzSB7co5ho0s,9802
|
332
332
|
airbyte_cdk/test/mock_http/__init__.py,sha256=jE5kC6CQ0OXkTqKhciDnNVZHesBFVIA2YvkdFGwva7k,322
|
333
333
|
airbyte_cdk/test/mock_http/matcher.py,sha256=4Qj8UnJKZIs-eodshryce3SN1Ayc8GZpBETmP6hTEyc,1446
|
334
|
-
airbyte_cdk/test/mock_http/mocker.py,sha256=
|
334
|
+
airbyte_cdk/test/mock_http/mocker.py,sha256=XgsjMtVoeMpRELPyALgrkHFauH9H5irxrz1Kcxh2yFY,8013
|
335
335
|
airbyte_cdk/test/mock_http/request.py,sha256=tdB8cqk2vLgCDTOKffBKsM06llYs4ZecgtH6DKyx6yY,4112
|
336
336
|
airbyte_cdk/test/mock_http/response.py,sha256=s4-cQQqTtmeej0pQDWqmG0vUWpHS-93lIWMpW3zSVyU,662
|
337
337
|
airbyte_cdk/test/mock_http/response_builder.py,sha256=debPx_lRYBaQVSwCoKLa0F8KFk3h0qG7bWxFBATa0cc,7958
|
@@ -358,9 +358,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
|
|
358
358
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
359
359
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
360
360
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
361
|
-
airbyte_cdk-6.41.
|
362
|
-
airbyte_cdk-6.41.
|
363
|
-
airbyte_cdk-6.41.
|
364
|
-
airbyte_cdk-6.41.
|
365
|
-
airbyte_cdk-6.41.
|
366
|
-
airbyte_cdk-6.41.
|
361
|
+
airbyte_cdk-6.41.5.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
362
|
+
airbyte_cdk-6.41.5.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
|
363
|
+
airbyte_cdk-6.41.5.dist-info/METADATA,sha256=lU55s01kRtTNVELjzwH0ZFZcKc9mDaAoegbKH5sRuAs,6071
|
364
|
+
airbyte_cdk-6.41.5.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
365
|
+
airbyte_cdk-6.41.5.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
366
|
+
airbyte_cdk-6.41.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|