airbyte-cdk 6.41.2__py3-none-any.whl → 6.41.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -179,7 +179,7 @@ class AsyncJobOrchestrator:
179
179
  self._non_breaking_exceptions: List[Exception] = []
180
180
 
181
181
  def _replace_failed_jobs(self, partition: AsyncPartition) -> None:
182
- failed_status_jobs = (AsyncJobStatus.FAILED, AsyncJobStatus.TIMED_OUT)
182
+ failed_status_jobs = (AsyncJobStatus.FAILED,)
183
183
  jobs_to_replace = [job for job in partition.jobs if job.status() in failed_status_jobs]
184
184
  for job in jobs_to_replace:
185
185
  new_job = self._start_job(job.job_parameters(), job.api_job_id())
@@ -359,14 +359,11 @@ class AsyncJobOrchestrator:
359
359
  self._process_partitions_with_errors(partition)
360
360
  case _:
361
361
  self._stop_timed_out_jobs(partition)
362
+ # re-allocate FAILED jobs, but TIMEOUT jobs are not re-allocated
363
+ self._reallocate_partition(current_running_partitions, partition)
362
364
 
363
- # job will be restarted in `_start_job`
364
- current_running_partitions.insert(0, partition)
365
-
366
- for job in partition.jobs:
367
- # We only remove completed jobs as we want failed/timed out jobs to be re-allocated in priority
368
- if job.status() == AsyncJobStatus.COMPLETED:
369
- self._job_tracker.remove_job(job.api_job_id())
365
+ # We only remove completed / timeout jobs jobs as we want failed jobs to be re-allocated in priority
366
+ self._remove_completed_or_timed_out_jobs(partition)
370
367
 
371
368
  # update the referenced list with running partitions
372
369
  self._running_partitions = current_running_partitions
@@ -381,8 +378,11 @@ class AsyncJobOrchestrator:
381
378
  def _stop_timed_out_jobs(self, partition: AsyncPartition) -> None:
382
379
  for job in partition.jobs:
383
380
  if job.status() == AsyncJobStatus.TIMED_OUT:
384
- # we don't free allocation here because it is expected to retry the job
385
- self._abort_job(job, free_job_allocation=False)
381
+ self._abort_job(job, free_job_allocation=True)
382
+ raise AirbyteTracedException(
383
+ internal_message=f"Job {job.api_job_id()} has timed out. Try increasing the `polling job timeout`.",
384
+ failure_type=FailureType.config_error,
385
+ )
386
386
 
387
387
  def _abort_job(self, job: AsyncJob, free_job_allocation: bool = True) -> None:
388
388
  try:
@@ -392,6 +392,34 @@ class AsyncJobOrchestrator:
392
392
  except Exception as exception:
393
393
  LOGGER.warning(f"Could not free budget for job {job.api_job_id()}: {exception}")
394
394
 
395
+ def _remove_completed_or_timed_out_jobs(self, partition: AsyncPartition) -> None:
396
+ """
397
+ Remove completed or timed out jobs from the partition.
398
+
399
+ Args:
400
+ partition (AsyncPartition): The partition to process.
401
+ """
402
+ for job in partition.jobs:
403
+ if job.status() in [AsyncJobStatus.COMPLETED, AsyncJobStatus.TIMED_OUT]:
404
+ self._job_tracker.remove_job(job.api_job_id())
405
+
406
+ def _reallocate_partition(
407
+ self,
408
+ current_running_partitions: List[AsyncPartition],
409
+ partition: AsyncPartition,
410
+ ) -> None:
411
+ """
412
+ Reallocate the partition by starting a new job for each job in the
413
+ partition.
414
+ Args:
415
+ current_running_partitions (list): The list of currently running partitions.
416
+ partition (AsyncPartition): The partition to reallocate.
417
+ """
418
+ for job in partition.jobs:
419
+ if job.status() != AsyncJobStatus.TIMED_OUT:
420
+ # allow the FAILED jobs to be re-allocated for partition
421
+ current_running_partitions.insert(0, partition)
422
+
395
423
  def _process_partitions_with_errors(self, partition: AsyncPartition) -> None:
396
424
  """
397
425
  Process a partition with status errors (FAILED and TIMEOUT).
@@ -273,12 +273,59 @@ class AsyncHttpJobRepository(AsyncJobRepository):
273
273
  del self._create_job_response_by_id[job_id]
274
274
  del self._polling_job_response_by_id[job_id]
275
275
 
276
+ def _get_creation_response_interpolation_context(self, job: AsyncJob) -> Dict[str, Any]:
277
+ """
278
+ Returns the interpolation context for the creation response.
279
+
280
+ Args:
281
+ job (AsyncJob): The job for which to get the creation response interpolation context.
282
+
283
+ Returns:
284
+ Dict[str, Any]: The interpolation context as a dictionary.
285
+ """
286
+ # TODO: currently we support only JsonDecoder to decode the response to track the ids or the status
287
+ # of the Jobs. We should consider to add the support of other decoders like XMLDecoder, in the future
288
+ creation_response_context = dict(self._create_job_response_by_id[job.api_job_id()].json())
289
+ if not "headers" in creation_response_context:
290
+ creation_response_context["headers"] = self._create_job_response_by_id[
291
+ job.api_job_id()
292
+ ].headers
293
+ if not "request" in creation_response_context:
294
+ creation_response_context["request"] = self._create_job_response_by_id[
295
+ job.api_job_id()
296
+ ].request
297
+ return creation_response_context
298
+
299
+ def _get_polling_response_interpolation_context(self, job: AsyncJob) -> Dict[str, Any]:
300
+ """
301
+ Returns the interpolation context for the polling response.
302
+
303
+ Args:
304
+ job (AsyncJob): The job for which to get the polling response interpolation context.
305
+
306
+ Returns:
307
+ Dict[str, Any]: The interpolation context as a dictionary.
308
+ """
309
+ # TODO: currently we support only JsonDecoder to decode the response to track the ids or the status
310
+ # of the Jobs. We should consider to add the support of other decoders like XMLDecoder, in the future
311
+ polling_response_context = dict(self._polling_job_response_by_id[job.api_job_id()].json())
312
+ if not "headers" in polling_response_context:
313
+ polling_response_context["headers"] = self._polling_job_response_by_id[
314
+ job.api_job_id()
315
+ ].headers
316
+ if not "request" in polling_response_context:
317
+ polling_response_context["request"] = self._polling_job_response_by_id[
318
+ job.api_job_id()
319
+ ].request
320
+ return polling_response_context
321
+
276
322
  def _get_create_job_stream_slice(self, job: AsyncJob) -> StreamSlice:
277
- creation_response = self._create_job_response_by_id[job.api_job_id()].json()
278
323
  stream_slice = StreamSlice(
279
324
  partition={},
280
325
  cursor_slice={},
281
- extra_fields={"creation_response": creation_response},
326
+ extra_fields={
327
+ "creation_response": self._get_creation_response_interpolation_context(job),
328
+ },
282
329
  )
283
330
  return stream_slice
284
331
 
@@ -286,11 +333,12 @@ class AsyncHttpJobRepository(AsyncJobRepository):
286
333
  if not self.download_target_requester:
287
334
  url_response = self._polling_job_response_by_id[job.api_job_id()]
288
335
  else:
289
- polling_response = self._polling_job_response_by_id[job.api_job_id()].json()
290
336
  stream_slice: StreamSlice = StreamSlice(
291
337
  partition={},
292
338
  cursor_slice={},
293
- extra_fields={"polling_response": polling_response},
339
+ extra_fields={
340
+ "polling_response": self._get_polling_response_interpolation_context(job),
341
+ },
294
342
  )
295
343
  url_response = self.download_target_requester.send_request(stream_slice=stream_slice) # type: ignore # we expect download_target_requester to always be presented, otherwise raise an exception as we cannot proceed with the report
296
344
  if not url_response:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.41.2
3
+ Version: 6.41.3
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -49,7 +49,7 @@ airbyte_cdk/sources/connector_state_manager.py,sha256=hw3TJJWl3UJKSDsH-PypFQU7mD
49
49
  airbyte_cdk/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
50
50
  airbyte_cdk/sources/declarative/async_job/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  airbyte_cdk/sources/declarative/async_job/job.py,sha256=V4Z6NohXwTlOavDbD-tUUQxOr7Lzpb_r4tRC64AfvDE,1702
52
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py,sha256=nUimSsq1nbEe3UPqsVC9mj8Zh2GYptJUZHQSVTbpWIc,21026
52
+ airbyte_cdk/sources/declarative/async_job/job_orchestrator.py,sha256=4uNdRLAx6PA5ESrvsxg9sYwQBDwRCIBh58E9byqfl3k,22207
53
53
  airbyte_cdk/sources/declarative/async_job/job_tracker.py,sha256=oAaqKxj5dGKeF5wkqiOZbu5gW6JvtaROxirDU2KqT1o,2565
54
54
  airbyte_cdk/sources/declarative/async_job/repository.py,sha256=2OkWiZp5IKTOi_SIpP1U-Rw3gH36LBy_a8CgXoENTtg,1044
55
55
  airbyte_cdk/sources/declarative/async_job/status.py,sha256=mkExR-uOAO1ckUnclaUOa74l2N9CdhLbVFM6KDoBgBM,715
@@ -143,7 +143,7 @@ airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.
143
143
  airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py,sha256=q0YkeYUUWO6iErUy0vjqiOkhg8_9d5YcCmtlpXAJJ9E,1314
144
144
  airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py,sha256=Tan66odx8VHzfdyyXMQkXz2pJYksllGqvxmpoajgcK4,669
145
145
  airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py,sha256=E-fQbt4ShfxZVoqfnmOx69C6FUPWZz8BIqI3DN9Kcjs,7935
146
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=4egnsWqzOhizHEvxhs43eAJazT211FtUmPHOr-8SsMQ,12037
146
+ airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=uDyLvNsJ183oh3TT-O1PDOgpGt7OD1uqpLTDWTyb9PA,14271
147
147
  airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=uEhUmLGVuwfadKz0c1vunrr66ZNYWmotKZWiaPYPDzw,17402
148
148
  airbyte_cdk/sources/declarative/requesters/paginators/__init__.py,sha256=uArbKs9JKNCt7t9tZoeWwjDpyI1HoPp29FNW0JzvaEM,644
149
149
  airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=SB-Af3CRb4mJwhm4EKNxzl_PK2w5QS4tqrSNNMO2IV4,12760
@@ -358,9 +358,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
358
358
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
359
359
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
360
360
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
361
- airbyte_cdk-6.41.2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
362
- airbyte_cdk-6.41.2.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
363
- airbyte_cdk-6.41.2.dist-info/METADATA,sha256=9Ouq9vugM669lqFm4ZeuMIhZDfgFWTTkRkOWNxvQ_Aw,6071
364
- airbyte_cdk-6.41.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
365
- airbyte_cdk-6.41.2.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
366
- airbyte_cdk-6.41.2.dist-info/RECORD,,
361
+ airbyte_cdk-6.41.3.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
362
+ airbyte_cdk-6.41.3.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
363
+ airbyte_cdk-6.41.3.dist-info/METADATA,sha256=hjK2dh6W_Zj1foDgJ-XjA2oxzhI3-wKRrvp5G-lYoyM,6071
364
+ airbyte_cdk-6.41.3.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
365
+ airbyte_cdk-6.41.3.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
366
+ airbyte_cdk-6.41.3.dist-info/RECORD,,