fairagro-middleware-api-client 8.6.4.dev7__tar.gz → 8.7.1.dev8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (17) hide show
  1. {fairagro_middleware_api_client-8.6.4.dev7 → fairagro_middleware_api_client-8.7.1.dev8}/PKG-INFO +1 -1
  2. {fairagro_middleware_api_client-8.6.4.dev7 → fairagro_middleware_api_client-8.7.1.dev8}/src/middleware/api_client/api_client.py +88 -15
  3. {fairagro_middleware_api_client-8.6.4.dev7 → fairagro_middleware_api_client-8.7.1.dev8}/tests/unit/test_client.py +67 -16
  4. {fairagro_middleware_api_client-8.6.4.dev7 → fairagro_middleware_api_client-8.7.1.dev8}/.gitignore +0 -0
  5. {fairagro_middleware_api_client-8.6.4.dev7 → fairagro_middleware_api_client-8.7.1.dev8}/README.md +0 -0
  6. {fairagro_middleware_api_client-8.6.4.dev7 → fairagro_middleware_api_client-8.7.1.dev8}/example_client_config.yaml +0 -0
  7. {fairagro_middleware_api_client-8.6.4.dev7 → fairagro_middleware_api_client-8.7.1.dev8}/pyproject.toml +0 -0
  8. {fairagro_middleware_api_client-8.6.4.dev7 → fairagro_middleware_api_client-8.7.1.dev8}/src/middleware/api_client/__init__.py +0 -0
  9. {fairagro_middleware_api_client-8.6.4.dev7 → fairagro_middleware_api_client-8.7.1.dev8}/src/middleware/api_client/config.py +0 -0
  10. {fairagro_middleware_api_client-8.6.4.dev7 → fairagro_middleware_api_client-8.7.1.dev8}/src/middleware/api_client/models.py +0 -0
  11. {fairagro_middleware_api_client-8.6.4.dev7 → fairagro_middleware_api_client-8.7.1.dev8}/src/middleware/api_client/py.typed +0 -0
  12. {fairagro_middleware_api_client-8.6.4.dev7 → fairagro_middleware_api_client-8.7.1.dev8}/tests/conftest.py +0 -0
  13. {fairagro_middleware_api_client-8.6.4.dev7 → fairagro_middleware_api_client-8.7.1.dev8}/tests/integration/conftest.py +0 -0
  14. {fairagro_middleware_api_client-8.6.4.dev7 → fairagro_middleware_api_client-8.7.1.dev8}/tests/integration/test_create_arcs.py +0 -0
  15. {fairagro_middleware_api_client-8.6.4.dev7 → fairagro_middleware_api_client-8.7.1.dev8}/tests/unit/test_api_client_config.py +0 -0
  16. {fairagro_middleware_api_client-8.6.4.dev7 → fairagro_middleware_api_client-8.7.1.dev8}/tests/unit/test_client_config.py +0 -0
  17. {fairagro_middleware_api_client-8.6.4.dev7 → fairagro_middleware_api_client-8.7.1.dev8}/tests/unit/test_retry_logic.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fairagro-middleware-api-client
3
- Version: 8.6.4.dev7
3
+ Version: 8.7.1.dev8
4
4
  Summary: The FAIRagro advanced middleware API client
5
5
  Requires-Python: >=3.12
6
6
  Requires-Dist: httpx>=0.28.1
@@ -13,9 +13,11 @@ from typing import TYPE_CHECKING, Any, cast
13
13
  import httpx
14
14
  from pydantic import BaseModel, ValidationError
15
15
 
16
+ from middleware.shared.api_models.common.models import HarvestStatus as SharedHarvestStatus
16
17
  from middleware.shared.api_models.v3.models import (
17
18
  CreateArcRequest,
18
19
  CreateHarvestRequest,
20
+ PatchHarvestRequest,
19
21
  SubmitHarvestArcRequest,
20
22
  )
21
23
 
@@ -226,12 +228,12 @@ class ApiClient:
226
228
  or status_code >= HTTPStatus.INTERNAL_SERVER_ERROR
227
229
  )
228
230
 
229
- async def _cancel_harvest_safely(self, rdi: str, harvest_id: str) -> None:
230
- """Try cancelling a harvest and suppress cancellation failures."""
231
+ async def _fail_harvest_safely(self, rdi: str, harvest_id: str) -> None:
232
+ """Try marking a harvest as failed and suppress any secondary failures."""
231
233
  try:
232
- await self.cancel_harvest(harvest_id)
234
+ await self.fail_harvest(harvest_id)
233
235
  except ApiClientError:
234
- logger.warning("[%s] Failed to cancel harvest %s", rdi, harvest_id)
236
+ logger.warning("[%s] Failed to mark harvest %s as failed", rdi, harvest_id)
235
237
 
236
238
  @classmethod
237
239
  async def _cancel_pending_arc_tasks(cls, pending_tasks: set[asyncio.Task[None]]) -> None:
@@ -267,12 +269,26 @@ class ApiClient:
267
269
  """Submit all ARCs in bounded parallelism and return number of skipped ARC submissions."""
268
270
  pending_tasks: set[asyncio.Task[None]] = set()
269
271
  failed_submissions = 0
272
+ seen_identifiers: set[str] = set()
270
273
 
271
- async def submit_one(arc_item: "ARC | dict[str, Any] | str") -> None:
272
- await self.submit_arc_in_harvest(harvest_id, arc_item)
274
+ async def submit_one(arc_item: dict[str, Any]) -> None:
275
+ request = SubmitHarvestArcRequest(arc=arc_item)
276
+ await self._post(f"v3/harvests/{harvest_id}/arcs", request)
273
277
 
274
278
  async for arc in arcs:
275
- task = asyncio.create_task(submit_one(arc))
279
+ serialized = self._serialize_arc(arc)
280
+ identifier = self._extract_identifier_from_rocrate(serialized)
281
+ if identifier is not None:
282
+ if identifier in seen_identifiers:
283
+ await self._cancel_pending_arc_tasks(pending_tasks)
284
+ raise ApiClientError(
285
+ f"Duplicate ARC identifier '{identifier}' submitted more than once "
286
+ f"in harvest {harvest_id}. This is likely a client-side data error "
287
+ "(two different ARCs sharing the same identifier)."
288
+ )
289
+ seen_identifiers.add(identifier)
290
+
291
+ task = asyncio.create_task(submit_one(serialized))
276
292
  pending_tasks.add(task)
277
293
 
278
294
  if len(pending_tasks) >= self._config.max_concurrency:
@@ -441,10 +457,38 @@ class ApiClient:
441
457
  """DELETE request, ignoring a 204 No Content response."""
442
458
  await self._request_with_retries("DELETE", path)
443
459
 
460
+ async def _patch(self, path: str, body: BaseModel) -> Any:
461
+ """PATCH with a Pydantic request body."""
462
+ return await self._request_with_retries(
463
+ "PATCH",
464
+ path,
465
+ content=body.model_dump_json(),
466
+ headers={"content-type": "application/json"},
467
+ )
468
+
444
469
  # ------------------------------------------------------------------
445
470
  # Helper
446
471
  # ------------------------------------------------------------------
447
472
 
473
+ @staticmethod
474
+ def _extract_identifier_from_rocrate(arc_content: dict[str, Any]) -> str | None:
475
+ """Extract the RO-Crate identifier from a serialized ARC dict.
476
+
477
+ Looks for the Root Data Entity (``@id == "./"``), then returns its
478
+ ``identifier`` field. Returns ``None`` when the field is absent or
479
+ the dict does not follow the RO-Crate structure — validation is left
480
+ to the server.
481
+ """
482
+ graph = arc_content.get("@graph")
483
+ if isinstance(graph, list):
484
+ for item in graph:
485
+ if item.get("@id") == "./":
486
+ identifier = item.get("identifier")
487
+ if isinstance(identifier, list):
488
+ identifier = identifier[0] if identifier else None
489
+ return str(identifier) if identifier else None
490
+ return None
491
+
448
492
  @classmethod
449
493
  def _serialize_arc(cls, arc: "ARC | dict[str, Any] | str") -> dict[str, Any]:
450
494
  """Serialize an ARC object, dict, or JSON string to a plain RO-Crate JSON dict."""
@@ -452,7 +496,10 @@ class ApiClient:
452
496
  return arc
453
497
  if isinstance(arc, str):
454
498
  try:
455
- return cast(dict[str, Any], json.loads(arc))
499
+ data = json.loads(arc)
500
+ if not isinstance(data, dict):
501
+ raise ApiClientError(f"JSON string must represent a dictionary, got {type(data).__name__}")
502
+ return cast(dict[str, Any], data)
456
503
  except json.JSONDecodeError as e:
457
504
  raise ApiClientError(f"Invalid JSON string provided for ARC: {e}") from e
458
505
  return cast(dict[str, Any], json.loads(arc.ToROCrateJsonString()))
@@ -571,15 +618,39 @@ class ApiClient:
571
618
  data = await self._post_empty(f"v3/harvests/{harvest_id}/complete")
572
619
  return self._parse_harvest_response(data)
573
620
 
574
- async def cancel_harvest(self, harvest_id: str) -> None:
575
- """Cancel (delete) a harvest run.
621
+ async def cancel_harvest(self, harvest_id: str) -> HarvestResult:
622
+ """Mark a harvest run as cancelled.
623
+
624
+ Uses ``PATCH /v3/harvests/{harvest_id}`` with ``status=CANCELLED``.
625
+
626
+ Args:
627
+ harvest_id: Harvest identifier.
628
+
629
+ Returns:
630
+ Updated :class:`HarvestResult`.
631
+ """
632
+ data = await self._patch(
633
+ f"v3/harvests/{harvest_id}",
634
+ PatchHarvestRequest(status=SharedHarvestStatus.CANCELLED),
635
+ )
636
+ return self._parse_harvest_response(data)
637
+
638
+ async def fail_harvest(self, harvest_id: str) -> HarvestResult:
639
+ """Mark a harvest run as failed.
576
640
 
577
- Uses ``DELETE /v3/harvests/{harvest_id}``.
641
+ Uses ``PATCH /v3/harvests/{harvest_id}`` with ``status=FAILED``.
578
642
 
579
643
  Args:
580
644
  harvest_id: Harvest identifier.
645
+
646
+ Returns:
647
+ Updated :class:`HarvestResult`.
581
648
  """
582
- await self._delete(f"v3/harvests/{harvest_id}")
649
+ data = await self._patch(
650
+ f"v3/harvests/{harvest_id}",
651
+ PatchHarvestRequest(status=SharedHarvestStatus.FAILED),
652
+ )
653
+ return self._parse_harvest_response(data)
583
654
 
584
655
  async def submit_arc_in_harvest(
585
656
  self,
@@ -632,7 +703,7 @@ class ApiClient:
632
703
 
633
704
  Raises:
634
705
  ApiClientError: On catastrophic HTTP or serialization errors. The
635
- harvest is cancelled before the exception propagates.
706
+ harvest is marked as failed before the exception propagates.
636
707
 
637
708
  Example::
638
709
 
@@ -650,8 +721,10 @@ class ApiClient:
650
721
  try:
651
722
  failed_submissions = await self._submit_arcs_parallel(harvest_id, arcs)
652
723
  except Exception:
653
- logger.warning("[%s] Catastrophic error during ARC submission, cancelling harvest %s", rdi, harvest_id)
654
- await self._cancel_harvest_safely(rdi, harvest_id)
724
+ logger.warning(
725
+ "[%s] Catastrophic error during ARC submission, marking harvest %s as failed", rdi, harvest_id
726
+ )
727
+ await self._fail_harvest_safely(rdi, harvest_id)
655
728
  raise
656
729
 
657
730
  if failed_submissions > 0:
@@ -482,14 +482,31 @@ async def test_complete_harvest(client_config: Config) -> None:
482
482
  @pytest.mark.asyncio
483
483
  @respx.mock
484
484
  async def test_cancel_harvest(client_config: Config) -> None:
485
- """Test cancelling a harvest run."""
486
- route = respx.delete(f"{client_config.api_url}v3/harvests/harvest-456").mock(
487
- return_value=httpx.Response(http.HTTPStatus.NO_CONTENT)
485
+ """Test cancelling a harvest run via PATCH."""
486
+ cancelled_response = {**_HARVEST_RESPONSE, "status": "CANCELLED"}
487
+ route = respx.patch(f"{client_config.api_url}v3/harvests/harvest-456").mock(
488
+ return_value=httpx.Response(http.HTTPStatus.OK, json=cancelled_response)
488
489
  )
489
490
  async with ApiClient(client_config) as client:
490
- await client.cancel_harvest("harvest-456")
491
+ result = await client.cancel_harvest("harvest-456")
491
492
  assert route.called
493
+ assert isinstance(result, HarvestResult)
494
+ assert result.status == "CANCELLED"
495
+
496
+
497
+ @pytest.mark.asyncio
498
+ @respx.mock
499
+ async def test_fail_harvest(client_config: Config) -> None:
500
+ """Test marking a harvest run as failed via PATCH."""
501
+ failed_response = {**_HARVEST_RESPONSE, "status": "FAILED"}
502
+ route = respx.patch(f"{client_config.api_url}v3/harvests/harvest-456").mock(
503
+ return_value=httpx.Response(http.HTTPStatus.OK, json=failed_response)
504
+ )
505
+ async with ApiClient(client_config) as client:
506
+ result = await client.fail_harvest("harvest-456")
492
507
  assert route.called
508
+ assert isinstance(result, HarvestResult)
509
+ assert result.status == "FAILED"
493
510
 
494
511
 
495
512
  @pytest.mark.asyncio
@@ -676,22 +693,57 @@ async def test_harvest_arcs_continues_on_item_error(client_config: Config) -> No
676
693
  @pytest.mark.asyncio
677
694
  @respx.mock
678
695
  async def test_harvest_arcs_cancels_on_catastrophic_error(client_config: Config) -> None:
679
- """harvest_arcs cancels the harvest on catastrophic submission errors."""
696
+ """harvest_arcs marks the harvest as failed on catastrophic submission errors."""
697
+ failed_response = {**_HARVEST_RESPONSE, "status": "FAILED"}
680
698
  respx.post(f"{client_config.api_url}v3/harvests").mock(
681
699
  return_value=httpx.Response(http.HTTPStatus.OK, json=_HARVEST_RESPONSE)
682
700
  )
683
701
  respx.post(f"{client_config.api_url}v3/harvests/harvest-456/arcs").mock(
684
702
  return_value=httpx.Response(http.HTTPStatus.INTERNAL_SERVER_ERROR, text="server unavailable")
685
703
  )
686
- cancel_route = respx.delete(f"{client_config.api_url}v3/harvests/harvest-456").mock(
687
- return_value=httpx.Response(http.HTTPStatus.NO_CONTENT)
704
+ fail_route = respx.patch(f"{client_config.api_url}v3/harvests/harvest-456").mock(
705
+ return_value=httpx.Response(http.HTTPStatus.OK, json=failed_response)
688
706
  )
689
707
 
690
708
  async with ApiClient(client_config) as client:
691
709
  with pytest.raises(ApiClientError):
692
710
  await client.harvest_arcs("test-rdi", _arc_gen({"id": "arc-1"}))
693
711
 
694
- assert cancel_route.called
712
+ assert fail_route.called
713
+
714
+
715
+ @pytest.mark.asyncio
716
+ @respx.mock
717
+ async def test_harvest_arcs_fails_on_duplicate_identifier(client_config: Config) -> None:
718
+ """harvest_arcs marks harvest as failed when the same ARC identifier appears twice."""
719
+ # Two ARC dicts that share the same RO-Crate identifier — simulates a
720
+ # client-side data error where two distinct ARCs were accidentally given
721
+ # the same identifier field.
722
+ arc_a = {
723
+ "@context": "https://w3id.org/ro/crate/1.1/context",
724
+ "@graph": [{"@id": "./", "@type": "Dataset", "identifier": "duplicate-arc", "name": "ARC A"}],
725
+ }
726
+ arc_b = {
727
+ "@context": "https://w3id.org/ro/crate/1.1/context",
728
+ "@graph": [{"@id": "./", "@type": "Dataset", "identifier": "duplicate-arc", "name": "ARC B"}],
729
+ }
730
+ failed_response = {**_HARVEST_RESPONSE, "status": "FAILED"}
731
+ respx.post(f"{client_config.api_url}v3/harvests").mock(
732
+ return_value=httpx.Response(http.HTTPStatus.OK, json=_HARVEST_RESPONSE)
733
+ )
734
+ # The first ARC is submitted successfully before the duplicate is detected.
735
+ respx.post(f"{client_config.api_url}v3/harvests/harvest-456/arcs").mock(
736
+ return_value=httpx.Response(http.HTTPStatus.OK, json=_ARC_RESPONSE)
737
+ )
738
+ fail_route = respx.patch(f"{client_config.api_url}v3/harvests/harvest-456").mock(
739
+ return_value=httpx.Response(http.HTTPStatus.OK, json=failed_response)
740
+ )
741
+
742
+ async with ApiClient(client_config) as client:
743
+ with pytest.raises(ApiClientError, match="Duplicate ARC identifier 'duplicate-arc'"):
744
+ await client.harvest_arcs("test-rdi", _arc_gen(arc_a, arc_b))
745
+
746
+ assert fail_route.called
695
747
 
696
748
 
697
749
  @pytest.mark.asyncio
@@ -725,13 +777,12 @@ async def test_harvest_arcs_with_json_string(client_config: Config) -> None:
725
777
  @respx.mock
726
778
  async def test_harvest_arcs_with_invalid_json_string(client_config: Config) -> None:
727
779
  """harvest_arcs raises ApiClientError when JSON string is invalid."""
728
- # Mock the harvest creation endpoint to prevent actual HTTP requests
780
+ failed_response = {**_HARVEST_RESPONSE, "status": "FAILED"}
729
781
  respx.post(f"{client_config.api_url}v3/harvests").mock(
730
782
  return_value=httpx.Response(http.HTTPStatus.OK, json=_HARVEST_RESPONSE)
731
783
  )
732
- # Mock the harvest cancellation endpoint
733
- respx.delete(f"{client_config.api_url}v3/harvests/harvest-456").mock(
734
- return_value=httpx.Response(http.HTTPStatus.NO_CONTENT)
784
+ respx.patch(f"{client_config.api_url}v3/harvests/harvest-456").mock(
785
+ return_value=httpx.Response(http.HTTPStatus.OK, json=failed_response)
735
786
  )
736
787
 
737
788
  async with ApiClient(client_config) as client:
@@ -743,16 +794,16 @@ async def test_harvest_arcs_with_invalid_json_string(client_config: Config) -> N
743
794
  @pytest.mark.asyncio
744
795
  @respx.mock
745
796
  async def test_harvest_arcs_cancel_failure_does_not_mask_original_error(client_config: Config) -> None:
746
- """If cancel itself raises, the original submission error is still propagated."""
797
+ """If fail_harvest itself raises, the original submission error is still propagated."""
747
798
  respx.post(f"{client_config.api_url}v3/harvests").mock(
748
799
  return_value=httpx.Response(http.HTTPStatus.OK, json=_HARVEST_RESPONSE)
749
800
  )
750
801
  respx.post(f"{client_config.api_url}v3/harvests/harvest-456/arcs").mock(
751
802
  return_value=httpx.Response(http.HTTPStatus.INTERNAL_SERVER_ERROR, text="arc error")
752
803
  )
753
- # Also make the cancel fail
754
- respx.delete(f"{client_config.api_url}v3/harvests/harvest-456").mock(
755
- return_value=httpx.Response(http.HTTPStatus.INTERNAL_SERVER_ERROR, text="cancel error")
804
+ # Also make the fail call fail
805
+ respx.patch(f"{client_config.api_url}v3/harvests/harvest-456").mock(
806
+ return_value=httpx.Response(http.HTTPStatus.INTERNAL_SERVER_ERROR, text="fail error")
756
807
  )
757
808
 
758
809
  async with ApiClient(client_config) as client: