fairagro-middleware-api-client 8.7.1.dev11__tar.gz → 8.8.1.dev12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/PKG-INFO +1 -1
- fairagro_middleware_api_client-8.8.1.dev12/spec/harvest-client/design.md +62 -0
- fairagro_middleware_api_client-8.8.1.dev12/spec/harvest-client/spec.md +50 -0
- {fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/src/middleware/api_client/__init__.py +15 -1
- {fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/src/middleware/api_client/api_client.py +72 -36
- {fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/src/middleware/api_client/models.py +57 -1
- {fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/tests/unit/test_client.py +3 -28
- {fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/.gitignore +0 -0
- {fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/README.md +0 -0
- {fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/example_client_config.yaml +0 -0
- {fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/pyproject.toml +0 -0
- {fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/src/middleware/api_client/config.py +0 -0
- {fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/src/middleware/api_client/py.typed +0 -0
- {fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/tests/conftest.py +0 -0
- {fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/tests/integration/conftest.py +0 -0
- {fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/tests/integration/test_create_arcs.py +0 -0
- {fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/tests/unit/test_api_client_config.py +0 -0
- {fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/tests/unit/test_client_config.py +0 -0
- {fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/tests/unit/test_retry_logic.py +0 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Harvest Client — Design
|
|
2
|
+
|
|
3
|
+
## Module Overview
|
|
4
|
+
|
|
5
|
+
`ApiClient` (`api_client.py`) orchestrates the harvest lifecycle.
|
|
6
|
+
`HarvestResult`, `HarvestStatistics`, `HarvestError`, and `HarvestErrorType`
|
|
7
|
+
(`models.py`) are the stable public types exposed to harvesters.
|
|
8
|
+
|
|
9
|
+
```text
|
|
10
|
+
harvester
|
|
11
|
+
└─→ ApiClient.harvest_arcs(rdi, arcs)
|
|
12
|
+
├─→ create_harvest → HarvestResult (RUNNING)
|
|
13
|
+
├─→ _submit_arcs_parallel
|
|
14
|
+
│ ├─→ duplicate check (client-side) → HarvestError(DUPLICATE)
|
|
15
|
+
│ └─→ POST v3/harvests/{id}/arcs → HarvestError(SUBMISSION_FAILED) on error
|
|
16
|
+
└─→ complete_harvest → HarvestResult (COMPLETED)
|
|
17
|
+
└─→ inject client_errors via model_copy → HarvestResult.errors
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Key Decisions
|
|
21
|
+
|
|
22
|
+
1. **`HarvestStatistics` is a typed Pydantic model, not `dict`**
|
|
23
|
+
— The server serializes its internal `HarvestStatistics` via `model_dump()`
|
|
24
|
+
before sending it over the wire. The field names and types are stable and
|
|
25
|
+
known. A typed model gives consumers validated, IDE-navigable fields rather
|
|
26
|
+
than requiring dict key lookups with no type safety.
|
|
27
|
+
|
|
28
|
+
2. **`HarvestError` is a client-facing type in `models.py`, independent of any server model**
|
|
29
|
+
— Per-item errors are currently generated client-side. When the server
|
|
30
|
+
persists them natively (issue #240), `_parse_harvest_response` will
|
|
31
|
+
populate `HarvestResult.errors` from the server response automatically —
|
|
32
|
+
the type and consumer interface remain unchanged.
|
|
33
|
+
|
|
34
|
+
3. **`arc_id: str | None` in `HarvestError`**
|
|
35
|
+
— The `DUPLICATE` and `SUBMISSION_FAILED` categories always have a
|
|
36
|
+
known ARC identifier (when one is extractable from the RO-Crate). Future
|
|
37
|
+
error categories — such as harvest-level timeouts or config failures —
|
|
38
|
+
may not be associated with any specific ARC. `None` is the semantically
|
|
39
|
+
correct representation; an empty string would be an invisible sentinel
|
|
40
|
+
value that callers would need to treat specially.
|
|
41
|
+
|
|
42
|
+
4. **Client-side error collection as compatibility shim until issue #240**
|
|
43
|
+
— `harvest_arcs()` collects errors from `_submit_arcs_parallel()` and
|
|
44
|
+
merges them into the server response via `model_copy(update=...)`.
|
|
45
|
+
This shim is removed once the server persists and returns per-item errors
|
|
46
|
+
natively. The `model_copy` merge is additive: if the server already
|
|
47
|
+
returns errors in its response (post-#240), client-side errors are
|
|
48
|
+
appended rather than overwriting.
|
|
49
|
+
|
|
50
|
+
5. **Duplicate detection is performed client-side before the HTTP request**
|
|
51
|
+
— Submitting both duplicates would cause the server to process two ARCs
|
|
52
|
+
with the same identifier in the same harvest run, resulting in an opaque
|
|
53
|
+
conflict. Client-side detection gives an explicit `DUPLICATE` error,
|
|
54
|
+
prevents the wasted round-trip, and avoids requiring the server to handle
|
|
55
|
+
intra-harvest identity conflicts.
|
|
56
|
+
|
|
57
|
+
6. **Item-level failures are non-fatal; harvest-level failures are fatal**
|
|
58
|
+
— A submission failure for one ARC (e.g. server 422 on bad content) must
|
|
59
|
+
not abort the entire harvest because the remaining ARCs may be valid. A
|
|
60
|
+
catastrophic failure (e.g. 401 Unauthorized, harvest already closed) means
|
|
61
|
+
no further submissions will succeed, so the harvest is aborted, marked
|
|
62
|
+
`FAILED`, and the exception propagates to the caller.
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# Harvest Client
|
|
2
|
+
|
|
3
|
+
Manage the full lifecycle of a harvest run — creation, parallel ARC
|
|
4
|
+
submission, error collection, and finalization — on behalf of a harvester
|
|
5
|
+
process. The client returns a typed result that captures both statistics
|
|
6
|
+
and per-item errors so harvesters can produce complete reports.
|
|
7
|
+
|
|
8
|
+
## Requirements
|
|
9
|
+
|
|
10
|
+
- [ ] Create a harvest run for a given RDI, submit all ARCs from an async
|
|
11
|
+
source in bounded parallelism, and return the completed harvest result
|
|
12
|
+
as a single operation.
|
|
13
|
+
- [ ] Accept an optional expected-dataset count at the start of a harvest to
|
|
14
|
+
enable progress tracking on the server side.
|
|
15
|
+
- [ ] Return typed harvest statistics (submitted, new, updated, unchanged,
|
|
16
|
+
missing counts, and optional expected-dataset count) as structured
|
|
17
|
+
fields rather than an opaque mapping.
|
|
18
|
+
- [ ] Record per-item errors encountered during submission and include them
|
|
19
|
+
in the returned harvest result.
|
|
20
|
+
- [ ] Classify each per-item error into one of the following categories:
|
|
21
|
+
`duplicate` (two ARCs share the same identifier) or `submission_failed`
|
|
22
|
+
(the server rejected or could not process the ARC).
|
|
23
|
+
- [ ] Each per-item error carries: the error category, a human-readable
|
|
24
|
+
message, and an ISO 8601 timestamp of when the error occurred.
|
|
25
|
+
- [ ] Optionally associate a per-item error with an ARC identifier; errors
|
|
26
|
+
that do not relate to a specific ARC (e.g. harvest-level failures) may
|
|
27
|
+
omit the identifier.
|
|
28
|
+
- [ ] Detect duplicate ARC identifiers before submission and record them as
|
|
29
|
+
`duplicate` errors; do not submit the duplicate.
|
|
30
|
+
- [ ] Skip individual ARC submission failures and continue the harvest with
|
|
31
|
+
remaining items; record each failure as a `submission_failed` error.
|
|
32
|
+
- [ ] Abort the entire harvest on catastrophic errors (e.g. authentication
|
|
33
|
+
failure, invalid harvest state) and mark the harvest as failed before
|
|
34
|
+
propagating the exception to the caller.
|
|
35
|
+
|
|
36
|
+
## Edge Cases
|
|
37
|
+
|
|
38
|
+
ARC with no extractable RO-Crate identifier → submitted normally; any
|
|
39
|
+
resulting error records no ARC identifier (`null`).
|
|
40
|
+
|
|
41
|
+
Two ARCs share the same identifier → the second is skipped; a `duplicate`
|
|
42
|
+
error is recorded for it; the first continues to be submitted normally.
|
|
43
|
+
|
|
44
|
+
Catastrophic error during submission → remaining tasks are cancelled; the
|
|
45
|
+
harvest is transitioned to `FAILED`; the exception propagates to the caller.
|
|
46
|
+
|
|
47
|
+
No per-item errors → the returned result contains an empty errors list.
|
|
48
|
+
|
|
49
|
+
`expected_datasets` not provided → harvest is created without a progress
|
|
50
|
+
denominator; statistics show raw counts only.
|
|
@@ -2,7 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
from .api_client import ApiClient, ApiClientError
|
|
4
4
|
from .config import Config
|
|
5
|
-
from .models import
|
|
5
|
+
from .models import (
|
|
6
|
+
ArcEventSummary,
|
|
7
|
+
ArcLifecycleStatus,
|
|
8
|
+
ArcMetadata,
|
|
9
|
+
ArcResult,
|
|
10
|
+
ArcStatus,
|
|
11
|
+
HarvestError,
|
|
12
|
+
HarvestErrorType,
|
|
13
|
+
HarvestResult,
|
|
14
|
+
HarvestStatistics,
|
|
15
|
+
HarvestStatus,
|
|
16
|
+
)
|
|
6
17
|
|
|
7
18
|
__all__ = [
|
|
8
19
|
"Config",
|
|
@@ -14,5 +25,8 @@ __all__ = [
|
|
|
14
25
|
"ArcMetadata",
|
|
15
26
|
"ArcEventSummary",
|
|
16
27
|
"HarvestResult",
|
|
28
|
+
"HarvestStatistics",
|
|
17
29
|
"HarvestStatus",
|
|
30
|
+
"HarvestError",
|
|
31
|
+
"HarvestErrorType",
|
|
18
32
|
]
|
|
@@ -7,6 +7,7 @@ import ssl
|
|
|
7
7
|
import threading
|
|
8
8
|
from collections.abc import AsyncGenerator, AsyncIterator
|
|
9
9
|
from contextlib import asynccontextmanager
|
|
10
|
+
from datetime import UTC, datetime
|
|
10
11
|
from http import HTTPStatus
|
|
11
12
|
from typing import TYPE_CHECKING, Any, cast
|
|
12
13
|
|
|
@@ -22,7 +23,7 @@ from middleware.shared.api_models.v3.models import (
|
|
|
22
23
|
)
|
|
23
24
|
|
|
24
25
|
from .config import Config
|
|
25
|
-
from .models import ArcResult, HarvestResult
|
|
26
|
+
from .models import ArcResult, HarvestError, HarvestErrorType, HarvestResult, HarvestStatus
|
|
26
27
|
|
|
27
28
|
if TYPE_CHECKING:
|
|
28
29
|
from arctrl import ARC # type: ignore[import-untyped]
|
|
@@ -246,29 +247,43 @@ class ApiClient:
|
|
|
246
247
|
self,
|
|
247
248
|
harvest_id: str,
|
|
248
249
|
done_tasks: set[asyncio.Task[None]],
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
250
|
+
task_identifiers: dict[asyncio.Task[None], str | None],
|
|
251
|
+
) -> tuple[list[HarvestError], Exception | None]:
|
|
252
|
+
"""Return (errors, catastrophic_error) for completed submission tasks."""
|
|
253
|
+
errors: list[HarvestError] = []
|
|
252
254
|
|
|
253
255
|
for done_task in done_tasks:
|
|
256
|
+
arc_id = task_identifiers.pop(done_task, None)
|
|
254
257
|
try:
|
|
255
258
|
done_task.result()
|
|
256
259
|
except Exception as e: # noqa: BLE001
|
|
257
260
|
if self._is_catastrophic_harvest_error(e):
|
|
258
|
-
return
|
|
259
|
-
|
|
261
|
+
return errors, e
|
|
262
|
+
errors.append(
|
|
263
|
+
HarvestError(
|
|
264
|
+
arc_id=arc_id,
|
|
265
|
+
error_type=HarvestErrorType.SUBMISSION_FAILED,
|
|
266
|
+
message=str(e),
|
|
267
|
+
timestamp=datetime.now(UTC).isoformat(),
|
|
268
|
+
)
|
|
269
|
+
)
|
|
260
270
|
logger.warning("Skipping failed ARC submission in harvest %s: %s", harvest_id, e)
|
|
261
271
|
|
|
262
|
-
return
|
|
272
|
+
return errors, None
|
|
263
273
|
|
|
264
274
|
async def _submit_arcs_parallel(
|
|
265
275
|
self,
|
|
266
276
|
harvest_id: str,
|
|
267
277
|
arcs: "AsyncGenerator[ARC | dict[str, Any] | str, None] | AsyncIterator[ARC | dict[str, Any] | str]",
|
|
268
|
-
) ->
|
|
269
|
-
"""Submit all ARCs in bounded parallelism and return
|
|
278
|
+
) -> list[HarvestError]:
|
|
279
|
+
"""Submit all ARCs in bounded parallelism and return per-item errors.
|
|
280
|
+
|
|
281
|
+
Compatibility shim (issue #240): duplicate detection and submission
|
|
282
|
+
failures are recorded client-side until the server persists them natively.
|
|
283
|
+
"""
|
|
270
284
|
pending_tasks: set[asyncio.Task[None]] = set()
|
|
271
|
-
|
|
285
|
+
task_identifiers: dict[asyncio.Task[None], str | None] = {}
|
|
286
|
+
errors: list[HarvestError] = []
|
|
272
287
|
seen_identifiers: set[str] = set()
|
|
273
288
|
|
|
274
289
|
async def submit_one(arc_item: dict[str, Any]) -> None:
|
|
@@ -281,35 +296,43 @@ class ApiClient:
|
|
|
281
296
|
if identifier is not None:
|
|
282
297
|
if identifier in seen_identifiers:
|
|
283
298
|
logger.error(
|
|
284
|
-
"
|
|
285
|
-
"
|
|
299
|
+
"Duplicate ARC identifier '%s' in harvest %s — "
|
|
300
|
+
"two ARCs share the same identifier (client-side data error).",
|
|
286
301
|
identifier,
|
|
287
302
|
harvest_id,
|
|
288
303
|
)
|
|
289
|
-
|
|
304
|
+
errors.append(
|
|
305
|
+
HarvestError(
|
|
306
|
+
arc_id=identifier,
|
|
307
|
+
error_type=HarvestErrorType.DUPLICATE,
|
|
308
|
+
message=f"Duplicate ARC identifier '{identifier}' — two ARCs share the same identifier",
|
|
309
|
+
timestamp=datetime.now(UTC).isoformat(),
|
|
310
|
+
)
|
|
311
|
+
)
|
|
290
312
|
continue
|
|
291
313
|
seen_identifiers.add(identifier)
|
|
292
314
|
|
|
293
315
|
task = asyncio.create_task(submit_one(serialized))
|
|
316
|
+
task_identifiers[task] = identifier
|
|
294
317
|
pending_tasks.add(task)
|
|
295
318
|
|
|
296
319
|
if len(pending_tasks) >= self._config.max_concurrency:
|
|
297
320
|
done, pending = await asyncio.wait(pending_tasks, return_when=asyncio.FIRST_COMPLETED)
|
|
298
321
|
pending_tasks = pending
|
|
299
|
-
|
|
300
|
-
|
|
322
|
+
new_errors, catastrophic_error = self._process_completed_arc_tasks(harvest_id, done, task_identifiers)
|
|
323
|
+
errors.extend(new_errors)
|
|
301
324
|
if catastrophic_error is not None:
|
|
302
325
|
await self._cancel_pending_arc_tasks(pending_tasks)
|
|
303
326
|
raise catastrophic_error
|
|
304
327
|
|
|
305
328
|
if pending_tasks:
|
|
306
329
|
done, _ = await asyncio.wait(pending_tasks)
|
|
307
|
-
|
|
308
|
-
|
|
330
|
+
new_errors, catastrophic_error = self._process_completed_arc_tasks(harvest_id, done, task_identifiers)
|
|
331
|
+
errors.extend(new_errors)
|
|
309
332
|
if catastrophic_error is not None:
|
|
310
333
|
raise catastrophic_error
|
|
311
334
|
|
|
312
|
-
return
|
|
335
|
+
return errors
|
|
313
336
|
|
|
314
337
|
def __init__(self, config: Config) -> None:
|
|
315
338
|
"""Initialize the ApiClient.
|
|
@@ -484,7 +507,7 @@ class ApiClient:
|
|
|
484
507
|
graph = arc_content.get("@graph")
|
|
485
508
|
if isinstance(graph, list):
|
|
486
509
|
for item in graph:
|
|
487
|
-
if item.get("@id") == "./":
|
|
510
|
+
if isinstance(item, dict) and item.get("@id") == "./":
|
|
488
511
|
identifier = item.get("identifier")
|
|
489
512
|
if isinstance(identifier, list):
|
|
490
513
|
identifier = identifier[0] if identifier else None
|
|
@@ -572,25 +595,32 @@ class ApiClient:
|
|
|
572
595
|
data = await self._post("v3/harvests", request)
|
|
573
596
|
return self._parse_harvest_response(data)
|
|
574
597
|
|
|
575
|
-
async def list_harvests(
|
|
576
|
-
|
|
598
|
+
async def list_harvests(
|
|
599
|
+
self,
|
|
600
|
+
rdi: str | None = None,
|
|
601
|
+
status: HarvestStatus | None = None,
|
|
602
|
+
limit: int = 20,
|
|
603
|
+
offset: int = 0,
|
|
604
|
+
) -> list[HarvestResult]:
|
|
605
|
+
"""List harvest runs, newest first.
|
|
577
606
|
|
|
578
|
-
|
|
607
|
+
.. note::
|
|
608
|
+
Not yet implemented — requires server-side changes (status filter,
|
|
609
|
+
guaranteed newest-first sort order). Tracked in GitHub issue #242.
|
|
579
610
|
|
|
580
611
|
Args:
|
|
581
612
|
rdi: Optional RDI filter.
|
|
613
|
+
status: Optional status filter (e.g. ``HarvestStatus.RUNNING``).
|
|
614
|
+
limit: Maximum number of results to return (default 20).
|
|
615
|
+
offset: Number of records to skip for pagination (default 0).
|
|
582
616
|
|
|
583
|
-
|
|
584
|
-
|
|
617
|
+
Raises:
|
|
618
|
+
NotImplementedError: Always — pending server-side support.
|
|
585
619
|
"""
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
try:
|
|
591
|
-
return [HarvestResult.model_validate(d) for d in data]
|
|
592
|
-
except ValidationError as e:
|
|
593
|
-
raise ApiClientError(f"Invalid harvest list response from API: {e}") from e
|
|
620
|
+
raise NotImplementedError(
|
|
621
|
+
"list_harvests requires server-side changes (status filter, guaranteed "
|
|
622
|
+
"newest-first sort order). See GitHub issue #242."
|
|
623
|
+
)
|
|
594
624
|
|
|
595
625
|
async def get_harvest(self, harvest_id: str) -> HarvestResult:
|
|
596
626
|
"""Get a single harvest run by ID.
|
|
@@ -721,7 +751,7 @@ class ApiClient:
|
|
|
721
751
|
logger.info("[%s] Started harvest %s for RDI %s", rdi, harvest_id, rdi)
|
|
722
752
|
|
|
723
753
|
try:
|
|
724
|
-
|
|
754
|
+
client_errors = await self._submit_arcs_parallel(harvest_id, arcs)
|
|
725
755
|
except Exception:
|
|
726
756
|
logger.warning(
|
|
727
757
|
"[%s] Catastrophic error during ARC submission, marking harvest %s as failed", rdi, harvest_id
|
|
@@ -729,15 +759,21 @@ class ApiClient:
|
|
|
729
759
|
await self._fail_harvest_safely(rdi, harvest_id)
|
|
730
760
|
raise
|
|
731
761
|
|
|
732
|
-
if
|
|
762
|
+
if client_errors:
|
|
733
763
|
logger.warning(
|
|
734
|
-
"[%s] Harvest %s
|
|
764
|
+
"[%s] Harvest %s has %d per-item error(s)",
|
|
735
765
|
rdi,
|
|
736
766
|
harvest_id,
|
|
737
|
-
|
|
767
|
+
len(client_errors),
|
|
738
768
|
)
|
|
739
769
|
|
|
740
770
|
result = await self.complete_harvest(harvest_id)
|
|
771
|
+
# Compatibility shim (issue #240): inject client-side errors into the result
|
|
772
|
+
# until the server persists and returns them natively via the harvest response.
|
|
773
|
+
# When the server supports it, result.errors will already be populated here
|
|
774
|
+
# and this merge can be removed.
|
|
775
|
+
if client_errors:
|
|
776
|
+
result = result.model_copy(update={"errors": result.errors + client_errors})
|
|
741
777
|
logger.info("[%s] Completed harvest %s", rdi, harvest_id)
|
|
742
778
|
return result
|
|
743
779
|
|
|
@@ -40,6 +40,32 @@ class HarvestStatus(StrEnum):
|
|
|
40
40
|
CANCELLED = "CANCELLED"
|
|
41
41
|
|
|
42
42
|
|
|
43
|
+
class HarvestErrorType(StrEnum):
|
|
44
|
+
"""Category of a per-item error recorded during a harvest run."""
|
|
45
|
+
|
|
46
|
+
DUPLICATE = "duplicate"
|
|
47
|
+
SUBMISSION_FAILED = "submission_failed"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class HarvestError(BaseModel):
|
|
51
|
+
"""A single per-item error recorded during a harvest run.
|
|
52
|
+
|
|
53
|
+
Once the server persists errors natively (issue #240), this list is
|
|
54
|
+
populated directly from the server response returned by any harvest
|
|
55
|
+
query method. Until then, :meth:`~middleware.api_client.ApiClient.harvest_arcs`
|
|
56
|
+
collects errors client-side and injects them into the returned
|
|
57
|
+
:class:`HarvestResult` as a compatibility shim.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
arc_id: Annotated[
|
|
61
|
+
str | None,
|
|
62
|
+
Field(description="ARC identifier (RO-Crate identifier field), None if not applicable or not extractable"),
|
|
63
|
+
] = None
|
|
64
|
+
error_type: Annotated[HarvestErrorType, Field(description="Category of the error")]
|
|
65
|
+
message: Annotated[str, Field(description="Human-readable error description")]
|
|
66
|
+
timestamp: Annotated[str, Field(description="ISO 8601 timestamp when the error occurred")] = ""
|
|
67
|
+
|
|
68
|
+
|
|
43
69
|
class ArcEventSummary(BaseModel):
|
|
44
70
|
"""Summary of a single event recorded against an ARC."""
|
|
45
71
|
|
|
@@ -76,6 +102,26 @@ class ArcResult(BaseModel):
|
|
|
76
102
|
client_id: Annotated[str | None, Field(description="Authenticated client identifier")] = None
|
|
77
103
|
|
|
78
104
|
|
|
105
|
+
class HarvestStatistics(BaseModel):
|
|
106
|
+
"""Statistics for a completed harvest run.
|
|
107
|
+
|
|
108
|
+
Mirrors the server-side ``HarvestStatistics`` wire format so that
|
|
109
|
+
:meth:`~middleware.api_client.ApiClient.HarvestResult.statistics` is
|
|
110
|
+
validated and typed rather than an opaque ``dict``.
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
expected_datasets: Annotated[
|
|
114
|
+
int | None,
|
|
115
|
+
Field(description="Number of datasets expected to be harvested, as reported by the client."),
|
|
116
|
+
] = None
|
|
117
|
+
arcs_submitted: Annotated[int, Field(description="Total ARCs submitted")] = 0
|
|
118
|
+
arcs_new: Annotated[int, Field(description="New ARCs created")] = 0
|
|
119
|
+
arcs_updated: Annotated[int, Field(description="Existing ARCs updated")] = 0
|
|
120
|
+
arcs_unchanged: Annotated[int, Field(description="ARCs with no changes")] = 0
|
|
121
|
+
arcs_missing: Annotated[int, Field(description="ARCs marked as missing")] = 0
|
|
122
|
+
errors: Annotated[int, Field(description="Number of errors encountered")] = 0
|
|
123
|
+
|
|
124
|
+
|
|
79
125
|
class HarvestResult(BaseModel):
|
|
80
126
|
"""Result returned by harvest-related methods on :class:`~middleware.api_client.ApiClient`.
|
|
81
127
|
|
|
@@ -88,6 +134,16 @@ class HarvestResult(BaseModel):
|
|
|
88
134
|
status: Annotated[HarvestStatus, Field(description="Current harvest status")]
|
|
89
135
|
started_at: Annotated[str, Field(description="ISO 8601 start timestamp")]
|
|
90
136
|
completed_at: Annotated[str | None, Field(description="ISO 8601 completion timestamp")] = None
|
|
91
|
-
statistics: Annotated[
|
|
137
|
+
statistics: Annotated[HarvestStatistics, Field(description="Harvest statistics")] = Field(
|
|
138
|
+
default_factory=HarvestStatistics
|
|
139
|
+
)
|
|
140
|
+
errors: Annotated[
|
|
141
|
+
list[HarvestError],
|
|
142
|
+
Field(
|
|
143
|
+
description="Per-item errors encountered during the harvest run. "
|
|
144
|
+
"Populated client-side by harvest_arcs() until the server supports "
|
|
145
|
+
"error persistence natively (issue #240)."
|
|
146
|
+
),
|
|
147
|
+
] = Field(default_factory=list)
|
|
92
148
|
message: Annotated[str, Field(description="Human-readable result message")] = ""
|
|
93
149
|
client_id: Annotated[str | None, Field(description="Authenticated client identifier")] = None
|
|
@@ -359,12 +359,12 @@ async def test_global_max_concurrency_limits_parallel_requests(client_config: Co
|
|
|
359
359
|
await asyncio.sleep(0.02)
|
|
360
360
|
async with counter_lock:
|
|
361
361
|
in_flight -= 1
|
|
362
|
-
return httpx.Response(http.HTTPStatus.OK, json=
|
|
362
|
+
return httpx.Response(http.HTTPStatus.OK, json=_HARVEST_RESPONSE)
|
|
363
363
|
|
|
364
|
-
route = respx.get(f"{client_config.api_url}v3/harvests").mock(side_effect=slow_response)
|
|
364
|
+
route = respx.get(f"{client_config.api_url}v3/harvests/harvest-456").mock(side_effect=slow_response)
|
|
365
365
|
|
|
366
366
|
async with ApiClient(client_config) as client:
|
|
367
|
-
await asyncio.gather(*(client.
|
|
367
|
+
await asyncio.gather(*(client.get_harvest("harvest-456") for _ in range(6)))
|
|
368
368
|
|
|
369
369
|
assert route.call_count == 6 # noqa: PLR2004
|
|
370
370
|
assert peak_in_flight <= 2 # noqa: PLR2004
|
|
@@ -426,31 +426,6 @@ async def test_create_harvest_503_not_retried(client_config: Config) -> None:
|
|
|
426
426
|
assert route.call_count == 1
|
|
427
427
|
|
|
428
428
|
|
|
429
|
-
@pytest.mark.asyncio
|
|
430
|
-
@respx.mock
|
|
431
|
-
async def test_list_harvests(client_config: Config) -> None:
|
|
432
|
-
"""Test listing harvest runs."""
|
|
433
|
-
respx.get(f"{client_config.api_url}v3/harvests").mock(
|
|
434
|
-
return_value=httpx.Response(http.HTTPStatus.OK, json=[_HARVEST_RESPONSE, _HARVEST_RESPONSE])
|
|
435
|
-
)
|
|
436
|
-
async with ApiClient(client_config) as client:
|
|
437
|
-
harvests = await client.list_harvests()
|
|
438
|
-
assert len(harvests) == 2 # noqa: PLR2004
|
|
439
|
-
assert all(isinstance(h, HarvestResult) for h in harvests)
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
@pytest.mark.asyncio
|
|
443
|
-
@respx.mock
|
|
444
|
-
async def test_list_harvests_with_rdi_filter(client_config: Config) -> None:
|
|
445
|
-
"""Test listing harvest runs filtered by RDI."""
|
|
446
|
-
route = respx.get(f"{client_config.api_url}v3/harvests").mock(
|
|
447
|
-
return_value=httpx.Response(http.HTTPStatus.OK, json=[_HARVEST_RESPONSE])
|
|
448
|
-
)
|
|
449
|
-
async with ApiClient(client_config) as client:
|
|
450
|
-
await client.list_harvests(rdi="test-rdi")
|
|
451
|
-
assert "rdi=test-rdi" in str(route.calls.last.request.url)
|
|
452
|
-
|
|
453
|
-
|
|
454
429
|
@pytest.mark.asyncio
|
|
455
430
|
@respx.mock
|
|
456
431
|
async def test_get_harvest(client_config: Config) -> None:
|
{fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/.gitignore
RENAMED
|
File without changes
|
{fairagro_middleware_api_client-8.7.1.dev11 → fairagro_middleware_api_client-8.8.1.dev12}/README.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|