fairagro-middleware-api-client 8.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fairagro_middleware_api_client-8.6.2.dist-info/METADATA +164 -0
- fairagro_middleware_api_client-8.6.2.dist-info/RECORD +8 -0
- fairagro_middleware_api_client-8.6.2.dist-info/WHEEL +4 -0
- middleware/api_client/__init__.py +18 -0
- middleware/api_client/api_client.py +681 -0
- middleware/api_client/config.py +71 -0
- middleware/api_client/models.py +93 -0
- middleware/api_client/py.typed +0 -0
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fairagro-middleware-api-client
|
|
3
|
+
Version: 8.6.2
|
|
4
|
+
Summary: The FAIRagro advanced middleware API client
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Requires-Dist: httpx>=0.28.1
|
|
7
|
+
Requires-Dist: pydantic>=2.12.5
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
|
|
10
|
+
# Middleware API Client
|
|
11
|
+
|
|
12
|
+
Python client for the FAIRagro Middleware API with certificate-based authentication (mTLS).
|
|
13
|
+
|
|
14
|
+
## Features
|
|
15
|
+
|
|
16
|
+
- ✅ Certificate-based authentication (mutual TLS)
|
|
17
|
+
- ✅ Configuration via YAML files, environment variables, or Docker secrets
|
|
18
|
+
- ✅ Async context manager support
|
|
19
|
+
- ✅ Comprehensive error handling
|
|
20
|
+
- ✅ Type-safe with Pydantic models
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
This package is part of the FAIRagro Advanced Middleware project and uses local dependencies.
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
### 1. Create Configuration File
|
|
29
|
+
|
|
30
|
+
```yaml
|
|
31
|
+
# config.yaml
|
|
32
|
+
log_level: INFO
|
|
33
|
+
api_url: https://your-api-server:8000
|
|
34
|
+
client_cert_path: /path/to/client-cert.pem
|
|
35
|
+
client_key_path: /path/to/client-key.pem
|
|
36
|
+
ca_cert_path: /path/to/ca-cert.pem # optional
|
|
37
|
+
timeout: 30.0
|
|
38
|
+
verify_ssl: true
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### 2. Use the Client
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
import asyncio
|
|
45
|
+
from pathlib import Path
|
|
46
|
+
from arctrl import ARC, ArcInvestigation
|
|
47
|
+
from middleware.api_client import Config, ApiClient
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
async def main():
|
|
51
|
+
# Load configuration
|
|
52
|
+
config = Config.from_yaml_file(Path("config.yaml"))
|
|
53
|
+
|
|
54
|
+
# Create ARC object
|
|
55
|
+
inv = ArcInvestigation.create(identifier="my-arc", title="My ARC")
|
|
56
|
+
arc = ARC.from_arc_investigation(inv)
|
|
57
|
+
|
|
58
|
+
# Use client with context manager
|
|
59
|
+
async with ApiClient(config) as client:
|
|
60
|
+
# Send a single ARC
|
|
61
|
+
response = await client.create_or_update_arc(
|
|
62
|
+
rdi="my-rdi",
|
|
63
|
+
arc=arc,
|
|
64
|
+
)
|
|
65
|
+
print(f"ARC status: {response.status}")
|
|
66
|
+
|
|
67
|
+
# Or run a harvest workflow
|
|
68
|
+
async def arc_stream():
|
|
69
|
+
yield arc
|
|
70
|
+
|
|
71
|
+
harvest = await client.harvest_arcs(
|
|
72
|
+
rdi="my-rdi",
|
|
73
|
+
arcs=arc_stream(),
|
|
74
|
+
expected_datasets=1,
|
|
75
|
+
)
|
|
76
|
+
print(f"Harvest status: {harvest.status}")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
asyncio.run(main())
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Configuration Options
|
|
83
|
+
|
|
84
|
+
| Option | Type | Required | Default | Description |
|
|
85
|
+
| ------ | ---- | -------- | ------- | ----------- |
|
|
86
|
+
| `log_level` | string | No | INFO | Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) |
|
|
87
|
+
| `api_url` | string | Yes | - | Base URL of the Middleware API |
|
|
88
|
+
| `client_cert_path` | string | No | null | Path to client certificate (PEM format) |
|
|
89
|
+
| `client_key_path` | string | No | null | Path to client private key (PEM format) |
|
|
90
|
+
| `ca_cert_path` | string | No | null | Path to CA certificate for server verification |
|
|
91
|
+
| `timeout` | float | No | 30.0 | Request timeout in seconds |
|
|
92
|
+
| `verify_ssl` | bool | No | true | Enable SSL certificate verification |
|
|
93
|
+
| `max_concurrency` | int | No | 10 | Maximum concurrent API requests (also default for `harvest_arcs`) |
|
|
94
|
+
|
|
95
|
+
## API Methods
|
|
96
|
+
|
|
97
|
+
### `create_or_update_arc(rdi: str, arc: ARC | dict) -> ArcResult`
|
|
98
|
+
|
|
99
|
+
Create or update one ARC in the Middleware API.
|
|
100
|
+
|
|
101
|
+
**Parameters:**
|
|
102
|
+
|
|
103
|
+
- `rdi` (str): The RDI identifier (e.g., "edaphobase").
|
|
104
|
+
- `arc` (ARC | dict): ARC object from arctrl or pre-serialised RO-Crate dict.
|
|
105
|
+
|
|
106
|
+
**Returns:**
|
|
107
|
+
|
|
108
|
+
- `ArcResult`: Contains the result of the operation.
|
|
109
|
+
|
|
110
|
+
**Raises:**
|
|
111
|
+
|
|
112
|
+
- `ApiClientError`: If the request fails due to HTTP errors or network issues.
|
|
113
|
+
|
|
114
|
+
**Example:**
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from arctrl import ARC, ArcInvestigation
|
|
118
|
+
|
|
119
|
+
inv = ArcInvestigation.create(identifier="my-arc-001", title="My ARC")
|
|
120
|
+
arc = ARC.from_arc_investigation(inv)
|
|
121
|
+
|
|
122
|
+
response = await client.create_or_update_arc(
|
|
123
|
+
rdi="edaphobase",
|
|
124
|
+
arc=arc,
|
|
125
|
+
)
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### `harvest_arcs(rdi: str, arcs: AsyncIterator[ARC | dict], expected_datasets: int | None = None) -> HarvestResult`
|
|
129
|
+
|
|
130
|
+
Convenience workflow to create a harvest, upload all ARCs from an async iterator, and complete the harvest.
|
|
131
|
+
|
|
132
|
+
- Uses `config.max_concurrency` by default.
|
|
133
|
+
- Continues on item-level submission errors and skips failed items.
|
|
134
|
+
- Cancels the harvest only for catastrophic errors.
|
|
135
|
+
|
|
136
|
+
All errors are raised as `ApiClientError` exceptions:
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
from middleware.api_client import ApiClientError
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
response = await client.create_or_update_arc(
|
|
143
|
+
rdi="my-rdi",
|
|
144
|
+
arc=arc,
|
|
145
|
+
)
|
|
146
|
+
except ApiClientError as e:
|
|
147
|
+
print(f"API Error: {e}")
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Configuration via Environment Variables
|
|
151
|
+
|
|
152
|
+
You can override configuration values using environment variables:
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
export API_URL="https://production-api:8000"
|
|
156
|
+
export CLIENT_CERT_PATH="/secure/certs/prod-cert.pem"
|
|
157
|
+
export CLIENT_KEY_PATH="/secure/certs/prod-key.pem"
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
Or use Docker secrets in `/run/secrets/`.
|
|
161
|
+
|
|
162
|
+
## License
|
|
163
|
+
|
|
164
|
+
This is part of the FAIRagro Advanced Middleware project.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
middleware/api_client/__init__.py,sha256=y04kcl61sXjg7RDWKqEUV_q9ZTXH5hGqK8iX0fCiyAk,461
|
|
2
|
+
middleware/api_client/api_client.py,sha256=Yf5EQ5eeiB0LvHBnvOPSL7xo29JH3BHKUExfiYEEPfo,25833
|
|
3
|
+
middleware/api_client/config.py,sha256=j15cqxFDoT0WV3y5TqVBl_h6i22ixeI5gmfVT_8zkY8,2741
|
|
4
|
+
middleware/api_client/models.py,sha256=RZL5ZyMVv2DOljWbVZvnaHQ_AmmLsmXHbONBYdm-iwk,3721
|
|
5
|
+
middleware/api_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
fairagro_middleware_api_client-8.6.2.dist-info/METADATA,sha256=HX02Mb1XJZV77Z6zovQmecfUdOGcyNO1praQDpu0nvo,4580
|
|
7
|
+
fairagro_middleware_api_client-8.6.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
8
|
+
fairagro_middleware_api_client-8.6.2.dist-info/RECORD,,
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""The FAIRagro Middleware API Client package."""
|
|
2
|
+
|
|
3
|
+
from .api_client import ApiClient, ApiClientError
|
|
4
|
+
from .config import Config
|
|
5
|
+
from .models import ArcEventSummary, ArcLifecycleStatus, ArcMetadata, ArcResult, ArcStatus, HarvestResult, HarvestStatus
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"Config",
|
|
9
|
+
"ApiClient",
|
|
10
|
+
"ApiClientError",
|
|
11
|
+
"ArcResult",
|
|
12
|
+
"ArcStatus",
|
|
13
|
+
"ArcLifecycleStatus",
|
|
14
|
+
"ArcMetadata",
|
|
15
|
+
"ArcEventSummary",
|
|
16
|
+
"HarvestResult",
|
|
17
|
+
"HarvestStatus",
|
|
18
|
+
]
|
|
@@ -0,0 +1,681 @@
|
|
|
1
|
+
"""Client for the FAIRagro Middleware API (v3)."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import ssl
|
|
7
|
+
import threading
|
|
8
|
+
from collections.abc import AsyncGenerator, AsyncIterator
|
|
9
|
+
from contextlib import asynccontextmanager
|
|
10
|
+
from http import HTTPStatus
|
|
11
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
12
|
+
|
|
13
|
+
import httpx
|
|
14
|
+
from pydantic import BaseModel, ValidationError
|
|
15
|
+
|
|
16
|
+
from middleware.shared.api_models.v3.models import (
|
|
17
|
+
CreateArcRequest,
|
|
18
|
+
CreateHarvestRequest,
|
|
19
|
+
SubmitHarvestArcRequest,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
from .config import Config
|
|
23
|
+
from .models import ArcResult, HarvestResult
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from arctrl import ARC # type: ignore[import-untyped]
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ApiClientError(Exception):
|
|
32
|
+
"""Base exception for ApiClient errors."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, message: str, status_code: int | None = None) -> None:
|
|
35
|
+
"""Initialize with message and optional status code."""
|
|
36
|
+
super().__init__(message)
|
|
37
|
+
self.status_code = status_code
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class ApiClient:
|
|
41
|
+
"""Client for the FAIRagro Middleware API (v3).
|
|
42
|
+
|
|
43
|
+
The v3 API is synchronous from the client's perspective: every call
|
|
44
|
+
returns the final result immediately — no task polling required.
|
|
45
|
+
GitLab synchronisation is triggered in the background by the server.
|
|
46
|
+
|
|
47
|
+
Example::
|
|
48
|
+
|
|
49
|
+
config = Config(api_url="https://api.example.com")
|
|
50
|
+
async with ApiClient(config) as client:
|
|
51
|
+
# Simple ARC submission
|
|
52
|
+
arc_response = await client.create_or_update_arc("my-rdi", arc_dict)
|
|
53
|
+
|
|
54
|
+
# Harvest-based batch submission
|
|
55
|
+
harvest = await client.create_harvest("my-rdi", expected_datasets=42)
|
|
56
|
+
for arc in arcs:
|
|
57
|
+
await client.submit_arc_in_harvest(harvest.harvest_id, arc)
|
|
58
|
+
await client.complete_harvest(harvest.harvest_id)
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
_IDEMPOTENT_METHODS = frozenset({"GET", "HEAD", "OPTIONS", "DELETE"})
|
|
62
|
+
_HTTP_ERROR_BODY_MAX_CHARS = 500
|
|
63
|
+
_global_request_limiter: asyncio.Semaphore | None = None
|
|
64
|
+
_global_max_concurrency: int | None = None
|
|
65
|
+
_global_in_flight_requests: int = 0
|
|
66
|
+
_global_state_lock = threading.Lock()
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def _configure_global_request_limiter(cls, max_concurrency: int) -> None:
|
|
70
|
+
"""Configure the package-wide request concurrency limiter."""
|
|
71
|
+
with cls._global_state_lock:
|
|
72
|
+
if (
|
|
73
|
+
cls._global_max_concurrency is not None
|
|
74
|
+
and cls._global_max_concurrency != max_concurrency
|
|
75
|
+
and cls._global_in_flight_requests > 0
|
|
76
|
+
):
|
|
77
|
+
msg = (
|
|
78
|
+
"Cannot change ApiClient max_concurrency while requests are in flight. "
|
|
79
|
+
"Please reuse one max_concurrency value per process or wait for ongoing requests to finish."
|
|
80
|
+
)
|
|
81
|
+
raise ApiClientError(msg)
|
|
82
|
+
|
|
83
|
+
if cls._global_request_limiter is None or cls._global_max_concurrency != max_concurrency:
|
|
84
|
+
cls._global_request_limiter = asyncio.Semaphore(max_concurrency)
|
|
85
|
+
cls._global_max_concurrency = max_concurrency
|
|
86
|
+
|
|
87
|
+
@classmethod
|
|
88
|
+
@asynccontextmanager
|
|
89
|
+
async def _acquire_request_slot(cls) -> AsyncGenerator[None, None]:
|
|
90
|
+
"""Acquire one slot from the package-wide request limiter."""
|
|
91
|
+
limiter = cls._global_request_limiter
|
|
92
|
+
if limiter is None:
|
|
93
|
+
msg = "ApiClient request limiter is not configured"
|
|
94
|
+
raise ApiClientError(msg)
|
|
95
|
+
|
|
96
|
+
await limiter.acquire()
|
|
97
|
+
with cls._global_state_lock:
|
|
98
|
+
cls._global_in_flight_requests += 1
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
yield
|
|
102
|
+
finally:
|
|
103
|
+
with cls._global_state_lock:
|
|
104
|
+
cls._global_in_flight_requests -= 1
|
|
105
|
+
limiter.release()
|
|
106
|
+
|
|
107
|
+
@classmethod
|
|
108
|
+
def _should_retry_http_status(cls, method: str, status_code: int) -> bool:
|
|
109
|
+
"""Return whether a response status is retryable for a method."""
|
|
110
|
+
transient = {httpx.codes.BAD_GATEWAY, httpx.codes.SERVICE_UNAVAILABLE, httpx.codes.GATEWAY_TIMEOUT}
|
|
111
|
+
return method in cls._IDEMPOTENT_METHODS and status_code in transient
|
|
112
|
+
|
|
113
|
+
@classmethod
|
|
114
|
+
def _should_retry_request_error(cls, method: str, error: httpx.RequestError) -> bool:
|
|
115
|
+
"""Return whether a request error is retryable for a method."""
|
|
116
|
+
if method not in cls._IDEMPOTENT_METHODS:
|
|
117
|
+
return False
|
|
118
|
+
return not isinstance(error, httpx.TimeoutException)
|
|
119
|
+
|
|
120
|
+
@classmethod
|
|
121
|
+
def _should_retry_failure(
|
|
122
|
+
cls,
|
|
123
|
+
method: str,
|
|
124
|
+
*,
|
|
125
|
+
status_code: int | None = None,
|
|
126
|
+
request_error: httpx.RequestError | None = None,
|
|
127
|
+
) -> bool:
|
|
128
|
+
"""Return whether an HTTP failure is retryable for a request method."""
|
|
129
|
+
if status_code is not None:
|
|
130
|
+
return cls._should_retry_http_status(method, status_code)
|
|
131
|
+
if request_error is not None:
|
|
132
|
+
return cls._should_retry_request_error(method, request_error)
|
|
133
|
+
return False
|
|
134
|
+
|
|
135
|
+
@classmethod
|
|
136
|
+
def _build_failure_error_message(
|
|
137
|
+
cls,
|
|
138
|
+
failure: httpx.HTTPStatusError | httpx.RequestError,
|
|
139
|
+
*,
|
|
140
|
+
retryable: bool,
|
|
141
|
+
max_retries: int,
|
|
142
|
+
) -> tuple[str, int | None]:
|
|
143
|
+
"""Return normalized error message and optional status code for a request failure."""
|
|
144
|
+
if isinstance(failure, httpx.HTTPStatusError):
|
|
145
|
+
status_code = failure.response.status_code
|
|
146
|
+
if retryable:
|
|
147
|
+
return f"Request failed after {max_retries} retries: HTTP {status_code}", status_code
|
|
148
|
+
return cls._format_http_error_message(status_code, failure.response.text), status_code
|
|
149
|
+
|
|
150
|
+
if retryable:
|
|
151
|
+
return f"Request failed after {max_retries} retries: {failure}", None
|
|
152
|
+
return f"Request failed: {failure}", None
|
|
153
|
+
|
|
154
|
+
@classmethod
|
|
155
|
+
def _should_retry_or_raise_failure(
|
|
156
|
+
cls,
|
|
157
|
+
failure: httpx.HTTPStatusError | httpx.RequestError,
|
|
158
|
+
*,
|
|
159
|
+
method: str,
|
|
160
|
+
attempt: int,
|
|
161
|
+
max_retries: int,
|
|
162
|
+
) -> bool:
|
|
163
|
+
"""Return True to retry; otherwise raise a normalized ApiClientError."""
|
|
164
|
+
status_code = failure.response.status_code if isinstance(failure, httpx.HTTPStatusError) else None
|
|
165
|
+
request_error = failure if isinstance(failure, httpx.RequestError) else None
|
|
166
|
+
|
|
167
|
+
should_retry = cls._should_retry_failure(
|
|
168
|
+
method,
|
|
169
|
+
status_code=status_code,
|
|
170
|
+
request_error=request_error,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
if should_retry and attempt < max_retries:
|
|
174
|
+
if isinstance(failure, httpx.HTTPStatusError):
|
|
175
|
+
logger.warning("Transient HTTP error %d from server, will retry", failure.response.status_code)
|
|
176
|
+
else:
|
|
177
|
+
logger.warning("Request error: %s. Retrying...", failure)
|
|
178
|
+
return True
|
|
179
|
+
|
|
180
|
+
msg, normalized_status_code = cls._build_failure_error_message(
|
|
181
|
+
failure,
|
|
182
|
+
retryable=should_retry,
|
|
183
|
+
max_retries=max_retries,
|
|
184
|
+
)
|
|
185
|
+
logger.error(msg)
|
|
186
|
+
raise ApiClientError(msg, status_code=normalized_status_code) from failure
|
|
187
|
+
|
|
188
|
+
@classmethod
|
|
189
|
+
def _format_http_error_message(cls, status_code: int, response_text: str) -> str:
|
|
190
|
+
"""Build a safe and concise HTTP error message."""
|
|
191
|
+
response_excerpt = " ".join(response_text.splitlines()).strip()
|
|
192
|
+
if len(response_excerpt) > cls._HTTP_ERROR_BODY_MAX_CHARS:
|
|
193
|
+
response_excerpt = response_excerpt[: cls._HTTP_ERROR_BODY_MAX_CHARS] + "..."
|
|
194
|
+
return f"HTTP error {status_code}: {response_excerpt}"
|
|
195
|
+
|
|
196
|
+
@classmethod
|
|
197
|
+
def _parse_json_response(cls, resp: httpx.Response, method: str, path: str) -> Any:
|
|
198
|
+
"""Parse and return JSON response body with normalized client errors."""
|
|
199
|
+
if resp.status_code == HTTPStatus.NO_CONTENT:
|
|
200
|
+
return None
|
|
201
|
+
try:
|
|
202
|
+
return resp.json()
|
|
203
|
+
except ValueError as e:
|
|
204
|
+
msg = f"Invalid JSON response from API for {method} {path}"
|
|
205
|
+
logger.error(msg)
|
|
206
|
+
raise ApiClientError(msg, status_code=resp.status_code) from e
|
|
207
|
+
|
|
208
|
+
@classmethod
|
|
209
|
+
def _is_catastrophic_harvest_error(cls, error: Exception) -> bool:
|
|
210
|
+
"""Return whether a harvest submission error should abort the whole harvest."""
|
|
211
|
+
if not isinstance(error, ApiClientError):
|
|
212
|
+
return True
|
|
213
|
+
|
|
214
|
+
status_code = error.status_code
|
|
215
|
+
if status_code is None:
|
|
216
|
+
return True
|
|
217
|
+
|
|
218
|
+
return (
|
|
219
|
+
status_code
|
|
220
|
+
in {
|
|
221
|
+
HTTPStatus.UNAUTHORIZED,
|
|
222
|
+
HTTPStatus.FORBIDDEN,
|
|
223
|
+
HTTPStatus.NOT_FOUND,
|
|
224
|
+
HTTPStatus.CONFLICT,
|
|
225
|
+
}
|
|
226
|
+
or status_code >= HTTPStatus.INTERNAL_SERVER_ERROR
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
async def _cancel_harvest_safely(self, rdi: str, harvest_id: str) -> None:
|
|
230
|
+
"""Try cancelling a harvest and suppress cancellation failures."""
|
|
231
|
+
try:
|
|
232
|
+
await self.cancel_harvest(harvest_id)
|
|
233
|
+
except ApiClientError:
|
|
234
|
+
logger.warning("[%s] Failed to cancel harvest %s", rdi, harvest_id)
|
|
235
|
+
|
|
236
|
+
@classmethod
|
|
237
|
+
async def _cancel_pending_arc_tasks(cls, pending_tasks: set[asyncio.Task[None]]) -> None:
|
|
238
|
+
"""Cancel and await remaining ARC submission tasks."""
|
|
239
|
+
for pending_task in pending_tasks:
|
|
240
|
+
pending_task.cancel()
|
|
241
|
+
await asyncio.gather(*pending_tasks, return_exceptions=True)
|
|
242
|
+
|
|
243
|
+
def _process_completed_arc_tasks(
|
|
244
|
+
self,
|
|
245
|
+
harvest_id: str,
|
|
246
|
+
done_tasks: set[asyncio.Task[None]],
|
|
247
|
+
) -> tuple[int, Exception | None]:
|
|
248
|
+
"""Return (failed_count, catastrophic_error) for completed submission tasks."""
|
|
249
|
+
failed_submissions = 0
|
|
250
|
+
|
|
251
|
+
for done_task in done_tasks:
|
|
252
|
+
try:
|
|
253
|
+
done_task.result()
|
|
254
|
+
except Exception as e: # noqa: BLE001
|
|
255
|
+
if self._is_catastrophic_harvest_error(e):
|
|
256
|
+
return failed_submissions, e
|
|
257
|
+
failed_submissions += 1
|
|
258
|
+
logger.warning("Skipping failed ARC submission in harvest %s: %s", harvest_id, e)
|
|
259
|
+
|
|
260
|
+
return failed_submissions, None
|
|
261
|
+
|
|
262
|
+
async def _submit_arcs_parallel(
|
|
263
|
+
self,
|
|
264
|
+
harvest_id: str,
|
|
265
|
+
arcs: "AsyncGenerator[ARC | dict[str, Any], None] | AsyncIterator[ARC | dict[str, Any]]",
|
|
266
|
+
) -> int:
|
|
267
|
+
"""Submit all ARCs in bounded parallelism and return number of skipped ARC submissions."""
|
|
268
|
+
pending_tasks: set[asyncio.Task[None]] = set()
|
|
269
|
+
failed_submissions = 0
|
|
270
|
+
|
|
271
|
+
async def submit_one(arc_item: "ARC | dict[str, Any]") -> None:
|
|
272
|
+
await self.submit_arc_in_harvest(harvest_id, arc_item)
|
|
273
|
+
|
|
274
|
+
async for arc in arcs:
|
|
275
|
+
task = asyncio.create_task(submit_one(arc))
|
|
276
|
+
pending_tasks.add(task)
|
|
277
|
+
|
|
278
|
+
if len(pending_tasks) >= self._config.max_concurrency:
|
|
279
|
+
done, pending = await asyncio.wait(pending_tasks, return_when=asyncio.FIRST_COMPLETED)
|
|
280
|
+
pending_tasks = pending
|
|
281
|
+
failed_delta, catastrophic_error = self._process_completed_arc_tasks(harvest_id, done)
|
|
282
|
+
failed_submissions += failed_delta
|
|
283
|
+
if catastrophic_error is not None:
|
|
284
|
+
await self._cancel_pending_arc_tasks(pending_tasks)
|
|
285
|
+
raise catastrophic_error
|
|
286
|
+
|
|
287
|
+
if pending_tasks:
|
|
288
|
+
done, _ = await asyncio.wait(pending_tasks)
|
|
289
|
+
failed_delta, catastrophic_error = self._process_completed_arc_tasks(harvest_id, done)
|
|
290
|
+
failed_submissions += failed_delta
|
|
291
|
+
if catastrophic_error is not None:
|
|
292
|
+
raise catastrophic_error
|
|
293
|
+
|
|
294
|
+
return failed_submissions
|
|
295
|
+
|
|
296
|
+
def __init__(self, config: Config) -> None:
|
|
297
|
+
"""Initialize the ApiClient.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
config: Configuration object containing API URL and certificate paths.
|
|
301
|
+
|
|
302
|
+
Raises:
|
|
303
|
+
ApiClientError: If certificate or key files don't exist.
|
|
304
|
+
"""
|
|
305
|
+
self._config = config
|
|
306
|
+
self._client: httpx.AsyncClient | None = None
|
|
307
|
+
|
|
308
|
+
self._configure_global_request_limiter(config.max_concurrency)
|
|
309
|
+
|
|
310
|
+
cert_path = config.client_cert_path
|
|
311
|
+
key_path = config.client_key_path
|
|
312
|
+
ca_path = config.ca_cert_path
|
|
313
|
+
|
|
314
|
+
if cert_path is not None and not cert_path.exists():
|
|
315
|
+
raise ApiClientError(f"Client certificate not found: {cert_path}")
|
|
316
|
+
if key_path is not None and not key_path.exists():
|
|
317
|
+
raise ApiClientError(f"Client key not found: {key_path}")
|
|
318
|
+
if ca_path is not None and not ca_path.exists():
|
|
319
|
+
raise ApiClientError(f"CA certificate not found: {ca_path}")
|
|
320
|
+
|
|
321
|
+
logger.debug(
|
|
322
|
+
"ApiClient initialized with API URL: %s, cert: %s, key: %s",
|
|
323
|
+
config.api_url,
|
|
324
|
+
cert_path,
|
|
325
|
+
key_path,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
# ------------------------------------------------------------------
|
|
329
|
+
# HTTP infrastructure
|
|
330
|
+
# ------------------------------------------------------------------
|
|
331
|
+
|
|
332
|
+
def _get_client(self) -> httpx.AsyncClient:
|
|
333
|
+
"""Return the shared httpx.AsyncClient, creating it on first call."""
|
|
334
|
+
if self._client is None:
|
|
335
|
+
if not self._config.verify_ssl:
|
|
336
|
+
verify: bool | ssl.SSLContext = False
|
|
337
|
+
elif self._config.ca_cert_path:
|
|
338
|
+
ctx = ssl.create_default_context(cafile=str(self._config.ca_cert_path))
|
|
339
|
+
if self._config.client_cert_path and self._config.client_key_path:
|
|
340
|
+
ctx.load_cert_chain(
|
|
341
|
+
str(self._config.client_cert_path),
|
|
342
|
+
str(self._config.client_key_path),
|
|
343
|
+
)
|
|
344
|
+
verify = ctx
|
|
345
|
+
elif self._config.client_cert_path and self._config.client_key_path:
|
|
346
|
+
ctx = ssl.create_default_context()
|
|
347
|
+
ctx.load_cert_chain(
|
|
348
|
+
str(self._config.client_cert_path),
|
|
349
|
+
str(self._config.client_key_path),
|
|
350
|
+
)
|
|
351
|
+
verify = ctx
|
|
352
|
+
else:
|
|
353
|
+
verify = True
|
|
354
|
+
|
|
355
|
+
self._client = httpx.AsyncClient(
|
|
356
|
+
base_url=self._config.api_url,
|
|
357
|
+
verify=verify,
|
|
358
|
+
timeout=self._config.timeout,
|
|
359
|
+
follow_redirects=self._config.follow_redirects,
|
|
360
|
+
headers={"accept": "application/json"},
|
|
361
|
+
)
|
|
362
|
+
logger.debug("Created new httpx.AsyncClient instance")
|
|
363
|
+
|
|
364
|
+
return self._client
|
|
365
|
+
|
|
366
|
+
async def _request_with_retries(
|
|
367
|
+
self,
|
|
368
|
+
method: str,
|
|
369
|
+
path: str,
|
|
370
|
+
**kwargs: Any,
|
|
371
|
+
) -> Any:
|
|
372
|
+
"""Send an HTTP request with retry logic for transient errors.
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
Parsed JSON body for responses with content, or ``None`` for 204.
|
|
376
|
+
|
|
377
|
+
Raises:
|
|
378
|
+
ApiClientError: On permanent HTTP errors or exhausted retries.
|
|
379
|
+
"""
|
|
380
|
+
client = self._get_client()
|
|
381
|
+
path = path.lstrip("/")
|
|
382
|
+
method = method.upper()
|
|
383
|
+
|
|
384
|
+
for attempt in range(self._config.max_retries + 1):
|
|
385
|
+
if attempt > 0:
|
|
386
|
+
delay = self._config.retry_backoff_factor * (2 ** (attempt - 1))
|
|
387
|
+
logger.info(
|
|
388
|
+
"Retrying %s %s in %.1fs (attempt %d/%d)", method, path, delay, attempt, self._config.max_retries
|
|
389
|
+
)
|
|
390
|
+
await asyncio.sleep(delay)
|
|
391
|
+
|
|
392
|
+
try:
|
|
393
|
+
logger.debug("Sending %s request to %s (attempt %d)", method, path, attempt + 1)
|
|
394
|
+
async with self._acquire_request_slot():
|
|
395
|
+
resp = await client.request(method, path, **kwargs)
|
|
396
|
+
|
|
397
|
+
# Retry on transient server-side errors before raising
|
|
398
|
+
should_retry = self._should_retry_failure(method, status_code=resp.status_code)
|
|
399
|
+
if should_retry and attempt < self._config.max_retries:
|
|
400
|
+
logger.warning("Transient HTTP error %d from server, will retry", resp.status_code)
|
|
401
|
+
continue
|
|
402
|
+
|
|
403
|
+
resp.raise_for_status()
|
|
404
|
+
logger.debug("%s %s succeeded with status %d", method, path, resp.status_code)
|
|
405
|
+
|
|
406
|
+
return self._parse_json_response(resp, method, path)
|
|
407
|
+
|
|
408
|
+
except (httpx.HTTPStatusError, httpx.RequestError) as e:
|
|
409
|
+
if self._should_retry_or_raise_failure(
|
|
410
|
+
e,
|
|
411
|
+
method=method,
|
|
412
|
+
attempt=attempt,
|
|
413
|
+
max_retries=self._config.max_retries,
|
|
414
|
+
):
|
|
415
|
+
continue
|
|
416
|
+
|
|
417
|
+
raise ApiClientError("Request failed for an unknown reason") # pragma: no cover
|
|
418
|
+
|
|
419
|
+
async def _post(self, path: str, body: BaseModel) -> Any:
|
|
420
|
+
"""POST with a Pydantic request body."""
|
|
421
|
+
return await self._request_with_retries(
|
|
422
|
+
"POST",
|
|
423
|
+
path,
|
|
424
|
+
content=body.model_dump_json(),
|
|
425
|
+
headers={"content-type": "application/json"},
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
async def _post_empty(self, path: str) -> Any:
|
|
429
|
+
"""POST with an empty body (e.g. trigger endpoints)."""
|
|
430
|
+
return await self._request_with_retries(
|
|
431
|
+
"POST",
|
|
432
|
+
path,
|
|
433
|
+
headers={"content-type": "application/json"},
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
async def _get(self, path: str, *, params: dict[str, str] | None = None) -> Any:
|
|
437
|
+
"""GET request."""
|
|
438
|
+
return await self._request_with_retries("GET", path, params=params)
|
|
439
|
+
|
|
440
|
+
async def _delete(self, path: str) -> None:
|
|
441
|
+
"""DELETE request, ignoring a 204 No Content response."""
|
|
442
|
+
await self._request_with_retries("DELETE", path)
|
|
443
|
+
|
|
444
|
+
# ------------------------------------------------------------------
|
|
445
|
+
# Helper
|
|
446
|
+
# ------------------------------------------------------------------
|
|
447
|
+
|
|
448
|
+
@classmethod
|
|
449
|
+
def _serialize_arc(cls, arc: "ARC | dict[str, Any]") -> dict[str, Any]:
|
|
450
|
+
"""Serialize an ARC object to a plain RO-Crate JSON dict."""
|
|
451
|
+
if isinstance(arc, dict):
|
|
452
|
+
return arc
|
|
453
|
+
return cast(dict[str, Any], json.loads(arc.ToROCrateJsonString()))
|
|
454
|
+
|
|
455
|
+
@classmethod
|
|
456
|
+
def _parse_arc_response(cls, data: Any) -> ArcResult:
|
|
457
|
+
try:
|
|
458
|
+
return ArcResult.model_validate(data)
|
|
459
|
+
except ValidationError as e:
|
|
460
|
+
raise ApiClientError(f"Invalid ARC response from API: {e}") from e
|
|
461
|
+
|
|
462
|
+
@classmethod
|
|
463
|
+
def _parse_harvest_response(cls, data: Any) -> HarvestResult:
|
|
464
|
+
try:
|
|
465
|
+
return HarvestResult.model_validate(data)
|
|
466
|
+
except ValidationError as e:
|
|
467
|
+
raise ApiClientError(f"Invalid harvest response from API: {e}") from e
|
|
468
|
+
|
|
469
|
+
# ------------------------------------------------------------------
|
|
470
|
+
# ARC endpoints (v3)
|
|
471
|
+
# ------------------------------------------------------------------
|
|
472
|
+
|
|
473
|
+
async def create_or_update_arc(
|
|
474
|
+
self,
|
|
475
|
+
rdi: str,
|
|
476
|
+
arc: "ARC | dict[str, Any]",
|
|
477
|
+
) -> ArcResult:
|
|
478
|
+
"""Create or update an ARC.
|
|
479
|
+
|
|
480
|
+
Uses ``POST /v3/arcs``. The server stores the ARC synchronously and
|
|
481
|
+
triggers the GitLab synchronisation in the background — no polling
|
|
482
|
+
required.
|
|
483
|
+
|
|
484
|
+
Args:
|
|
485
|
+
rdi: RDI identifier.
|
|
486
|
+
arc: ARC object or a pre-serialised RO-Crate JSON dict.
|
|
487
|
+
|
|
488
|
+
Returns:
|
|
489
|
+
:class:`ArcResult` with the result of the operation.
|
|
490
|
+
"""
|
|
491
|
+
logger.info("Creating/updating ARC for RDI: %s", rdi)
|
|
492
|
+
serialized = self._serialize_arc(arc)
|
|
493
|
+
request = CreateArcRequest(rdi=rdi, arc=serialized)
|
|
494
|
+
data = await self._post("v3/arcs", request)
|
|
495
|
+
return self._parse_arc_response(data)
|
|
496
|
+
|
|
497
|
+
# ------------------------------------------------------------------
|
|
498
|
+
# Harvest endpoints (v3)
|
|
499
|
+
# ------------------------------------------------------------------
|
|
500
|
+
|
|
501
|
+
async def create_harvest(
|
|
502
|
+
self,
|
|
503
|
+
rdi: str,
|
|
504
|
+
expected_datasets: int | None = None,
|
|
505
|
+
) -> HarvestResult:
|
|
506
|
+
"""Start a new harvest run.
|
|
507
|
+
|
|
508
|
+
Uses ``POST /v3/harvests``.
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
rdi: RDI identifier.
|
|
512
|
+
expected_datasets: Optional hint about how many datasets will be submitted.
|
|
513
|
+
|
|
514
|
+
Returns:
|
|
515
|
+
:class:`HarvestResult` with the newly created harvest.
|
|
516
|
+
"""
|
|
517
|
+
request = CreateHarvestRequest(rdi=rdi, expected_datasets=expected_datasets)
|
|
518
|
+
data = await self._post("v3/harvests", request)
|
|
519
|
+
return self._parse_harvest_response(data)
|
|
520
|
+
|
|
521
|
+
async def list_harvests(self, rdi: str | None = None) -> list[HarvestResult]:
|
|
522
|
+
"""List harvest runs.
|
|
523
|
+
|
|
524
|
+
Uses ``GET /v3/harvests``.
|
|
525
|
+
|
|
526
|
+
Args:
|
|
527
|
+
rdi: Optional RDI filter.
|
|
528
|
+
|
|
529
|
+
Returns:
|
|
530
|
+
List of :class:`HarvestResult` objects.
|
|
531
|
+
"""
|
|
532
|
+
params: dict[str, str] | None = None
|
|
533
|
+
if rdi:
|
|
534
|
+
params = {"rdi": rdi}
|
|
535
|
+
data = await self._get("v3/harvests", params=params)
|
|
536
|
+
try:
|
|
537
|
+
return [HarvestResult.model_validate(d) for d in data]
|
|
538
|
+
except ValidationError as e:
|
|
539
|
+
raise ApiClientError(f"Invalid harvest list response from API: {e}") from e
|
|
540
|
+
|
|
541
|
+
async def get_harvest(self, harvest_id: str) -> HarvestResult:
|
|
542
|
+
"""Get a single harvest run by ID.
|
|
543
|
+
|
|
544
|
+
Uses ``GET /v3/harvests/{harvest_id}``.
|
|
545
|
+
|
|
546
|
+
Args:
|
|
547
|
+
harvest_id: Harvest identifier.
|
|
548
|
+
|
|
549
|
+
Returns:
|
|
550
|
+
:class:`HarvestResult`.
|
|
551
|
+
"""
|
|
552
|
+
data = await self._get(f"v3/harvests/{harvest_id}")
|
|
553
|
+
return self._parse_harvest_response(data)
|
|
554
|
+
|
|
555
|
+
async def complete_harvest(self, harvest_id: str) -> HarvestResult:
|
|
556
|
+
"""Mark a harvest run as completed.
|
|
557
|
+
|
|
558
|
+
Uses ``POST /v3/harvests/{harvest_id}/complete``.
|
|
559
|
+
|
|
560
|
+
Args:
|
|
561
|
+
harvest_id: Harvest identifier.
|
|
562
|
+
|
|
563
|
+
Returns:
|
|
564
|
+
Updated :class:`HarvestResult`.
|
|
565
|
+
"""
|
|
566
|
+
data = await self._post_empty(f"v3/harvests/{harvest_id}/complete")
|
|
567
|
+
return self._parse_harvest_response(data)
|
|
568
|
+
|
|
569
|
+
async def cancel_harvest(self, harvest_id: str) -> None:
|
|
570
|
+
"""Cancel (delete) a harvest run.
|
|
571
|
+
|
|
572
|
+
Uses ``DELETE /v3/harvests/{harvest_id}``.
|
|
573
|
+
|
|
574
|
+
Args:
|
|
575
|
+
harvest_id: Harvest identifier.
|
|
576
|
+
"""
|
|
577
|
+
await self._delete(f"v3/harvests/{harvest_id}")
|
|
578
|
+
|
|
579
|
+
async def submit_arc_in_harvest(
|
|
580
|
+
self,
|
|
581
|
+
harvest_id: str,
|
|
582
|
+
arc: "ARC | dict[str, Any]",
|
|
583
|
+
) -> ArcResult:
|
|
584
|
+
"""Submit an ARC within an active harvest run.
|
|
585
|
+
|
|
586
|
+
Uses ``POST /v3/harvests/{harvest_id}/arcs``. The RDI is resolved
|
|
587
|
+
automatically from the harvest run on the server side.
|
|
588
|
+
|
|
589
|
+
Args:
|
|
590
|
+
harvest_id: Harvest identifier.
|
|
591
|
+
arc: ARC object or a pre-serialised RO-Crate JSON dict.
|
|
592
|
+
|
|
593
|
+
Returns:
|
|
594
|
+
:class:`ArcResult` with the result of the operation.
|
|
595
|
+
"""
|
|
596
|
+
serialized = self._serialize_arc(arc)
|
|
597
|
+
request = SubmitHarvestArcRequest(arc=serialized)
|
|
598
|
+
data = await self._post(f"v3/harvests/{harvest_id}/arcs", request)
|
|
599
|
+
return self._parse_arc_response(data)
|
|
600
|
+
|
|
601
|
+
async def harvest_arcs(
|
|
602
|
+
self,
|
|
603
|
+
rdi: str,
|
|
604
|
+
arcs: "AsyncGenerator[ARC | dict[str, Any], None] | AsyncIterator[ARC | dict[str, Any]]",
|
|
605
|
+
expected_datasets: int | None = None,
|
|
606
|
+
) -> HarvestResult:
|
|
607
|
+
"""Create a harvest, upload all ARCs from an async generator, then complete it.
|
|
608
|
+
|
|
609
|
+
The method:
|
|
610
|
+
|
|
611
|
+
1. Creates a new harvest for *rdi*.
|
|
612
|
+
2. Iterates *arcs*, submitting each one as part of that harvest.
|
|
613
|
+
3. Calls :meth:`complete_harvest` when the generator is exhausted.
|
|
614
|
+
|
|
615
|
+
ARC submission is best-effort: item-level errors are logged and skipped,
|
|
616
|
+
and the harvest continues with remaining items. Catastrophic errors
|
|
617
|
+
(for example auth or harvest-state failures) abort the harvest.
|
|
618
|
+
|
|
619
|
+
Args:
|
|
620
|
+
rdi: RDI identifier for the harvest.
|
|
621
|
+
arcs: Async generator or async iterator yielding ARC objects or
|
|
622
|
+
pre-serialised RO-Crate dicts.
|
|
623
|
+
expected_datasets: Optional hint about the total number of ARCs.
|
|
624
|
+
|
|
625
|
+
Returns:
|
|
626
|
+
:class:`HarvestResult` of the completed harvest.
|
|
627
|
+
|
|
628
|
+
Raises:
|
|
629
|
+
ApiClientError: On catastrophic HTTP or serialization errors. The
|
|
630
|
+
harvest is cancelled before the exception propagates.
|
|
631
|
+
|
|
632
|
+
Example::
|
|
633
|
+
|
|
634
|
+
async def my_arcs() -> AsyncGenerator[dict, None]:
|
|
635
|
+
for arc in source:
|
|
636
|
+
yield arc
|
|
637
|
+
|
|
638
|
+
async with ApiClient(config) as client:
|
|
639
|
+
result = await client.harvest_arcs("my-rdi", my_arcs())
|
|
640
|
+
"""
|
|
641
|
+
harvest = await self.create_harvest(rdi, expected_datasets=expected_datasets)
|
|
642
|
+
harvest_id = harvest.harvest_id
|
|
643
|
+
logger.info("[%s] Started harvest %s for RDI %s", rdi, harvest_id, rdi)
|
|
644
|
+
|
|
645
|
+
try:
|
|
646
|
+
failed_submissions = await self._submit_arcs_parallel(harvest_id, arcs)
|
|
647
|
+
except Exception:
|
|
648
|
+
logger.warning("[%s] Catastrophic error during ARC submission, cancelling harvest %s", rdi, harvest_id)
|
|
649
|
+
await self._cancel_harvest_safely(rdi, harvest_id)
|
|
650
|
+
raise
|
|
651
|
+
|
|
652
|
+
if failed_submissions > 0:
|
|
653
|
+
logger.warning(
|
|
654
|
+
"[%s] Harvest %s completed with %d skipped ARC submissions",
|
|
655
|
+
rdi,
|
|
656
|
+
harvest_id,
|
|
657
|
+
failed_submissions,
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
result = await self.complete_harvest(harvest_id)
|
|
661
|
+
logger.info("[%s] Completed harvest %s", rdi, harvest_id)
|
|
662
|
+
return result
|
|
663
|
+
|
|
664
|
+
# ------------------------------------------------------------------
|
|
665
|
+
# Lifecycle
|
|
666
|
+
# ------------------------------------------------------------------
|
|
667
|
+
|
|
668
|
+
async def aclose(self) -> None:
|
|
669
|
+
"""Close the underlying HTTP client and release connections."""
|
|
670
|
+
if self._client is not None:
|
|
671
|
+
logger.debug("Closing httpx.AsyncClient")
|
|
672
|
+
await self._client.aclose()
|
|
673
|
+
self._client = None
|
|
674
|
+
|
|
675
|
+
async def __aenter__(self) -> "ApiClient":
|
|
676
|
+
"""Async context manager entry."""
|
|
677
|
+
return self
|
|
678
|
+
|
|
679
|
+
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
680
|
+
"""Async context manager exit — closes the client."""
|
|
681
|
+
await self.aclose()
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Configuration module for the Middleware API Client."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Annotated
|
|
5
|
+
|
|
6
|
+
from pydantic import Field, field_validator
|
|
7
|
+
|
|
8
|
+
from middleware.shared.config.config_base import ConfigBase
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Config(ConfigBase):
|
|
12
|
+
"""Configuration model for the Middleware API Client.
|
|
13
|
+
|
|
14
|
+
This configuration class extends ConfigBase and provides settings
|
|
15
|
+
for connecting to the Middleware API with certificate-based authentication.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
api_url: Annotated[str, Field(description="Base URL of the Middleware API (e.g., https://api.example.com)")]
|
|
19
|
+
client_cert_path: Annotated[
|
|
20
|
+
Path | None, Field(description="Path to the client certificate file in PEM format (optional)")
|
|
21
|
+
] = None
|
|
22
|
+
client_key_path: Annotated[
|
|
23
|
+
Path | None, Field(description="Path to the client private key file in PEM format (optional)")
|
|
24
|
+
] = None
|
|
25
|
+
ca_cert_path: Annotated[
|
|
26
|
+
Path | None, Field(description="Path to the CA certificate file for server verification (optional)")
|
|
27
|
+
] = None
|
|
28
|
+
timeout: Annotated[float, Field(description="Request timeout in seconds", gt=0)] = 30.0
|
|
29
|
+
verify_ssl: Annotated[bool, Field(description="Enable SSL certificate verification")] = True
|
|
30
|
+
follow_redirects: Annotated[bool, Field(description="Follow HTTP redirects for API requests")] = True
|
|
31
|
+
max_concurrency: Annotated[
|
|
32
|
+
int,
|
|
33
|
+
Field(description="Maximum number of concurrent API requests across the ApiClient package", ge=1),
|
|
34
|
+
] = 10
|
|
35
|
+
|
|
36
|
+
# Retry parameters
|
|
37
|
+
max_retries: Annotated[int, Field(description="Maximum number of retries for transient HTTP errors", ge=0)] = 3
|
|
38
|
+
retry_backoff_factor: Annotated[float, Field(description="Backoff factor for retries", gt=0)] = 2.0
|
|
39
|
+
# Polling parameters (deprecated)
|
|
40
|
+
polling_initial_delay: Annotated[
|
|
41
|
+
float,
|
|
42
|
+
Field(
|
|
43
|
+
description="Initial delay in seconds between polling requests",
|
|
44
|
+
gt=0,
|
|
45
|
+
deprecated=True,
|
|
46
|
+
),
|
|
47
|
+
] = 1.0
|
|
48
|
+
polling_max_delay: Annotated[
|
|
49
|
+
float,
|
|
50
|
+
Field(
|
|
51
|
+
description="Maximum delay in seconds between polling requests",
|
|
52
|
+
gt=0,
|
|
53
|
+
deprecated=True,
|
|
54
|
+
),
|
|
55
|
+
] = 30.0
|
|
56
|
+
polling_backoff_factor: Annotated[
|
|
57
|
+
float,
|
|
58
|
+
Field(description="Factor to increase delay between polls", gt=1.0, deprecated=True),
|
|
59
|
+
] = 1.5
|
|
60
|
+
polling_timeout: Annotated[
|
|
61
|
+
float,
|
|
62
|
+
Field(description="Total timeout for polling in minutes", gt=0, deprecated=True),
|
|
63
|
+
] = 90.0
|
|
64
|
+
|
|
65
|
+
@field_validator("api_url")
|
|
66
|
+
@classmethod
|
|
67
|
+
def ensure_trailing_slash(cls, v: str) -> str:
|
|
68
|
+
"""Ensure the API URL ends with a trailing slash."""
|
|
69
|
+
if not v.endswith("/"):
|
|
70
|
+
return v + "/"
|
|
71
|
+
return v
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Stable public types exposed by the Middleware API Client.
|
|
2
|
+
|
|
3
|
+
These types are intentionally independent of the server-side API models so
|
|
4
|
+
that the client's public interface remains stable across server API version
|
|
5
|
+
changes. All mapping from server wire-format to these types happens inside
|
|
6
|
+
:class:`~middleware.api_client.ApiClient` and is not visible to consumers.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from enum import StrEnum
|
|
10
|
+
from typing import Annotated
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, Field
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ArcStatus(StrEnum):
|
|
16
|
+
"""Operation status of a single ARC submission."""
|
|
17
|
+
|
|
18
|
+
CREATED = "created"
|
|
19
|
+
UPDATED = "updated"
|
|
20
|
+
DELETED = "deleted"
|
|
21
|
+
REQUESTED = "requested"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ArcLifecycleStatus(StrEnum):
|
|
25
|
+
"""Lifecycle status of an ARC in the system."""
|
|
26
|
+
|
|
27
|
+
ACTIVE = "ACTIVE"
|
|
28
|
+
PROCESSING = "PROCESSING"
|
|
29
|
+
MISSING = "MISSING"
|
|
30
|
+
DELETED = "DELETED"
|
|
31
|
+
INVALID = "INVALID"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class HarvestStatus(StrEnum):
|
|
35
|
+
"""Status of a harvest run."""
|
|
36
|
+
|
|
37
|
+
RUNNING = "RUNNING"
|
|
38
|
+
COMPLETED = "COMPLETED"
|
|
39
|
+
FAILED = "FAILED"
|
|
40
|
+
CANCELLED = "CANCELLED"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ArcEventSummary(BaseModel):
|
|
44
|
+
"""Summary of a single event recorded against an ARC."""
|
|
45
|
+
|
|
46
|
+
timestamp: Annotated[str, Field(description="ISO 8601 timestamp of the event")]
|
|
47
|
+
type: Annotated[str, Field(description="Event type identifier")]
|
|
48
|
+
message: Annotated[str, Field(description="Human-readable event message")]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class ArcMetadata(BaseModel):
|
|
52
|
+
"""Metadata snapshot attached to an ARC result."""
|
|
53
|
+
|
|
54
|
+
arc_hash: Annotated[str, Field(description="SHA-256 content hash of the ARC")]
|
|
55
|
+
status: Annotated[ArcLifecycleStatus, Field(description="Lifecycle status")]
|
|
56
|
+
first_seen: Annotated[str, Field(description="ISO 8601 timestamp of first submission")]
|
|
57
|
+
last_seen: Annotated[str, Field(description="ISO 8601 timestamp of latest submission")]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class ArcResult(BaseModel):
|
|
61
|
+
"""Result returned by :meth:`~middleware.api_client.ApiClient.create_or_update_arc`.
|
|
62
|
+
|
|
63
|
+
and :meth:`~middleware.api_client.ApiClient.submit_arc_in_harvest`.
|
|
64
|
+
|
|
65
|
+
This is the stable, client-facing type. The underlying server response
|
|
66
|
+
model may change between server versions; the mapping layer inside
|
|
67
|
+
:class:`~middleware.api_client.ApiClient` ensures this type stays
|
|
68
|
+
compatible.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
arc_id: Annotated[str, Field(description="ARC identifier")]
|
|
72
|
+
status: Annotated[ArcStatus, Field(description="Operation status")]
|
|
73
|
+
metadata: Annotated[ArcMetadata, Field(description="ARC metadata snapshot")]
|
|
74
|
+
events: Annotated[list[ArcEventSummary], Field(description="Event log entries")] = Field(default_factory=list)
|
|
75
|
+
message: Annotated[str, Field(description="Human-readable result message")] = ""
|
|
76
|
+
client_id: Annotated[str | None, Field(description="Authenticated client identifier")] = None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class HarvestResult(BaseModel):
|
|
80
|
+
"""Result returned by harvest-related methods on :class:`~middleware.api_client.ApiClient`.
|
|
81
|
+
|
|
82
|
+
This is the stable, client-facing type. See :class:`ArcResult` for
|
|
83
|
+
the rationale behind keeping client types separate from server models.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
harvest_id: Annotated[str, Field(description="Unique harvest run identifier")]
|
|
87
|
+
rdi: Annotated[str, Field(description="RDI identifier")]
|
|
88
|
+
status: Annotated[HarvestStatus, Field(description="Current harvest status")]
|
|
89
|
+
started_at: Annotated[str, Field(description="ISO 8601 start timestamp")]
|
|
90
|
+
completed_at: Annotated[str | None, Field(description="ISO 8601 completion timestamp")] = None
|
|
91
|
+
statistics: Annotated[dict, Field(description="Harvest statistics")] = Field(default_factory=dict)
|
|
92
|
+
message: Annotated[str, Field(description="Human-readable result message")] = ""
|
|
93
|
+
client_id: Annotated[str | None, Field(description="Authenticated client identifier")] = None
|
|
File without changes
|