netrias_client 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of netrias_client might be problematic. Click here for more details.
- netrias_client/__init__.py +9 -0
- netrias_client/_adapter.py +288 -0
- netrias_client/_client.py +251 -0
- netrias_client/_config.py +95 -0
- netrias_client/_core.py +560 -0
- netrias_client/_discovery.py +437 -0
- netrias_client/_errors.py +33 -0
- netrias_client/_gateway_bypass.py +208 -0
- netrias_client/_http.py +126 -0
- netrias_client/_io.py +28 -0
- netrias_client/_logging.py +46 -0
- netrias_client/_models.py +72 -0
- netrias_client/_validators.py +173 -0
- netrias_client/scripts.py +313 -0
- netrias_client-0.0.1.dist-info/METADATA +222 -0
- netrias_client-0.0.1.dist-info/RECORD +19 -0
- netrias_client-0.0.1.dist-info/WHEEL +4 -0
- netrias_client-0.0.1.dist-info/entry_points.txt +5 -0
- netrias_client-0.0.1.dist-info/licenses/LICENSE +21 -0
netrias_client/_core.py
ADDED
|
@@ -0,0 +1,560 @@
|
|
|
1
|
+
"""Core harmonization workflow functions.
|
|
2
|
+
|
|
3
|
+
'why': unify sync/async paths via a single async implementation
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
import time
|
|
11
|
+
from collections.abc import Mapping, Sequence
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Final, TypeAlias, cast
|
|
14
|
+
|
|
15
|
+
import httpx
|
|
16
|
+
|
|
17
|
+
from ._errors import NetriasAPIUnavailable
|
|
18
|
+
from ._http import build_harmonize_payload, fetch_job_status, submit_harmonize_job
|
|
19
|
+
from ._io import stream_download_to_file
|
|
20
|
+
from ._models import HarmonizationResult, Settings
|
|
21
|
+
from ._validators import validate_manifest_path, validate_output_path, validate_source_path
|
|
22
|
+
|
|
23
|
+
JSONPrimitive: TypeAlias = str | int | float | bool | None
|
|
24
|
+
JSONValue: TypeAlias = JSONPrimitive | Mapping[str, "JSONValue"] | Sequence["JSONValue"]
|
|
25
|
+
JOB_POLL_INTERVAL_SECONDS: Final[float] = 3.0
|
|
26
|
+
_MESSAGE_KEYS: Final[tuple[str, ...]] = (
|
|
27
|
+
"message",
|
|
28
|
+
"detail",
|
|
29
|
+
"error",
|
|
30
|
+
"description",
|
|
31
|
+
"statusMessage",
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class HarmonizationJobError(RuntimeError):
|
|
36
|
+
"""Raised when the harmonization job fails before producing a result."""
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
async def _harmonize_async(
|
|
40
|
+
settings: Settings,
|
|
41
|
+
source_path: Path,
|
|
42
|
+
manifest: Path | Mapping[str, object],
|
|
43
|
+
output_path: Path | None = None,
|
|
44
|
+
manifest_output_path: Path | None = None,
|
|
45
|
+
logger: logging.Logger | None = None,
|
|
46
|
+
) -> HarmonizationResult:
|
|
47
|
+
"""Execute harmonization using the asynchronous job API."""
|
|
48
|
+
|
|
49
|
+
logger = logger or logging.getLogger("netrias_client")
|
|
50
|
+
csv_path = validate_source_path(source_path)
|
|
51
|
+
manifest_input = _resolve_manifest(manifest, manifest_output_path)
|
|
52
|
+
dest = validate_output_path(output_path, source_name=csv_path.stem, allow_versioning=True)
|
|
53
|
+
|
|
54
|
+
started = time.perf_counter()
|
|
55
|
+
status_label = "error"
|
|
56
|
+
logger.info("harmonize start: file=%s", csv_path)
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
payload = build_harmonize_payload(csv_path, manifest_input)
|
|
60
|
+
job_payload = await _submit_job_response(
|
|
61
|
+
base_url=settings.harmonization_url,
|
|
62
|
+
api_key=settings.api_key,
|
|
63
|
+
timeout=settings.timeout,
|
|
64
|
+
payload=payload,
|
|
65
|
+
csv_path=csv_path,
|
|
66
|
+
logger=logger,
|
|
67
|
+
)
|
|
68
|
+
job_id = _require_job_id(job_payload, csv_path, logger)
|
|
69
|
+
logger.info("harmonize job queued: file=%s job_id=%s", csv_path, job_id)
|
|
70
|
+
final_payload = await _resolve_final_payload(
|
|
71
|
+
base_url=settings.harmonization_url,
|
|
72
|
+
api_key=settings.api_key,
|
|
73
|
+
job_id=job_id,
|
|
74
|
+
timeout=settings.timeout,
|
|
75
|
+
csv_path=csv_path,
|
|
76
|
+
logger=logger,
|
|
77
|
+
)
|
|
78
|
+
final_url = _require_final_url(final_payload, csv_path, logger)
|
|
79
|
+
except HarmonizationJobError as exc:
|
|
80
|
+
status_label = "failed"
|
|
81
|
+
return HarmonizationResult(file_path=dest, status="failed", description=str(exc))
|
|
82
|
+
else:
|
|
83
|
+
result = await _download_final(final_url, dest, settings.timeout, csv_path, logger)
|
|
84
|
+
status_label = result.status
|
|
85
|
+
return result
|
|
86
|
+
finally:
|
|
87
|
+
elapsed = time.perf_counter() - started
|
|
88
|
+
logger.info(
|
|
89
|
+
"harmonize finished: file=%s status=%s duration=%.2fs",
|
|
90
|
+
csv_path,
|
|
91
|
+
status_label,
|
|
92
|
+
elapsed,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def harmonize(
|
|
97
|
+
settings: Settings,
|
|
98
|
+
source_path: Path,
|
|
99
|
+
manifest: Path | Mapping[str, object],
|
|
100
|
+
output_path: Path | None = None,
|
|
101
|
+
manifest_output_path: Path | None = None,
|
|
102
|
+
logger: logging.Logger | None = None,
|
|
103
|
+
) -> HarmonizationResult:
|
|
104
|
+
"""Sync wrapper: run the async harmonize workflow and block until completion."""
|
|
105
|
+
|
|
106
|
+
return asyncio.run(
|
|
107
|
+
_harmonize_async(
|
|
108
|
+
settings=settings,
|
|
109
|
+
source_path=source_path,
|
|
110
|
+
manifest=manifest,
|
|
111
|
+
output_path=output_path,
|
|
112
|
+
manifest_output_path=manifest_output_path,
|
|
113
|
+
logger=logger,
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
async def harmonize_async(
|
|
119
|
+
settings: Settings,
|
|
120
|
+
source_path: Path,
|
|
121
|
+
manifest: Path | Mapping[str, object],
|
|
122
|
+
output_path: Path | None = None,
|
|
123
|
+
manifest_output_path: Path | None = None,
|
|
124
|
+
logger: logging.Logger | None = None,
|
|
125
|
+
) -> HarmonizationResult:
|
|
126
|
+
"""Async counterpart to `harmonize` with identical validation and result semantics."""
|
|
127
|
+
|
|
128
|
+
return await _harmonize_async(
|
|
129
|
+
settings=settings,
|
|
130
|
+
source_path=source_path,
|
|
131
|
+
manifest=manifest,
|
|
132
|
+
output_path=output_path,
|
|
133
|
+
manifest_output_path=manifest_output_path,
|
|
134
|
+
logger=logger,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _resolve_manifest(
|
|
139
|
+
manifest: Path | Mapping[str, object], manifest_output_path: Path | None
|
|
140
|
+
) -> Path | Mapping[str, object]:
|
|
141
|
+
if isinstance(manifest, Path):
|
|
142
|
+
return _manifest_from_path(manifest, manifest_output_path)
|
|
143
|
+
return _manifest_from_mapping(manifest, manifest_output_path)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _manifest_from_path(
|
|
147
|
+
manifest_path: Path, manifest_output_path: Path | None
|
|
148
|
+
) -> Path:
|
|
149
|
+
validated = validate_manifest_path(manifest_path)
|
|
150
|
+
if manifest_output_path is None or manifest_output_path == validated:
|
|
151
|
+
return validated
|
|
152
|
+
manifest_output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
153
|
+
_ = manifest_output_path.write_text(
|
|
154
|
+
validated.read_text(encoding="utf-8"),
|
|
155
|
+
encoding="utf-8",
|
|
156
|
+
)
|
|
157
|
+
return manifest_output_path
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _manifest_from_mapping(
|
|
161
|
+
manifest: Mapping[str, object], manifest_output_path: Path | None
|
|
162
|
+
) -> Path | Mapping[str, object]:
|
|
163
|
+
normalized = _normalize_manifest_mapping(manifest)
|
|
164
|
+
if manifest_output_path is None:
|
|
165
|
+
return normalized
|
|
166
|
+
manifest_output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
167
|
+
_ = manifest_output_path.write_text(
|
|
168
|
+
json.dumps(normalized, indent=2),
|
|
169
|
+
encoding="utf-8",
|
|
170
|
+
)
|
|
171
|
+
return manifest_output_path
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _normalize_manifest_mapping(manifest: Mapping[str, object]) -> dict[str, object]:
|
|
175
|
+
try:
|
|
176
|
+
serialized = json.dumps(manifest)
|
|
177
|
+
except TypeError as exc: # pragma: no cover - guarded by tests
|
|
178
|
+
raise ValueError("manifest mapping must be JSON-serializable") from exc
|
|
179
|
+
return cast(dict[str, object], json.loads(serialized))
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
async def _submit_job_response(
|
|
183
|
+
base_url: str,
|
|
184
|
+
api_key: str,
|
|
185
|
+
timeout: float,
|
|
186
|
+
payload: bytes,
|
|
187
|
+
csv_path: Path,
|
|
188
|
+
logger: logging.Logger,
|
|
189
|
+
) -> Mapping[str, JSONValue]:
|
|
190
|
+
response = await _submit_job_http(
|
|
191
|
+
base_url=base_url,
|
|
192
|
+
api_key=api_key,
|
|
193
|
+
timeout=timeout,
|
|
194
|
+
payload=payload,
|
|
195
|
+
csv_path=csv_path,
|
|
196
|
+
logger=logger,
|
|
197
|
+
)
|
|
198
|
+
_ensure_submit_success(response, csv_path, logger)
|
|
199
|
+
payload_mapping = _json_mapping(response)
|
|
200
|
+
if not payload_mapping:
|
|
201
|
+
logger.error("harmonize submit response was not JSON: file=%s", csv_path)
|
|
202
|
+
raise HarmonizationJobError("harmonization job response was not JSON")
|
|
203
|
+
return payload_mapping
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
async def _submit_job_http(
|
|
207
|
+
base_url: str,
|
|
208
|
+
api_key: str,
|
|
209
|
+
timeout: float,
|
|
210
|
+
payload: bytes,
|
|
211
|
+
csv_path: Path,
|
|
212
|
+
logger: logging.Logger,
|
|
213
|
+
) -> httpx.Response:
|
|
214
|
+
try:
|
|
215
|
+
return await submit_harmonize_job(
|
|
216
|
+
base_url=base_url,
|
|
217
|
+
api_key=api_key,
|
|
218
|
+
payload_gz=payload,
|
|
219
|
+
timeout=timeout,
|
|
220
|
+
)
|
|
221
|
+
except httpx.TimeoutException as exc:
|
|
222
|
+
logger.error("harmonize submit timeout: file=%s err=%s", csv_path, exc)
|
|
223
|
+
raise HarmonizationJobError("harmonization submit request timed out") from exc
|
|
224
|
+
except httpx.HTTPError as exc:
|
|
225
|
+
logger.error("harmonize submit transport error: file=%s err=%s", csv_path, exc)
|
|
226
|
+
raise NetriasAPIUnavailable(f"transport error: {exc}") from exc
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _ensure_submit_success(response: httpx.Response, csv_path: Path, logger: logging.Logger) -> None:
|
|
230
|
+
if response.status_code < 400:
|
|
231
|
+
return
|
|
232
|
+
message, payload_for_log = _error_description(
|
|
233
|
+
status=response.status_code,
|
|
234
|
+
body_text=response.text,
|
|
235
|
+
default="harmonization submit failed",
|
|
236
|
+
)
|
|
237
|
+
logger.error(
|
|
238
|
+
"harmonize submit failed: file=%s status=%s body=%s",
|
|
239
|
+
csv_path,
|
|
240
|
+
response.status_code,
|
|
241
|
+
_formatted_body(payload_for_log),
|
|
242
|
+
)
|
|
243
|
+
raise HarmonizationJobError(message)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _require_job_id(
|
|
247
|
+
payload: Mapping[str, JSONValue],
|
|
248
|
+
csv_path: Path,
|
|
249
|
+
logger: logging.Logger,
|
|
250
|
+
) -> str:
|
|
251
|
+
job_id = _string_field(payload, "jobId")
|
|
252
|
+
if job_id:
|
|
253
|
+
return job_id
|
|
254
|
+
logger.error("harmonize submit response missing jobId: file=%s body=%s", csv_path, payload)
|
|
255
|
+
raise HarmonizationJobError("harmonization job response missing jobId")
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
async def _resolve_final_payload(
|
|
259
|
+
base_url: str,
|
|
260
|
+
api_key: str,
|
|
261
|
+
job_id: str,
|
|
262
|
+
timeout: float,
|
|
263
|
+
csv_path: Path,
|
|
264
|
+
logger: logging.Logger,
|
|
265
|
+
) -> Mapping[str, JSONValue]:
|
|
266
|
+
started = time.monotonic()
|
|
267
|
+
deadline = started + timeout
|
|
268
|
+
poll_interval = max(1.0, min(JOB_POLL_INTERVAL_SECONDS, timeout / 60 if timeout else JOB_POLL_INTERVAL_SECONDS))
|
|
269
|
+
|
|
270
|
+
while time.monotonic() < deadline:
|
|
271
|
+
elapsed = time.monotonic() - started
|
|
272
|
+
response = await _job_status_http(
|
|
273
|
+
base_url=base_url,
|
|
274
|
+
api_key=api_key,
|
|
275
|
+
job_id=job_id,
|
|
276
|
+
timeout=timeout,
|
|
277
|
+
csv_path=csv_path,
|
|
278
|
+
logger=logger,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
payload = _interpret_job_status(response, csv_path, logger)
|
|
282
|
+
if payload is None:
|
|
283
|
+
logger.info(
|
|
284
|
+
"harmonize job polling: file=%s job_id=%s status=pending elapsed=%.2fs",
|
|
285
|
+
csv_path,
|
|
286
|
+
job_id,
|
|
287
|
+
elapsed,
|
|
288
|
+
)
|
|
289
|
+
await asyncio.sleep(poll_interval)
|
|
290
|
+
continue
|
|
291
|
+
logger.info(
|
|
292
|
+
"harmonize job polling: file=%s job_id=%s status=%s elapsed=%.2fs",
|
|
293
|
+
csv_path,
|
|
294
|
+
job_id,
|
|
295
|
+
payload.get("status"),
|
|
296
|
+
elapsed,
|
|
297
|
+
)
|
|
298
|
+
return payload
|
|
299
|
+
|
|
300
|
+
total_elapsed = time.monotonic() - started
|
|
301
|
+
logger.error("harmonize job polling timed out: file=%s elapsed=%.2fs", csv_path, total_elapsed)
|
|
302
|
+
raise HarmonizationJobError("harmonization job polling timed out")
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
async def _job_status_http(
|
|
306
|
+
base_url: str,
|
|
307
|
+
api_key: str,
|
|
308
|
+
job_id: str,
|
|
309
|
+
timeout: float,
|
|
310
|
+
csv_path: Path,
|
|
311
|
+
logger: logging.Logger,
|
|
312
|
+
) -> httpx.Response:
|
|
313
|
+
try:
|
|
314
|
+
return await fetch_job_status(
|
|
315
|
+
base_url=base_url,
|
|
316
|
+
api_key=api_key,
|
|
317
|
+
job_id=job_id,
|
|
318
|
+
timeout=timeout,
|
|
319
|
+
)
|
|
320
|
+
except httpx.TimeoutException as exc:
|
|
321
|
+
logger.error("harmonize job status timeout: file=%s err=%s", csv_path, exc)
|
|
322
|
+
raise HarmonizationJobError("harmonization job status timed out") from exc
|
|
323
|
+
except httpx.HTTPError as exc:
|
|
324
|
+
logger.error("harmonize job status transport error: file=%s err=%s", csv_path, exc)
|
|
325
|
+
raise NetriasAPIUnavailable(f"transport error: {exc}") from exc
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _interpret_job_status(response: httpx.Response, csv_path: Path, logger: logging.Logger) -> Mapping[str, JSONValue] | None:
|
|
329
|
+
if response.status_code == 404:
|
|
330
|
+
return None
|
|
331
|
+
|
|
332
|
+
payload = _validated_status_payload(response, csv_path, logger)
|
|
333
|
+
state = _job_state(payload)
|
|
334
|
+
if state == "FAILED":
|
|
335
|
+
message = _job_failure_message(payload)
|
|
336
|
+
logger.error("harmonize job failed: file=%s message=%s", csv_path, message)
|
|
337
|
+
raise HarmonizationJobError(message)
|
|
338
|
+
if state == "SUCCEEDED":
|
|
339
|
+
return payload
|
|
340
|
+
return None
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _validated_status_payload(response: httpx.Response, csv_path: Path, logger: logging.Logger) -> Mapping[str, JSONValue]:
|
|
344
|
+
if response.status_code >= 400:
|
|
345
|
+
message, payload_for_log = _error_description(
|
|
346
|
+
status=response.status_code,
|
|
347
|
+
body_text=response.text,
|
|
348
|
+
default="harmonization job status failed",
|
|
349
|
+
)
|
|
350
|
+
logger.error(
|
|
351
|
+
"harmonize job status failed: file=%s status=%s body=%s",
|
|
352
|
+
csv_path,
|
|
353
|
+
response.status_code,
|
|
354
|
+
_formatted_body(payload_for_log),
|
|
355
|
+
)
|
|
356
|
+
raise HarmonizationJobError(message)
|
|
357
|
+
|
|
358
|
+
payload = _json_mapping(response)
|
|
359
|
+
if not payload:
|
|
360
|
+
logger.error("harmonize job status response was not JSON: file=%s", csv_path)
|
|
361
|
+
raise HarmonizationJobError("harmonization job status response was not JSON")
|
|
362
|
+
return payload
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _job_state(payload: Mapping[str, JSONValue]) -> str:
|
|
366
|
+
status_value = (_string_field(payload, "status") or "").upper()
|
|
367
|
+
if status_value == "SUCCEEDED":
|
|
368
|
+
return "SUCCEEDED"
|
|
369
|
+
if status_value == "FAILED":
|
|
370
|
+
return "FAILED"
|
|
371
|
+
return "PENDING"
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def _require_final_url(
|
|
375
|
+
payload: Mapping[str, JSONValue],
|
|
376
|
+
csv_path: Path,
|
|
377
|
+
logger: logging.Logger,
|
|
378
|
+
) -> str:
|
|
379
|
+
final_url = _string_field(payload, "finalUrl")
|
|
380
|
+
if final_url:
|
|
381
|
+
return final_url
|
|
382
|
+
logger.error("harmonize job missing finalUrl: file=%s payload=%s", csv_path, payload)
|
|
383
|
+
raise HarmonizationJobError("harmonization job completed without a download URL")
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
async def _download_final(
|
|
387
|
+
final_url: str,
|
|
388
|
+
dest: Path,
|
|
389
|
+
timeout: float,
|
|
390
|
+
csv_path: Path,
|
|
391
|
+
logger: logging.Logger,
|
|
392
|
+
) -> HarmonizationResult:
|
|
393
|
+
try:
|
|
394
|
+
async with httpx.AsyncClient(timeout=httpx.Timeout(timeout)) as client:
|
|
395
|
+
async with client.stream("GET", final_url) as response:
|
|
396
|
+
if 200 <= response.status_code < 300:
|
|
397
|
+
_ = await stream_download_to_file(response, dest)
|
|
398
|
+
logger.info("harmonize complete: file=%s -> %s", csv_path, dest)
|
|
399
|
+
return HarmonizationResult(file_path=dest, status="succeeded", description="harmonization succeeded")
|
|
400
|
+
|
|
401
|
+
body_bytes = await response.aread()
|
|
402
|
+
description = _download_error_message(response.status_code, body_bytes)
|
|
403
|
+
logger.error(
|
|
404
|
+
"harmonize download failed: file=%s status=%s body=%s",
|
|
405
|
+
csv_path,
|
|
406
|
+
response.status_code,
|
|
407
|
+
_formatted_body(_payload_for_logging(body_bytes)),
|
|
408
|
+
)
|
|
409
|
+
return HarmonizationResult(file_path=dest, status="failed", description=description)
|
|
410
|
+
except httpx.TimeoutException as exc:
|
|
411
|
+
logger.error("harmonize download timeout: file=%s err=%s", csv_path, exc)
|
|
412
|
+
return HarmonizationResult(file_path=dest, status="timeout", description="download timed out")
|
|
413
|
+
except httpx.HTTPError as exc:
|
|
414
|
+
logger.error("harmonize download transport error: file=%s err=%s", csv_path, exc)
|
|
415
|
+
raise NetriasAPIUnavailable(f"transport error: {exc}") from exc
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def _error_description(status: int, body_text: str, default: str) -> tuple[str, JSONValue | str]:
|
|
419
|
+
parsed = _try_parse_json(body_text)
|
|
420
|
+
message = _message_from_mapping(parsed if isinstance(parsed, Mapping) else None)
|
|
421
|
+
if not message:
|
|
422
|
+
hint = _failure_hint(status)
|
|
423
|
+
if hint:
|
|
424
|
+
message = hint
|
|
425
|
+
description = message or default
|
|
426
|
+
payload_for_log: JSONValue | str = parsed if parsed is not None else body_text
|
|
427
|
+
return description, payload_for_log
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def _json_mapping(response: httpx.Response) -> Mapping[str, JSONValue]:
|
|
431
|
+
try:
|
|
432
|
+
data = cast(object, response.json())
|
|
433
|
+
except (json.JSONDecodeError, ValueError):
|
|
434
|
+
return {}
|
|
435
|
+
if isinstance(data, Mapping):
|
|
436
|
+
return cast(Mapping[str, JSONValue], data)
|
|
437
|
+
return {}
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def _string_field(payload: Mapping[str, JSONValue], key: str) -> str | None:
|
|
441
|
+
value = payload.get(key)
|
|
442
|
+
if isinstance(value, str):
|
|
443
|
+
stripped = value.strip()
|
|
444
|
+
if stripped:
|
|
445
|
+
return stripped
|
|
446
|
+
return None
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def _job_failure_message(payload: Mapping[str, JSONValue]) -> str:
|
|
450
|
+
direct = _message_from_mapping(payload)
|
|
451
|
+
if direct:
|
|
452
|
+
return direct
|
|
453
|
+
for key in ("statusReason", "failureReason", "errorMessage"):
|
|
454
|
+
text = _string_field(payload, key)
|
|
455
|
+
if text:
|
|
456
|
+
return text
|
|
457
|
+
return "harmonization job failed"
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def _download_error_message(status: int, body: bytes) -> str:
|
|
461
|
+
payload = _payload_for_logging(body)
|
|
462
|
+
message = _message_from_mapping(payload if isinstance(payload, Mapping) else None)
|
|
463
|
+
if message:
|
|
464
|
+
return message
|
|
465
|
+
hint = _failure_hint(status)
|
|
466
|
+
if hint:
|
|
467
|
+
return hint
|
|
468
|
+
return f"harmonization download failed (HTTP {status})"
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def _message_from_mapping(payload: Mapping[str, JSONValue] | None) -> str | None:
|
|
472
|
+
direct = _direct_message(payload)
|
|
473
|
+
if direct:
|
|
474
|
+
return direct
|
|
475
|
+
return _message_from_body_field(payload)
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def _direct_message(payload: Mapping[str, JSONValue] | None) -> str | None:
|
|
479
|
+
if payload is None:
|
|
480
|
+
return None
|
|
481
|
+
for key in _MESSAGE_KEYS:
|
|
482
|
+
text = _coerce_message(payload.get(key))
|
|
483
|
+
if text:
|
|
484
|
+
return text
|
|
485
|
+
return None
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
def _message_from_body_field(payload: Mapping[str, JSONValue] | None) -> str | None:
|
|
489
|
+
body_mapping = _body_mapping(payload)
|
|
490
|
+
if body_mapping is None:
|
|
491
|
+
return None
|
|
492
|
+
return _message_from_mapping(body_mapping)
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def _coerce_message(value: JSONValue | None) -> str | None:
|
|
496
|
+
if isinstance(value, str):
|
|
497
|
+
stripped = value.strip()
|
|
498
|
+
if stripped:
|
|
499
|
+
return stripped
|
|
500
|
+
return None
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def _body_mapping(payload: Mapping[str, JSONValue] | None) -> Mapping[str, JSONValue] | None:
|
|
504
|
+
if payload is None:
|
|
505
|
+
return None
|
|
506
|
+
body = payload.get("body")
|
|
507
|
+
if isinstance(body, str):
|
|
508
|
+
parsed = _try_parse_json(body)
|
|
509
|
+
return parsed if isinstance(parsed, Mapping) else None
|
|
510
|
+
if isinstance(body, Mapping):
|
|
511
|
+
return cast(Mapping[str, JSONValue], body)
|
|
512
|
+
return None
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
def _payload_for_logging(body: bytes) -> JSONValue | str:
|
|
516
|
+
text = body.decode("utf-8", errors="replace")
|
|
517
|
+
parsed = _try_parse_json(text)
|
|
518
|
+
return parsed if parsed is not None else text
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
def _failure_hint(status: int) -> str | None:
|
|
522
|
+
if status in {401, 403}:
|
|
523
|
+
return "harmonization request was rejected (check API credentials and permissions)"
|
|
524
|
+
if status == 404:
|
|
525
|
+
return "harmonization endpoint not found (confirm base URL/path)"
|
|
526
|
+
if 500 <= status < 600:
|
|
527
|
+
return "harmonization service encountered an internal error"
|
|
528
|
+
return None
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
def _formatted_body(payload: JSONValue | str) -> str:
|
|
532
|
+
if isinstance(payload, str):
|
|
533
|
+
return _formatted_string_body(payload)
|
|
534
|
+
if isinstance(payload, (dict, list)):
|
|
535
|
+
return _render_json(payload)
|
|
536
|
+
return _truncate(str(payload))
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
def _formatted_string_body(raw: str) -> str:
|
|
540
|
+
parsed = _try_parse_json(raw)
|
|
541
|
+
if isinstance(parsed, (dict, list)):
|
|
542
|
+
return _render_json(parsed)
|
|
543
|
+
return _truncate(raw)
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def _try_parse_json(raw: str) -> JSONValue | None:
|
|
547
|
+
try:
|
|
548
|
+
return cast(JSONValue, json.loads(raw))
|
|
549
|
+
except Exception:
|
|
550
|
+
return None
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
def _render_json(data: Mapping[str, JSONValue] | Sequence[JSONValue]) -> str:
|
|
554
|
+
return _truncate(json.dumps(data, indent=2, sort_keys=True))
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
def _truncate(text: str, limit: int = 2000) -> str:
|
|
558
|
+
if len(text) <= limit:
|
|
559
|
+
return text
|
|
560
|
+
return f"{text[: limit - 1]}…"
|