ddharmon 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddharmon/__init__.py +54 -0
- ddharmon/client.py +264 -0
- ddharmon/exceptions.py +39 -0
- ddharmon/extras/__init__.py +1 -0
- ddharmon/extras/metabolon/__init__.py +29 -0
- ddharmon/extras/metabolon/export.py +107 -0
- ddharmon/extras/metabolon/preprocessing.py +239 -0
- ddharmon/mapper.py +117 -0
- ddharmon/models.py +218 -0
- ddharmon-0.1.0.dist-info/LICENSE +21 -0
- ddharmon-0.1.0.dist-info/METADATA +316 -0
- ddharmon-0.1.0.dist-info/RECORD +13 -0
- ddharmon-0.1.0.dist-info/WHEEL +4 -0
ddharmon/__init__.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""ddharmon — Python client for the BioMapper2 API.
|
|
2
|
+
|
|
3
|
+
Quick start::
|
|
4
|
+
|
|
5
|
+
from ddharmon import map_entity, map_entities, BioMapperClient
|
|
6
|
+
|
|
7
|
+
# Single lookup (synchronous)
|
|
8
|
+
result = map_entity("L-Histidine")
|
|
9
|
+
print(result.primary_curie) # RM:0129894
|
|
10
|
+
print(result.confidence_tier) # high
|
|
11
|
+
|
|
12
|
+
# Batch (synchronous, with progress bar)
|
|
13
|
+
results = map_entities(
|
|
14
|
+
[{"name": "L-Histidine"}, {"name": "Glucose"}],
|
|
15
|
+
progress=True,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# Async (in an async context)
|
|
19
|
+
async with BioMapperClient() as client:
|
|
20
|
+
result = await client.map_entity("L-Histidine")
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from ddharmon.client import BioMapperClient
|
|
24
|
+
from ddharmon.exceptions import (
|
|
25
|
+
BioMapperAuthError,
|
|
26
|
+
BioMapperConfigError,
|
|
27
|
+
BioMapperError,
|
|
28
|
+
BioMapperRateLimitError,
|
|
29
|
+
BioMapperServerError,
|
|
30
|
+
BioMapperTimeoutError,
|
|
31
|
+
)
|
|
32
|
+
from ddharmon.mapper import map_entities, map_entity, summarize
|
|
33
|
+
from ddharmon.models import MappingResult, MappingSummary
|
|
34
|
+
|
|
35
|
+
__version__ = "0.1.0"
|
|
36
|
+
|
|
37
|
+
__all__ = [
|
|
38
|
+
# Client
|
|
39
|
+
"BioMapperClient",
|
|
40
|
+
# Sync helpers
|
|
41
|
+
"map_entity",
|
|
42
|
+
"map_entities",
|
|
43
|
+
"summarize",
|
|
44
|
+
# Models
|
|
45
|
+
"MappingResult",
|
|
46
|
+
"MappingSummary",
|
|
47
|
+
# Exceptions
|
|
48
|
+
"BioMapperError",
|
|
49
|
+
"BioMapperAuthError",
|
|
50
|
+
"BioMapperConfigError",
|
|
51
|
+
"BioMapperRateLimitError",
|
|
52
|
+
"BioMapperServerError",
|
|
53
|
+
"BioMapperTimeoutError",
|
|
54
|
+
]
|
ddharmon/client.py
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
"""Async HTTP client for the BioMapper2 API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import os
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
|
|
11
|
+
from ddharmon.exceptions import (
|
|
12
|
+
BioMapperAuthError,
|
|
13
|
+
BioMapperConfigError,
|
|
14
|
+
BioMapperRateLimitError,
|
|
15
|
+
BioMapperServerError,
|
|
16
|
+
BioMapperTimeoutError,
|
|
17
|
+
)
|
|
18
|
+
from ddharmon.models import MapEntityRequest, MappingResult
|
|
19
|
+
|
|
20
|
+
DEFAULT_BASE_URL = "https://biomapper.expertintheloop.io/api/v1"
|
|
21
|
+
DEFAULT_TIMEOUT = 30.0
|
|
22
|
+
DEFAULT_RATE_LIMIT_DELAY = 0.3 # seconds between calls in batch mode
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class BioMapperClient:
|
|
26
|
+
"""Async client for the BioMapper2 API.
|
|
27
|
+
|
|
28
|
+
Handles authentication, request serialization, error mapping, and optional
|
|
29
|
+
rate-limited batch processing.
|
|
30
|
+
|
|
31
|
+
Usage (minimal)::
|
|
32
|
+
|
|
33
|
+
async with BioMapperClient() as client:
|
|
34
|
+
result = await client.map_entity("L-Histidine")
|
|
35
|
+
print(result.primary_curie) # "RM:0129894"
|
|
36
|
+
|
|
37
|
+
Usage (with explicit key and hint)::
|
|
38
|
+
|
|
39
|
+
async with BioMapperClient(api_key="sk-...") as client:
|
|
40
|
+
result = await client.map_entity(
|
|
41
|
+
name="4,6-DIOXOHEPTANOIC ACID",
|
|
42
|
+
identifiers={"HMDB": "HMDB03349"},
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
api_key: BioMapper API key. Defaults to ``BIOMAPPER_API_KEY`` env var.
|
|
47
|
+
base_url: API root URL. Override for staging/local instances.
|
|
48
|
+
timeout: Per-request timeout in seconds.
|
|
49
|
+
httpx_kwargs: Extra kwargs forwarded to :class:`httpx.AsyncClient`.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
api_key: str | None = None,
|
|
55
|
+
base_url: str = DEFAULT_BASE_URL,
|
|
56
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
57
|
+
**httpx_kwargs: Any,
|
|
58
|
+
) -> None:
|
|
59
|
+
resolved_key = api_key or os.getenv("BIOMAPPER_API_KEY")
|
|
60
|
+
if not resolved_key:
|
|
61
|
+
raise BioMapperConfigError(
|
|
62
|
+
"No API key provided. Pass api_key= or set BIOMAPPER_API_KEY env var."
|
|
63
|
+
)
|
|
64
|
+
self._api_key = resolved_key
|
|
65
|
+
self._base_url = base_url.rstrip("/")
|
|
66
|
+
self._timeout = timeout
|
|
67
|
+
self._httpx_kwargs = httpx_kwargs
|
|
68
|
+
self._client: httpx.AsyncClient | None = None
|
|
69
|
+
|
|
70
|
+
# ------------------------------------------------------------------
|
|
71
|
+
# Context manager
|
|
72
|
+
# ------------------------------------------------------------------
|
|
73
|
+
|
|
74
|
+
async def __aenter__(self) -> BioMapperClient:
|
|
75
|
+
self._client = httpx.AsyncClient(
|
|
76
|
+
headers={"X-API-Key": self._api_key},
|
|
77
|
+
timeout=self._timeout,
|
|
78
|
+
**self._httpx_kwargs,
|
|
79
|
+
)
|
|
80
|
+
return self
|
|
81
|
+
|
|
82
|
+
async def __aexit__(self, *args: Any) -> None:
|
|
83
|
+
if self._client is not None:
|
|
84
|
+
await self._client.aclose()
|
|
85
|
+
self._client = None
|
|
86
|
+
|
|
87
|
+
# ------------------------------------------------------------------
|
|
88
|
+
# Internal helpers
|
|
89
|
+
# ------------------------------------------------------------------
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def _http(self) -> httpx.AsyncClient:
|
|
93
|
+
if self._client is None:
|
|
94
|
+
raise RuntimeError(
|
|
95
|
+
"BioMapperClient must be used as an async context manager. "
|
|
96
|
+
"Use `async with BioMapperClient() as client:`"
|
|
97
|
+
)
|
|
98
|
+
return self._client
|
|
99
|
+
|
|
100
|
+
def _raise_for_status(self, response: httpx.Response) -> None:
|
|
101
|
+
"""Map HTTP status codes to typed exceptions."""
|
|
102
|
+
code = response.status_code
|
|
103
|
+
if code == 401 or code == 403:
|
|
104
|
+
raise BioMapperAuthError(
|
|
105
|
+
f"Authentication failed (HTTP {code}). Check your API key."
|
|
106
|
+
)
|
|
107
|
+
if code == 429:
|
|
108
|
+
retry_after: float | None = None
|
|
109
|
+
if ra := response.headers.get("Retry-After"):
|
|
110
|
+
try:
|
|
111
|
+
retry_after = float(ra)
|
|
112
|
+
except ValueError:
|
|
113
|
+
pass
|
|
114
|
+
raise BioMapperRateLimitError(
|
|
115
|
+
"Rate limit exceeded (HTTP 429).", retry_after=retry_after
|
|
116
|
+
)
|
|
117
|
+
if code >= 500:
|
|
118
|
+
raise BioMapperServerError(
|
|
119
|
+
f"Server error (HTTP {code}): {response.text[:200]}",
|
|
120
|
+
status_code=code,
|
|
121
|
+
)
|
|
122
|
+
response.raise_for_status()
|
|
123
|
+
|
|
124
|
+
# ------------------------------------------------------------------
|
|
125
|
+
# Public API
|
|
126
|
+
# ------------------------------------------------------------------
|
|
127
|
+
|
|
128
|
+
async def health_check(self) -> dict[str, Any]:
|
|
129
|
+
"""Verify connectivity and API readiness.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
The parsed health JSON, e.g.
|
|
133
|
+
``{"status": "healthy", "version": "0.1.0", "mapper_initialized": True}``.
|
|
134
|
+
|
|
135
|
+
Raises:
|
|
136
|
+
BioMapperAuthError: If the key is rejected.
|
|
137
|
+
BioMapperServerError: If the service is not healthy.
|
|
138
|
+
"""
|
|
139
|
+
try:
|
|
140
|
+
response = await self._http.get(f"{self._base_url}/health")
|
|
141
|
+
except httpx.TimeoutException as exc:
|
|
142
|
+
raise BioMapperTimeoutError("Health check timed out") from exc
|
|
143
|
+
self._raise_for_status(response)
|
|
144
|
+
return dict(response.json())
|
|
145
|
+
|
|
146
|
+
async def map_entity(
|
|
147
|
+
self,
|
|
148
|
+
name: str,
|
|
149
|
+
entity_type: str = "biolink:SmallMolecule",
|
|
150
|
+
identifiers: dict[str, str] | None = None,
|
|
151
|
+
annotation_mode: str = "missing",
|
|
152
|
+
) -> MappingResult:
|
|
153
|
+
"""Map a single entity name to standardized knowledge-graph identifiers.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
name: Compound or entity name to resolve.
|
|
157
|
+
entity_type: Biolink entity type. Use ``"biolink:SmallMolecule"``
|
|
158
|
+
for metabolites.
|
|
159
|
+
identifiers: Optional pre-existing IDs used as resolver hints,
|
|
160
|
+
e.g. ``{"HMDB": "HMDB00177"}``.
|
|
161
|
+
annotation_mode: ``"missing"`` (default), ``"all"``, or ``"none"``.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
A :class:`~ddharmon.models.MappingResult` with resolved identifiers.
|
|
165
|
+
|
|
166
|
+
Raises:
|
|
167
|
+
BioMapperAuthError: If the API key is rejected.
|
|
168
|
+
BioMapperRateLimitError: If the API signals throttling.
|
|
169
|
+
BioMapperServerError: For unrecoverable 5xx errors.
|
|
170
|
+
BioMapperTimeoutError: If the request times out.
|
|
171
|
+
"""
|
|
172
|
+
payload = MapEntityRequest(
|
|
173
|
+
name=name,
|
|
174
|
+
entity_type=entity_type,
|
|
175
|
+
identifiers=identifiers or {},
|
|
176
|
+
options={"annotation_mode": annotation_mode},
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
hmdb_hint: str | None = (identifiers or {}).get("HMDB")
|
|
180
|
+
|
|
181
|
+
try:
|
|
182
|
+
response = await self._http.post(
|
|
183
|
+
f"{self._base_url}/map/entity",
|
|
184
|
+
json=payload.model_dump(exclude_none=False),
|
|
185
|
+
)
|
|
186
|
+
except httpx.TimeoutException as exc:
|
|
187
|
+
raise BioMapperTimeoutError(f"Request timed out for '{name}'") from exc
|
|
188
|
+
|
|
189
|
+
self._raise_for_status(response)
|
|
190
|
+
data = dict[str, Any](response.json())
|
|
191
|
+
return MappingResult.from_api_response(data, query_name=name, hmdb_hint=hmdb_hint)
|
|
192
|
+
|
|
193
|
+
async def map_entities(
|
|
194
|
+
self,
|
|
195
|
+
records: list[dict[str, Any]],
|
|
196
|
+
rate_limit_delay: float = DEFAULT_RATE_LIMIT_DELAY,
|
|
197
|
+
entity_type: str = "biolink:SmallMolecule",
|
|
198
|
+
annotation_mode: str = "missing",
|
|
199
|
+
progress: bool = False,
|
|
200
|
+
) -> list[MappingResult]:
|
|
201
|
+
"""Map a batch of entity records with rate limiting.
|
|
202
|
+
|
|
203
|
+
Each record is a dict with at least a ``"name"`` key, and optionally
|
|
204
|
+
``"identifiers"`` (``{"HMDB": "HMDB00177"}``).
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
records: List of ``{"name": str, "identifiers": dict}`` dicts.
|
|
208
|
+
rate_limit_delay: Seconds to sleep between API calls. Default 0.3.
|
|
209
|
+
entity_type: Biolink entity type for all records.
|
|
210
|
+
annotation_mode: Annotation mode for all records.
|
|
211
|
+
progress: Show a tqdm progress bar (requires ``ddharmon[notebook]``).
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
List of :class:`~ddharmon.models.MappingResult`, one per input record,
|
|
215
|
+
in the same order. Failed records return a result with ``error`` set
|
|
216
|
+
rather than raising.
|
|
217
|
+
|
|
218
|
+
Example::
|
|
219
|
+
|
|
220
|
+
async with BioMapperClient() as client:
|
|
221
|
+
results = await client.map_entities(
|
|
222
|
+
[
|
|
223
|
+
{"name": "L-Histidine"},
|
|
224
|
+
{"name": "Glucose", "identifiers": {"HMDB": "HMDB00122"}},
|
|
225
|
+
],
|
|
226
|
+
progress=True,
|
|
227
|
+
)
|
|
228
|
+
"""
|
|
229
|
+
iter_records: Any = records
|
|
230
|
+
|
|
231
|
+
if progress:
|
|
232
|
+
try:
|
|
233
|
+
from tqdm.auto import tqdm
|
|
234
|
+
|
|
235
|
+
iter_records = tqdm(records, desc="Mapping entities")
|
|
236
|
+
except ImportError:
|
|
237
|
+
pass # silently degrade if tqdm not installed
|
|
238
|
+
|
|
239
|
+
results: list[MappingResult] = []
|
|
240
|
+
|
|
241
|
+
for i, record in enumerate(iter_records):
|
|
242
|
+
if i > 0:
|
|
243
|
+
await asyncio.sleep(rate_limit_delay)
|
|
244
|
+
|
|
245
|
+
name: str = str(record.get("name", ""))
|
|
246
|
+
identifiers: dict[str, str] = dict(record.get("identifiers") or {})
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
result = await self.map_entity(
|
|
250
|
+
name=name,
|
|
251
|
+
entity_type=entity_type,
|
|
252
|
+
identifiers=identifiers or None,
|
|
253
|
+
annotation_mode=annotation_mode,
|
|
254
|
+
)
|
|
255
|
+
except Exception as exc: # noqa: BLE001
|
|
256
|
+
result = MappingResult(
|
|
257
|
+
query_name=name,
|
|
258
|
+
hmdb_hint=identifiers.get("HMDB"),
|
|
259
|
+
error=str(exc),
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
results.append(result)
|
|
263
|
+
|
|
264
|
+
return results
|
ddharmon/exceptions.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Typed exception hierarchy for ddharmon."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BioMapperError(Exception):
|
|
7
|
+
"""Base exception for all ddharmon errors."""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BioMapperAuthError(BioMapperError):
|
|
11
|
+
"""Raised when the API key is missing or rejected (HTTP 401/403)."""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BioMapperRateLimitError(BioMapperError):
|
|
15
|
+
"""Raised when the API signals rate limiting (HTTP 429).
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
retry_after: Suggested wait in seconds, if provided by the server.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, message: str, retry_after: float | None = None) -> None:
|
|
22
|
+
super().__init__(message)
|
|
23
|
+
self.retry_after = retry_after
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class BioMapperServerError(BioMapperError):
|
|
27
|
+
"""Raised for unrecoverable 5xx responses from the API."""
|
|
28
|
+
|
|
29
|
+
def __init__(self, message: str, status_code: int) -> None:
|
|
30
|
+
super().__init__(message)
|
|
31
|
+
self.status_code = status_code
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class BioMapperTimeoutError(BioMapperError):
|
|
35
|
+
"""Raised when a request exceeds the configured timeout."""
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class BioMapperConfigError(BioMapperError):
|
|
39
|
+
"""Raised for invalid client configuration (missing API key, bad URL, etc.)."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Optional extras for ddharmon."""
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Metabolon-specific utilities for ddharmon.
|
|
2
|
+
|
|
3
|
+
Provides preprocessing and export helpers for Metabolon metabolomics data.
|
|
4
|
+
Requires ``ddharmon[metabolon]`` (pandas, openpyxl).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from ddharmon.extras.metabolon.export import (
|
|
8
|
+
flatten_results,
|
|
9
|
+
results_to_dataframe,
|
|
10
|
+
save_results,
|
|
11
|
+
)
|
|
12
|
+
from ddharmon.extras.metabolon.preprocessing import (
|
|
13
|
+
MetabolonRecord,
|
|
14
|
+
build_mapping_queue,
|
|
15
|
+
clean_compound_name,
|
|
16
|
+
extract_hmdb_id,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
# Preprocessing
|
|
21
|
+
"clean_compound_name",
|
|
22
|
+
"extract_hmdb_id",
|
|
23
|
+
"MetabolonRecord",
|
|
24
|
+
"build_mapping_queue",
|
|
25
|
+
# Export
|
|
26
|
+
"flatten_results",
|
|
27
|
+
"results_to_dataframe",
|
|
28
|
+
"save_results",
|
|
29
|
+
]
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""Export helpers for Metabolon mapping results.
|
|
2
|
+
|
|
3
|
+
Requires ``ddharmon[metabolon]`` (pandas, openpyxl).
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from ddharmon.models import MappingResult, MappingSummary
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def flatten_results(results: list[MappingResult]) -> list[dict[str, Any]]:
|
|
16
|
+
"""Convert mapping results to a flat list of dicts suitable for DataFrame / CSV.
|
|
17
|
+
|
|
18
|
+
Each dict has the columns::
|
|
19
|
+
|
|
20
|
+
query_name, hmdb_hint, resolved, primary_curie, chosen_kg_id,
|
|
21
|
+
confidence_score, confidence_tier,
|
|
22
|
+
hmdb_ids, pubchem_ids, chebi_ids, refmet_ids, error
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
results: Output of :func:`ddharmon.map_entities` or equivalent.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
List of flat dicts, one per input result.
|
|
29
|
+
"""
|
|
30
|
+
flat: list[dict[str, Any]] = []
|
|
31
|
+
|
|
32
|
+
for r in results:
|
|
33
|
+
flat.append(
|
|
34
|
+
{
|
|
35
|
+
"query_name": r.query_name,
|
|
36
|
+
"hmdb_hint": r.hmdb_hint or "",
|
|
37
|
+
"resolved": r.resolved,
|
|
38
|
+
"primary_curie": r.primary_curie or "",
|
|
39
|
+
"chosen_kg_id": r.chosen_kg_id or "",
|
|
40
|
+
"confidence_score": r.confidence_score if r.confidence_score is not None else "",
|
|
41
|
+
"confidence_tier": r.confidence_tier,
|
|
42
|
+
"hmdb_ids": ";".join(r.ids_for("HMDB")),
|
|
43
|
+
"pubchem_ids": ";".join(
|
|
44
|
+
r.ids_for("PUBCHEM.COMPOUND") or r.ids_for("PUBCHEM")
|
|
45
|
+
),
|
|
46
|
+
"chebi_ids": ";".join(r.ids_for("CHEBI")),
|
|
47
|
+
"refmet_ids": ";".join(r.ids_for("refmet_id")),
|
|
48
|
+
"error": r.error or "",
|
|
49
|
+
}
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
return flat
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def results_to_dataframe(results: list[MappingResult]) -> Any:
|
|
56
|
+
"""Return a pandas DataFrame of flattened mapping results.
|
|
57
|
+
|
|
58
|
+
Requires ``ddharmon[metabolon]`` (pandas).
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
results: Mapping results from any ddharmon mapping function.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
``pandas.DataFrame`` with one row per result.
|
|
65
|
+
"""
|
|
66
|
+
try:
|
|
67
|
+
import pandas as pd
|
|
68
|
+
except ImportError as exc:
|
|
69
|
+
raise ImportError(
|
|
70
|
+
"pandas is required for results_to_dataframe. "
|
|
71
|
+
"Install with: pip install 'ddharmon[metabolon]'"
|
|
72
|
+
) from exc
|
|
73
|
+
|
|
74
|
+
return pd.DataFrame(flatten_results(results))
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def save_results(
|
|
78
|
+
results: list[MappingResult],
|
|
79
|
+
summary: MappingSummary | None = None,
|
|
80
|
+
json_path: str | Path | None = None,
|
|
81
|
+
tsv_path: str | Path | None = None,
|
|
82
|
+
) -> None:
|
|
83
|
+
"""Save mapping results to JSON and / or TSV.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
results: Mapping results to export.
|
|
87
|
+
summary: Optional :class:`~ddharmon.models.MappingSummary` to embed
|
|
88
|
+
in the JSON output.
|
|
89
|
+
json_path: If provided, write full detail (summary + raw results) here.
|
|
90
|
+
tsv_path: If provided, write flat TSV suitable for spreadsheet review.
|
|
91
|
+
|
|
92
|
+
Raises:
|
|
93
|
+
ImportError: If tsv_path is given but pandas is not installed.
|
|
94
|
+
"""
|
|
95
|
+
if json_path is not None:
|
|
96
|
+
out: dict[str, Any] = {
|
|
97
|
+
"summary": summary.model_dump() if summary else None,
|
|
98
|
+
"mappings": [r.model_dump(exclude={"raw_response"}) for r in results],
|
|
99
|
+
}
|
|
100
|
+
Path(json_path).parent.mkdir(parents=True, exist_ok=True)
|
|
101
|
+
with open(json_path, "w") as f:
|
|
102
|
+
json.dump(out, f, indent=2, default=str)
|
|
103
|
+
|
|
104
|
+
if tsv_path is not None:
|
|
105
|
+
df = results_to_dataframe(results)
|
|
106
|
+
Path(tsv_path).parent.mkdir(parents=True, exist_ok=True)
|
|
107
|
+
df.to_csv(tsv_path, sep="\t", index=False)
|