ddharmon 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddharmon/__init__.py ADDED
@@ -0,0 +1,54 @@
1
+ """ddharmon — Python client for the BioMapper2 API.
2
+
3
+ Quick start::
4
+
5
+ from ddharmon import map_entity, map_entities, BioMapperClient
6
+
7
+ # Single lookup (synchronous)
8
+ result = map_entity("L-Histidine")
9
+ print(result.primary_curie) # RM:0129894
10
+ print(result.confidence_tier) # high
11
+
12
+ # Batch (synchronous, with progress bar)
13
+ results = map_entities(
14
+ [{"name": "L-Histidine"}, {"name": "Glucose"}],
15
+ progress=True,
16
+ )
17
+
18
+ # Async (in an async context)
19
+ async with BioMapperClient() as client:
20
+ result = await client.map_entity("L-Histidine")
21
+ """
22
+
23
+ from ddharmon.client import BioMapperClient
24
+ from ddharmon.exceptions import (
25
+ BioMapperAuthError,
26
+ BioMapperConfigError,
27
+ BioMapperError,
28
+ BioMapperRateLimitError,
29
+ BioMapperServerError,
30
+ BioMapperTimeoutError,
31
+ )
32
+ from ddharmon.mapper import map_entities, map_entity, summarize
33
+ from ddharmon.models import MappingResult, MappingSummary
34
+
35
+ __version__ = "0.1.0"
36
+
37
+ __all__ = [
38
+ # Client
39
+ "BioMapperClient",
40
+ # Sync helpers
41
+ "map_entity",
42
+ "map_entities",
43
+ "summarize",
44
+ # Models
45
+ "MappingResult",
46
+ "MappingSummary",
47
+ # Exceptions
48
+ "BioMapperError",
49
+ "BioMapperAuthError",
50
+ "BioMapperConfigError",
51
+ "BioMapperRateLimitError",
52
+ "BioMapperServerError",
53
+ "BioMapperTimeoutError",
54
+ ]
ddharmon/client.py ADDED
@@ -0,0 +1,264 @@
1
+ """Async HTTP client for the BioMapper2 API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import os
7
+ from typing import Any
8
+
9
+ import httpx
10
+
11
+ from ddharmon.exceptions import (
12
+ BioMapperAuthError,
13
+ BioMapperConfigError,
14
+ BioMapperRateLimitError,
15
+ BioMapperServerError,
16
+ BioMapperTimeoutError,
17
+ )
18
+ from ddharmon.models import MapEntityRequest, MappingResult
19
+
20
+ DEFAULT_BASE_URL = "https://biomapper.expertintheloop.io/api/v1"
21
+ DEFAULT_TIMEOUT = 30.0
22
+ DEFAULT_RATE_LIMIT_DELAY = 0.3 # seconds between calls in batch mode
23
+
24
+
25
+ class BioMapperClient:
26
+ """Async client for the BioMapper2 API.
27
+
28
+ Handles authentication, request serialization, error mapping, and optional
29
+ rate-limited batch processing.
30
+
31
+ Usage (minimal)::
32
+
33
+ async with BioMapperClient() as client:
34
+ result = await client.map_entity("L-Histidine")
35
+ print(result.primary_curie) # "RM:0129894"
36
+
37
+ Usage (with explicit key and hint)::
38
+
39
+ async with BioMapperClient(api_key="sk-...") as client:
40
+ result = await client.map_entity(
41
+ name="4,6-DIOXOHEPTANOIC ACID",
42
+ identifiers={"HMDB": "HMDB03349"},
43
+ )
44
+
45
+ Args:
46
+ api_key: BioMapper API key. Defaults to ``BIOMAPPER_API_KEY`` env var.
47
+ base_url: API root URL. Override for staging/local instances.
48
+ timeout: Per-request timeout in seconds.
49
+ httpx_kwargs: Extra kwargs forwarded to :class:`httpx.AsyncClient`.
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ api_key: str | None = None,
55
+ base_url: str = DEFAULT_BASE_URL,
56
+ timeout: float = DEFAULT_TIMEOUT,
57
+ **httpx_kwargs: Any,
58
+ ) -> None:
59
+ resolved_key = api_key or os.getenv("BIOMAPPER_API_KEY")
60
+ if not resolved_key:
61
+ raise BioMapperConfigError(
62
+ "No API key provided. Pass api_key= or set BIOMAPPER_API_KEY env var."
63
+ )
64
+ self._api_key = resolved_key
65
+ self._base_url = base_url.rstrip("/")
66
+ self._timeout = timeout
67
+ self._httpx_kwargs = httpx_kwargs
68
+ self._client: httpx.AsyncClient | None = None
69
+
70
+ # ------------------------------------------------------------------
71
+ # Context manager
72
+ # ------------------------------------------------------------------
73
+
74
+ async def __aenter__(self) -> BioMapperClient:
75
+ self._client = httpx.AsyncClient(
76
+ headers={"X-API-Key": self._api_key},
77
+ timeout=self._timeout,
78
+ **self._httpx_kwargs,
79
+ )
80
+ return self
81
+
82
+ async def __aexit__(self, *args: Any) -> None:
83
+ if self._client is not None:
84
+ await self._client.aclose()
85
+ self._client = None
86
+
87
+ # ------------------------------------------------------------------
88
+ # Internal helpers
89
+ # ------------------------------------------------------------------
90
+
91
+ @property
92
+ def _http(self) -> httpx.AsyncClient:
93
+ if self._client is None:
94
+ raise RuntimeError(
95
+ "BioMapperClient must be used as an async context manager. "
96
+ "Use `async with BioMapperClient() as client:`"
97
+ )
98
+ return self._client
99
+
100
+ def _raise_for_status(self, response: httpx.Response) -> None:
101
+ """Map HTTP status codes to typed exceptions."""
102
+ code = response.status_code
103
+ if code == 401 or code == 403:
104
+ raise BioMapperAuthError(
105
+ f"Authentication failed (HTTP {code}). Check your API key."
106
+ )
107
+ if code == 429:
108
+ retry_after: float | None = None
109
+ if ra := response.headers.get("Retry-After"):
110
+ try:
111
+ retry_after = float(ra)
112
+ except ValueError:
113
+ pass
114
+ raise BioMapperRateLimitError(
115
+ "Rate limit exceeded (HTTP 429).", retry_after=retry_after
116
+ )
117
+ if code >= 500:
118
+ raise BioMapperServerError(
119
+ f"Server error (HTTP {code}): {response.text[:200]}",
120
+ status_code=code,
121
+ )
122
+ response.raise_for_status()
123
+
124
+ # ------------------------------------------------------------------
125
+ # Public API
126
+ # ------------------------------------------------------------------
127
+
128
+ async def health_check(self) -> dict[str, Any]:
129
+ """Verify connectivity and API readiness.
130
+
131
+ Returns:
132
+ The parsed health JSON, e.g.
133
+ ``{"status": "healthy", "version": "0.1.0", "mapper_initialized": True}``.
134
+
135
+ Raises:
136
+ BioMapperAuthError: If the key is rejected.
137
+ BioMapperServerError: If the service is not healthy.
138
+ """
139
+ try:
140
+ response = await self._http.get(f"{self._base_url}/health")
141
+ except httpx.TimeoutException as exc:
142
+ raise BioMapperTimeoutError("Health check timed out") from exc
143
+ self._raise_for_status(response)
144
+ return dict(response.json())
145
+
146
+ async def map_entity(
147
+ self,
148
+ name: str,
149
+ entity_type: str = "biolink:SmallMolecule",
150
+ identifiers: dict[str, str] | None = None,
151
+ annotation_mode: str = "missing",
152
+ ) -> MappingResult:
153
+ """Map a single entity name to standardized knowledge-graph identifiers.
154
+
155
+ Args:
156
+ name: Compound or entity name to resolve.
157
+ entity_type: Biolink entity type. Use ``"biolink:SmallMolecule"``
158
+ for metabolites.
159
+ identifiers: Optional pre-existing IDs used as resolver hints,
160
+ e.g. ``{"HMDB": "HMDB00177"}``.
161
+ annotation_mode: ``"missing"`` (default), ``"all"``, or ``"none"``.
162
+
163
+ Returns:
164
+ A :class:`~ddharmon.models.MappingResult` with resolved identifiers.
165
+
166
+ Raises:
167
+ BioMapperAuthError: If the API key is rejected.
168
+ BioMapperRateLimitError: If the API signals throttling.
169
+ BioMapperServerError: For unrecoverable 5xx errors.
170
+ BioMapperTimeoutError: If the request times out.
171
+ """
172
+ payload = MapEntityRequest(
173
+ name=name,
174
+ entity_type=entity_type,
175
+ identifiers=identifiers or {},
176
+ options={"annotation_mode": annotation_mode},
177
+ )
178
+
179
+ hmdb_hint: str | None = (identifiers or {}).get("HMDB")
180
+
181
+ try:
182
+ response = await self._http.post(
183
+ f"{self._base_url}/map/entity",
184
+ json=payload.model_dump(exclude_none=False),
185
+ )
186
+ except httpx.TimeoutException as exc:
187
+ raise BioMapperTimeoutError(f"Request timed out for '{name}'") from exc
188
+
189
+ self._raise_for_status(response)
190
+ data = dict[str, Any](response.json())
191
+ return MappingResult.from_api_response(data, query_name=name, hmdb_hint=hmdb_hint)
192
+
193
+ async def map_entities(
194
+ self,
195
+ records: list[dict[str, Any]],
196
+ rate_limit_delay: float = DEFAULT_RATE_LIMIT_DELAY,
197
+ entity_type: str = "biolink:SmallMolecule",
198
+ annotation_mode: str = "missing",
199
+ progress: bool = False,
200
+ ) -> list[MappingResult]:
201
+ """Map a batch of entity records with rate limiting.
202
+
203
+ Each record is a dict with at least a ``"name"`` key, and optionally
204
+ ``"identifiers"`` (``{"HMDB": "HMDB00177"}``).
205
+
206
+ Args:
207
+ records: List of ``{"name": str, "identifiers": dict}`` dicts.
208
+ rate_limit_delay: Seconds to sleep between API calls. Default 0.3.
209
+ entity_type: Biolink entity type for all records.
210
+ annotation_mode: Annotation mode for all records.
211
+ progress: Show a tqdm progress bar (requires ``ddharmon[notebook]``).
212
+
213
+ Returns:
214
+ List of :class:`~ddharmon.models.MappingResult`, one per input record,
215
+ in the same order. Failed records return a result with ``error`` set
216
+ rather than raising.
217
+
218
+ Example::
219
+
220
+ async with BioMapperClient() as client:
221
+ results = await client.map_entities(
222
+ [
223
+ {"name": "L-Histidine"},
224
+ {"name": "Glucose", "identifiers": {"HMDB": "HMDB00122"}},
225
+ ],
226
+ progress=True,
227
+ )
228
+ """
229
+ iter_records: Any = records
230
+
231
+ if progress:
232
+ try:
233
+ from tqdm.auto import tqdm
234
+
235
+ iter_records = tqdm(records, desc="Mapping entities")
236
+ except ImportError:
237
+ pass # silently degrade if tqdm not installed
238
+
239
+ results: list[MappingResult] = []
240
+
241
+ for i, record in enumerate(iter_records):
242
+ if i > 0:
243
+ await asyncio.sleep(rate_limit_delay)
244
+
245
+ name: str = str(record.get("name", ""))
246
+ identifiers: dict[str, str] = dict(record.get("identifiers") or {})
247
+
248
+ try:
249
+ result = await self.map_entity(
250
+ name=name,
251
+ entity_type=entity_type,
252
+ identifiers=identifiers or None,
253
+ annotation_mode=annotation_mode,
254
+ )
255
+ except Exception as exc: # noqa: BLE001
256
+ result = MappingResult(
257
+ query_name=name,
258
+ hmdb_hint=identifiers.get("HMDB"),
259
+ error=str(exc),
260
+ )
261
+
262
+ results.append(result)
263
+
264
+ return results
ddharmon/exceptions.py ADDED
@@ -0,0 +1,39 @@
1
+ """Typed exception hierarchy for ddharmon."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ class BioMapperError(Exception):
7
+ """Base exception for all ddharmon errors."""
8
+
9
+
10
+ class BioMapperAuthError(BioMapperError):
11
+ """Raised when the API key is missing or rejected (HTTP 401/403)."""
12
+
13
+
14
+ class BioMapperRateLimitError(BioMapperError):
15
+ """Raised when the API signals rate limiting (HTTP 429).
16
+
17
+ Attributes:
18
+ retry_after: Suggested wait in seconds, if provided by the server.
19
+ """
20
+
21
+ def __init__(self, message: str, retry_after: float | None = None) -> None:
22
+ super().__init__(message)
23
+ self.retry_after = retry_after
24
+
25
+
26
+ class BioMapperServerError(BioMapperError):
27
+ """Raised for unrecoverable 5xx responses from the API."""
28
+
29
+ def __init__(self, message: str, status_code: int) -> None:
30
+ super().__init__(message)
31
+ self.status_code = status_code
32
+
33
+
34
+ class BioMapperTimeoutError(BioMapperError):
35
+ """Raised when a request exceeds the configured timeout."""
36
+
37
+
38
+ class BioMapperConfigError(BioMapperError):
39
+ """Raised for invalid client configuration (missing API key, bad URL, etc.)."""
@@ -0,0 +1 @@
1
+ """Optional extras for ddharmon."""
@@ -0,0 +1,29 @@
1
+ """Metabolon-specific utilities for ddharmon.
2
+
3
+ Provides preprocessing and export helpers for Metabolon metabolomics data.
4
+ Requires ``ddharmon[metabolon]`` (pandas, openpyxl).
5
+ """
6
+
7
+ from ddharmon.extras.metabolon.export import (
8
+ flatten_results,
9
+ results_to_dataframe,
10
+ save_results,
11
+ )
12
+ from ddharmon.extras.metabolon.preprocessing import (
13
+ MetabolonRecord,
14
+ build_mapping_queue,
15
+ clean_compound_name,
16
+ extract_hmdb_id,
17
+ )
18
+
19
+ __all__ = [
20
+ # Preprocessing
21
+ "clean_compound_name",
22
+ "extract_hmdb_id",
23
+ "MetabolonRecord",
24
+ "build_mapping_queue",
25
+ # Export
26
+ "flatten_results",
27
+ "results_to_dataframe",
28
+ "save_results",
29
+ ]
@@ -0,0 +1,107 @@
1
+ """Export helpers for Metabolon mapping results.
2
+
3
+ Requires ``ddharmon[metabolon]`` (pandas, openpyxl).
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from ddharmon.models import MappingResult, MappingSummary
13
+
14
+
15
+ def flatten_results(results: list[MappingResult]) -> list[dict[str, Any]]:
16
+ """Convert mapping results to a flat list of dicts suitable for DataFrame / CSV.
17
+
18
+ Each dict has the columns::
19
+
20
+ query_name, hmdb_hint, resolved, primary_curie, chosen_kg_id,
21
+ confidence_score, confidence_tier,
22
+ hmdb_ids, pubchem_ids, chebi_ids, refmet_ids, error
23
+
24
+ Args:
25
+ results: Output of :func:`ddharmon.map_entities` or equivalent.
26
+
27
+ Returns:
28
+ List of flat dicts, one per input result.
29
+ """
30
+ flat: list[dict[str, Any]] = []
31
+
32
+ for r in results:
33
+ flat.append(
34
+ {
35
+ "query_name": r.query_name,
36
+ "hmdb_hint": r.hmdb_hint or "",
37
+ "resolved": r.resolved,
38
+ "primary_curie": r.primary_curie or "",
39
+ "chosen_kg_id": r.chosen_kg_id or "",
40
+ "confidence_score": r.confidence_score if r.confidence_score is not None else "",
41
+ "confidence_tier": r.confidence_tier,
42
+ "hmdb_ids": ";".join(r.ids_for("HMDB")),
43
+ "pubchem_ids": ";".join(
44
+ r.ids_for("PUBCHEM.COMPOUND") or r.ids_for("PUBCHEM")
45
+ ),
46
+ "chebi_ids": ";".join(r.ids_for("CHEBI")),
47
+ "refmet_ids": ";".join(r.ids_for("refmet_id")),
48
+ "error": r.error or "",
49
+ }
50
+ )
51
+
52
+ return flat
53
+
54
+
55
+ def results_to_dataframe(results: list[MappingResult]) -> Any:
56
+ """Return a pandas DataFrame of flattened mapping results.
57
+
58
+ Requires ``ddharmon[metabolon]`` (pandas).
59
+
60
+ Args:
61
+ results: Mapping results from any ddharmon mapping function.
62
+
63
+ Returns:
64
+ ``pandas.DataFrame`` with one row per result.
65
+ """
66
+ try:
67
+ import pandas as pd
68
+ except ImportError as exc:
69
+ raise ImportError(
70
+ "pandas is required for results_to_dataframe. "
71
+ "Install with: pip install 'ddharmon[metabolon]'"
72
+ ) from exc
73
+
74
+ return pd.DataFrame(flatten_results(results))
75
+
76
+
77
+ def save_results(
78
+ results: list[MappingResult],
79
+ summary: MappingSummary | None = None,
80
+ json_path: str | Path | None = None,
81
+ tsv_path: str | Path | None = None,
82
+ ) -> None:
83
+ """Save mapping results to JSON and / or TSV.
84
+
85
+ Args:
86
+ results: Mapping results to export.
87
+ summary: Optional :class:`~ddharmon.models.MappingSummary` to embed
88
+ in the JSON output.
89
+ json_path: If provided, write full detail (summary + raw results) here.
90
+ tsv_path: If provided, write flat TSV suitable for spreadsheet review.
91
+
92
+ Raises:
93
+ ImportError: If tsv_path is given but pandas is not installed.
94
+ """
95
+ if json_path is not None:
96
+ out: dict[str, Any] = {
97
+ "summary": summary.model_dump() if summary else None,
98
+ "mappings": [r.model_dump(exclude={"raw_response"}) for r in results],
99
+ }
100
+ Path(json_path).parent.mkdir(parents=True, exist_ok=True)
101
+ with open(json_path, "w") as f:
102
+ json.dump(out, f, indent=2, default=str)
103
+
104
+ if tsv_path is not None:
105
+ df = results_to_dataframe(results)
106
+ Path(tsv_path).parent.mkdir(parents=True, exist_ok=True)
107
+ df.to_csv(tsv_path, sep="\t", index=False)