hponorm 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hponorm-1.0.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) Tim Hearn 2026
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
hponorm-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.4
2
+ Name: hponorm
3
+ Version: 1.0.0
4
+ Summary: Interactively validate and normalise GA4GH phenopacket phenotypic-feature terms against the HPO ontology
5
+ Requires-Python: >=3.9
6
+ License-File: LICENSE
7
+ Provides-Extra: recommended
8
+ Requires-Dist: rapidfuzz>=3.0; extra == "recommended"
9
+ Requires-Dist: rich>=13.0; extra == "recommended"
10
+ Dynamic: license-file
@@ -0,0 +1,156 @@
1
+ # hponorm
2
+
3
+ Interactively **validate and normalise the phenotypic-feature terms** in GA4GH
4
+ phenopacket / family JSON files against the **HPO** (Human Phenotype Ontology).
5
+
6
+ It's the sibling of `mondonorm` (which does disease terms → MONDO) and shares the
7
+ same look and feel. This one targets `phenotypicFeatures[].type` and HPO.
8
+
9
+ Your generator currently emits phenotype terms with an empty `id` and a
10
+ free-text label:
11
+
12
+ ```json
13
+ "phenotypicFeatures": [ { "type": { "id": "", "label": "long qt interval" } } ]
14
+ ```
15
+
16
+ `hponorm` walks the file, finds every phenotypic feature, suggests the most
17
+ likely HPO terms for each label, lets you pick one (or type an HP id yourself),
18
+ and writes a normalised copy:
19
+
20
+ ```json
21
+ "phenotypicFeatures": [ { "type": { "id": "HP:0001657", "label": "Prolonged QT interval" } } ]
22
+ ```
23
+
24
+ ## How suggestions are sourced
25
+
26
+ Two interchangeable backends; pick with `--backend`:
27
+
28
+ | Backend | What it uses | Network |
29
+ |---------|--------------|---------|
30
+ | `ols` | EBI Ontology Lookup Service v4 (live HPO) | required |
31
+ | `local` | A local `hp.obo` / `hp.json`, downloaded + cached on first use | only for the one-time download |
32
+ | `auto` | OLS if reachable, otherwise local (the default) | preferred |
33
+
34
+ The official ontology file can be fetched automatically (from
35
+ `purl.obolibrary.org/obo/hp.obo`) or supplied with `--hpo-file`.
36
+ A tiny **illustrative** offline sample (`hponorm/data/hpo-sample.obo`, 8 terms)
37
+ ships with the package so you can try the tool with no network — it is **not** a
38
+ substitute for the real ontology.
39
+
40
+ ## Install
41
+
42
+ ```bash
43
+ pip install -e . # installs the `hponorm` command
44
+ pip install -e ".[recommended]" # + rapidfuzz (better matching) and rich (nicer UI)
45
+ ```
46
+
47
+ Both extras are optional: without `rapidfuzz` it falls back to stdlib `difflib`;
48
+ without `rich` it prints plain text.
49
+
50
+ ## Quick start
51
+
52
+ ```bash
53
+ # Online (auto-detects OLS):
54
+ hponorm test3_phenopackets.json
55
+
56
+ # Fully offline against a downloaded ontology:
57
+ hponorm myfile.json --backend local --hpo-file /path/to/hp.obo
58
+
59
+ # Try it offline with the bundled sample (diabetes / long QT / a few others):
60
+ hponorm myfile.json --backend local --hpo-file hponorm/data/hpo-sample.obo
61
+
62
+ # Equivalent without installing:
63
+ python -m hponorm myfile.json
64
+ ```
65
+
66
+ ## The interactive review
67
+
68
+ Each **distinct** label is reviewed once and the decision is applied to every
69
+ occurrence (so "diabetes" appearing 8 times is one question, not eight). For
70
+ each label you see a ranked table and a prompt:
71
+
72
+ ```
73
+ Select [#, s words, h HP:id, r, k, x, ?, q]:
74
+ <number> select that suggestion
75
+ s <words> search again with different words
76
+ h HP:id enter an HPO id manually (validated, canonical label fetched)
77
+ r reuse a decision remembered from earlier in this session
78
+ k keep the current term unchanged
79
+ x skip this label (leave it unmapped)
80
+ ? help
81
+ q finish now: apply decisions made so far and save
82
+ ```
83
+
84
+ If a feature already has an `id`, it is validated: the tool reports whether the
85
+ id resolves in HPO and whether its canonical label matches the existing label.
86
+ A feature's `excluded` flag (a negated phenotype) is preserved untouched — only
87
+ `type.id` / `type.label` are changed.
88
+
89
+ ## Output
90
+
91
+ For `input.json` (unless you pass `--out` or `--in-place`):
92
+
93
+ * `input.normalized.json` — the normalised phenopacket (only `type.id` /
94
+ `type.label` are changed; everything else, including the pedigree and any
95
+ `diseases` terms, is left exactly as-is).
96
+ * `input.normalized.json.mapping.json` — a decision log (label → HP id, count).
97
+
98
+ Because `hponorm` only touches `phenotypicFeatures`, you can run it alongside
99
+ `mondonorm` (which only touches `diseases`) on the same file without conflict.
100
+
101
+ ## Useful options
102
+
103
+ ```
104
+ --out PATH output path (single input file only)
105
+ --in-place overwrite the input file(s)
106
+ --backend {auto,ols,local}
107
+ --hpo-file PATH local hp.obo or hp.json
108
+ --cache-dir DIR where the downloaded HPO + parsed index are cached
109
+ --update re-download / re-parse HPO
110
+ --no-download never download (local backend must find a file)
111
+ --no-online never use OLS, even in auto mode
112
+ --limit N suggestions shown per label (default 8)
113
+ --mapping FILE load/save remembered label->term decisions across runs
114
+ --auto-remembered auto-apply remembered labels without prompting
115
+ --no-color plain text output
116
+ ```
117
+
118
+ Process several files in one go; remembered decisions carry across them:
119
+
120
+ ```bash
121
+ hponorm *.json --mapping team-hpo-map.json
122
+ # next time, reuse without re-typing:
123
+ hponorm new/*.json --mapping team-hpo-map.json --auto-remembered
124
+ ```
125
+
126
+ ## Use as a library
127
+
128
+ ```python
129
+ from hponorm import HpoIndex, OlsClient, Suggester
130
+ from hponorm import phenopackets as pp
131
+
132
+ sug = Suggester(OlsClient(), name="ols") # or Suggester(HpoIndex.load(...))
133
+ for c in sug.suggest("long qt interval", limit=5):
134
+ print(c.id, c.label, c.score)
135
+
136
+ data = pp.load("myfile.json")
137
+ for ref in pp.find_phenotypic_features(data):
138
+ print(ref.path, ref.label, ref.id, "excluded" if ref.excluded else "")
139
+ ```
140
+
141
+ ## Getting the full HPO ontology
142
+
143
+ * Browser / search: https://hpo.jax.org
144
+ * Direct files: `http://purl.obolibrary.org/obo/hp.obo` or `.../hp.json`
145
+ * Releases: https://github.com/obophenotype/human-phenotype-ontology/releases
146
+
147
+ Phenopackets reference: https://phenopacket-schema.readthedocs.io
148
+ (phenotypic features use HPO `OntologyClass` values in `PhenotypicFeature.type`).
149
+
150
+ ## A note on the bundled sample's ids
151
+
152
+ The HP ids in `hpo-sample.obo` were checked against HPO browsers (e.g. Diabetes
153
+ mellitus HP:0000819, Prolonged QT interval HP:0001657, Seizure HP:0001250,
154
+ Microcephaly HP:0000252, Short stature HP:0004322, Global developmental delay
155
+ HP:0001263). It is still only a tiny demo fixture — verify against the live HPO
156
+ for real curation work.
@@ -0,0 +1,390 @@
1
+ """Interactive command-line tool to validate and normalise the phenotypic
2
+ feature terms in GA4GH phenopacket / family JSON files against the HPO ontology.
3
+
4
+ Run: python -m hponorm FILE.json [FILE2.json ...]
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import argparse
9
+ import json
10
+ from pathlib import Path
11
+
12
+ from . import phenopackets as pp
13
+ from .hpo import HpoIndex, normalise_curie
14
+ from .ols import OlsClient
15
+ from .suggest import Candidate, Suggester
16
+
17
+ # --------------------------------------------------------------------- pretty
18
+ try:
19
+ from rich.console import Console
20
+ from rich.table import Table
21
+
22
+ _console = Console()
23
+ _HAVE_RICH = True
24
+ except Exception: # pragma: no cover
25
+ _console = None
26
+ _HAVE_RICH = False
27
+
28
+
29
+ class UI:
30
+ def __init__(self, color: bool = True):
31
+ self.rich = _HAVE_RICH and color
32
+
33
+ def rule(self, text: str = "") -> None:
34
+ if self.rich:
35
+ _console.rule(f"[bold]{text}")
36
+ else:
37
+ print("\n" + "=" * 70)
38
+ if text:
39
+ print(text)
40
+ print("-" * 70)
41
+
42
+ def say(self, text: str = "") -> None:
43
+ (_console.print if self.rich else print)(text)
44
+
45
+ def warn(self, text: str) -> None:
46
+ self.say(f"[yellow]! {text}[/yellow]" if self.rich else f"! {text}")
47
+
48
+ def ok(self, text: str) -> None:
49
+ self.say(f"[green]\u2713 {text}[/green]" if self.rich else f"\u2713 {text}")
50
+
51
+ def err(self, text: str) -> None:
52
+ self.say(f"[red]\u2717 {text}[/red]" if self.rich else f"\u2717 {text}")
53
+
54
+ def candidates(self, cands: list[Candidate]) -> None:
55
+ if not cands:
56
+ self.warn("No suggestions found. Try 's <better search words>' or 'h <HP:id>'.")
57
+ return
58
+ if self.rich:
59
+ t = Table(show_header=True, header_style="bold cyan", box=None, pad_edge=False)
60
+ t.add_column("#", justify="right")
61
+ t.add_column("HPO id", style="magenta", no_wrap=True)
62
+ t.add_column("Label", style="white")
63
+ t.add_column("Score", justify="right")
64
+ t.add_column("Matched on", style="dim")
65
+ t.add_column("Definition", style="dim")
66
+ for i, c in enumerate(cands, 1):
67
+ t.add_row(str(i), c.id, c.label, f"{c.score:g}", c.matched_on, c.short_def(70))
68
+ _console.print(t)
69
+ else:
70
+ for i, c in enumerate(cands, 1):
71
+ print(f" {i:>2}. {c.id} {c.label} (score {c.score:g}, {c.matched_on})")
72
+ if c.short_def(80):
73
+ print(f" {c.short_def(80)}")
74
+
75
+ def prompt(self, text: str) -> str:
76
+ try:
77
+ return input(text)
78
+ except EOFError:
79
+ return "q"
80
+
81
+
82
+ HELP = """\
83
+ Choices:
84
+ <number> select that suggestion
85
+ s <words> search again with different words
86
+ h <HP:id> enter an HPO id manually (e.g. h HP:0001657) - it is validated
87
+ k keep the current term unchanged
88
+ x skip this label (leave it unmapped)
89
+ ? show this help
90
+ q finish now: apply decisions made so far and save
91
+ """
92
+
93
+
94
+ def build_suggester(args, ui: UI) -> Suggester:
95
+ backend = args.backend
96
+ if backend == "ols":
97
+ ui.say("Backend: EBI OLS (online).")
98
+ return Suggester(OlsClient(), name="ols")
99
+ if backend == "local":
100
+ idx = HpoIndex.load(
101
+ args.hpo_file,
102
+ allow_download=not args.no_download,
103
+ update=args.update,
104
+ cache_dir=args.cache_dir,
105
+ log=ui.say,
106
+ )
107
+ ui.say(f"Backend: local HPO ({len(idx)} terms from {idx.source}).")
108
+ return Suggester(idx, name="hpo-local")
109
+ # auto
110
+ ols = OlsClient()
111
+ if not args.no_online and ols.available():
112
+ ui.say("Backend: EBI OLS (online).")
113
+ # Local is used as a silent fallback only if it is already cheap to load.
114
+ fb = None
115
+ try:
116
+ if args.hpo_file or (Path(args.cache_dir or _default_cache()) / "hp.obo").exists():
117
+ fb = HpoIndex.load(
118
+ args.hpo_file, allow_download=False,
119
+ cache_dir=args.cache_dir, log=lambda *a, **k: None,
120
+ )
121
+ except Exception:
122
+ fb = None
123
+ return Suggester(ols, fallback=fb, name="ols")
124
+ ui.warn("OLS not reachable; using local HPO.")
125
+ idx = HpoIndex.load(
126
+ args.hpo_file,
127
+ allow_download=not args.no_download,
128
+ update=args.update,
129
+ cache_dir=args.cache_dir,
130
+ log=ui.say,
131
+ )
132
+ ui.say(f"Backend: local HPO ({len(idx)} terms from {idx.source}).")
133
+ return Suggester(idx, name="hpo-local")
134
+
135
+
136
+ def _default_cache():
137
+ from .hpo import default_cache_dir
138
+
139
+ return default_cache_dir()
140
+
141
+
142
+ def review_group(
143
+ ui: UI,
144
+ suggester: Suggester,
145
+ label: str,
146
+ existing_id: str,
147
+ count: int,
148
+ index: int,
149
+ total: int,
150
+ remembered: dict[str, dict],
151
+ limit: int,
152
+ ) -> dict | None:
153
+ """Drive the interactive review of one label group.
154
+
155
+ Returns a decision dict {id,label,action} or None to abort/quit.
156
+ The special action 'quit' tells the caller to stop and save.
157
+ """
158
+ ui.rule(f"[{index}/{total}] phenotype label: {label!r} ({count} occurrence(s))")
159
+
160
+ if existing_id:
161
+ existing = suggester.resolve(existing_id)
162
+ if existing and existing.label.lower() == label.lower():
163
+ ui.ok(f"Already a valid HPO term: {existing.id} {existing.label}")
164
+ elif existing:
165
+ ui.warn(
166
+ f"Existing id {existing_id} resolves to {existing.id} '{existing.label}', "
167
+ f"which differs from the label '{label}'."
168
+ )
169
+ else:
170
+ ui.warn(f"Existing id {existing_id!r} could not be validated against HPO.")
171
+
172
+ query = label
173
+ cands = suggester.suggest(query, limit=limit)
174
+
175
+ # Float a remembered decision for this label to the very top.
176
+ mem = remembered.get(label.lower())
177
+ if mem:
178
+ ui.ok(f"Remembered from earlier: {mem['id']} {mem['label']} (enter 'r' to reuse)")
179
+
180
+ while True:
181
+ ui.candidates(cands)
182
+ raw = ui.prompt("Select [#, s words, h HP:id, r, k, x, ?, q]: ").strip()
183
+
184
+ if raw == "":
185
+ continue
186
+ if raw in ("?", "help"):
187
+ ui.say(HELP)
188
+ continue
189
+ if raw in ("q", "quit"):
190
+ return {"action": "quit"}
191
+ if raw in ("k", "keep"):
192
+ return {"action": "keep"}
193
+ if raw in ("x", "skip"):
194
+ return {"action": "skip"}
195
+ if raw in ("r", "reuse") and mem:
196
+ return {"action": "map", "id": mem["id"], "label": mem["label"]}
197
+
198
+ # search again
199
+ if raw.startswith("s ") or raw == "s":
200
+ query = raw[2:].strip() or ui.prompt("Search words: ").strip()
201
+ if query:
202
+ cands = suggester.suggest(query, limit=limit)
203
+ continue
204
+
205
+ # explicit manual id (h HP:0001657) or a bare curie
206
+ manual = None
207
+ if raw.startswith("h "):
208
+ manual = raw[2:].strip()
209
+ elif normalise_curie(raw):
210
+ # Only treat as a curie if it is NOT a small selection index.
211
+ if not (raw.isdigit() and 1 <= int(raw) <= len(cands)):
212
+ manual = raw
213
+ if manual is not None:
214
+ c = normalise_curie(manual)
215
+ if not c:
216
+ ui.err(f"{manual!r} is not a valid HPO id (expected HP:0000000).")
217
+ continue
218
+ resolved = suggester.resolve(c)
219
+ if resolved:
220
+ ui.ok(f"{resolved.id} {resolved.label}")
221
+ if _confirm(ui, "Use this term?"):
222
+ return {"action": "map", "id": resolved.id, "label": resolved.label}
223
+ continue
224
+ ui.warn(f"Could not validate {c} against HPO.")
225
+ if _confirm(ui, f"Use {c} anyway with label {label!r}?"):
226
+ return {"action": "map", "id": c, "label": label}
227
+ continue
228
+
229
+ # numeric selection
230
+ if raw.isdigit():
231
+ i = int(raw)
232
+ if 1 <= i <= len(cands):
233
+ chosen = cands[i - 1]
234
+ return {"action": "map", "id": chosen.id, "label": chosen.label}
235
+ ui.err(f"Pick a number between 1 and {len(cands)}.")
236
+ continue
237
+
238
+ ui.err("Unrecognised input. Type ? for help.")
239
+
240
+
241
+ def _confirm(ui: UI, text: str) -> bool:
242
+ ans = ui.prompt(f"{text} [Y/n]: ").strip().lower()
243
+ return ans in ("", "y", "yes")
244
+
245
+
246
+ def process_file(path: Path, args, ui: UI, suggester: Suggester, remembered: dict) -> dict:
247
+ ui.rule(f"FILE: {path.name}")
248
+ data = pp.load(path)
249
+ refs = pp.find_phenotypic_features(data)
250
+ if not refs:
251
+ ui.warn("No phenotypic features found in this file.")
252
+ return {"file": str(path), "mapped": 0, "kept": 0, "skipped": 0, "total": 0}
253
+
254
+ groups = pp.group_by_label(refs)
255
+ ui.say(f"Found {len(refs)} phenotypic feature(s) across {len(groups)} distinct label(s).")
256
+
257
+ decisions: list[dict] = []
258
+ stop = False
259
+ items = list(groups.items())
260
+ for index, ((label, existing_id), group_refs) in enumerate(items, 1):
261
+ if stop:
262
+ break
263
+ if args.auto_remembered and label.lower() in remembered:
264
+ mem = remembered[label.lower()]
265
+ for r in group_refs:
266
+ r.apply(mem["id"], mem["label"])
267
+ ui.ok(f"[auto] {label!r} -> {mem['id']} {mem['label']}")
268
+ decisions.append({"label": label, **mem, "action": "map", "count": len(group_refs)})
269
+ continue
270
+
271
+ decision = review_group(
272
+ ui, suggester, label, existing_id, len(group_refs),
273
+ index, len(items), remembered, args.limit,
274
+ )
275
+ action = decision.get("action")
276
+ if action == "quit":
277
+ stop = True
278
+ ui.warn("Finishing early - remaining labels left unchanged.")
279
+ break
280
+ if action == "keep":
281
+ ui.say(f"Kept {label!r} unchanged.")
282
+ decisions.append({"label": label, "action": "keep", "count": len(group_refs)})
283
+ continue
284
+ if action == "skip":
285
+ ui.say(f"Skipped {label!r}.")
286
+ decisions.append({"label": label, "action": "skip", "count": len(group_refs)})
287
+ continue
288
+ # map
289
+ cid, clabel = decision["id"], decision["label"]
290
+ for r in group_refs:
291
+ r.apply(cid, clabel)
292
+ remembered[label.lower()] = {"id": cid, "label": clabel}
293
+ ui.ok(f"Mapped {label!r} -> {cid} {clabel} ({len(group_refs)} occurrence(s)).")
294
+ decisions.append({"label": label, "id": cid, "label_hpo": clabel,
295
+ "action": "map", "count": len(group_refs)})
296
+
297
+ # ---- write output
298
+ if args.out and len(args._files) == 1:
299
+ out_path = Path(args.out)
300
+ elif args.in_place:
301
+ out_path = path
302
+ else:
303
+ out_path = path.with_name(path.stem + ".normalized.json")
304
+ pp.save(data, out_path)
305
+ ui.ok(f"Wrote normalised file: {out_path}")
306
+
307
+ # ---- write a mapping sidecar
308
+ sidecar = out_path.with_suffix(out_path.suffix + ".mapping.json")
309
+ pp.save({"file": str(path), "decisions": decisions}, sidecar)
310
+ ui.say(f"Wrote decision log: {sidecar}")
311
+
312
+ mapped = sum(d["count"] for d in decisions if d["action"] == "map")
313
+ kept = sum(d["count"] for d in decisions if d["action"] == "keep")
314
+ skipped = sum(d["count"] for d in decisions if d["action"] == "skip")
315
+ return {"file": str(path), "mapped": mapped, "kept": kept,
316
+ "skipped": skipped, "total": len(refs)}
317
+
318
+
319
+ def main(argv: list[str] | None = None) -> int:
320
+ p = argparse.ArgumentParser(
321
+ prog="hponorm",
322
+ description="Interactively map free-text phenopacket phenotype labels to HPO terms.",
323
+ )
324
+ p.add_argument("files", nargs="+", help="Phenopacket / family JSON file(s).")
325
+ p.add_argument("-o", "--out", help="Output path (only valid with a single input file).")
326
+ p.add_argument("--in-place", action="store_true", help="Overwrite the input file(s).")
327
+ p.add_argument("--backend", choices=["auto", "ols", "local"], default="auto",
328
+ help="Suggestion source (default: auto -> OLS online, else local).")
329
+ p.add_argument("--hpo-file", help="Path to a local hp.obo or hp.json.")
330
+ p.add_argument("--cache-dir", help="Where to cache the downloaded HPO file.")
331
+ p.add_argument("--update", action="store_true", help="Re-download / re-parse HPO.")
332
+ p.add_argument("--no-download", action="store_true",
333
+ help="Never download HPO (local backend must find a file).")
334
+ p.add_argument("--no-online", action="store_true",
335
+ help="Do not use OLS even in auto mode.")
336
+ p.add_argument("--limit", type=int, default=8, help="Suggestions shown per label.")
337
+ p.add_argument("--mapping", help="JSON file of remembered label->term decisions to "
338
+ "load and update across runs.")
339
+ p.add_argument("--auto-remembered", action="store_true",
340
+ help="Auto-apply remembered labels without prompting.")
341
+ p.add_argument("--no-color", action="store_true", help="Disable rich/colour output.")
342
+ args = p.parse_args(argv)
343
+ args._files = args.files
344
+
345
+ ui = UI(color=not args.no_color)
346
+
347
+ if args.out and len(args.files) > 1:
348
+ ui.err("--out can only be used with a single input file.")
349
+ return 2
350
+
351
+ # Load remembered decisions, if any.
352
+ remembered: dict[str, dict] = {}
353
+ if args.mapping and Path(args.mapping).exists():
354
+ try:
355
+ remembered = json.loads(Path(args.mapping).read_text())
356
+ ui.say(f"Loaded {len(remembered)} remembered mapping(s) from {args.mapping}.")
357
+ except Exception:
358
+ ui.warn(f"Could not read mapping file {args.mapping}; starting fresh.")
359
+
360
+ try:
361
+ suggester = build_suggester(args, ui)
362
+ except Exception as exc:
363
+ ui.err(f"Could not initialise an HPO backend: {exc}")
364
+ return 1
365
+
366
+ summaries = []
367
+ for f in args.files:
368
+ path = Path(f)
369
+ if not path.exists():
370
+ ui.err(f"File not found: {path}")
371
+ continue
372
+ summaries.append(process_file(path, args, ui, suggester, remembered))
373
+
374
+ # Persist remembered decisions for next time.
375
+ if args.mapping:
376
+ try:
377
+ Path(args.mapping).write_text(json.dumps(remembered, indent=2, ensure_ascii=False))
378
+ ui.say(f"Saved {len(remembered)} remembered mapping(s) to {args.mapping}.")
379
+ except Exception as exc:
380
+ ui.warn(f"Could not write mapping file: {exc}")
381
+
382
+ ui.rule("Summary")
383
+ for s in summaries:
384
+ ui.say(f"{Path(s['file']).name}: {s['mapped']} mapped, {s['kept']} kept, "
385
+ f"{s['skipped']} skipped (of {s['total']} feature(s)).")
386
+ return 0
387
+
388
+
389
+ if __name__ == "__main__":
390
+ raise SystemExit(main())
@@ -0,0 +1,317 @@
1
+ """Local HPO (Human Phenotype Ontology) index.
2
+
3
+ Loads HPO from a local ``.obo`` or obographs ``.json`` file (downloading and
4
+ caching the official release on first use if permitted), builds an in-memory
5
+ search index over term labels + synonyms, and answers two questions:
6
+
7
+ * ``search(query)`` -> ranked candidate terms for a free-text phenotype label
8
+ * ``get(curie)`` -> the canonical term for a specific ``HP:xxxxxxx`` id
9
+
10
+ Fuzzy matching uses ``rapidfuzz`` when available and falls back to the stdlib
11
+ ``difflib`` otherwise, so the tool runs even in a minimal environment.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import os
17
+ import pickle
18
+ import re
19
+ import urllib.request
20
+ from dataclasses import dataclass, field
21
+ from pathlib import Path
22
+
23
+ from .suggest import Candidate
24
+
25
+ # Official, version-pinned-to-"latest" PURLs maintained by the Monarch Initiative.
26
+ HPO_OBO_URL = "http://purl.obolibrary.org/obo/hp.obo"
27
+ HPO_JSON_URL = "http://purl.obolibrary.org/obo/hp.json"
28
+
29
+ _CURIE_RE = re.compile(r"^HP:\d{7}$")
30
+ _IRI_RE = re.compile(r"HP_(\d{7})")
31
+
32
+ try: # Optional, much faster + better quality.
33
+ from rapidfuzz import fuzz, process
34
+
35
+ _HAVE_RAPIDFUZZ = True
36
+ except Exception: # pragma: no cover - exercised only when dep is absent
37
+ import difflib
38
+
39
+ _HAVE_RAPIDFUZZ = False
40
+
41
+
42
+ @dataclass
43
+ class HpoTerm:
44
+ id: str
45
+ label: str
46
+ synonyms: list[str] = field(default_factory=list)
47
+ definition: str | None = None
48
+ obsolete: bool = False
49
+
50
+
51
+ def default_cache_dir() -> Path:
52
+ base = os.environ.get("XDG_CACHE_HOME") or str(Path.home() / ".cache")
53
+ return Path(base) / "hponorm"
54
+
55
+
56
+ def normalise_curie(raw: str) -> str | None:
57
+ """Coerce user input / IRIs to a canonical ``HP:xxxxxxx`` curie, or None."""
58
+ s = raw.strip()
59
+ if not s:
60
+ return None
61
+ m = _IRI_RE.search(s)
62
+ if m:
63
+ return f"HP:{m.group(1)}"
64
+ s = s.replace("_", ":")
65
+ if not s.upper().startswith("HP"):
66
+ # Allow bare 7-digit ids like "0001657".
67
+ digits = re.sub(r"\D", "", s)
68
+ if len(digits) == 7:
69
+ return f"HP:{digits}"
70
+ return None
71
+ digits = re.sub(r"\D", "", s)
72
+ if len(digits) != 7:
73
+ return None
74
+ return f"HP:{digits}"
75
+
76
+
77
+ class HpoIndex:
78
+ """An in-memory, searchable index of HPO phenotype terms."""
79
+
80
+ name = "hpo-local"
81
+
82
+ def __init__(self, terms: dict[str, HpoTerm], source: str):
83
+ self.source = source
84
+ self.terms: dict[str, HpoTerm] = terms
85
+ # Parallel arrays for fast fuzzy scanning: searchable string -> term id.
86
+ self._names: list[str] = []
87
+ self._owner: list[str] = [] # term id for _names[i]
88
+ self._kind: list[str] = [] # "label" or "synonym"
89
+ self._exact: dict[str, list[tuple[str, str]]] = {} # lower text -> [(id, kind)]
90
+ for t in terms.values():
91
+ if t.obsolete:
92
+ continue
93
+ self._add_name(t.id, t.label, "label")
94
+ for syn in t.synonyms:
95
+ self._add_name(t.id, syn, "synonym")
96
+
97
+ def _add_name(self, term_id: str, text: str, kind: str) -> None:
98
+ text = (text or "").strip()
99
+ if not text:
100
+ return
101
+ self._names.append(text)
102
+ self._owner.append(term_id)
103
+ self._kind.append(kind)
104
+ self._exact.setdefault(text.lower(), []).append((term_id, kind))
105
+
106
+ # ------------------------------------------------------------------ lookup
107
+ def get(self, curie: str) -> HpoTerm | None:
108
+ c = normalise_curie(curie)
109
+ return self.terms.get(c) if c else None
110
+
111
+ def __len__(self) -> int:
112
+ return len(self.terms)
113
+
114
+ # ------------------------------------------------------------------ search
115
+ def search(self, query: str, limit: int = 8) -> list[Candidate]:
116
+ query = (query or "").strip()
117
+ if not query:
118
+ return []
119
+ scored: dict[str, Candidate] = {}
120
+
121
+ def consider(term_id: str, score: float, matched_on: str, kind: str) -> None:
122
+ t = self.terms.get(term_id)
123
+ if t is None or t.obsolete:
124
+ return
125
+ tag = "label" if kind == "label" else f"synonym: {matched_on}"
126
+ prev = scored.get(term_id)
127
+ if prev is None or score > prev.score:
128
+ scored[term_id] = Candidate(
129
+ id=t.id,
130
+ label=t.label,
131
+ score=round(float(score), 1),
132
+ matched_on=tag,
133
+ definition=t.definition,
134
+ source="hpo-local",
135
+ )
136
+
137
+ # 1) Exact (case-insensitive) label / synonym hits get a strong score.
138
+ for term_id, kind in self._exact.get(query.lower(), []):
139
+ consider(term_id, 100.0 if kind == "label" else 98.0, query, kind)
140
+
141
+ # 2) Fuzzy scan across all label + synonym strings.
142
+ if _HAVE_RAPIDFUZZ:
143
+ matches = process.extract(
144
+ query, self._names, scorer=fuzz.WRatio, limit=limit * 6
145
+ )
146
+ for matched_text, score, idx in matches:
147
+ consider(self._owner[idx], score, matched_text, self._kind[idx])
148
+ else: # difflib fallback
149
+ ratios = (
150
+ (difflib.SequenceMatcher(None, query.lower(), n.lower()).ratio(), i)
151
+ for i, n in enumerate(self._names)
152
+ )
153
+ top = sorted(ratios, reverse=True)[: limit * 6]
154
+ for ratio, idx in top:
155
+ consider(
156
+ self._owner[idx], ratio * 100.0, self._names[idx], self._kind[idx]
157
+ )
158
+
159
+ ranked = sorted(scored.values(), key=lambda c: c.score, reverse=True)
160
+ return ranked[:limit]
161
+
162
+ # ----------------------------------------------------------- construction
163
+ @classmethod
164
+ def load(
165
+ cls,
166
+ path: str | os.PathLike | None = None,
167
+ *,
168
+ allow_download: bool = True,
169
+ update: bool = False,
170
+ cache_dir: str | os.PathLike | None = None,
171
+ log=lambda *a, **k: None,
172
+ ) -> "HpoIndex":
173
+ """Load HPO from *path*, the cache, or by downloading the release."""
174
+ cache = Path(cache_dir) if cache_dir else default_cache_dir()
175
+ cache.mkdir(parents=True, exist_ok=True)
176
+
177
+ src_path: Path
178
+ if path is not None:
179
+ src_path = Path(path)
180
+ if not src_path.exists():
181
+ raise FileNotFoundError(f"HPO file not found: {src_path}")
182
+ else:
183
+ src_path = cache / "hp.obo"
184
+ if update or not src_path.exists():
185
+ if not allow_download:
186
+ raise FileNotFoundError(
187
+ "No local HPO file and downloading is disabled. "
188
+ "Pass --hpo-file or allow downloading."
189
+ )
190
+ log(f"Downloading HPO from {HPO_OBO_URL} (one-time) ...")
191
+ _download(HPO_OBO_URL, src_path)
192
+ log(f"Saved HPO to {src_path}")
193
+
194
+ # Use a parsed-index pickle cache keyed on source size+mtime.
195
+ stat = src_path.stat()
196
+ key = f"{src_path.name}-{stat.st_size}-{int(stat.st_mtime)}.idx.pkl"
197
+ idx_cache = cache / key
198
+ if idx_cache.exists() and not update:
199
+ try:
200
+ with idx_cache.open("rb") as fh:
201
+ terms = pickle.load(fh)
202
+ log(f"Loaded {len(terms)} HPO terms from cache.")
203
+ return cls(terms, source=str(src_path))
204
+ except Exception:
205
+ pass # fall through and re-parse
206
+
207
+ log(f"Parsing HPO from {src_path} ...")
208
+ terms = _parse_file(src_path)
209
+ log(f"Indexed {len(terms)} HPO terms.")
210
+ try:
211
+ with idx_cache.open("wb") as fh:
212
+ pickle.dump(terms, fh, protocol=pickle.HIGHEST_PROTOCOL)
213
+ except Exception:
214
+ pass
215
+ return cls(terms, source=str(src_path))
216
+
217
+
218
+ # --------------------------------------------------------------------- parsing
219
+ def _download(url: str, dest: Path, timeout: int = 120) -> None:
220
+ dest.parent.mkdir(parents=True, exist_ok=True)
221
+ tmp = dest.with_suffix(dest.suffix + ".part")
222
+ req = urllib.request.Request(url, headers={"User-Agent": "hponorm/1.0"})
223
+ with urllib.request.urlopen(req, timeout=timeout) as resp, tmp.open("wb") as out:
224
+ while True:
225
+ chunk = resp.read(1 << 16)
226
+ if not chunk:
227
+ break
228
+ out.write(chunk)
229
+ tmp.replace(dest)
230
+
231
+
232
+ def _parse_file(path: Path) -> dict[str, HpoTerm]:
233
+ head = path.open("rb").read(256).lstrip()
234
+ if head[:1] == b"{":
235
+ return _parse_obographs(path)
236
+ return _parse_obo(path)
237
+
238
+
239
+ def _parse_obo(path: Path) -> dict[str, HpoTerm]:
240
+ terms: dict[str, HpoTerm] = {}
241
+ cur: dict | None = None
242
+ in_term = False
243
+
244
+ def flush(block: dict | None) -> None:
245
+ if not block:
246
+ return
247
+ tid = block.get("id")
248
+ if not tid or not _CURIE_RE.match(tid):
249
+ return
250
+ terms[tid] = HpoTerm(
251
+ id=tid,
252
+ label=block.get("name", tid),
253
+ synonyms=block.get("synonyms", []),
254
+ definition=block.get("def"),
255
+ obsolete=block.get("obsolete", False),
256
+ )
257
+
258
+ syn_re = re.compile(r'synonym:\s*"((?:[^"\\]|\\.)*)"')
259
+ def_re = re.compile(r'def:\s*"((?:[^"\\]|\\.)*)"')
260
+ with path.open("r", encoding="utf-8", errors="replace") as fh:
261
+ for line in fh:
262
+ line = line.rstrip("\n")
263
+ if line.startswith("[Term]"):
264
+ flush(cur)
265
+ cur = {"synonyms": []}
266
+ in_term = True
267
+ continue
268
+ if line.startswith("[") and line.endswith("]"):
269
+ flush(cur)
270
+ cur = None
271
+ in_term = False
272
+ continue
273
+ if not in_term or cur is None:
274
+ continue
275
+ if line.startswith("id:"):
276
+ cur["id"] = line[3:].strip()
277
+ elif line.startswith("name:"):
278
+ cur["name"] = line[5:].strip()
279
+ elif line.startswith("def:"):
280
+ m = def_re.match(line)
281
+ if m:
282
+ cur["def"] = _unescape(m.group(1))
283
+ elif line.startswith("synonym:"):
284
+ m = syn_re.match(line)
285
+ if m:
286
+ cur["synonyms"].append(_unescape(m.group(1)))
287
+ elif line.startswith("is_obsolete:") and line.split(":", 1)[1].strip() == "true":
288
+ cur["obsolete"] = True
289
+ flush(cur)
290
+ return terms
291
+
292
+
293
+ def _parse_obographs(path: Path) -> dict[str, HpoTerm]:
294
+ with path.open("r", encoding="utf-8", errors="replace") as fh:
295
+ data = json.load(fh)
296
+ terms: dict[str, HpoTerm] = {}
297
+ for graph in data.get("graphs", []):
298
+ for node in graph.get("nodes", []):
299
+ m = _IRI_RE.search(node.get("id", ""))
300
+ if not m:
301
+ continue
302
+ tid = f"HP:{m.group(1)}"
303
+ meta = node.get("meta", {}) or {}
304
+ synonyms = [s.get("val", "") for s in meta.get("synonyms", []) if s.get("val")]
305
+ definition = (meta.get("definition") or {}).get("val")
306
+ terms[tid] = HpoTerm(
307
+ id=tid,
308
+ label=node.get("lbl") or tid,
309
+ synonyms=synonyms,
310
+ definition=definition,
311
+ obsolete=bool(meta.get("deprecated", False)),
312
+ )
313
+ return terms
314
+
315
+
316
+ def _unescape(s: str) -> str:
317
+ return s.replace('\\"', '"').replace("\\n", " ").replace("\\\\", "\\").strip()
@@ -0,0 +1,112 @@
1
+ """EBI Ontology Lookup Service (OLS4) backend for HPO.
2
+
3
+ Provides high-quality ranked search over the live HPO ontology without any
4
+ local download. Used when the machine is online; otherwise the CLI falls back
5
+ to the local :class:`~hponorm.hpo.HpoIndex`.
6
+
7
+ OLS4 API docs: https://www.ebi.ac.uk/ols4/help
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import urllib.parse
13
+ import urllib.request
14
+
15
+ from .suggest import Candidate
16
+
17
+ OLS_BASE = "https://www.ebi.ac.uk/ols4/api"
18
+
19
+
20
+ class OlsClient:
21
+ name = "ols"
22
+
23
+ def __init__(self, base: str = OLS_BASE, timeout: int = 15):
24
+ self.base = base.rstrip("/")
25
+ self.timeout = timeout
26
+
27
+ # ------------------------------------------------------------------ utils
28
+ def _get_json(self, url: str) -> dict:
29
+ req = urllib.request.Request(url, headers={"User-Agent": "hponorm/1.0"})
30
+ with urllib.request.urlopen(req, timeout=self.timeout) as resp:
31
+ return json.loads(resp.read().decode("utf-8"))
32
+
33
+ def available(self) -> bool:
34
+ try:
35
+ self._get_json(f"{self.base}/ontologies/hp")
36
+ return True
37
+ except Exception:
38
+ return False
39
+
40
+ # ----------------------------------------------------------------- search
41
+ def search(self, query: str, limit: int = 8) -> list[Candidate]:
42
+ query = (query or "").strip()
43
+ if not query:
44
+ return []
45
+ params = urllib.parse.urlencode(
46
+ {
47
+ "q": query,
48
+ "ontology": "hp",
49
+ "type": "class",
50
+ "rows": max(limit, 10),
51
+ "fieldList": "obo_id,label,description,synonym,is_obsolete",
52
+ }
53
+ )
54
+ data = self._get_json(f"{self.base}/search?{params}")
55
+ docs = (data.get("response") or {}).get("docs", [])
56
+ out: list[Candidate] = []
57
+ seen: set[str] = set()
58
+ # OLS returns docs in relevance order; map that to a descending score.
59
+ for rank, doc in enumerate(docs):
60
+ obo_id = doc.get("obo_id")
61
+ if not obo_id or not obo_id.startswith("HP:"):
62
+ continue
63
+ if doc.get("is_obsolete"):
64
+ continue
65
+ if obo_id in seen:
66
+ continue
67
+ seen.add(obo_id)
68
+ desc = doc.get("description")
69
+ if isinstance(desc, list):
70
+ desc = desc[0] if desc else None
71
+ out.append(
72
+ Candidate(
73
+ id=obo_id,
74
+ label=doc.get("label") or obo_id,
75
+ score=round(max(1.0, 100.0 - rank * 4), 1),
76
+ matched_on="OLS relevance",
77
+ definition=desc,
78
+ source="ols",
79
+ )
80
+ )
81
+ if len(out) >= limit:
82
+ break
83
+ return out
84
+
85
+ # ------------------------------------------------------------------- term
86
+ def get(self, curie: str) -> Candidate | None:
87
+ from .hpo import normalise_curie
88
+
89
+ c = normalise_curie(curie)
90
+ if not c:
91
+ return None
92
+ iri = f"http://purl.obolibrary.org/obo/{c.replace(':', '_')}"
93
+ params = urllib.parse.urlencode({"iri": iri})
94
+ try:
95
+ data = self._get_json(f"{self.base}/ontologies/hp/terms?{params}")
96
+ except Exception:
97
+ return None
98
+ terms = (data.get("_embedded") or {}).get("terms", [])
99
+ if not terms:
100
+ return None
101
+ t = terms[0]
102
+ desc = t.get("description")
103
+ if isinstance(desc, list):
104
+ desc = desc[0] if desc else None
105
+ return Candidate(
106
+ id=c,
107
+ label=t.get("label") or c,
108
+ score=100.0,
109
+ matched_on="exact id",
110
+ definition=desc,
111
+ source="ols",
112
+ )
@@ -0,0 +1,86 @@
1
+ """Read a phenopacket / family JSON, locate every phenotypic-feature ``type``,
2
+ apply chosen HPO mappings in place, and write the normalised file back out.
3
+
4
+ The finder is deliberately schema-tolerant: it recursively walks the document
5
+ and collects the ``type`` object of every entry under any ``phenotypicFeatures``
6
+ array. That covers the GA4GH ``Phenopacket.phenotypicFeatures`` and
7
+ ``Family.proband`` / ``Family.relatives`` layouts (and any nesting of them)
8
+ without hard-coding paths. An entry's ``excluded`` flag is left untouched -- it
9
+ qualifies the feature, not the term, so the ``type`` is mapped either way.
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ from dataclasses import dataclass
15
+ from pathlib import Path
16
+
17
+
18
+ @dataclass
19
+ class FeatureRef:
20
+ """A live, mutable reference to one phenotypic-feature ``type`` object."""
21
+
22
+ path: str
23
+ type: dict # the actual {"id": ..., "label": ...} dict inside the loaded JSON
24
+ excluded: bool = False # GA4GH PhenotypicFeature.excluded (informational)
25
+
26
+ @property
27
+ def label(self) -> str:
28
+ return (self.type.get("label") or "").strip()
29
+
30
+ @property
31
+ def id(self) -> str:
32
+ return (self.type.get("id") or "").strip()
33
+
34
+ def apply(self, curie: str, label: str) -> None:
35
+ self.type["id"] = curie
36
+ self.type["label"] = label
37
+
38
+
39
+ def load(path: str | Path) -> dict:
40
+ with Path(path).open("r", encoding="utf-8") as fh:
41
+ return json.load(fh)
42
+
43
+
44
+ def find_phenotypic_features(obj, path: str = "$") -> list[FeatureRef]:
45
+ """Recursively collect every phenotypic-feature ``type`` dict in *obj*."""
46
+ found: list[FeatureRef] = []
47
+
48
+ def walk(node, p):
49
+ if isinstance(node, dict):
50
+ for key, value in node.items():
51
+ if key == "phenotypicFeatures" and isinstance(value, list):
52
+ for i, entry in enumerate(value):
53
+ if isinstance(entry, dict) and isinstance(
54
+ entry.get("type"), dict
55
+ ):
56
+ found.append(
57
+ FeatureRef(
58
+ f"{p}.phenotypicFeatures[{i}].type",
59
+ entry["type"],
60
+ bool(entry.get("excluded", False)),
61
+ )
62
+ )
63
+ walk(entry, f"{p}.phenotypicFeatures[{i}]")
64
+ else:
65
+ walk(value, f"{p}.{key}")
66
+ elif isinstance(node, list):
67
+ for i, item in enumerate(node):
68
+ walk(item, f"{p}[{i}]")
69
+
70
+ walk(obj, path)
71
+ return found
72
+
73
+
74
+ def group_by_label(refs: list[FeatureRef]) -> "dict[tuple[str, str], list[FeatureRef]]":
75
+ """Group feature references by their current (label, id) so each distinct
76
+ label is reviewed once and the decision applied to every occurrence."""
77
+ groups: dict[tuple[str, str], list[FeatureRef]] = {}
78
+ for ref in refs:
79
+ groups.setdefault((ref.label, ref.id), []).append(ref)
80
+ return groups
81
+
82
+
83
+ def save(data: dict, path: str | Path) -> None:
84
+ with Path(path).open("w", encoding="utf-8") as fh:
85
+ json.dump(data, fh, indent=2, ensure_ascii=False)
86
+ fh.write("\n")
@@ -0,0 +1,75 @@
1
+ """Unified suggestion layer.
2
+
3
+ A ``Suggester`` wraps one of two interchangeable backends:
4
+
5
+ * ``hpo`` - a local :class:`~hponorm.hpo.HpoIndex` (offline capable)
6
+ * ``ols`` - the EBI Ontology Lookup Service v4 (online, best ranking)
7
+
8
+ and exposes a backend-agnostic API used by the CLI:
9
+
10
+ * ``suggest(label)`` -> ranked list of :class:`Candidate`
11
+ * ``resolve(curie)`` -> validate a specific HPO id and return its canonical term
12
+ """
13
+ from __future__ import annotations
14
+
15
+ from dataclasses import dataclass
16
+
17
+
18
+ @dataclass
19
+ class Candidate:
20
+ """A single suggested ontology term."""
21
+
22
+ id: str # e.g. "HP:0001657"
23
+ label: str # canonical HPO label
24
+ score: float # 0-100 match score (higher is better)
25
+ matched_on: str # "label" or 'synonym: "..."'
26
+ definition: str | None = None
27
+ source: str = "" # which backend produced it
28
+
29
+ def short_def(self, n: int = 90) -> str:
30
+ if not self.definition:
31
+ return ""
32
+ d = " ".join(self.definition.split())
33
+ return d if len(d) <= n else d[: n - 1] + "\u2026"
34
+
35
+
36
+ class Suggester:
37
+ def __init__(self, backend, *, fallback=None, name: str = ""):
38
+ self.backend = backend
39
+ self.fallback = fallback
40
+ self.name = name or getattr(backend, "name", "backend")
41
+
42
+ def suggest(self, label: str, limit: int = 8) -> list[Candidate]:
43
+ try:
44
+ results = self.backend.search(label, limit=limit)
45
+ except Exception:
46
+ results = []
47
+ if not results and self.fallback is not None:
48
+ try:
49
+ results = self.fallback.search(label, limit=limit)
50
+ except Exception:
51
+ results = []
52
+ return results
53
+
54
+ def resolve(self, curie: str) -> Candidate | None:
55
+ """Look up a specific HPO id to validate it and fetch its canonical label."""
56
+ for be in (self.backend, self.fallback):
57
+ if be is None:
58
+ continue
59
+ try:
60
+ term = be.get(curie)
61
+ except Exception:
62
+ term = None
63
+ if term is not None:
64
+ # ``HpoIndex.get`` returns an HpoTerm; OLS returns a Candidate.
65
+ if isinstance(term, Candidate):
66
+ return term
67
+ return Candidate(
68
+ id=term.id,
69
+ label=term.label,
70
+ score=100.0,
71
+ matched_on="exact id",
72
+ definition=getattr(term, "definition", None),
73
+ source=getattr(be, "name", self.name),
74
+ )
75
+ return None
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.4
2
+ Name: hponorm
3
+ Version: 1.0.0
4
+ Summary: Interactively validate and normalise GA4GH phenopacket phenotypic-feature terms against the HPO ontology
5
+ Requires-Python: >=3.9
6
+ License-File: LICENSE
7
+ Provides-Extra: recommended
8
+ Requires-Dist: rapidfuzz>=3.0; extra == "recommended"
9
+ Requires-Dist: rich>=13.0; extra == "recommended"
10
+ Dynamic: license-file
@@ -0,0 +1,14 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ hponorm/cli.py
5
+ hponorm/hpo.py
6
+ hponorm/ols.py
7
+ hponorm/phenopackets.py
8
+ hponorm/suggest.py
9
+ hponorm.egg-info/PKG-INFO
10
+ hponorm.egg-info/SOURCES.txt
11
+ hponorm.egg-info/dependency_links.txt
12
+ hponorm.egg-info/entry_points.txt
13
+ hponorm.egg-info/requires.txt
14
+ hponorm.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ hponorm = hponorm.cli:main
@@ -0,0 +1,4 @@
1
+
2
+ [recommended]
3
+ rapidfuzz>=3.0
4
+ rich>=13.0
@@ -0,0 +1 @@
1
+ hponorm
@@ -0,0 +1,22 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "hponorm"
7
+ version = "1.0.0"
8
+ description = "Interactively validate and normalise GA4GH phenopacket phenotypic-feature terms against the HPO ontology"
9
+ requires-python = ">=3.9"
10
+ dependencies = [] # stdlib-only by default
11
+
12
+ [project.optional-dependencies]
13
+ recommended = ["rapidfuzz>=3.0", "rich>=13.0"]
14
+
15
+ [project.scripts]
16
+ hponorm = "hponorm.cli:main"
17
+
18
+ [tool.setuptools]
19
+ packages = ["hponorm"]
20
+
21
+ [tool.setuptools.package-data]
22
+ hponorm = ["data/*.obo"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+