dna-decode 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. dna_decode/__init__.py +7 -0
  2. dna_decode/amr/__init__.py +1 -0
  3. dna_decode/amr/cli.py +505 -0
  4. dna_decode/cli.py +169 -0
  5. dna_decode/colocalization/__init__.py +8 -0
  6. dna_decode/colocalization/__main__.py +4 -0
  7. dna_decode/colocalization/cli.py +122 -0
  8. dna_decode/colocalization/core.py +53 -0
  9. dna_decode/concordance/__init__.py +7 -0
  10. dna_decode/concordance/__main__.py +4 -0
  11. dna_decode/concordance/cli.py +104 -0
  12. dna_decode/concordance/core.py +85 -0
  13. dna_decode/data/__init__.py +10 -0
  14. dna_decode/data/annotations.py +258 -0
  15. dna_decode/data/antimalarial_amr.py +132 -0
  16. dna_decode/data/antiviral_amr.py +129 -0
  17. dna_decode/data/ast_data.py +176 -0
  18. dna_decode/data/bacdive.py +235 -0
  19. dna_decode/data/bvbrc_genome.py +170 -0
  20. dna_decode/data/calibrated_amr_rules.json +99 -0
  21. dna_decode/data/cell_key.py +23 -0
  22. dna_decode/data/cohort.py +497 -0
  23. dna_decode/data/experimental_drug_rules.py +80 -0
  24. dna_decode/data/external_cohort_genomes.py +103 -0
  25. dna_decode/data/external_crosswalk.py +86 -0
  26. dna_decode/data/external_mic_ingest.py +65 -0
  27. dna_decode/data/external_mic_labels.py +223 -0
  28. dna_decode/data/fungal_amr.py +152 -0
  29. dna_decode/data/hiv_amr.py +400 -0
  30. dna_decode/data/mic_tiers.py +350 -0
  31. dna_decode/data/pilot.py +302 -0
  32. dna_decode/data/refseq.py +236 -0
  33. dna_decode/data/resistance_db.py +159 -0
  34. dna_decode/data/sarscov2_amr.py +138 -0
  35. dna_decode/data/shipped_decoder_surface.py +73 -0
  36. dna_decode/data/tb_lineage_barcode.py +69 -0
  37. dna_decode/data/tb_who_catalogue.py +149 -0
  38. dna_decode/data/trust_surface.py +258 -0
  39. dna_decode/disinfinder/__init__.py +7 -0
  40. dna_decode/disinfinder/__main__.py +4 -0
  41. dna_decode/disinfinder/cli.py +83 -0
  42. dna_decode/disinfinder/runner.py +38 -0
  43. dna_decode/eval/__init__.py +9 -0
  44. dna_decode/eval/amr_rules.py +386 -0
  45. dna_decode/eval/biosample_resolver.py +302 -0
  46. dna_decode/eval/calibrate_organism.py +298 -0
  47. dna_decode/eval/clade_baseline.py +109 -0
  48. dna_decode/eval/clonality.py +250 -0
  49. dna_decode/eval/cohort_deconfound.py +158 -0
  50. dna_decode/eval/cohort_manifest.py +138 -0
  51. dna_decode/eval/cv.py +287 -0
  52. dna_decode/eval/expression_context.py +146 -0
  53. dna_decode/eval/loso_kmer.py +199 -0
  54. dna_decode/eval/metrics.py +219 -0
  55. dna_decode/eval/phylogeny.py +268 -0
  56. dna_decode/eval/point_baseline.py +92 -0
  57. dna_decode/eval/prospective_lock.py +168 -0
  58. dna_decode/genome_map/__init__.py +64 -0
  59. dna_decode/genome_map/amrfinder.py +68 -0
  60. dna_decode/genome_map/annotate.py +84 -0
  61. dna_decode/genome_map/build_map.py +396 -0
  62. dna_decode/genome_map/gate.py +199 -0
  63. dna_decode/genome_map/ingest.py +74 -0
  64. dna_decode/genome_map/phenotype_overlay.py +334 -0
  65. dna_decode/genome_map/tier_vocab.py +45 -0
  66. dna_decode/genome_map/tiers.py +69 -0
  67. dna_decode/genome_map/virulence_overlay.py +201 -0
  68. dna_decode/interp/__init__.py +8 -0
  69. dna_decode/interp/mutagenesis.py +407 -0
  70. dna_decode/ktype/__init__.py +1 -0
  71. dna_decode/ktype/__main__.py +4 -0
  72. dna_decode/ktype/cli.py +86 -0
  73. dna_decode/ktype/runner.py +74 -0
  74. dna_decode/mlst/__init__.py +7 -0
  75. dna_decode/mlst/__main__.py +4 -0
  76. dna_decode/mlst/cli.py +110 -0
  77. dna_decode/mlst/core.py +57 -0
  78. dna_decode/mlst/runner.py +58 -0
  79. dna_decode/models/__init__.py +7 -0
  80. dna_decode/models/cache.py +434 -0
  81. dna_decode/models/classical_baselines.py +355 -0
  82. dna_decode/models/classifiers.py +233 -0
  83. dna_decode/models/foundation.py +404 -0
  84. dna_decode/organism_rules/__init__.py +9 -0
  85. dna_decode/organism_rules/tb_amr.py +97 -0
  86. dna_decode/organism_rules/tb_goldset.py +107 -0
  87. dna_decode/organism_rules/tb_lineage.py +56 -0
  88. dna_decode/organism_rules/tb_vcf.py +197 -0
  89. dna_decode/pathotype/__init__.py +12 -0
  90. dna_decode/pathotype/__main__.py +4 -0
  91. dna_decode/pathotype/cli.py +134 -0
  92. dna_decode/pathotype/detect.py +167 -0
  93. dna_decode/pathotype/expec_score.py +59 -0
  94. dna_decode/pathotype/markers.py +81 -0
  95. dna_decode/pathotype/resolve.py +197 -0
  96. dna_decode/pathotype/vf_runner.py +357 -0
  97. dna_decode/plasmid/__init__.py +8 -0
  98. dna_decode/plasmid/__main__.py +4 -0
  99. dna_decode/plasmid/cli.py +90 -0
  100. dna_decode/plasmid/runner.py +65 -0
  101. dna_decode/pointfinder/__init__.py +8 -0
  102. dna_decode/pointfinder/__main__.py +4 -0
  103. dna_decode/pointfinder/cli.py +99 -0
  104. dna_decode/pointfinder/runner.py +120 -0
  105. dna_decode/profile/__init__.py +7 -0
  106. dna_decode/profile/__main__.py +4 -0
  107. dna_decode/profile/cli.py +218 -0
  108. dna_decode/report_cards/amr_portal_independent_report_card.json +500 -0
  109. dna_decode/report_cards/decoder_validation_report_card.json +737 -0
  110. dna_decode/report_cards/hiv_decoder_report_card.json +318 -0
  111. dna_decode/report_cards/tb_report_card.json +72 -0
  112. dna_decode/resfinder/__init__.py +7 -0
  113. dna_decode/resfinder/__main__.py +4 -0
  114. dna_decode/resfinder/cli.py +105 -0
  115. dna_decode/resfinder/runner.py +60 -0
  116. dna_decode/serotype/__init__.py +6 -0
  117. dna_decode/serotype/__main__.py +4 -0
  118. dna_decode/serotype/cli.py +82 -0
  119. dna_decode/serotype/runner.py +70 -0
  120. dna_decode/typing/__init__.py +4 -0
  121. dna_decode/typing/blast_caller.py +96 -0
  122. dna_decode/typing/codon_map.py +54 -0
  123. dna_decode/viz/__init__.py +5 -0
  124. dna_decode/viz/browser.py +138 -0
  125. dna_decode-0.5.1.dist-info/METADATA +457 -0
  126. dna_decode-0.5.1.dist-info/RECORD +128 -0
  127. dna_decode-0.5.1.dist-info/WHEEL +4 -0
  128. dna_decode-0.5.1.dist-info/entry_points.txt +14 -0
dna_decode/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ """DNA → trait prediction platform.
2
+
3
+ Phase 1: E. coli antibiotic resistance prediction with biologically
4
+ interpretable attribution. See plans/Ecoli_G2P_Platform_Technical_Plan.md.
5
+ """
6
+
7
+ __version__ = "0.0.1"
@@ -0,0 +1 @@
1
+ """In-package AMR mechanism decoder (console entry dna-amr)."""
dna_decode/amr/cli.py ADDED
@@ -0,0 +1,505 @@
1
+ """Deterministic AMR mechanism decoder — in-package CLI (console entry `dna-amr`).
2
+
3
+ Genome FASTA (or a cached AMRFinder run) -> R/S call per drug + the curated resistance determinants that
4
+ drove it + provenance. Mechanism-feature decoding, NOT embeddings (per
5
+ `plans/AMR_embedding_niche_decision_2026-06-05.md`). Sibling of `dna_decode.pathotype.cli` (dna-pathotype).
6
+
7
+ In-package so it ships in the wheel. Cached-run mode is pure (reads main.tsv via amr_rules — no Docker).
8
+ Genome mode lazily imports the AMRFinder Docker runner from `scripts/` (repo-only; needs Docker + a
9
+ Docker-readable DB) and errors cleanly if unavailable — so the console entry installs + imports without
10
+ the scripts/ dir.
11
+
12
+ dna-amr --drug ciprofloxacin --amrfinder-run data/amrfinder_runs/GCA_xxx.x
13
+ dna-amr --drug ciprofloxacin --genome-fasta X.fna --sample-id X # needs Docker + data/amrfinder_db
14
+
15
+ NOT a clinical decision tool. cipro N=147 op-chars (threshold=2): acc 0.939 / sens 0.931 / spec 0.947.
16
+ """
17
+ from __future__ import annotations
18
+
19
+ import argparse
20
+ import datetime
21
+ import json
22
+ import sys
23
+ from pathlib import Path
24
+
25
+ from dna_decode.data.antimalarial_amr import (
26
+ call_from_observed_substitutions as antimalarial_call_from_observed,
27
+ gene_for_drug,
28
+ supported_antimalarial_drugs,
29
+ )
30
+ from dna_decode.data.antiviral_amr import (
31
+ call_from_observed_substitutions as antiviral_call_from_observed,
32
+ supported_antiviral_drugs,
33
+ )
34
+ from dna_decode.data.fungal_amr import (
35
+ call_from_observed_substitutions,
36
+ supported_fungal_drugs,
37
+ )
38
+ from dna_decode.data.sarscov2_amr import (
39
+ all_supported_sarscov2_drugs,
40
+ call_sarscov2_observed,
41
+ gene_for_sarscov2_drug,
42
+ )
43
+ from dna_decode.data.hiv_amr import (
44
+ all_supported_hiv_drugs,
45
+ call_hiv_observed,
46
+ gene_for_hiv_drug,
47
+ )
48
+ from dna_decode.data.mic_tiers import supported_drugs
49
+ from dna_decode.data.trust_surface import one_line, trust_block
50
+ from dna_decode.eval.amr_rules import AMRFINDER_IMAGE_PINNED, call_resistance
51
+
52
+ # Fungal target-site path (BLAST ERG11/FKS1 -> catalog), NOT AMRFinder (no AMRFinder-for-fungi). Routed by
53
+ # drug: fluconazole/voriconazole/caspofungin/micafungin -> fungal engine. G1-validated on C. auris
54
+ # (2026-06-08, wiki/fungal_ep7_g1_closeout): method transfers, sens 1.0 across clades; label-limited spec.
55
+ _DEFAULT_ERG11_REF = Path(__file__).resolve().parent.parent.parent / "data" / "fungal_ref" / "Cauris_ERG11_cds.fna"
56
+ # Antimalarial target-site path (BLAST Pfkelch13 -> WHO-validated marker catalog), the 3rd kingdom
57
+ # (protozoan). Routed by drug: artemisinin/artesunate/dihydroartemisinin -> K13 engine.
58
+ _DEFAULT_K13_REF = Path(__file__).resolve().parent.parent.parent / "data" / "antimalarial_ref" / "Pf3D7_K13_cds.fna"
59
+ _DEFAULT_PFCRT_REF = Path(__file__).resolve().parent.parent.parent / "data" / "antimalarial_ref" / "Pf3D7_pfcrt_cds.fna"
60
+ # Antiviral target-site path (BLAST influenza NA -> CDC/WHO-recognized NAI marker catalog), the 4th kingdom
61
+ # (viral). Routed by drug: oseltamivir/peramivir/zanamivir -> NA engine. Reference is N1 (WT His275).
62
+ _DEFAULT_NA_REF = Path(__file__).resolve().parent.parent.parent / "data" / "antiviral_ref" / "N1_NA_NC026434_cds.fna"
63
+
64
+ # HIV-1 target-site path (BLAST the class's gene CDS -> Stanford/HIVDB-sourced major-DRM catalog). Routed
65
+ # by drug -> gene (RT NNRTI/NRTI, PR PI, IN INSTI, CA CAI). References are HXB2 (K03455.1) in-frame CDS,
66
+ # consensus-B WT at every catalogued DRM position.
67
+ _HIV_REF_DIR = Path(__file__).resolve().parent.parent.parent / "data" / "hiv_ref"
68
+ # SARS-CoV-2 Mpro (3CLpro) target-site path — the next free-independent-label viral cell (validated vs the
69
+ # Stanford CoV-RDB measured fold-change). Committed Wuhan-Hu-1 NC_045512.2 nsp5 reference, WT at every
70
+ # catalogued position (catalytic dyad H41/C145 + E166 verified).
71
+ _DEFAULT_SARSCOV2_MPRO_REF = (Path(__file__).resolve().parent.parent.parent / "data" / "sarscov2_ref"
72
+ / "SARSCoV2_Mpro_NC045512_cds.fna")
73
+ _DEFAULT_HIV_RT_REF = _HIV_REF_DIR / "HIV1_RT_HXB2_cds.fna"
74
+ _DEFAULT_HIV_PR_REF = _HIV_REF_DIR / "HIV1_PR_HXB2_cds.fna"
75
+ _DEFAULT_HIV_IN_REF = _HIV_REF_DIR / "HIV1_IN_HXB2_cds.fna"
76
+ _DEFAULT_HIV_CA_REF = _HIV_REF_DIR / "HIV1_CA_HXB2_cds.fna"
77
+
78
+
79
+ def _parse_observed(observed: str) -> dict[str, set[str]]:
80
+ """'ERG11:Y132F,ERG11:K143R,FKS1:S639F' -> {'ERG11': {'Y132F','K143R'}, 'FKS1': {'S639F'}}."""
81
+ out: dict[str, set[str]] = {}
82
+ for tok in (t.strip() for t in observed.split(",") if t.strip()):
83
+ gene, _, sub = tok.partition(":")
84
+ if not sub:
85
+ raise ValueError(f"bad --observed token {tok!r}; expected GENE:SUBSTITUTION (e.g. ERG11:Y132F)")
86
+ out.setdefault(gene.strip(), set()).add(sub.strip())
87
+ return out
88
+
89
+
90
+ def _target_site_record(call, sample_id: str, drug: str, organism: str, provenance: dict, *,
91
+ caller_name: str, source: str) -> dict:
92
+ """Map a target-site Call (fungal OR antimalarial — same shape) onto the uniform
93
+ amr-mechanism-call-v1 record (same shape as the bacterial path)."""
94
+ dets = [{"symbol": d.split(":", 1)[0], "subclass": d.split(":", 1)[1] if ":" in d else "",
95
+ "class": "TARGET_SITE_MUTATION", "pct_identity": None} for d in call.determinants]
96
+ return {
97
+ "sample_id": sample_id, "drug": drug,
98
+ "analysis_date": datetime.date.today().isoformat(), "schema": "amr-mechanism-call-v1",
99
+ "prediction": call.prediction,
100
+ "confidence": "high" if call.prediction == "R" else ("n/a" if call.prediction == "INDETERMINATE" else "screen"),
101
+ "n_determinants": len(call.determinants), "determinants": dets,
102
+ "resistance_threshold": 1,
103
+ "undetectable_mechanisms": list(call.undetectable_mechanisms),
104
+ "caller": {"name": caller_name, "rule": call.rule, "source": source,
105
+ "caller_is_independent_baseline": False},
106
+ "caveat": call.caveat,
107
+ "validation": trust_block(drug, organism),
108
+ "provenance": {**provenance, "organism": organism},
109
+ }
110
+
111
+
112
+ def _fungal_main(args) -> int:
113
+ """Fungal target-site decoder branch (routed when --drug is a fungal drug)."""
114
+ # the --organism default 'Escherichia' is the bacterial AMRFinder default; on the fungal path, relabel
115
+ # to the validated fungal organism unless the user explicitly set one.
116
+ if args.organism == "Escherichia":
117
+ args.organism = "Candida_auris"
118
+ if args.observed is not None:
119
+ obs = _parse_observed(args.observed)
120
+ call = call_from_observed_substitutions(args.drug, obs)
121
+ sample_id = args.sample_id or "observed"
122
+ prov = {"mode": "observed-substitutions", "observed": args.observed}
123
+ elif args.genome_fasta is not None:
124
+ if not args.genome_fasta.exists():
125
+ print(f"ERROR: genome FASTA not found: {args.genome_fasta}", file=sys.stderr)
126
+ return 2
127
+ sample_id = args.sample_id or args.genome_fasta.stem
128
+ try:
129
+ from scripts.fungal_erg11_caller import call_erg11 # repo-only; needs BLAST+
130
+ except ImportError as e:
131
+ print(f"ERROR: fungal genome mode needs scripts/fungal_erg11_caller + BLAST+ ({e}). "
132
+ "Use --observed with known substitutions for a wheel-only call.", file=sys.stderr)
133
+ return 3
134
+ call = call_erg11(str(args.genome_fasta), str(args.erg11_ref), args.drug)
135
+ prov = {"mode": "blast-erg11", "erg11_ref": str(args.erg11_ref)}
136
+ else:
137
+ print("ERROR: fungal drug needs --genome-fasta OR --observed GENE:SUB[,...]", file=sys.stderr)
138
+ return 2
139
+
140
+ rec = _target_site_record(call, sample_id, args.drug, args.organism, prov,
141
+ caller_name="dna_decode-fungal-target-mutation-v0",
142
+ source="hand-curated fungal catalog (no AMRFinder-for-fungi)")
143
+ return _emit_target_site(rec, call, sample_id, args)
144
+
145
+
146
+ def _emit_target_site(rec: dict, call, sample_id: str, args) -> int:
147
+ """Shared output for the fungal + antimalarial target-site branches."""
148
+ if args.out:
149
+ Path(args.out).write_text(json.dumps(rec, indent=2), encoding="utf-8")
150
+ if args.json_only:
151
+ print(json.dumps(rec, indent=2))
152
+ else:
153
+ print(f"sample: {sample_id} drug: {args.drug} organism: {args.organism}")
154
+ print(f"CALL: {call.prediction} [{rec['confidence']} | {len(call.determinants)} determinant(s)]")
155
+ for d in call.determinants:
156
+ print(f" driven by: {d}")
157
+ if not call.determinants:
158
+ print(" driven by: (no catalogued target-site resistance mutation)")
159
+ if call.undetectable_mechanisms:
160
+ print(f" blind spots (an S call can't rule out): {', '.join(call.undetectable_mechanisms)}")
161
+ print(f" {call.caveat}")
162
+ print(f" {one_line(rec['validation'])}")
163
+ if args.out:
164
+ print(f"\n[provenance JSON -> {args.out}]")
165
+ return 0 if call.prediction != "INDETERMINATE" else 4
166
+
167
+
168
+ def _antimalarial_main(args) -> int:
169
+ """Antimalarial K13 target-site decoder branch (routed when --drug is an antimalarial drug)."""
170
+ if args.organism == "Escherichia": # relabel the bacterial default on this path
171
+ args.organism = "Plasmodium_falciparum"
172
+ if args.observed is not None:
173
+ call = antimalarial_call_from_observed(args.drug, _parse_observed(args.observed))
174
+ sample_id = args.sample_id or "observed"
175
+ prov = {"mode": "observed-substitutions", "observed": args.observed}
176
+ elif args.genome_fasta is not None:
177
+ if not args.genome_fasta.exists():
178
+ print(f"ERROR: genome FASTA not found: {args.genome_fasta}", file=sys.stderr)
179
+ return 2
180
+ # Genome mode uses the BLAST codon-mapper (intron-aware / multi-HSP as of 2026-06-10), so both the
181
+ # intronless K13 and the 13-exon pfcrt work. Pick the committed CDS reference by target gene.
182
+ gene = gene_for_drug(args.drug)
183
+ ref_by_gene = {"K13": args.k13_ref, "pfcrt": args.pfcrt_ref}
184
+ ref = ref_by_gene.get(gene)
185
+ if ref is None or not Path(ref).exists():
186
+ print(f"ERROR: genome mode for {args.drug} (gene {gene}) needs a committed {gene} CDS reference"
187
+ f"{f' at {ref}' if ref else ' (none configured)'}. Use --observed {gene}:SUB for a "
188
+ f"wheel-only call.", file=sys.stderr)
189
+ return 3
190
+ sample_id = args.sample_id or args.genome_fasta.stem
191
+ try:
192
+ from scripts.pf_kelch13_caller import call_kelch13 # repo-only; needs BLAST+
193
+ except ImportError as e:
194
+ print(f"ERROR: antimalarial genome mode needs scripts/pf_kelch13_caller + BLAST+ ({e}). "
195
+ "Use --observed GENE:SUB for a wheel-only call.", file=sys.stderr)
196
+ return 3
197
+ call = call_kelch13(str(args.genome_fasta), str(ref), args.drug, gene=gene)
198
+ prov = {"mode": f"blast-{gene.lower()}", f"{gene.lower()}_ref": str(ref)}
199
+ else:
200
+ print("ERROR: antimalarial drug needs --genome-fasta OR --observed K13:SUB[,...]", file=sys.stderr)
201
+ return 2
202
+ rec = _target_site_record(call, sample_id, args.drug, args.organism, prov,
203
+ caller_name="dna_decode-antimalarial-k13-target-mutation-v0",
204
+ source="hand-curated WHO-validated Pfkelch13 catalog (no AMRFinder-for-Plasmodium)")
205
+ return _emit_target_site(rec, call, sample_id, args)
206
+
207
+
208
+ def _hiv_main(args) -> int:
209
+ """HIV-1 target-site decoder branch (5 classes / 4 genes; validated vs HIVDB PhenoSense).
210
+
211
+ Routed by drug -> gene: NNRTI/NRTI->RT, PI->PR (protease), INSTI->IN (integrase), CAI->CA (capsid).
212
+ Wheel-only `--observed GENE:SUB[,...]` (e.g. RT:K103N, PR:V82A, IN:Q148H, CA:M66I), OR genome-FASTA mode
213
+ (`--genome-fasta X.fna`) which BLASTs the committed HXB2 CDS reference for that gene vs the assembly and
214
+ codon-maps the substitutions (scripts/hiv_rt_caller; needs BLAST+), mirroring the influenza NA path."""
215
+ if args.organism == "Escherichia": # relabel the bacterial default on this path
216
+ args.organism = "HIV-1"
217
+ gene = gene_for_hiv_drug(args.drug) # RT / PR / IN / CA
218
+ ref_by_gene = {"RT": args.hiv_rt_ref, "PR": args.hiv_pr_ref,
219
+ "IN": args.hiv_in_ref, "CA": args.hiv_ca_ref}
220
+ if args.observed is not None:
221
+ call = call_hiv_observed(args.drug, _parse_observed(args.observed))
222
+ sample_id = args.sample_id or "observed"
223
+ prov = {"mode": "observed-substitutions", "observed": args.observed, "gene": gene}
224
+ elif args.genome_fasta is not None:
225
+ if not args.genome_fasta.exists():
226
+ print(f"ERROR: genome FASTA not found: {args.genome_fasta}", file=sys.stderr)
227
+ return 2
228
+ ref = ref_by_gene.get(gene)
229
+ if ref is None or not Path(ref).exists():
230
+ print(f"ERROR: genome mode for {args.drug} (gene {gene}) needs a committed HIV-1 {gene} CDS "
231
+ f"reference at {ref}. Use --observed {gene}:SUB for a wheel-only call.", file=sys.stderr)
232
+ return 3
233
+ sample_id = args.sample_id or args.genome_fasta.stem
234
+ try:
235
+ from scripts.hiv_rt_caller import call_hiv_target # repo-only; needs BLAST+
236
+ except ImportError as e:
237
+ print(f"ERROR: HIV genome mode needs scripts/hiv_rt_caller + BLAST+ ({e}). "
238
+ f"Use --observed {gene}:SUB for a wheel-only call.", file=sys.stderr)
239
+ return 3
240
+ call = call_hiv_target(str(args.genome_fasta), str(ref), args.drug, gene)
241
+ prov = {"mode": "blast-hiv-target", "gene": gene, "hiv_ref": str(ref)}
242
+ else:
243
+ print(f"ERROR: HIV drug needs --observed {gene}:SUB[,...] (e.g. {gene}:K103N) OR --genome-fasta X.fna.",
244
+ file=sys.stderr)
245
+ return 2
246
+ rec = _target_site_record(call, sample_id, args.drug, args.organism, prov,
247
+ caller_name="dna_decode-" + call.rule.replace("_", "-"),
248
+ source="HIVDB-PhenoSense-validated (in-distribution; see hiv_decoder_report_card)")
249
+ return _emit_target_site(rec, call, sample_id, args)
250
+
251
+
252
+ def _antiviral_main(args) -> int:
253
+ """Antiviral NA target-site decoder branch (routed when --drug is an NA-inhibitor drug). 4th kingdom."""
254
+ if args.organism == "Escherichia": # relabel the bacterial default on this path
255
+ args.organism = "Influenza_A_virus"
256
+ if args.observed is not None:
257
+ call = antiviral_call_from_observed(args.drug, _parse_observed(args.observed))
258
+ sample_id = args.sample_id or "observed"
259
+ prov = {"mode": "observed-substitutions", "observed": args.observed}
260
+ elif args.genome_fasta is not None:
261
+ if not args.genome_fasta.exists():
262
+ print(f"ERROR: genome FASTA not found: {args.genome_fasta}", file=sys.stderr)
263
+ return 2
264
+ if not Path(args.na_ref).exists():
265
+ print(f"ERROR: genome mode for {args.drug} (gene NA) needs a committed N1 NA CDS reference at "
266
+ f"{args.na_ref}. Use --observed NA:SUB for a wheel-only call.", file=sys.stderr)
267
+ return 3
268
+ sample_id = args.sample_id or args.genome_fasta.stem
269
+ try:
270
+ from scripts.flu_na_caller import call_neuraminidase # repo-only; needs BLAST+
271
+ except ImportError as e:
272
+ print(f"ERROR: antiviral genome mode needs scripts/flu_na_caller + BLAST+ ({e}). "
273
+ "Use --observed NA:SUB for a wheel-only call.", file=sys.stderr)
274
+ return 3
275
+ call = call_neuraminidase(str(args.genome_fasta), str(args.na_ref), args.drug, gene="NA")
276
+ prov = {"mode": "blast-na", "na_ref": str(args.na_ref)}
277
+ else:
278
+ print("ERROR: antiviral drug needs --genome-fasta OR --observed NA:SUB[,...]", file=sys.stderr)
279
+ return 2
280
+ rec = _target_site_record(call, sample_id, args.drug, args.organism, prov,
281
+ caller_name="dna_decode-antiviral-na-target-mutation-v0",
282
+ source="hand-curated CDC/WHO-recognized influenza NA marker catalog (no AMRFinder-for-influenza)")
283
+ return _emit_target_site(rec, call, sample_id, args)
284
+
285
+
286
+ def _sarscov2_main(args) -> int:
287
+ """SARS-CoV-2 Mpro target-site decoder branch (nirmatrelvir/ensitrelvir/lufotrelvir; validated vs CoV-RDB).
288
+
289
+ Wheel-only `--observed Mpro:E166V[,...]`, OR genome-FASTA mode (`--genome-fasta X.fna`) which BLASTs the
290
+ committed Wuhan-Hu-1 Mpro CDS reference vs the assembly and codon-maps the substitutions
291
+ (scripts/sarscov2_caller; needs BLAST+), mirroring the HIV / influenza-NA paths."""
292
+ if args.organism == "Escherichia": # relabel the bacterial default on this path
293
+ args.organism = "SARS-CoV-2"
294
+ gene = gene_for_sarscov2_drug(args.drug) # Mpro
295
+ if args.observed is not None:
296
+ call = call_sarscov2_observed(args.drug, _parse_observed(args.observed))
297
+ sample_id = args.sample_id or "observed"
298
+ prov = {"mode": "observed-substitutions", "observed": args.observed}
299
+ elif args.genome_fasta is not None:
300
+ if not args.genome_fasta.exists():
301
+ print(f"ERROR: genome FASTA not found: {args.genome_fasta}", file=sys.stderr)
302
+ return 2
303
+ ref = args.sarscov2_mpro_ref
304
+ if not Path(ref).exists():
305
+ print(f"ERROR: genome mode for {args.drug} (gene {gene}) needs a committed SARS-CoV-2 Mpro CDS "
306
+ f"reference at {ref}. Use --observed {gene}:SUB for a wheel-only call.", file=sys.stderr)
307
+ return 3
308
+ sample_id = args.sample_id or args.genome_fasta.stem
309
+ try:
310
+ from scripts.sarscov2_caller import call_sarscov2_target # repo-only; needs BLAST+
311
+ except ImportError as e:
312
+ print(f"ERROR: SARS-CoV-2 genome mode needs scripts/sarscov2_caller + BLAST+ ({e}). "
313
+ f"Use --observed {gene}:SUB for a wheel-only call.", file=sys.stderr)
314
+ return 3
315
+ call = call_sarscov2_target(str(args.genome_fasta), str(ref), args.drug, gene)
316
+ prov = {"mode": "blast-sarscov2-mpro", "gene": gene, "sarscov2_mpro_ref": str(ref)}
317
+ else:
318
+ print(f"ERROR: SARS-CoV-2 drug needs --observed {gene}:SUB[,...] (e.g. {gene}:E166V) OR "
319
+ "--genome-fasta X.fna.", file=sys.stderr)
320
+ return 2
321
+ rec = _target_site_record(call, sample_id, args.drug, args.organism, prov,
322
+ caller_name="dna_decode-sarscov2-mpro-target-mutation-v0",
323
+ source="Stanford CoV-RDB selection-derived Mpro catalog (validate vs measured fold-change)")
324
+ return _emit_target_site(rec, call, sample_id, args)
325
+
326
+
327
+ def _run_amrfinder_for_genome(fasta: Path, sample_id: str, out_root: Path, db: Path,
328
+ organism: str = "Escherichia") -> Path:
329
+ """Genome mode: lazily import the repo's AMRFinder Docker runner (not in the wheel).
330
+
331
+ `organism` selects AMRFinder's `-O` (organism-specific point-mutation detection — gyrA/parC QRDR calls
332
+ are organism-specific, so a Klebsiella genome MUST use 'Klebsiella_pneumoniae' or its QRDR is missed)."""
333
+ try:
334
+ import scripts.drug_mechanism_audit as dma # repo-only; needs Docker + DB
335
+ from scripts.drug_mechanism_audit import _run_amrfinder
336
+ except ImportError as e:
337
+ raise RuntimeError(
338
+ "genome mode needs the repo's AMRFinder runner (scripts/drug_mechanism_audit) + Docker + a "
339
+ "Docker-readable DB at --amrfinder-db; not available in a wheel install. Use --amrfinder-run "
340
+ f"with a precomputed run instead. ({e})"
341
+ ) from e
342
+ out_dir = out_root / (sample_id or fasta.stem)
343
+ out_dir.mkdir(parents=True, exist_ok=True)
344
+ if db:
345
+ dma.AMRFINDER_DB = str(db)
346
+ _run_amrfinder(fasta, out_dir, organism=organism)
347
+ return out_dir
348
+
349
+
350
+ def main(argv=None) -> int:
351
+ ap = argparse.ArgumentParser(prog="dna-amr",
352
+ description="Deterministic AMR R/S decoder from AMRFinder curated determinants")
353
+ ap.add_argument("--drug", required=True,
354
+ choices=sorted(set(supported_drugs()) | set(supported_fungal_drugs())
355
+ | set(supported_antimalarial_drugs()) | set(supported_antiviral_drugs())
356
+ | set(all_supported_hiv_drugs()) | set(all_supported_sarscov2_drugs())),
357
+ metavar="DRUG", help="bacterial (AMRFinder engine), fungal (BLAST-ERG11 engine), "
358
+ "antimalarial (BLAST-Pfkelch13 engine), or antiviral "
359
+ "(BLAST-influenza-NA engine) drug")
360
+ src = ap.add_mutually_exclusive_group(required=True)
361
+ src.add_argument("--amrfinder-run", type=Path, help="[bacterial] existing AMRFinder run dir (main.tsv)")
362
+ src.add_argument("--genome-fasta", type=Path, help="genome FASTA (bacterial: AMRFinder via Docker; "
363
+ "fungal: BLAST ERG11 via scripts/fungal_erg11_caller)")
364
+ src.add_argument("--observed", default=None, help="[fungal] known substitutions GENE:SUB[,...] "
365
+ "(e.g. ERG11:Y132F) — pure, no BLAST")
366
+ ap.add_argument("--erg11-ref", type=Path, default=_DEFAULT_ERG11_REF,
367
+ help="[fungal] in-frame ERG11 CDS reference FASTA (default: committed C. auris allele)")
368
+ ap.add_argument("--k13-ref", type=Path, default=_DEFAULT_K13_REF,
369
+ help="[antimalarial] in-frame Pfkelch13 CDS reference FASTA (default: committed 3D7 allele)")
370
+ ap.add_argument("--pfcrt-ref", type=Path, default=_DEFAULT_PFCRT_REF,
371
+ help="[antimalarial] in-frame pfcrt CDS reference FASTA (default: committed 3D7 allele; "
372
+ "genome mode is intron-aware so a genomic pfcrt allele works)")
373
+ ap.add_argument("--na-ref", type=Path, default=_DEFAULT_NA_REF,
374
+ help="[antiviral] in-frame influenza N1 NA CDS reference FASTA (default: committed "
375
+ "NC_026434.1 A/California/07/2009 allele, WT His275)")
376
+ ap.add_argument("--hiv-rt-ref", type=Path, default=_DEFAULT_HIV_RT_REF,
377
+ help="[HIV] in-frame HIV-1 RT CDS reference FASTA (default: committed HXB2 "
378
+ "K03455.1:2550-4229 allele, consensus-B WT at every DRM position)")
379
+ ap.add_argument("--hiv-pr-ref", type=Path, default=_DEFAULT_HIV_PR_REF,
380
+ help="[HIV] in-frame protease CDS reference FASTA (default: committed HXB2 K03455.1:2253-2549)")
381
+ ap.add_argument("--hiv-in-ref", type=Path, default=_DEFAULT_HIV_IN_REF,
382
+ help="[HIV] in-frame integrase CDS reference FASTA (default: committed HXB2 K03455.1:4230-5093)")
383
+ ap.add_argument("--hiv-ca-ref", type=Path, default=_DEFAULT_HIV_CA_REF,
384
+ help="[HIV] in-frame capsid CDS reference FASTA (default: committed HXB2 K03455.1:1186-1878)")
385
+ ap.add_argument("--sarscov2-mpro-ref", type=Path, default=_DEFAULT_SARSCOV2_MPRO_REF,
386
+ help="[SARS-CoV-2] in-frame Mpro (3CLpro/nsp5) CDS reference FASTA (default: committed "
387
+ "Wuhan-Hu-1 NC_045512.2:10055-10972 allele, WT at every catalogued position)")
388
+ ap.add_argument("--sample-id", default=None)
389
+ ap.add_argument("--organism", default="Escherichia",
390
+ help="AMRFinder -O organism for genome mode (organism-specific QRDR point-mutation "
391
+ "detection). E.g. Escherichia (default), Klebsiella_pneumoniae. Validated "
392
+ "cross-organism for E. coli + K. pneumoniae.")
393
+ ap.add_argument("--amrfinder-db", type=Path, default=Path("data/amrfinder_db"),
394
+ help="AMRFinder DB root (Docker-readable; default data/amrfinder_db)")
395
+ ap.add_argument("--out-root", type=Path, default=Path("data/amrfinder_runs"))
396
+ ap.add_argument("--resistance-threshold", type=int, default=None,
397
+ help="min #curated determinants for an R call. Default: per-drug validated config "
398
+ "(cipro=2 QRDR; cef=1 + extended-spectrum refinement; tet/gent=1). Pass an int to override.")
399
+ ap.add_argument("--out", type=Path, default=None, help="write provenance JSON here")
400
+ ap.add_argument("--json-only", action="store_true")
401
+ args = ap.parse_args(argv)
402
+
403
+ # Route by drug: fungal -> BLAST-ERG11; antimalarial -> BLAST-Pfkelch13; bacterial -> AMRFinder.
404
+ if args.drug in supported_fungal_drugs():
405
+ if args.amrfinder_run is not None:
406
+ print("ERROR: --amrfinder-run is bacterial-only; fungal drugs use --genome-fasta or --observed",
407
+ file=sys.stderr)
408
+ return 2
409
+ return _fungal_main(args)
410
+
411
+ if args.drug in supported_antimalarial_drugs():
412
+ if args.amrfinder_run is not None:
413
+ print("ERROR: --amrfinder-run is bacterial-only; antimalarial drugs use --genome-fasta or --observed",
414
+ file=sys.stderr)
415
+ return 2
416
+ return _antimalarial_main(args)
417
+
418
+ if args.drug in supported_antiviral_drugs():
419
+ if args.amrfinder_run is not None:
420
+ print("ERROR: --amrfinder-run is bacterial-only; antiviral drugs use --genome-fasta or --observed",
421
+ file=sys.stderr)
422
+ return 2
423
+ return _antiviral_main(args)
424
+
425
+ if args.drug in all_supported_hiv_drugs():
426
+ if args.amrfinder_run is not None:
427
+ print("ERROR: --amrfinder-run is bacterial-only; HIV drugs use --observed GENE:SUB[,...]",
428
+ file=sys.stderr)
429
+ return 2
430
+ return _hiv_main(args)
431
+
432
+ if args.drug in all_supported_sarscov2_drugs():
433
+ if args.amrfinder_run is not None:
434
+ print("ERROR: --amrfinder-run is bacterial-only; SARS-CoV-2 drugs use --observed Mpro:SUB[,...]",
435
+ file=sys.stderr)
436
+ return 2
437
+ return _sarscov2_main(args)
438
+
439
+ if args.observed is not None:
440
+ print("ERROR: --observed is fungal-only; bacterial drugs use --amrfinder-run or --genome-fasta",
441
+ file=sys.stderr)
442
+ return 2
443
+
444
+ if args.amrfinder_run:
445
+ run_dir = args.amrfinder_run
446
+ sample_id = args.sample_id or run_dir.name
447
+ else:
448
+ if not args.genome_fasta.exists():
449
+ print(f"ERROR: genome FASTA not found: {args.genome_fasta}", file=sys.stderr)
450
+ return 2
451
+ sample_id = args.sample_id or args.genome_fasta.stem
452
+ try:
453
+ run_dir = _run_amrfinder_for_genome(args.genome_fasta, sample_id, args.out_root,
454
+ args.amrfinder_db, organism=args.organism)
455
+ except Exception as e:
456
+ print(f"ERROR: AMRFinder run failed ({type(e).__name__}: {e}).", file=sys.stderr)
457
+ return 3
458
+
459
+ # Forward --organism so a calibrated registry entry (opt-in) is used when the organism is known. The
460
+ # default 'Escherichia' has no registry entry -> DRUG_RULE default (unchanged); an explicit
461
+ # Campylobacter/Klebsiella/Salmonella resolves its independent-cohort-validated config, and an
462
+ # EXPRESSION_FLOOR organism (Acinetobacter/Pseudomonas carbapenem) returns prediction 'ABSTAIN'.
463
+ # Pass the genome FASTA (genome mode only; None for --amrfinder-run) so the EXPRESSION_FLOOR
464
+ # expression-context override can read the assembly when its registry block is enabled (opt-in).
465
+ call = call_resistance(run_dir / "main.tsv", args.drug, args.resistance_threshold,
466
+ organism=args.organism, genome_fasta=args.genome_fasta)
467
+ rec = {
468
+ "sample_id": sample_id, "drug": args.drug,
469
+ "analysis_date": datetime.date.today().isoformat(), "schema": "amr-mechanism-call-v1",
470
+ "prediction": call["prediction"], "confidence": call["confidence"],
471
+ "n_determinants": call["n_determinants"], "determinants": call["determinants"],
472
+ "resistance_threshold": call.get("resistance_threshold"),
473
+ "undetectable_mechanisms": call.get("undetectable_mechanisms", []),
474
+ "caller": {"name": "dna_decode-amr-rules-v1", "rule": call["rule"],
475
+ "source": "AMRFinderPlus curated main.tsv", "caller_is_independent_baseline": False},
476
+ "caveat": call["caveat"],
477
+ "validation": trust_block(args.drug, args.organism),
478
+ "provenance": {"amrfinder_run": str(run_dir), "amrfinder_image": AMRFINDER_IMAGE_PINNED,
479
+ "amrfinder_organism": args.organism},
480
+ }
481
+ if args.out:
482
+ Path(args.out).write_text(json.dumps(rec, indent=2), encoding="utf-8")
483
+ if args.json_only:
484
+ print(json.dumps(rec, indent=2))
485
+ else:
486
+ print(f"sample: {sample_id} drug: {args.drug} organism: {args.organism}")
487
+ if call["prediction"] == "ABSTAIN":
488
+ print("CALL: ABSTAIN [gene-presence cannot decode this organism×drug]")
489
+ print(f" {call['caveat']}")
490
+ else:
491
+ nd = call["n_determinants"]
492
+ print(f"CALL: {call['prediction']} [{call['confidence']} | {nd} determinant(s)]")
493
+ for x in call["determinants"]:
494
+ print(f" driven by: {x['symbol']} ({x['subclass'] or x['class']}, {x['pct_identity']}% id)")
495
+ if not call["determinants"]:
496
+ print(" driven by: (no curated resistance determinants for this drug)")
497
+ print(f" {call['caveat']}")
498
+ print(f" {one_line(rec['validation'])}")
499
+ if args.out:
500
+ print(f"\n[provenance JSON -> {args.out}]")
501
+ return {"INDETERMINATE": 4, "ABSTAIN": 5}.get(call["prediction"], 0)
502
+
503
+
504
+ if __name__ == "__main__":
505
+ raise SystemExit(main())