aadr-subset 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aadr_subset/__init__.py +5 -0
- aadr_subset/__main__.py +6 -0
- aadr_subset/cli.py +359 -0
- aadr_subset/commands/__init__.py +1 -0
- aadr_subset/commands/inspect_cmd.py +129 -0
- aadr_subset/commands/report_cmd.py +168 -0
- aadr_subset/commands/select_cmd.py +366 -0
- aadr_subset/commands/template_cmd.py +58 -0
- aadr_subset/commands/validate_cmd.py +40 -0
- aadr_subset/engine.py +568 -0
- aadr_subset/errors.py +99 -0
- aadr_subset/formats.py +301 -0
- aadr_subset/py.typed +0 -0
- aadr_subset/reporting.py +423 -0
- aadr_subset/schemas/selector.schema.json +168 -0
- aadr_subset/selector.py +819 -0
- aadr_subset/templates/bronze_age_europe.yaml +52 -0
- aadr_subset/templates/iron_age_britain.yaml +33 -0
- aadr_subset/templates/modern_european.yaml +55 -0
- aadr_subset/templates/neolithic_anatolia.yaml +44 -0
- aadr_subset/templates/viking_period_scandinavian.yaml +54 -0
- aadr_subset/templates/wsh_steppe_pool.yaml +57 -0
- aadr_subset/templates.py +76 -0
- aadr_subset/types.py +166 -0
- aadr_subset-0.1.0.dist-info/METADATA +291 -0
- aadr_subset-0.1.0.dist-info/RECORD +30 -0
- aadr_subset-0.1.0.dist-info/WHEEL +5 -0
- aadr_subset-0.1.0.dist-info/entry_points.txt +2 -0
- aadr_subset-0.1.0.dist-info/licenses/LICENSE +21 -0
- aadr_subset-0.1.0.dist-info/top_level.txt +1 -0
aadr_subset/__init__.py
ADDED
aadr_subset/__main__.py
ADDED
aadr_subset/cli.py
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
"""click entry point + subcommand routing.
|
|
2
|
+
|
|
3
|
+
Day 1: `validate` subcommand wired end-to-end. `select` / `inspect` /
|
|
4
|
+
`report` / `template` land on Day 3+ per HLD project plan.
|
|
5
|
+
|
|
6
|
+
Top-level exception handler maps AadrSubsetError subclasses → exit codes
|
|
7
|
+
per LLD §3.8 pin. standalone_mode=False prevents click from intercepting
|
|
8
|
+
exceptions before our handler runs.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import sys
|
|
14
|
+
|
|
15
|
+
import click
|
|
16
|
+
|
|
17
|
+
from . import __version__
|
|
18
|
+
from .commands.inspect_cmd import run_inspect
|
|
19
|
+
from .commands.report_cmd import run_report
|
|
20
|
+
from .commands.select_cmd import run_select
|
|
21
|
+
from .commands.template_cmd import run_template
|
|
22
|
+
from .commands.validate_cmd import run_validate
|
|
23
|
+
from .errors import EXIT_UNEXPECTED, AadrSubsetError, UsageError
|
|
24
|
+
from .selector import format_validation_errors
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _version_message() -> str:
|
|
28
|
+
"""Build the --version output. aadr-resolve version reported when the
|
|
29
|
+
import succeeds (it will not on Day 1 since aadr-resolve isn't imported
|
|
30
|
+
by validate). Day 2+ will pull aadr_resolve.__version__ here."""
|
|
31
|
+
try:
|
|
32
|
+
import aadr_resolve
|
|
33
|
+
|
|
34
|
+
aadr_resolve_v = getattr(aadr_resolve, "__version__", "<unknown>")
|
|
35
|
+
return f"aadr-subset {__version__}\naadr-resolve {aadr_resolve_v}"
|
|
36
|
+
except ImportError:
|
|
37
|
+
return f"aadr-subset {__version__}\naadr-resolve <not installed>"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@click.group(invoke_without_command=False)
|
|
41
|
+
@click.version_option(version=__version__, prog_name="aadr-subset", message=_version_message())
|
|
42
|
+
@click.option(
|
|
43
|
+
"--quiet",
|
|
44
|
+
is_flag=True,
|
|
45
|
+
help="Suppress stdout summary on success; warnings to stderr; errors to stderr.",
|
|
46
|
+
)
|
|
47
|
+
@click.pass_context
|
|
48
|
+
def cli(ctx: click.Context, quiet: bool) -> None:
|
|
49
|
+
"""aadr-subset: declarative AADR panel subsetting from YAML selectors."""
|
|
50
|
+
ctx.ensure_object(dict)
|
|
51
|
+
ctx.obj["quiet"] = quiet
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@cli.command("validate")
|
|
55
|
+
@click.argument("selector_path", type=click.STRING)
|
|
56
|
+
@click.pass_context
|
|
57
|
+
def validate_command(ctx: click.Context, selector_path: str) -> None:
|
|
58
|
+
"""JSON-schema + semantic-constraint check on a selector YAML. No .anno
|
|
59
|
+
loaded. Useful in CI as a fast gate before any .anno is available."""
|
|
60
|
+
exit_code = run_validate(
|
|
61
|
+
selector_path=selector_path,
|
|
62
|
+
quiet=ctx.obj["quiet"],
|
|
63
|
+
)
|
|
64
|
+
sys.exit(exit_code)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@cli.command("select")
|
|
68
|
+
@click.argument("selector_path", type=click.STRING)
|
|
69
|
+
@click.argument("anno_path", type=click.Path(exists=True, dir_okay=False))
|
|
70
|
+
@click.option(
|
|
71
|
+
"-o",
|
|
72
|
+
"--out",
|
|
73
|
+
type=click.Path(dir_okay=False),
|
|
74
|
+
default=None,
|
|
75
|
+
help="Output file path (default: stdout).",
|
|
76
|
+
)
|
|
77
|
+
@click.option(
|
|
78
|
+
"--format",
|
|
79
|
+
"fmt",
|
|
80
|
+
type=click.Choice(["ids", "tsv", "json"]),
|
|
81
|
+
default="ids",
|
|
82
|
+
show_default=True,
|
|
83
|
+
help="Output format. `ids`=newline-delimited GeneticIDs; `tsv`=TSV with "
|
|
84
|
+
"genetic_id/individual_id/group_id/date_calbp/coverage/matched_criteria; "
|
|
85
|
+
"`json`=structured SubsetResult.",
|
|
86
|
+
)
|
|
87
|
+
@click.option(
|
|
88
|
+
"--schema-override",
|
|
89
|
+
type=click.Choice(["A", "B", "C", "D", "E"]),
|
|
90
|
+
default=None,
|
|
91
|
+
help="Force AnnoFrame schema class (A-E). Use when .anno is renamed but "
|
|
92
|
+
"matches an existing class signature.",
|
|
93
|
+
)
|
|
94
|
+
@click.option(
|
|
95
|
+
"--allow-empty",
|
|
96
|
+
is_flag=True,
|
|
97
|
+
help="Downgrade zero-match exit 1 to exit 0 (write an empty output file).",
|
|
98
|
+
)
|
|
99
|
+
@click.option(
|
|
100
|
+
"--allow-empty-source",
|
|
101
|
+
is_flag=True,
|
|
102
|
+
help="Allow individual_ids_source to be empty (exits 0 instead of 1).",
|
|
103
|
+
)
|
|
104
|
+
@click.option(
|
|
105
|
+
"--include-matched-criteria",
|
|
106
|
+
is_flag=True,
|
|
107
|
+
help="Include per-sample matched_criteria in JSON output (off by default).",
|
|
108
|
+
)
|
|
109
|
+
@click.option(
|
|
110
|
+
"--source-anno",
|
|
111
|
+
type=click.Path(exists=True, dir_okay=False),
|
|
112
|
+
default=None,
|
|
113
|
+
help="Source .anno for cross-version IID lift. Required when selector sets resolve_to_version.",
|
|
114
|
+
)
|
|
115
|
+
@click.option(
|
|
116
|
+
"--mid-bridge",
|
|
117
|
+
type=click.Path(exists=True, dir_okay=False),
|
|
118
|
+
default=None,
|
|
119
|
+
help="Optional MID-rename bridge TSV (4 cols: v_old_label, mid_old, "
|
|
120
|
+
"v_new_label, mid_new). Layers on top of aadr-resolve's GID-stable "
|
|
121
|
+
"auto-detection.",
|
|
122
|
+
)
|
|
123
|
+
@click.option(
|
|
124
|
+
"--strict-resolve",
|
|
125
|
+
is_flag=True,
|
|
126
|
+
help="On cross-version resolution, fail exit 1 if any source Individual_ID "
|
|
127
|
+
"fails to resolve. Default: warn to stderr and proceed with the resolvable "
|
|
128
|
+
"subset.",
|
|
129
|
+
)
|
|
130
|
+
@click.option(
|
|
131
|
+
"--coverage-column",
|
|
132
|
+
default=None,
|
|
133
|
+
metavar="NAME",
|
|
134
|
+
help="Canonical coverage field for min_coverage filters. Routed through "
|
|
135
|
+
"AnnoFrame.coverage_via(NAME). Useful for v62.0 (class D, no native "
|
|
136
|
+
"coverage column) — pass e.g. 'snps_hit_1240k' for a derived proxy. "
|
|
137
|
+
"Selector's coverage_column: takes precedence when both are set.",
|
|
138
|
+
)
|
|
139
|
+
@click.option(
|
|
140
|
+
"--coverage-derive",
|
|
141
|
+
default=None,
|
|
142
|
+
metavar="NAME",
|
|
143
|
+
help="Alias for --coverage-column (only one of the two may be set). "
|
|
144
|
+
"Mnemonic for the v62-class-D derived-proxy use case.",
|
|
145
|
+
)
|
|
146
|
+
@click.pass_context
|
|
147
|
+
def select_command(
|
|
148
|
+
ctx: click.Context,
|
|
149
|
+
selector_path: str,
|
|
150
|
+
anno_path: str,
|
|
151
|
+
out: str | None,
|
|
152
|
+
fmt: str,
|
|
153
|
+
schema_override: str | None,
|
|
154
|
+
allow_empty: bool,
|
|
155
|
+
allow_empty_source: bool,
|
|
156
|
+
include_matched_criteria: bool,
|
|
157
|
+
source_anno: str | None,
|
|
158
|
+
mid_bridge: str | None,
|
|
159
|
+
strict_resolve: bool,
|
|
160
|
+
coverage_column: str | None,
|
|
161
|
+
coverage_derive: str | None,
|
|
162
|
+
) -> None:
|
|
163
|
+
"""Materialize a selector against a target AADR .anno; emit sample IDs / TSV / JSON.
|
|
164
|
+
|
|
165
|
+
Full HLD select surface: populations + individual_ids + date +
|
|
166
|
+
modern_only + min_coverage + any:/exclude: combinators against a
|
|
167
|
+
single .anno; ids / tsv / json output; cross-version IID lift via
|
|
168
|
+
--source-anno + selector.resolve_to_version.
|
|
169
|
+
"""
|
|
170
|
+
exit_code = run_select(
|
|
171
|
+
selector_path=selector_path,
|
|
172
|
+
anno_path=anno_path,
|
|
173
|
+
out=out,
|
|
174
|
+
fmt=fmt,
|
|
175
|
+
schema_override=schema_override,
|
|
176
|
+
allow_empty=allow_empty,
|
|
177
|
+
allow_empty_source=allow_empty_source,
|
|
178
|
+
include_matched_criteria=include_matched_criteria,
|
|
179
|
+
source_anno=source_anno,
|
|
180
|
+
mid_bridge=mid_bridge,
|
|
181
|
+
strict_resolve=strict_resolve,
|
|
182
|
+
coverage_column=coverage_column,
|
|
183
|
+
coverage_derive=coverage_derive,
|
|
184
|
+
quiet=ctx.obj["quiet"],
|
|
185
|
+
)
|
|
186
|
+
sys.exit(exit_code)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@cli.command("inspect")
|
|
190
|
+
@click.argument("selector_path", type=click.STRING)
|
|
191
|
+
@click.argument("anno_path", type=click.Path(exists=True, dir_okay=False))
|
|
192
|
+
@click.option(
|
|
193
|
+
"--schema-override",
|
|
194
|
+
type=click.Choice(["A", "B", "C", "D", "E"]),
|
|
195
|
+
default=None,
|
|
196
|
+
help="Force AnnoFrame schema class (A-E).",
|
|
197
|
+
)
|
|
198
|
+
@click.option(
|
|
199
|
+
"--allow-empty-source",
|
|
200
|
+
is_flag=True,
|
|
201
|
+
help="Allow individual_ids_source to be empty.",
|
|
202
|
+
)
|
|
203
|
+
@click.option(
|
|
204
|
+
"--strict-resolve",
|
|
205
|
+
is_flag=True,
|
|
206
|
+
help="Show STRICT-RESOLVE diagnostic in the summary when missing-after-"
|
|
207
|
+
"resolve IDs are present. Per HLD §Inspect mode, --strict-resolve is "
|
|
208
|
+
"accepted for diagnostic display but never changes inspect's exit code "
|
|
209
|
+
"(inspect always exits 0).",
|
|
210
|
+
)
|
|
211
|
+
@click.pass_context
|
|
212
|
+
def inspect_command(
|
|
213
|
+
ctx: click.Context,
|
|
214
|
+
selector_path: str,
|
|
215
|
+
anno_path: str,
|
|
216
|
+
schema_override: str | None,
|
|
217
|
+
allow_empty_source: bool,
|
|
218
|
+
strict_resolve: bool,
|
|
219
|
+
) -> None:
|
|
220
|
+
"""Diagnostic dry-run: shows what a selector matches against a target
|
|
221
|
+
.anno without writing any output. Always exits 0 (informational)."""
|
|
222
|
+
exit_code = run_inspect(
|
|
223
|
+
selector_path=selector_path,
|
|
224
|
+
anno_path=anno_path,
|
|
225
|
+
schema_override=schema_override,
|
|
226
|
+
allow_empty_source=allow_empty_source,
|
|
227
|
+
strict_resolve=strict_resolve,
|
|
228
|
+
quiet=ctx.obj["quiet"],
|
|
229
|
+
)
|
|
230
|
+
sys.exit(exit_code)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
@cli.command("report")
|
|
234
|
+
@click.argument("selector_path", type=click.STRING)
|
|
235
|
+
@click.argument("anno_path", type=click.Path(exists=True, dir_okay=False))
|
|
236
|
+
@click.option(
|
|
237
|
+
"-o",
|
|
238
|
+
"--out",
|
|
239
|
+
type=click.Path(dir_okay=False),
|
|
240
|
+
default=None,
|
|
241
|
+
help="Output file path (default: stdout).",
|
|
242
|
+
)
|
|
243
|
+
@click.option(
|
|
244
|
+
"--format",
|
|
245
|
+
"fmt",
|
|
246
|
+
type=click.Choice(["tsv", "json"]),
|
|
247
|
+
default="tsv",
|
|
248
|
+
show_default=True,
|
|
249
|
+
help="Report format. `tsv`=per-group columns; `json`=structured object.",
|
|
250
|
+
)
|
|
251
|
+
@click.option(
|
|
252
|
+
"--schema-override",
|
|
253
|
+
type=click.Choice(["A", "B", "C", "D", "E"]),
|
|
254
|
+
default=None,
|
|
255
|
+
help="Force AnnoFrame schema class (A-E).",
|
|
256
|
+
)
|
|
257
|
+
@click.option(
|
|
258
|
+
"--allow-empty",
|
|
259
|
+
is_flag=True,
|
|
260
|
+
help="Downgrade zero-match exit 1 to exit 0 (write a header-only report).",
|
|
261
|
+
)
|
|
262
|
+
@click.option(
|
|
263
|
+
"--allow-empty-source",
|
|
264
|
+
is_flag=True,
|
|
265
|
+
help="Allow individual_ids_source to be empty.",
|
|
266
|
+
)
|
|
267
|
+
@click.option(
|
|
268
|
+
"--include-empty-groups",
|
|
269
|
+
is_flag=True,
|
|
270
|
+
help="Include rows for .anno groups with zero matches (n_matched=0). "
|
|
271
|
+
"Useful for population-survey workflows.",
|
|
272
|
+
)
|
|
273
|
+
@click.pass_context
|
|
274
|
+
def report_command(
|
|
275
|
+
ctx: click.Context,
|
|
276
|
+
selector_path: str,
|
|
277
|
+
anno_path: str,
|
|
278
|
+
out: str | None,
|
|
279
|
+
fmt: str,
|
|
280
|
+
schema_override: str | None,
|
|
281
|
+
allow_empty: bool,
|
|
282
|
+
allow_empty_source: bool,
|
|
283
|
+
include_empty_groups: bool,
|
|
284
|
+
) -> None:
|
|
285
|
+
"""Per-population aggregate output: group_id, n_matched, n_in_anno,
|
|
286
|
+
pct_matched, date_min/max_calbp, coverage_median (+ JSON adds
|
|
287
|
+
coverage_min/max). Atomic write."""
|
|
288
|
+
exit_code = run_report(
|
|
289
|
+
selector_path=selector_path,
|
|
290
|
+
anno_path=anno_path,
|
|
291
|
+
out=out,
|
|
292
|
+
fmt=fmt,
|
|
293
|
+
schema_override=schema_override,
|
|
294
|
+
allow_empty=allow_empty,
|
|
295
|
+
allow_empty_source=allow_empty_source,
|
|
296
|
+
include_empty_groups=include_empty_groups,
|
|
297
|
+
quiet=ctx.obj["quiet"],
|
|
298
|
+
)
|
|
299
|
+
sys.exit(exit_code)
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
@cli.command("template")
|
|
303
|
+
@click.argument("name", required=False, default=None, type=click.STRING)
|
|
304
|
+
@click.option(
|
|
305
|
+
"-o",
|
|
306
|
+
"--out",
|
|
307
|
+
type=click.Path(dir_okay=False),
|
|
308
|
+
default=None,
|
|
309
|
+
help="Output file path for emit mode (default: stdout).",
|
|
310
|
+
)
|
|
311
|
+
@click.pass_context
|
|
312
|
+
def template_command(ctx: click.Context, name: str | None, out: str | None) -> None:
|
|
313
|
+
"""Discover or emit a shipped selector template.
|
|
314
|
+
|
|
315
|
+
No-argument form: prints the sorted list of shipped templates to
|
|
316
|
+
stdout. Argument form: emits `<name>.yaml`'s verbatim content
|
|
317
|
+
(including its metadata block and comments) to stdout or --out PATH.
|
|
318
|
+
Unknown names exit 2 with a discovery hint."""
|
|
319
|
+
exit_code = run_template(name=name, out=out, quiet=ctx.obj["quiet"])
|
|
320
|
+
sys.exit(exit_code)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def main() -> None:
|
|
324
|
+
"""Top-level entry point. Maps AadrSubsetError subclasses to exit codes;
|
|
325
|
+
uncaught exceptions exit 70 (BSD EX_SOFTWARE)."""
|
|
326
|
+
try:
|
|
327
|
+
cli(standalone_mode=False)
|
|
328
|
+
except click.UsageError as e:
|
|
329
|
+
# click's own usage error (bad arg counts, etc.) → exit 2 by default;
|
|
330
|
+
# we map to 4 to align with HLD §Exit codes (usage error = 4).
|
|
331
|
+
sys.stderr.write(f"Usage error: {e.format_message()}\n")
|
|
332
|
+
sys.exit(4)
|
|
333
|
+
except click.exceptions.Abort:
|
|
334
|
+
# ctrl-C, etc. → exit 130 (conventional SIGINT exit code).
|
|
335
|
+
sys.exit(130)
|
|
336
|
+
except UsageError as e:
|
|
337
|
+
# UsageError may carry a list[ValidationError] payload (from
|
|
338
|
+
# selector load) or a plain message (from engine feature-gate).
|
|
339
|
+
if e.errors:
|
|
340
|
+
sys.stderr.write(format_validation_errors(e.errors) + "\n")
|
|
341
|
+
elif str(e):
|
|
342
|
+
sys.stderr.write(f"{e}\n")
|
|
343
|
+
sys.exit(e.exit_code)
|
|
344
|
+
except AadrSubsetError as e:
|
|
345
|
+
# Other tool-internal errors carry exit_code.
|
|
346
|
+
if str(e):
|
|
347
|
+
sys.stderr.write(f"{e}\n")
|
|
348
|
+
sys.exit(e.exit_code)
|
|
349
|
+
except SystemExit:
|
|
350
|
+
# run_<verb> orchestrators raise SystemExit via sys.exit() — pass
|
|
351
|
+
# through.
|
|
352
|
+
raise
|
|
353
|
+
except Exception:
|
|
354
|
+
# Uncaught exception → exit 70 with traceback to stderr.
|
|
355
|
+
import traceback
|
|
356
|
+
|
|
357
|
+
sys.stderr.write("INTERNAL ERROR: uncaught exception (please report):\n")
|
|
358
|
+
traceback.print_exc(file=sys.stderr)
|
|
359
|
+
sys.exit(EXIT_UNEXPECTED)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Per-subcommand orchestrators. Each `run_<verb>` returns an int exit code."""
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""inspect subcommand orchestrator.
|
|
2
|
+
|
|
3
|
+
Diagnostic dry-run: shows what a selector matches against a target .anno
|
|
4
|
+
without writing any file. Always exits 0 — inspect is informational; a
|
|
5
|
+
non-zero exit on zero-match would defeat the purpose.
|
|
6
|
+
|
|
7
|
+
Per LLD §3.10 / §4.3. Day 4 ships the single-version path; cross-version
|
|
8
|
+
diagnostics land Day 6 alongside select's cross-version flow.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import sys
|
|
14
|
+
from dataclasses import replace
|
|
15
|
+
|
|
16
|
+
import aadr_resolve
|
|
17
|
+
|
|
18
|
+
from ..engine import select_samples
|
|
19
|
+
from ..errors import EXIT_SUCCESS, IOFailure, UsageError, ValidationError
|
|
20
|
+
from ..reporting import format_inspect_summary
|
|
21
|
+
from ..selector import compute_signature, load_selector
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def run_inspect(
|
|
25
|
+
*,
|
|
26
|
+
selector_path: str,
|
|
27
|
+
anno_path: str,
|
|
28
|
+
schema_override: str | None,
|
|
29
|
+
allow_empty_source: bool,
|
|
30
|
+
strict_resolve: bool,
|
|
31
|
+
quiet: bool,
|
|
32
|
+
) -> int:
|
|
33
|
+
"""Orchestrate `aadr-subset inspect`. Always returns EXIT_SUCCESS.
|
|
34
|
+
|
|
35
|
+
Day-4 sequence (§4.3 reduced for single-version):
|
|
36
|
+
1. Load + validate selector.
|
|
37
|
+
2. Load target AnnoFrame.
|
|
38
|
+
3. Engine evaluation with include_matched_criteria=True (inspect's
|
|
39
|
+
output uses matched_criteria via the branch breakdown).
|
|
40
|
+
4. Populate run-env metadata.
|
|
41
|
+
5. Print format_inspect_summary to stdout (NOT stderr — inspect has
|
|
42
|
+
no machine-readable output to protect).
|
|
43
|
+
6. Return EXIT_SUCCESS regardless of n_matched. SoftValidationFailure
|
|
44
|
+
from zero matches becomes a stdout "0 samples matched" message.
|
|
45
|
+
strict_resolve diagnostics surface in the summary block but don't
|
|
46
|
+
change exit code (HLD §Inspect mode pin).
|
|
47
|
+
"""
|
|
48
|
+
# 1. Load + validate selector.
|
|
49
|
+
_metadata, selector = load_selector(
|
|
50
|
+
selector_path,
|
|
51
|
+
allow_empty_source=allow_empty_source,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# 2. Load AnnoFrame.
|
|
55
|
+
schema_override_enum = _parse_schema_override(schema_override)
|
|
56
|
+
try:
|
|
57
|
+
anno = aadr_resolve.AnnoFrame.from_path(
|
|
58
|
+
anno_path,
|
|
59
|
+
schema_override=schema_override_enum,
|
|
60
|
+
)
|
|
61
|
+
except aadr_resolve.SchemaDetectionError as e:
|
|
62
|
+
raise IOFailure(f"AADR .anno schema unrecognized: {e}") from e
|
|
63
|
+
except (OSError, aadr_resolve.IOFailure) as e:
|
|
64
|
+
raise IOFailure(f"cannot load .anno at {anno_path}: {e}") from e
|
|
65
|
+
|
|
66
|
+
# 3. Engine evaluation. include_matched_criteria=True so the inspect
|
|
67
|
+
# summary can show per-branch attribution.
|
|
68
|
+
result = select_samples(
|
|
69
|
+
anno,
|
|
70
|
+
selector,
|
|
71
|
+
include_matched_criteria=True,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# 4. Compute signature + populate run-env metadata. cli_coverage_column
|
|
75
|
+
# is None until --coverage-column ships; selector.coverage_column alone
|
|
76
|
+
# drives the signature today.
|
|
77
|
+
sig = compute_signature(selector, cli_coverage_column=None)
|
|
78
|
+
result = replace(
|
|
79
|
+
result,
|
|
80
|
+
anno_file=str(anno_path),
|
|
81
|
+
anno_version=anno.version,
|
|
82
|
+
schema_class=anno.schema_class.value,
|
|
83
|
+
selector_file=selector_path,
|
|
84
|
+
selector_signature=sig,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# 5. Print inspect summary to STDOUT.
|
|
88
|
+
summary = format_inspect_summary(result, anno)
|
|
89
|
+
|
|
90
|
+
# strict_resolve diagnostic: HLD pins it as informational-only on
|
|
91
|
+
# inspect. Day 4 has no cross-version yet, so missing_after_resolve
|
|
92
|
+
# is always empty; reserved for Day 6.
|
|
93
|
+
if strict_resolve and result.warnings.missing_after_resolve:
|
|
94
|
+
missing = result.warnings.missing_after_resolve
|
|
95
|
+
shown = missing[:10]
|
|
96
|
+
summary += (
|
|
97
|
+
f"\n\n[STRICT-RESOLVE would fail: {len(missing)} Individual_ID(s) "
|
|
98
|
+
f"failed to resolve. First 10: {shown}]"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
if not quiet:
|
|
102
|
+
sys.stdout.write(summary + "\n")
|
|
103
|
+
|
|
104
|
+
return EXIT_SUCCESS
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _parse_schema_override(value: str | None): # type: ignore[no-untyped-def]
|
|
108
|
+
"""Map a CLI --schema-override CLASS letter to aadr_resolve.SchemaClass."""
|
|
109
|
+
if value is None:
|
|
110
|
+
return None
|
|
111
|
+
from aadr_resolve.types import SchemaClass
|
|
112
|
+
|
|
113
|
+
try:
|
|
114
|
+
return SchemaClass[value]
|
|
115
|
+
except KeyError as e:
|
|
116
|
+
raise UsageError(
|
|
117
|
+
errors=[
|
|
118
|
+
ValidationError(
|
|
119
|
+
file="<cli>",
|
|
120
|
+
line=1,
|
|
121
|
+
col=1,
|
|
122
|
+
pointer="/--schema-override",
|
|
123
|
+
message=(
|
|
124
|
+
f"unknown schema class '{value}'; expected one of "
|
|
125
|
+
f"{[c.name for c in SchemaClass]}"
|
|
126
|
+
),
|
|
127
|
+
)
|
|
128
|
+
],
|
|
129
|
+
) from e
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""report subcommand orchestrator.
|
|
2
|
+
|
|
3
|
+
Per-population aggregate output. Same selector + AnnoFrame loading as
|
|
4
|
+
`select`, then `reporting.write_report_tsv` / `write_report_json` instead
|
|
5
|
+
of formats.py writers.
|
|
6
|
+
|
|
7
|
+
Per LLD §3.13 / §4.5. Day 5 ships the single-version path; cross-version
|
|
8
|
+
lands Day 6 alongside select's cross-version flow.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import sys
|
|
14
|
+
import time
|
|
15
|
+
from dataclasses import replace
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
import aadr_resolve
|
|
19
|
+
|
|
20
|
+
from ..engine import select_samples
|
|
21
|
+
from ..errors import (
|
|
22
|
+
EXIT_SUCCESS,
|
|
23
|
+
IOFailure,
|
|
24
|
+
SoftValidationFailure,
|
|
25
|
+
UsageError,
|
|
26
|
+
ValidationError,
|
|
27
|
+
)
|
|
28
|
+
from ..reporting import write_report_json, write_report_tsv
|
|
29
|
+
from ..selector import compute_signature, load_selector
|
|
30
|
+
from ..types import ReportFormat
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def run_report(
|
|
34
|
+
*,
|
|
35
|
+
selector_path: str,
|
|
36
|
+
anno_path: str,
|
|
37
|
+
out: str | None,
|
|
38
|
+
fmt: str,
|
|
39
|
+
schema_override: str | None,
|
|
40
|
+
allow_empty: bool,
|
|
41
|
+
allow_empty_source: bool,
|
|
42
|
+
include_empty_groups: bool,
|
|
43
|
+
quiet: bool,
|
|
44
|
+
) -> int:
|
|
45
|
+
"""Orchestrate `aadr-subset report`. Returns exit code.
|
|
46
|
+
|
|
47
|
+
Day-5 sequence (§4.5 reduced for single-version):
|
|
48
|
+
1. Load + validate selector.
|
|
49
|
+
2. Load target AnnoFrame.
|
|
50
|
+
3. Compute selector_signature.
|
|
51
|
+
4. Engine evaluation (include_matched_criteria=False — report doesn't
|
|
52
|
+
need per-row criteria, only group aggregates).
|
|
53
|
+
5. Exit-1 gate: n_matched == 0 and not allow_empty → SoftValidationFailure.
|
|
54
|
+
6. Populate run-env metadata.
|
|
55
|
+
7. Write report (TSV or JSON) via reporting.write_report_*.
|
|
56
|
+
8. One-line stdout summary unless quiet (HLD §Reports: report's stdout
|
|
57
|
+
summary is intentionally a one-liner — no parse/eval/write breakdown).
|
|
58
|
+
9. Return EXIT_SUCCESS.
|
|
59
|
+
"""
|
|
60
|
+
t_parse_start = time.monotonic()
|
|
61
|
+
|
|
62
|
+
# 1. Load + validate selector.
|
|
63
|
+
_metadata, selector = load_selector(
|
|
64
|
+
selector_path,
|
|
65
|
+
allow_empty_source=allow_empty_source,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# 2. Load AnnoFrame.
|
|
69
|
+
schema_override_enum = _parse_schema_override(schema_override)
|
|
70
|
+
try:
|
|
71
|
+
anno = aadr_resolve.AnnoFrame.from_path(
|
|
72
|
+
anno_path,
|
|
73
|
+
schema_override=schema_override_enum,
|
|
74
|
+
)
|
|
75
|
+
except aadr_resolve.SchemaDetectionError as e:
|
|
76
|
+
raise IOFailure(f"AADR .anno schema unrecognized: {e}") from e
|
|
77
|
+
except (OSError, aadr_resolve.IOFailure) as e:
|
|
78
|
+
raise IOFailure(f"cannot load .anno at {anno_path}: {e}") from e
|
|
79
|
+
|
|
80
|
+
# 3. Compute selector signature.
|
|
81
|
+
sig = compute_signature(selector, cli_coverage_column=None)
|
|
82
|
+
|
|
83
|
+
parse_time = time.monotonic() - t_parse_start
|
|
84
|
+
|
|
85
|
+
# 4. Engine evaluation.
|
|
86
|
+
t_eval_start = time.monotonic()
|
|
87
|
+
result = select_samples(
|
|
88
|
+
anno,
|
|
89
|
+
selector,
|
|
90
|
+
include_matched_criteria=False,
|
|
91
|
+
)
|
|
92
|
+
eval_time = time.monotonic() - t_eval_start
|
|
93
|
+
|
|
94
|
+
# 5. Exit-1 gate.
|
|
95
|
+
if result.n_matched == 0 and not allow_empty:
|
|
96
|
+
raise SoftValidationFailure(
|
|
97
|
+
"selector matched 0 samples — report not written. "
|
|
98
|
+
"Pass --allow-empty for a header-only report."
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# 6. Populate run-env metadata.
|
|
102
|
+
result = replace(
|
|
103
|
+
result,
|
|
104
|
+
anno_file=str(anno_path),
|
|
105
|
+
anno_version=anno.version,
|
|
106
|
+
schema_class=anno.schema_class.value,
|
|
107
|
+
selector_file=selector_path,
|
|
108
|
+
selector_signature=sig,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# 7. Write report.
|
|
112
|
+
fmt_enum = ReportFormat(fmt)
|
|
113
|
+
t_write_start = time.monotonic()
|
|
114
|
+
out_path = Path(out) if out else None
|
|
115
|
+
if fmt_enum == ReportFormat.TSV:
|
|
116
|
+
write_report_tsv(
|
|
117
|
+
result,
|
|
118
|
+
anno,
|
|
119
|
+
include_empty_groups=include_empty_groups,
|
|
120
|
+
out_path=out_path,
|
|
121
|
+
)
|
|
122
|
+
else:
|
|
123
|
+
write_report_json(
|
|
124
|
+
result,
|
|
125
|
+
anno,
|
|
126
|
+
include_empty_groups=include_empty_groups,
|
|
127
|
+
out_path=out_path,
|
|
128
|
+
)
|
|
129
|
+
write_time = time.monotonic() - t_write_start
|
|
130
|
+
|
|
131
|
+
# 8. One-line stdout summary (HLD §Reports). Intentionally not the
|
|
132
|
+
# multi-segment parse/eval/write breakdown that select uses.
|
|
133
|
+
if not quiet:
|
|
134
|
+
n_pops = len(result.per_population_counts)
|
|
135
|
+
pop_word = "population" if n_pops == 1 else "populations"
|
|
136
|
+
out_label = str(out_path) if out_path else "stdout"
|
|
137
|
+
total = parse_time + eval_time + write_time
|
|
138
|
+
sys.stderr.write(
|
|
139
|
+
f"Wrote {out_label} ({n_pops} {pop_word}, {result.n_matched} samples) "
|
|
140
|
+
f"in {total:.2f}s.\n"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
return EXIT_SUCCESS
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _parse_schema_override(value: str | None): # type: ignore[no-untyped-def]
|
|
147
|
+
"""Map a CLI --schema-override CLASS letter to aadr_resolve.SchemaClass."""
|
|
148
|
+
if value is None:
|
|
149
|
+
return None
|
|
150
|
+
from aadr_resolve.types import SchemaClass
|
|
151
|
+
|
|
152
|
+
try:
|
|
153
|
+
return SchemaClass[value]
|
|
154
|
+
except KeyError as e:
|
|
155
|
+
raise UsageError(
|
|
156
|
+
errors=[
|
|
157
|
+
ValidationError(
|
|
158
|
+
file="<cli>",
|
|
159
|
+
line=1,
|
|
160
|
+
col=1,
|
|
161
|
+
pointer="/--schema-override",
|
|
162
|
+
message=(
|
|
163
|
+
f"unknown schema class '{value}'; expected one of "
|
|
164
|
+
f"{[c.name for c in SchemaClass]}"
|
|
165
|
+
),
|
|
166
|
+
)
|
|
167
|
+
],
|
|
168
|
+
) from e
|