adaptergate 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adaptergate/__init__.py +5 -0
- adaptergate/cli.py +588 -0
- adaptergate/core/__init__.py +0 -0
- adaptergate/core/procl.py +479 -0
- adaptergate/data/seed_recipes.jsonl +7 -0
- adaptergate/eval/__init__.py +0 -0
- adaptergate/eval/bird_eval.py +292 -0
- adaptergate/examples/__init__.py +0 -0
- adaptergate/examples/mock_scorer.py +98 -0
- adaptergate/gating/__init__.py +40 -0
- adaptergate/gating/cluster.py +119 -0
- adaptergate/gating/holdout_eval.py +153 -0
- adaptergate/gating/regression_gate.py +474 -0
- adaptergate/gating/replay_buffer.py +105 -0
- adaptergate/observability/__init__.py +0 -0
- adaptergate/observability/silent_collapse.py +334 -0
- adaptergate/recipes/__init__.py +33 -0
- adaptergate/recipes/models.py +151 -0
- adaptergate/recipes/recommend.py +142 -0
- adaptergate/recipes/store.py +100 -0
- adaptergate/serving/__init__.py +0 -0
- adaptergate/serving/qwen_backend.py +148 -0
- adaptergate-0.5.1.dist-info/METADATA +437 -0
- adaptergate-0.5.1.dist-info/RECORD +29 -0
- adaptergate-0.5.1.dist-info/WHEEL +5 -0
- adaptergate-0.5.1.dist-info/entry_points.txt +2 -0
- adaptergate-0.5.1.dist-info/licenses/LICENSE +189 -0
- adaptergate-0.5.1.dist-info/licenses/NOTICE +53 -0
- adaptergate-0.5.1.dist-info/top_level.txt +1 -0
adaptergate/__init__.py
ADDED
adaptergate/cli.py
ADDED
|
@@ -0,0 +1,588 @@
|
|
|
1
|
+
"""adaptergate CLI — run the regression gate from the command line.
|
|
2
|
+
|
|
3
|
+
The CLI is intentionally serving-agnostic. Users supply a `scorer` callable
|
|
4
|
+
via Python module:function syntax (e.g. ``my_eval:score``). The scorer takes
|
|
5
|
+
``(adapter_id, query)`` and returns a float in ``[0.0, 1.0]``. adaptergate
|
|
6
|
+
handles the held-out set management, gate decision, audit log, and replay
|
|
7
|
+
buffer.
|
|
8
|
+
|
|
9
|
+
Exits with status 0 if the candidate is accepted, 1 if rejected, 2 on usage
|
|
10
|
+
errors. This makes ``adaptergate gate`` plug into CI/CD as a pre-deploy check.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import importlib
|
|
16
|
+
import json
|
|
17
|
+
import sys
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
import typer
|
|
22
|
+
from rich.console import Console
|
|
23
|
+
|
|
24
|
+
from adaptergate import __version__
|
|
25
|
+
from adaptergate.gating import (
|
|
26
|
+
GateConfig,
|
|
27
|
+
GateDecision,
|
|
28
|
+
HoldoutSet,
|
|
29
|
+
RegressionGate,
|
|
30
|
+
ReplayBuffer,
|
|
31
|
+
SliceAttribution,
|
|
32
|
+
append_audit,
|
|
33
|
+
)
|
|
34
|
+
from adaptergate.recipes import RecipeStore, recommend
|
|
35
|
+
|
|
36
|
+
app = typer.Typer(
|
|
37
|
+
help="adaptergate — CI gate for per-tenant LoRA adapters that update online.",
|
|
38
|
+
no_args_is_help=True,
|
|
39
|
+
add_completion=False,
|
|
40
|
+
)
|
|
41
|
+
holdout_app = typer.Typer(
|
|
42
|
+
help="Manage per-tenant held-out eval sets.",
|
|
43
|
+
no_args_is_help=True,
|
|
44
|
+
add_completion=False,
|
|
45
|
+
)
|
|
46
|
+
replay_app = typer.Typer(
|
|
47
|
+
help="Inspect rejected update history.",
|
|
48
|
+
no_args_is_help=True,
|
|
49
|
+
add_completion=False,
|
|
50
|
+
)
|
|
51
|
+
recipes_app = typer.Typer(
|
|
52
|
+
help="Manage the recipe library (paper-derived CL interventions).",
|
|
53
|
+
no_args_is_help=True,
|
|
54
|
+
add_completion=False,
|
|
55
|
+
)
|
|
56
|
+
app.add_typer(holdout_app, name="holdout")
|
|
57
|
+
app.add_typer(replay_app, name="replay")
|
|
58
|
+
app.add_typer(recipes_app, name="recipes")
|
|
59
|
+
|
|
60
|
+
console = Console()
|
|
61
|
+
err_console = Console(stderr=True)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _import_scorer(spec: str):
|
|
65
|
+
"""Import a scorer from 'module:function' syntax.
|
|
66
|
+
|
|
67
|
+
Adds the current working directory to sys.path so users can point at a
|
|
68
|
+
local scorer module (e.g. ``my_eval:score``) without packaging.
|
|
69
|
+
"""
|
|
70
|
+
if ":" not in spec:
|
|
71
|
+
raise typer.BadParameter(
|
|
72
|
+
f"Scorer spec must be 'module:function', got {spec!r}."
|
|
73
|
+
)
|
|
74
|
+
cwd = str(Path.cwd())
|
|
75
|
+
if cwd not in sys.path:
|
|
76
|
+
sys.path.insert(0, cwd)
|
|
77
|
+
module_name, func_name = spec.split(":", 1)
|
|
78
|
+
try:
|
|
79
|
+
module = importlib.import_module(module_name)
|
|
80
|
+
except ImportError as e:
|
|
81
|
+
raise typer.BadParameter(f"Could not import {module_name!r}: {e}")
|
|
82
|
+
try:
|
|
83
|
+
return getattr(module, func_name)
|
|
84
|
+
except AttributeError:
|
|
85
|
+
raise typer.BadParameter(
|
|
86
|
+
f"Module {module_name!r} has no attribute {func_name!r}."
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@app.command()
|
|
91
|
+
def gate(
|
|
92
|
+
tenant: str = typer.Option(..., help="Tenant / workspace identifier."),
|
|
93
|
+
candidate: str = typer.Option(..., help="Candidate adapter ID being evaluated."),
|
|
94
|
+
baseline: Optional[str] = typer.Option(
|
|
95
|
+
None, help="Baseline adapter ID. Omit for first-adapter promotion."
|
|
96
|
+
),
|
|
97
|
+
holdout_path: Path = typer.Option(..., "--holdout", help="Path to JSONL held-out set."),
|
|
98
|
+
scorer_spec: str = typer.Option(
|
|
99
|
+
..., "--scorer", help="Scorer in 'module:function' syntax."
|
|
100
|
+
),
|
|
101
|
+
epsilon: float = typer.Option(0.02, help="Max acceptable score drop."),
|
|
102
|
+
sample_n: Optional[int] = typer.Option(
|
|
103
|
+
None, "--sample", help="Sample N queries from held-out (default: all)."
|
|
104
|
+
),
|
|
105
|
+
strict: bool = typer.Option(
|
|
106
|
+
False, "--strict", help="Strict per-query mode: reject any clean regression."
|
|
107
|
+
),
|
|
108
|
+
require_calibration: bool = typer.Option(
|
|
109
|
+
True,
|
|
110
|
+
"--require-calibration/--no-require-calibration",
|
|
111
|
+
help="If true, refuse to gate without a baseline.",
|
|
112
|
+
),
|
|
113
|
+
audit_log: Optional[Path] = typer.Option(
|
|
114
|
+
None, help="Append decision to this JSONL audit log."
|
|
115
|
+
),
|
|
116
|
+
replay_path: Optional[Path] = typer.Option(
|
|
117
|
+
None, help="If decision is rejected, append to this replay buffer."
|
|
118
|
+
),
|
|
119
|
+
output_format: str = typer.Option(
|
|
120
|
+
"human", "--format", "-f",
|
|
121
|
+
help="Output format: human (rich CLI, default), json (JSON to stdout), pr-comment (GitHub-flavored Markdown for PR comments).",
|
|
122
|
+
),
|
|
123
|
+
show_failures: int = typer.Option(
|
|
124
|
+
5, "--show-failures",
|
|
125
|
+
help="How many failing query IDs to preview under the driver slice (default 5).",
|
|
126
|
+
),
|
|
127
|
+
staleness_threshold_days: int = typer.Option(
|
|
128
|
+
30, "--staleness-threshold-days",
|
|
129
|
+
help="Warn if the held-out set has not been updated in this many days.",
|
|
130
|
+
),
|
|
131
|
+
quiet: bool = typer.Option(False, help="Alias for --format json (kept for backwards compatibility)."),
|
|
132
|
+
):
|
|
133
|
+
"""Evaluate a candidate adapter against a baseline. Exit 0 if accepted, 1 if rejected."""
|
|
134
|
+
holdout = HoldoutSet(tenant_id=tenant, path=holdout_path)
|
|
135
|
+
if len(holdout) == 0:
|
|
136
|
+
err_console.print(
|
|
137
|
+
f"[red]Held-out set at {holdout_path} is empty. Add queries first via"
|
|
138
|
+
f" 'adaptergate holdout add'.[/red]"
|
|
139
|
+
)
|
|
140
|
+
raise typer.Exit(2)
|
|
141
|
+
|
|
142
|
+
queries = holdout.sample(n=sample_n, seed=candidate)
|
|
143
|
+
scorer = _import_scorer(scorer_spec)
|
|
144
|
+
|
|
145
|
+
gate_inst = RegressionGate(
|
|
146
|
+
GateConfig(
|
|
147
|
+
epsilon=epsilon,
|
|
148
|
+
strict_per_query=strict,
|
|
149
|
+
require_calibration=require_calibration,
|
|
150
|
+
)
|
|
151
|
+
)
|
|
152
|
+
decision = gate_inst.evaluate(
|
|
153
|
+
tenant_id=tenant,
|
|
154
|
+
candidate_id=candidate,
|
|
155
|
+
baseline_id=baseline,
|
|
156
|
+
holdout=queries,
|
|
157
|
+
scorer=scorer,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
if audit_log is not None:
|
|
161
|
+
append_audit(decision, audit_log)
|
|
162
|
+
|
|
163
|
+
if not decision.accepted and replay_path is not None:
|
|
164
|
+
buf = ReplayBuffer(tenant_id=tenant, path=replay_path)
|
|
165
|
+
buf.add(decision)
|
|
166
|
+
|
|
167
|
+
# Compute holdout staleness once so any output format can surface it.
|
|
168
|
+
staleness_days = holdout.staleness_days()
|
|
169
|
+
|
|
170
|
+
if quiet:
|
|
171
|
+
output_format = "json"
|
|
172
|
+
|
|
173
|
+
if output_format == "json":
|
|
174
|
+
typer.echo(decision.to_json())
|
|
175
|
+
elif output_format == "pr-comment":
|
|
176
|
+
typer.echo(_render_pr_comment(decision, staleness_days=staleness_days))
|
|
177
|
+
else:
|
|
178
|
+
_render_human(
|
|
179
|
+
decision,
|
|
180
|
+
staleness_days=staleness_days,
|
|
181
|
+
staleness_threshold_days=staleness_threshold_days,
|
|
182
|
+
show_failures=show_failures,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
raise typer.Exit(0 if decision.accepted else 1)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _render_human(
|
|
189
|
+
decision,
|
|
190
|
+
*,
|
|
191
|
+
staleness_days: Optional[int],
|
|
192
|
+
staleness_threshold_days: int,
|
|
193
|
+
show_failures: int,
|
|
194
|
+
) -> None:
|
|
195
|
+
"""Render the gate decision to a human-friendly terminal."""
|
|
196
|
+
verdict_style = "green" if decision.accepted else "red"
|
|
197
|
+
verdict = "ACCEPTED" if decision.accepted else "REJECTED"
|
|
198
|
+
console.rule(f"[{verdict_style}]{verdict}[/{verdict_style}]")
|
|
199
|
+
baseline_display = decision.baseline_id or "(none)"
|
|
200
|
+
console.print(f"Tenant: [cyan]{decision.tenant_id}[/cyan]")
|
|
201
|
+
console.print(f"Candidate: [cyan]{decision.candidate_id}[/cyan]")
|
|
202
|
+
console.print(f"Baseline: [cyan]{baseline_display}[/cyan]")
|
|
203
|
+
console.print(
|
|
204
|
+
f"Score: {decision.score_baseline:.3f} → {decision.score_candidate:.3f}"
|
|
205
|
+
f" (Δ={decision.delta:+.3f}, ε={decision.epsilon})"
|
|
206
|
+
)
|
|
207
|
+
console.print(f"Held-out: n={decision.holdout_size}")
|
|
208
|
+
console.print(f"Reason: {decision.reason}")
|
|
209
|
+
|
|
210
|
+
driver = decision.driver_slice
|
|
211
|
+
if driver is not None:
|
|
212
|
+
console.print(
|
|
213
|
+
f"\n[bold red]DRIVER SLICE:[/bold red] [magenta]{driver.slice_tag}[/magenta]"
|
|
214
|
+
f" {driver.score_baseline:.3f} → {driver.score_candidate:.3f}"
|
|
215
|
+
f" (Δ={driver.delta:+.3f}, {driver.n_regressed}/{driver.n_total} regressed)"
|
|
216
|
+
)
|
|
217
|
+
if driver.pattern:
|
|
218
|
+
console.print(f" [bold]Pattern:[/bold] {driver.pattern}")
|
|
219
|
+
named_ids = [qid for qid in driver.regressed_query_ids if qid]
|
|
220
|
+
if named_ids:
|
|
221
|
+
preview = ", ".join(str(x) for x in named_ids[:show_failures])
|
|
222
|
+
more = (
|
|
223
|
+
f" + {len(named_ids) - show_failures} more"
|
|
224
|
+
if len(named_ids) > show_failures
|
|
225
|
+
else ""
|
|
226
|
+
)
|
|
227
|
+
console.print(f" Failing query IDs: {preview}{more}")
|
|
228
|
+
|
|
229
|
+
if decision.slice_attributions and len(decision.slice_attributions) > 1:
|
|
230
|
+
console.print("\n[bold]Slice breakdown[/bold] (most-regressed first):")
|
|
231
|
+
for s in decision.slice_attributions:
|
|
232
|
+
colour = "red" if s.delta < 0 else "green"
|
|
233
|
+
console.print(
|
|
234
|
+
f" [{colour}]{s.delta:+.3f}[/{colour}] "
|
|
235
|
+
f"{s.n_regressed}/{s.n_total} regressed "
|
|
236
|
+
f"[magenta]{s.slice_tag}[/magenta]"
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
if decision.regressions:
|
|
240
|
+
note = ""
|
|
241
|
+
if decision.slice_attributions and len(decision.slice_attributions) > 1:
|
|
242
|
+
note = " (slice n_regressed values may sum higher when queries belong to multiple slices)"
|
|
243
|
+
console.print(
|
|
244
|
+
f"\n[yellow]{len(decision.regressions)} unique queries regressed[/yellow]"
|
|
245
|
+
f"{note}"
|
|
246
|
+
)
|
|
247
|
+
if decision.improvements:
|
|
248
|
+
console.print(f"[green]{len(decision.improvements)} unique queries improved[/green]")
|
|
249
|
+
|
|
250
|
+
if decision.malformed_slice_queries > 0:
|
|
251
|
+
err_console.print(
|
|
252
|
+
f"\n[yellow]Warning:[/yellow] {decision.malformed_slice_queries}"
|
|
253
|
+
f" held-out queries had a malformed 'slices' field"
|
|
254
|
+
f" (expected list, got non-list). Those queries contributed to"
|
|
255
|
+
f" aggregate scoring but were skipped for slice attribution."
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
if decision.suspected_duplicate_slices:
|
|
259
|
+
err_console.print(
|
|
260
|
+
"\n[yellow]Warning:[/yellow] suspected duplicate slice tags"
|
|
261
|
+
" (similarity ≥ 0.85). Consider normalising your held-out set:"
|
|
262
|
+
)
|
|
263
|
+
for a, b in decision.suspected_duplicate_slices:
|
|
264
|
+
err_console.print(f" • [magenta]{a}[/magenta] ↔ [magenta]{b}[/magenta]")
|
|
265
|
+
|
|
266
|
+
if staleness_days is not None and staleness_days > staleness_threshold_days:
|
|
267
|
+
err_console.print(
|
|
268
|
+
f"\n[yellow]Warning:[/yellow] held-out set has not been refreshed"
|
|
269
|
+
f" in {staleness_days} days (threshold: {staleness_threshold_days})."
|
|
270
|
+
f" Stale held-out sets may fail to reflect current traffic — consider"
|
|
271
|
+
f" adding fresh queries to avoid eval-set drift being misread as adapter drift."
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _render_pr_comment(decision, *, staleness_days: Optional[int]) -> str:
|
|
276
|
+
"""Render the gate decision as GitHub-flavored Markdown for a PR comment."""
|
|
277
|
+
icon = "✅" if decision.accepted else "🚫"
|
|
278
|
+
verdict = "ACCEPTED" if decision.accepted else "REJECTED"
|
|
279
|
+
lines: list[str] = [f"## {icon} adaptergate gate: **{verdict}**", ""]
|
|
280
|
+
lines.append(f"- **Tenant**: `{decision.tenant_id}`")
|
|
281
|
+
lines.append(
|
|
282
|
+
f"- **Candidate**: `{decision.candidate_id}`"
|
|
283
|
+
f" vs baseline `{decision.baseline_id or '(none)'}`"
|
|
284
|
+
)
|
|
285
|
+
lines.append(
|
|
286
|
+
f"- **Score**: {decision.score_baseline:.3f} → {decision.score_candidate:.3f}"
|
|
287
|
+
f" (Δ={decision.delta:+.3f}, ε={decision.epsilon})"
|
|
288
|
+
)
|
|
289
|
+
lines.append(f"- **Held-out**: n={decision.holdout_size}")
|
|
290
|
+
lines.append(f"- **Reason**: {decision.reason}")
|
|
291
|
+
|
|
292
|
+
driver = decision.driver_slice
|
|
293
|
+
if driver is not None:
|
|
294
|
+
lines.append("")
|
|
295
|
+
lines.append(f"### 🎯 Driver slice: `{driver.slice_tag}`")
|
|
296
|
+
lines.append(
|
|
297
|
+
f"- {driver.score_baseline:.3f} → {driver.score_candidate:.3f}"
|
|
298
|
+
f" (Δ={driver.delta:+.3f}, {driver.n_regressed}/{driver.n_total} regressed)"
|
|
299
|
+
)
|
|
300
|
+
if driver.pattern:
|
|
301
|
+
lines.append(f"- **Pattern**: {driver.pattern}")
|
|
302
|
+
named_ids = [str(x) for x in driver.regressed_query_ids if x]
|
|
303
|
+
if named_ids:
|
|
304
|
+
preview = ", ".join(f"`{x}`" for x in named_ids[:5])
|
|
305
|
+
more = f" + {len(named_ids) - 5} more" if len(named_ids) > 5 else ""
|
|
306
|
+
lines.append(f"- **Failing IDs**: {preview}{more}")
|
|
307
|
+
|
|
308
|
+
if decision.slice_attributions and len(decision.slice_attributions) > 1:
|
|
309
|
+
lines.append("")
|
|
310
|
+
lines.append("### Slice breakdown")
|
|
311
|
+
lines.append("| Δ | regressed | slice |")
|
|
312
|
+
lines.append("|---|---|---|")
|
|
313
|
+
for s in decision.slice_attributions:
|
|
314
|
+
lines.append(
|
|
315
|
+
f"| {s.delta:+.3f} | {s.n_regressed}/{s.n_total} | `{s.slice_tag}` |"
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
warnings: list[str] = []
|
|
319
|
+
if decision.malformed_slice_queries > 0:
|
|
320
|
+
warnings.append(
|
|
321
|
+
f"{decision.malformed_slice_queries} held-out queries had a malformed"
|
|
322
|
+
" `slices` field — skipped for slice attribution."
|
|
323
|
+
)
|
|
324
|
+
if decision.suspected_duplicate_slices:
|
|
325
|
+
pairs = ", ".join(
|
|
326
|
+
f"`{a}` ↔ `{b}`" for a, b in decision.suspected_duplicate_slices
|
|
327
|
+
)
|
|
328
|
+
warnings.append(f"Suspected duplicate slice tags: {pairs}")
|
|
329
|
+
if staleness_days is not None and staleness_days > 30:
|
|
330
|
+
warnings.append(f"Held-out set has not been refreshed in {staleness_days} days.")
|
|
331
|
+
if warnings:
|
|
332
|
+
lines.append("")
|
|
333
|
+
lines.append("### ⚠️ Warnings")
|
|
334
|
+
for w in warnings:
|
|
335
|
+
lines.append(f"- {w}")
|
|
336
|
+
|
|
337
|
+
lines.append("")
|
|
338
|
+
lines.append("_Generated by [adaptergate](https://github.com/OriginalKazdov/adaptergate)._")
|
|
339
|
+
return "\n".join(lines)
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
@holdout_app.command("add")
|
|
343
|
+
def holdout_add(
|
|
344
|
+
tenant: str = typer.Option(...),
|
|
345
|
+
holdout_path: Path = typer.Option(..., "--holdout"),
|
|
346
|
+
query_json: str = typer.Argument(..., help="Query payload as JSON string."),
|
|
347
|
+
accepted_by: Optional[str] = typer.Option(None, help="Adapter version this query was OK with."),
|
|
348
|
+
):
|
|
349
|
+
"""Add a query to the per-tenant held-out set."""
|
|
350
|
+
try:
|
|
351
|
+
payload = json.loads(query_json)
|
|
352
|
+
except json.JSONDecodeError as e:
|
|
353
|
+
raise typer.BadParameter(f"Query is not valid JSON: {e}")
|
|
354
|
+
holdout = HoldoutSet(tenant_id=tenant, path=holdout_path)
|
|
355
|
+
record = holdout.add(payload, accepted_by=accepted_by)
|
|
356
|
+
typer.echo(json.dumps({"added": record.query_id, "size": len(holdout)}))
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
@holdout_app.command("size")
|
|
360
|
+
def holdout_size(
|
|
361
|
+
tenant: str = typer.Option(...),
|
|
362
|
+
holdout_path: Path = typer.Option(..., "--holdout"),
|
|
363
|
+
):
|
|
364
|
+
"""Print the number of queries in the held-out set."""
|
|
365
|
+
holdout = HoldoutSet(tenant_id=tenant, path=holdout_path)
|
|
366
|
+
typer.echo(str(len(holdout)))
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
@holdout_app.command("list")
|
|
370
|
+
def holdout_list(
|
|
371
|
+
tenant: str = typer.Option(...),
|
|
372
|
+
holdout_path: Path = typer.Option(..., "--holdout"),
|
|
373
|
+
):
|
|
374
|
+
"""List queries in the held-out set, one JSON per line."""
|
|
375
|
+
holdout = HoldoutSet(tenant_id=tenant, path=holdout_path)
|
|
376
|
+
for q in holdout:
|
|
377
|
+
typer.echo(
|
|
378
|
+
json.dumps(
|
|
379
|
+
{
|
|
380
|
+
"query_id": q.query_id,
|
|
381
|
+
"accepted_by": q.accepted_by_adapter,
|
|
382
|
+
"added_at": q.added_at,
|
|
383
|
+
}
|
|
384
|
+
)
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
@replay_app.command("list")
|
|
389
|
+
def replay_list(
|
|
390
|
+
tenant: str = typer.Option(...),
|
|
391
|
+
replay_path: Path = typer.Option(..., "--replay"),
|
|
392
|
+
n: int = typer.Option(10, help="Show the most recent N rejected updates."),
|
|
393
|
+
):
|
|
394
|
+
"""Show recent rejected updates."""
|
|
395
|
+
buf = ReplayBuffer(tenant_id=tenant, path=replay_path)
|
|
396
|
+
for r in buf.recent(n=n):
|
|
397
|
+
typer.echo(
|
|
398
|
+
json.dumps(
|
|
399
|
+
{
|
|
400
|
+
"candidate": r.candidate_id,
|
|
401
|
+
"baseline": r.baseline_id,
|
|
402
|
+
"rejected_at": r.rejected_at,
|
|
403
|
+
"delta": round(r.delta, 4),
|
|
404
|
+
"reason": r.reason,
|
|
405
|
+
}
|
|
406
|
+
)
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
@app.command()
|
|
411
|
+
def version():
|
|
412
|
+
"""Print adaptergate version."""
|
|
413
|
+
typer.echo(__version__)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
# ---------- recipes subcommands ----------
|
|
417
|
+
|
|
418
|
+
def _bundled_seed_path() -> Path:
|
|
419
|
+
"""Locate the package-bundled seed_recipes.jsonl."""
|
|
420
|
+
import adaptergate as _pkg
|
|
421
|
+
|
|
422
|
+
return Path(_pkg.__file__).parent / "data" / "seed_recipes.jsonl"
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
@recipes_app.command("list")
|
|
426
|
+
def recipes_list(
|
|
427
|
+
recipes_path: Path = typer.Option(..., "--recipes", help="Path to recipes JSONL."),
|
|
428
|
+
):
|
|
429
|
+
"""List recipes in the library."""
|
|
430
|
+
store = RecipeStore(recipes_path=recipes_path, applications_path=recipes_path.with_name("applications.jsonl"))
|
|
431
|
+
if len(store) == 0:
|
|
432
|
+
err_console.print(
|
|
433
|
+
f"[yellow]No recipes in {recipes_path}.[/yellow]"
|
|
434
|
+
f" Run [bold]adaptergate recipes seed --recipes {recipes_path}[/bold]"
|
|
435
|
+
f" to populate from the bundled seed library."
|
|
436
|
+
)
|
|
437
|
+
raise typer.Exit(1)
|
|
438
|
+
for r in store.list_recipes():
|
|
439
|
+
typer.echo(
|
|
440
|
+
json.dumps(
|
|
441
|
+
{
|
|
442
|
+
"recipe_id": r.recipe_id,
|
|
443
|
+
"name": r.name,
|
|
444
|
+
"intervention_type": r.intervention_type,
|
|
445
|
+
"source_paper": r.source_paper_arxiv,
|
|
446
|
+
}
|
|
447
|
+
)
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
@recipes_app.command("show")
|
|
452
|
+
def recipes_show(
|
|
453
|
+
recipe_id: str = typer.Argument(..., help="ID of the recipe to display."),
|
|
454
|
+
recipes_path: Path = typer.Option(..., "--recipes"),
|
|
455
|
+
):
|
|
456
|
+
"""Show full detail for one recipe."""
|
|
457
|
+
store = RecipeStore(recipes_path=recipes_path, applications_path=recipes_path.with_name("applications.jsonl"))
|
|
458
|
+
r = store.get_recipe(recipe_id)
|
|
459
|
+
if r is None:
|
|
460
|
+
err_console.print(f"[red]No recipe with id {recipe_id!r} in {recipes_path}.[/red]")
|
|
461
|
+
raise typer.Exit(1)
|
|
462
|
+
typer.echo(r.to_json())
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
@recipes_app.command("seed")
|
|
466
|
+
def recipes_seed(
|
|
467
|
+
recipes_path: Path = typer.Option(..., "--recipes", help="Where to write the seed recipes."),
|
|
468
|
+
overwrite: bool = typer.Option(
|
|
469
|
+
False, "--overwrite", help="Overwrite any existing recipes file at this path."
|
|
470
|
+
),
|
|
471
|
+
):
|
|
472
|
+
"""Populate a recipes file from the package-bundled seed library."""
|
|
473
|
+
seed = _bundled_seed_path()
|
|
474
|
+
if not seed.exists():
|
|
475
|
+
err_console.print(f"[red]Seed file not found at {seed}.[/red]")
|
|
476
|
+
raise typer.Exit(2)
|
|
477
|
+
if recipes_path.exists() and not overwrite:
|
|
478
|
+
err_console.print(
|
|
479
|
+
f"[red]{recipes_path} already exists. Pass --overwrite to replace.[/red]"
|
|
480
|
+
)
|
|
481
|
+
raise typer.Exit(1)
|
|
482
|
+
recipes_path.parent.mkdir(parents=True, exist_ok=True)
|
|
483
|
+
recipes_path.write_text(seed.read_text())
|
|
484
|
+
n = sum(1 for line in recipes_path.read_text().splitlines() if line.strip())
|
|
485
|
+
typer.echo(json.dumps({"seeded": str(recipes_path), "n_recipes": n}))
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
@app.command()
|
|
489
|
+
def recommend_cmd(
|
|
490
|
+
decision_path: Path = typer.Option(
|
|
491
|
+
..., "--decision", help="Path to a GateDecision JSON or JSONL audit log."
|
|
492
|
+
),
|
|
493
|
+
recipes_path: Path = typer.Option(..., "--recipes", help="Path to recipes JSONL."),
|
|
494
|
+
line: int = typer.Option(
|
|
495
|
+
-1, "--line", help="Which line of the audit log to consume (default: last)."
|
|
496
|
+
),
|
|
497
|
+
top_k: int = typer.Option(5, "--top-k", help="Maximum recommendations to return."),
|
|
498
|
+
output_format: str = typer.Option(
|
|
499
|
+
"human", "--format", "-f", help="Output format: human (default) or json."
|
|
500
|
+
),
|
|
501
|
+
):
|
|
502
|
+
"""Recommend repair recipes for a rejected gate decision."""
|
|
503
|
+
decision = _load_decision(decision_path, line=line)
|
|
504
|
+
store = RecipeStore(
|
|
505
|
+
recipes_path=recipes_path,
|
|
506
|
+
applications_path=recipes_path.with_name("applications.jsonl"),
|
|
507
|
+
)
|
|
508
|
+
recs = recommend(decision, store, top_k=top_k)
|
|
509
|
+
|
|
510
|
+
if output_format == "json":
|
|
511
|
+
typer.echo(json.dumps([r.to_dict() for r in recs], indent=2))
|
|
512
|
+
return
|
|
513
|
+
|
|
514
|
+
if not recs:
|
|
515
|
+
if decision.accepted:
|
|
516
|
+
console.print("[green]Decision was ACCEPTED — no repair recipe needed.[/green]")
|
|
517
|
+
elif decision.driver_slice is None:
|
|
518
|
+
console.print(
|
|
519
|
+
"[yellow]No driver slice on this decision — add slice tags to your"
|
|
520
|
+
" held-out queries to enable recipe matching.[/yellow]"
|
|
521
|
+
)
|
|
522
|
+
else:
|
|
523
|
+
console.print(
|
|
524
|
+
f"[yellow]No recipes in the library match driver slice"
|
|
525
|
+
f" `{decision.driver_slice.slice_tag}`.[/yellow]"
|
|
526
|
+
)
|
|
527
|
+
return
|
|
528
|
+
|
|
529
|
+
driver = decision.driver_slice
|
|
530
|
+
if driver is not None:
|
|
531
|
+
console.rule(f"[bold magenta]Recipes for driver slice: {driver.slice_tag}[/bold magenta]")
|
|
532
|
+
|
|
533
|
+
if all(r.n_uses == 0 for r in recs):
|
|
534
|
+
err_console.print(
|
|
535
|
+
"[yellow]Note:[/yellow] no prior applications have been logged for any"
|
|
536
|
+
" matching recipe. The ranking below reflects [bold]slice matching only[/bold],"
|
|
537
|
+
" not observed efficacy. To make the ranking empirical, log recipe outcomes"
|
|
538
|
+
" via [bold]RecipeStore.add_application(...)[/bold] after running a recipe."
|
|
539
|
+
)
|
|
540
|
+
for i, r in enumerate(recs, 1):
|
|
541
|
+
eff = (
|
|
542
|
+
f"avg Δ={r.expected_efficacy:+.3f} over n={r.n_uses}"
|
|
543
|
+
if r.expected_efficacy is not None
|
|
544
|
+
else "no prior applications"
|
|
545
|
+
)
|
|
546
|
+
console.print(
|
|
547
|
+
f"\n[bold cyan]{i}. {r.recipe.name}[/bold cyan]"
|
|
548
|
+
f" [{eff}]"
|
|
549
|
+
)
|
|
550
|
+
console.print(f" id: [magenta]{r.recipe.recipe_id}[/magenta]")
|
|
551
|
+
console.print(f" intervention: {r.recipe.intervention_type}")
|
|
552
|
+
if r.recipe.source_paper_arxiv:
|
|
553
|
+
console.print(f" source: arXiv {r.recipe.source_paper_arxiv}")
|
|
554
|
+
console.print(f" {r.recipe.description}")
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
def _load_decision(path: Path, *, line: int) -> GateDecision:
|
|
558
|
+
"""Load a GateDecision from a JSON or JSONL file at ``path``.
|
|
559
|
+
|
|
560
|
+
Robust to both single-JSON files (older audit logs) and JSONL (current).
|
|
561
|
+
For JSONL, ``line`` selects which entry (default ``-1`` = last).
|
|
562
|
+
"""
|
|
563
|
+
raw = path.read_text(encoding="utf-8").strip()
|
|
564
|
+
if not raw:
|
|
565
|
+
raise typer.BadParameter(f"{path} is empty.")
|
|
566
|
+
if "\n" in raw:
|
|
567
|
+
lines = [s for s in raw.splitlines() if s.strip()]
|
|
568
|
+
idx = line if line >= 0 else len(lines) + line
|
|
569
|
+
if not (0 <= idx < len(lines)):
|
|
570
|
+
raise typer.BadParameter(f"line index {line} out of range (file has {len(lines)} lines)")
|
|
571
|
+
blob = lines[idx]
|
|
572
|
+
else:
|
|
573
|
+
blob = raw
|
|
574
|
+
data = json.loads(blob)
|
|
575
|
+
# Reconstruct SliceAttribution and GateDecision from the dict.
|
|
576
|
+
slice_attrs = [
|
|
577
|
+
SliceAttribution(**s) for s in data.get("slice_attributions", [])
|
|
578
|
+
]
|
|
579
|
+
data["slice_attributions"] = slice_attrs
|
|
580
|
+
return GateDecision(**data)
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
def main():
|
|
584
|
+
app()
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
if __name__ == "__main__":
|
|
588
|
+
main()
|
|
File without changes
|