sindri-forge 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sindri/__init__.py +11 -0
- sindri/__main__.py +7 -0
- sindri/cli.py +630 -0
- sindri/core/__init__.py +0 -0
- sindri/core/git_ops.py +86 -0
- sindri/core/metric.py +53 -0
- sindri/core/modes.py +86 -0
- sindri/core/noise.py +67 -0
- sindri/core/pool.py +72 -0
- sindri/core/pr_body.py +119 -0
- sindri/core/state.py +140 -0
- sindri/core/termination.py +96 -0
- sindri/core/validators.py +224 -0
- sindri_forge-0.3.2.dist-info/METADATA +110 -0
- sindri_forge-0.3.2.dist-info/RECORD +16 -0
- sindri_forge-0.3.2.dist-info/WHEEL +4 -0
sindri/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""sindri — Claude Code plugin core for bounded optimization loops."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
__version__ = version("sindri-forge")
|
|
8
|
+
except PackageNotFoundError:
|
|
9
|
+
__version__ = "0.0.0+unknown"
|
|
10
|
+
|
|
11
|
+
__all__ = ["__version__"]
|
sindri/__main__.py
ADDED
sindri/cli.py
ADDED
|
@@ -0,0 +1,630 @@
|
|
|
1
|
+
"""CLI entry point — argparse dispatcher for sindri subcommands.
|
|
2
|
+
|
|
3
|
+
Each subcommand is registered once here and dispatched to a handler function.
|
|
4
|
+
Handlers return an exit code (0 on success, non-zero on failure).
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import argparse
|
|
9
|
+
import re
|
|
10
|
+
import sys
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import TYPE_CHECKING, Callable
|
|
13
|
+
|
|
14
|
+
from sindri import __version__
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from sindri.core.validators import SindriState
|
|
18
|
+
|
|
19
|
+
_HANDLERS: dict[str, Callable[[argparse.Namespace], int]] = {}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _register(
|
|
23
|
+
name: str,
|
|
24
|
+
) -> Callable[[Callable[[argparse.Namespace], int]], Callable[[argparse.Namespace], int]]:
|
|
25
|
+
"""Decorator: register a subcommand handler."""
|
|
26
|
+
def decorator(fn: Callable[[argparse.Namespace], int]) -> Callable[[argparse.Namespace], int]:
|
|
27
|
+
_HANDLERS[name] = fn
|
|
28
|
+
return fn
|
|
29
|
+
return decorator
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _load_state_or_exit(**kwargs: Path) -> SindriState | None:
|
|
33
|
+
"""Read state; on StateIOError, print `error: ...` to stderr and return None.
|
|
34
|
+
|
|
35
|
+
Handlers that want the "no active run" friendly path (e.g. `status`) should
|
|
36
|
+
catch StateIOError themselves — this helper is for the exit-1 path.
|
|
37
|
+
"""
|
|
38
|
+
from sindri.core.state import StateIOError, read_state
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
return read_state(**kwargs)
|
|
42
|
+
except StateIOError as e:
|
|
43
|
+
print(f"error: {e}", file=sys.stderr)
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
48
|
+
p = argparse.ArgumentParser(
|
|
49
|
+
prog="sindri", description="Sindri: bounded optimization loop core"
|
|
50
|
+
)
|
|
51
|
+
p.add_argument("--version", action="version", version=f"sindri {__version__}")
|
|
52
|
+
sub = p.add_subparsers(dest="subcommand", required=False)
|
|
53
|
+
|
|
54
|
+
_add_validate_benchmark(sub)
|
|
55
|
+
_add_detect_mode(sub)
|
|
56
|
+
_add_read_state(sub)
|
|
57
|
+
_add_pick_next(sub)
|
|
58
|
+
_add_record_result(sub)
|
|
59
|
+
_add_check_termination(sub)
|
|
60
|
+
_add_generate_pr_body(sub)
|
|
61
|
+
_add_archive(sub)
|
|
62
|
+
_add_status(sub)
|
|
63
|
+
_add_init(sub)
|
|
64
|
+
return p
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _add_validate_benchmark(sub: argparse._SubParsersAction) -> None:
|
|
68
|
+
vp = sub.add_parser(
|
|
69
|
+
"validate-benchmark",
|
|
70
|
+
help="run benchmark.py, check METRIC output",
|
|
71
|
+
)
|
|
72
|
+
vp.add_argument(
|
|
73
|
+
"--expected",
|
|
74
|
+
help="expected metric name; error if script outputs a different one",
|
|
75
|
+
)
|
|
76
|
+
vp.add_argument(
|
|
77
|
+
"--script",
|
|
78
|
+
default=".claude/scripts/sindri/benchmark.py",
|
|
79
|
+
help="path to benchmark script (default: .claude/scripts/sindri/benchmark.py)",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@_register("validate-benchmark")
|
|
84
|
+
def _handle_validate_benchmark(args: argparse.Namespace) -> int:
|
|
85
|
+
import json
|
|
86
|
+
import subprocess
|
|
87
|
+
from pathlib import Path
|
|
88
|
+
|
|
89
|
+
from sindri.core.metric import MetricParseError, parse_metric_line
|
|
90
|
+
|
|
91
|
+
script = Path(args.script)
|
|
92
|
+
if not script.exists():
|
|
93
|
+
print(f"error: benchmark.py not found at {script}", file=sys.stderr)
|
|
94
|
+
return 1
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
proc = subprocess.run(
|
|
98
|
+
[sys.executable, str(script)],
|
|
99
|
+
capture_output=True,
|
|
100
|
+
text=True,
|
|
101
|
+
check=False,
|
|
102
|
+
timeout=300,
|
|
103
|
+
)
|
|
104
|
+
except subprocess.TimeoutExpired:
|
|
105
|
+
print("error: benchmark timed out during validation (>5min)", file=sys.stderr)
|
|
106
|
+
return 1
|
|
107
|
+
|
|
108
|
+
if proc.returncode != 0:
|
|
109
|
+
print(f"error: benchmark exited non-zero: {proc.stderr.strip()}", file=sys.stderr)
|
|
110
|
+
return 1
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
name, value = parse_metric_line(proc.stdout, expected_name=args.expected)
|
|
114
|
+
except MetricParseError as e:
|
|
115
|
+
print(f"error: {e}", file=sys.stderr)
|
|
116
|
+
return 1
|
|
117
|
+
|
|
118
|
+
print(json.dumps({"ok": True, "metric_name": name, "metric_value": value}))
|
|
119
|
+
return 0
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _add_detect_mode(sub: argparse._SubParsersAction) -> None:
|
|
123
|
+
p = sub.add_parser(
|
|
124
|
+
"detect-mode",
|
|
125
|
+
help="auto-detect local vs remote benchmark mode; reads JSON from stdin",
|
|
126
|
+
)
|
|
127
|
+
p.add_argument(
|
|
128
|
+
"--script",
|
|
129
|
+
default=".claude/scripts/sindri/benchmark.py",
|
|
130
|
+
help="path to benchmark script",
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@_register("detect-mode")
|
|
135
|
+
def _handle_detect_mode(args: argparse.Namespace) -> int:
|
|
136
|
+
import json
|
|
137
|
+
from pathlib import Path
|
|
138
|
+
|
|
139
|
+
from sindri.core.modes import detect_mode
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
payload = json.loads(sys.stdin.read())
|
|
143
|
+
samples = payload["baseline_samples"]
|
|
144
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
145
|
+
print(
|
|
146
|
+
f"error: malformed stdin (need {{'baseline_samples': [...]}}): {e}",
|
|
147
|
+
file=sys.stderr,
|
|
148
|
+
)
|
|
149
|
+
return 1
|
|
150
|
+
|
|
151
|
+
script = Path(args.script)
|
|
152
|
+
if not script.exists():
|
|
153
|
+
print(f"error: script not found: {script}", file=sys.stderr)
|
|
154
|
+
return 1
|
|
155
|
+
|
|
156
|
+
mode = detect_mode(script, samples)
|
|
157
|
+
print(json.dumps({"mode": mode}))
|
|
158
|
+
return 0
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _add_read_state(sub: argparse._SubParsersAction) -> None:
|
|
162
|
+
sub.add_parser("read-state", help="read .sindri/current/sindri.md; emit JSON")
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@_register("read-state")
|
|
166
|
+
def _handle_read_state(args: argparse.Namespace) -> int:
|
|
167
|
+
state = _load_state_or_exit()
|
|
168
|
+
if state is None:
|
|
169
|
+
return 1
|
|
170
|
+
print(state.model_dump_json())
|
|
171
|
+
return 0
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _add_pick_next(sub: argparse._SubParsersAction) -> None:
|
|
175
|
+
sub.add_parser("pick-next", help="pick next pending candidate from state")
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
@_register("pick-next")
|
|
179
|
+
def _handle_pick_next(args: argparse.Namespace) -> int:
|
|
180
|
+
from sindri.core.pool import pick_next
|
|
181
|
+
|
|
182
|
+
state = _load_state_or_exit()
|
|
183
|
+
if state is None:
|
|
184
|
+
return 1
|
|
185
|
+
nxt = pick_next(state.pool)
|
|
186
|
+
if nxt is None:
|
|
187
|
+
print("null")
|
|
188
|
+
else:
|
|
189
|
+
print(nxt.model_dump_json())
|
|
190
|
+
return 0
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _add_record_result(sub: argparse._SubParsersAction) -> None:
|
|
194
|
+
sub.add_parser(
|
|
195
|
+
"record-result",
|
|
196
|
+
help="record an experiment result; reads JSON from stdin",
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
@_register("record-result")
|
|
201
|
+
def _handle_record_result(args: argparse.Namespace) -> int:
|
|
202
|
+
import json
|
|
203
|
+
from datetime import datetime, timezone
|
|
204
|
+
|
|
205
|
+
from pydantic import BaseModel, ValidationError
|
|
206
|
+
|
|
207
|
+
from sindri.core.state import append_jsonl, write_state
|
|
208
|
+
from sindri.core.validators import JsonlExperiment, SubagentResult
|
|
209
|
+
|
|
210
|
+
class RecordPayload(BaseModel):
|
|
211
|
+
candidate_id: int
|
|
212
|
+
metric_before: float
|
|
213
|
+
subagent_result: SubagentResult
|
|
214
|
+
commit_sha: str | None = None
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
raw = sys.stdin.read()
|
|
218
|
+
payload = RecordPayload.model_validate_json(raw)
|
|
219
|
+
except ValidationError as e:
|
|
220
|
+
print(f"error: invalid payload: {e}", file=sys.stderr)
|
|
221
|
+
return 1
|
|
222
|
+
except json.JSONDecodeError as e:
|
|
223
|
+
print(f"error: payload is not JSON: {e}", file=sys.stderr)
|
|
224
|
+
return 1
|
|
225
|
+
|
|
226
|
+
state = _load_state_or_exit()
|
|
227
|
+
if state is None:
|
|
228
|
+
return 1
|
|
229
|
+
cand = next((c for c in state.pool if c.id == payload.candidate_id), None)
|
|
230
|
+
if cand is None:
|
|
231
|
+
print(f"error: no candidate with id {payload.candidate_id}", file=sys.stderr)
|
|
232
|
+
return 1
|
|
233
|
+
|
|
234
|
+
res = payload.subagent_result
|
|
235
|
+
delta = res.metric_value - payload.metric_before
|
|
236
|
+
|
|
237
|
+
pool_status_map = {
|
|
238
|
+
"improved": "kept",
|
|
239
|
+
"regressed": "reverted",
|
|
240
|
+
"inconclusive": "reverted",
|
|
241
|
+
"check_failed": "check_failed",
|
|
242
|
+
"errored": "errored",
|
|
243
|
+
"timeout": "errored",
|
|
244
|
+
}
|
|
245
|
+
new_status = pool_status_map[res.status]
|
|
246
|
+
|
|
247
|
+
new_pool = []
|
|
248
|
+
for c in state.pool:
|
|
249
|
+
if c.id == payload.candidate_id:
|
|
250
|
+
new_pool.append(c.model_copy(update={"status": new_status}))
|
|
251
|
+
else:
|
|
252
|
+
new_pool.append(c)
|
|
253
|
+
updates: dict[str, object] = {"pool": new_pool}
|
|
254
|
+
if res.status == "improved":
|
|
255
|
+
updates["current_best"] = res.metric_value
|
|
256
|
+
new_state = state.model_copy(update=updates)
|
|
257
|
+
write_state(new_state)
|
|
258
|
+
|
|
259
|
+
rec = JsonlExperiment(
|
|
260
|
+
ts=datetime.now(tz=timezone.utc),
|
|
261
|
+
id=payload.candidate_id,
|
|
262
|
+
candidate=cand.name,
|
|
263
|
+
reps_used=res.reps_used,
|
|
264
|
+
metric_before=payload.metric_before,
|
|
265
|
+
metric_after=res.metric_value,
|
|
266
|
+
delta=delta,
|
|
267
|
+
confidence_ratio=res.confidence_ratio,
|
|
268
|
+
status=res.status,
|
|
269
|
+
commit_sha=payload.commit_sha,
|
|
270
|
+
files_modified=res.files_modified,
|
|
271
|
+
)
|
|
272
|
+
append_jsonl(rec)
|
|
273
|
+
|
|
274
|
+
print(
|
|
275
|
+
json.dumps(
|
|
276
|
+
{
|
|
277
|
+
"ok": True,
|
|
278
|
+
"new_status": new_status,
|
|
279
|
+
"current_best": new_state.current_best,
|
|
280
|
+
}
|
|
281
|
+
)
|
|
282
|
+
)
|
|
283
|
+
return 0
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def _add_check_termination(sub: argparse._SubParsersAction) -> None:
|
|
287
|
+
sub.add_parser(
|
|
288
|
+
"check-termination",
|
|
289
|
+
help="check whether the loop should terminate; "
|
|
290
|
+
"JSON {experiments_run, consecutive_reverts} on stdin",
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
@_register("check-termination")
|
|
295
|
+
def _handle_check_termination(args: argparse.Namespace) -> int:
|
|
296
|
+
import json
|
|
297
|
+
|
|
298
|
+
from sindri.core.termination import check_termination
|
|
299
|
+
|
|
300
|
+
try:
|
|
301
|
+
payload = json.loads(sys.stdin.read())
|
|
302
|
+
experiments_run = int(payload["experiments_run"])
|
|
303
|
+
consecutive_reverts = int(payload["consecutive_reverts"])
|
|
304
|
+
except (json.JSONDecodeError, KeyError, ValueError, TypeError) as e:
|
|
305
|
+
print(f"error: invalid stdin payload: {e}", file=sys.stderr)
|
|
306
|
+
return 1
|
|
307
|
+
|
|
308
|
+
state = _load_state_or_exit()
|
|
309
|
+
if state is None:
|
|
310
|
+
return 1
|
|
311
|
+
result = check_termination(
|
|
312
|
+
state,
|
|
313
|
+
experiments_run=experiments_run,
|
|
314
|
+
consecutive_reverts=consecutive_reverts,
|
|
315
|
+
)
|
|
316
|
+
print(result.model_dump_json())
|
|
317
|
+
return 0
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _add_generate_pr_body(sub: argparse._SubParsersAction) -> None:
|
|
321
|
+
sub.add_parser("generate-pr-body", help="print PR body markdown to stdout")
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
@_register("generate-pr-body")
|
|
325
|
+
def _handle_generate_pr_body(args: argparse.Namespace) -> int:
|
|
326
|
+
from sindri.core.pr_body import render_pr_body
|
|
327
|
+
from sindri.core.state import read_jsonl
|
|
328
|
+
|
|
329
|
+
state = _load_state_or_exit()
|
|
330
|
+
if state is None:
|
|
331
|
+
return 1
|
|
332
|
+
records = read_jsonl(Path(".sindri/current/sindri.jsonl"))
|
|
333
|
+
print(render_pr_body(state, records))
|
|
334
|
+
return 0
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def _add_archive(sub: argparse._SubParsersAction) -> None:
|
|
338
|
+
sub.add_parser(
|
|
339
|
+
"archive",
|
|
340
|
+
help="move .sindri/current/ to .sindri/archive/<date>-<slug>/",
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
@_register("archive")
|
|
345
|
+
def _handle_archive(args: argparse.Namespace) -> int:
|
|
346
|
+
import json
|
|
347
|
+
import shutil
|
|
348
|
+
from datetime import date
|
|
349
|
+
|
|
350
|
+
current = Path(".sindri/current")
|
|
351
|
+
if not current.exists():
|
|
352
|
+
print("error: .sindri/current/ does not exist", file=sys.stderr)
|
|
353
|
+
return 1
|
|
354
|
+
|
|
355
|
+
state = _load_state_or_exit(dir=current)
|
|
356
|
+
if state is None:
|
|
357
|
+
return 1
|
|
358
|
+
|
|
359
|
+
# Slug is the branch name minus the 'sindri/' prefix. Sanitize it: a
|
|
360
|
+
# tampered sindri.md could carry a branch like "sindri/../../etc/whatever"
|
|
361
|
+
# and we must not let shutil.move follow that out of .sindri/archive/.
|
|
362
|
+
branch = state.branch
|
|
363
|
+
raw_slug = branch[len("sindri/"):] if branch.startswith("sindri/") else branch
|
|
364
|
+
slug = raw_slug.replace("/", "-").replace("\\", "-")
|
|
365
|
+
if slug in {"", ".", ".."} or slug.startswith("."):
|
|
366
|
+
print(
|
|
367
|
+
f"error: refusing to archive with unsafe slug {raw_slug!r}",
|
|
368
|
+
file=sys.stderr,
|
|
369
|
+
)
|
|
370
|
+
return 1
|
|
371
|
+
archive_name = f"{date.today().isoformat()}-{slug}"
|
|
372
|
+
archive_dir = Path(".sindri") / "archive" / archive_name
|
|
373
|
+
archive_dir.parent.mkdir(parents=True, exist_ok=True)
|
|
374
|
+
shutil.move(str(current), str(archive_dir))
|
|
375
|
+
|
|
376
|
+
print(json.dumps({"ok": True, "archived_to": str(archive_dir)}))
|
|
377
|
+
return 0
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def _add_status(sub: argparse._SubParsersAction) -> None:
|
|
381
|
+
sub.add_parser("status", help="human-readable summary of current run")
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
@_register("status")
|
|
385
|
+
def _handle_status(args: argparse.Namespace) -> int:
|
|
386
|
+
from sindri.core.state import StateIOError, read_state
|
|
387
|
+
|
|
388
|
+
try:
|
|
389
|
+
state = read_state()
|
|
390
|
+
except StateIOError:
|
|
391
|
+
print("sindri: no active run (.sindri/current/ not found)")
|
|
392
|
+
return 0
|
|
393
|
+
|
|
394
|
+
kept = sum(1 for c in state.pool if c.status == "kept")
|
|
395
|
+
reverted = sum(1 for c in state.pool if c.status == "reverted")
|
|
396
|
+
pending = sum(1 for c in state.pool if c.status == "pending")
|
|
397
|
+
dead = sum(1 for c in state.pool if c.status in {"errored", "check_failed"})
|
|
398
|
+
|
|
399
|
+
current = state.current_best if state.current_best is not None else state.baseline.value
|
|
400
|
+
delta_pct = ((current - state.baseline.value) / state.baseline.value) * 100
|
|
401
|
+
|
|
402
|
+
print(
|
|
403
|
+
f"sindri: {state.goal.direction} {state.goal.metric_name} by "
|
|
404
|
+
f"{state.goal.target_pct}%"
|
|
405
|
+
)
|
|
406
|
+
print(f" branch: {state.branch}")
|
|
407
|
+
print(f" mode: {state.mode}")
|
|
408
|
+
print(
|
|
409
|
+
f" baseline: {state.baseline.value:,.0f} "
|
|
410
|
+
f"(σ {state.baseline.noise_floor:,.2f})"
|
|
411
|
+
)
|
|
412
|
+
print(f" current: {current:,.0f} ({delta_pct:+.2f}%)")
|
|
413
|
+
print(
|
|
414
|
+
f" pool: {len(state.pool)} total · {kept} kept · {reverted} reverted "
|
|
415
|
+
f"· {dead} dead · {pending} pending"
|
|
416
|
+
)
|
|
417
|
+
return 0
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
_GOAL_RE = re.compile(
|
|
421
|
+
r"\s*(?P<direction>reduce|increase)\s+(?P<metric>[a-z][a-z0-9_]*)"
|
|
422
|
+
r"\s+by\s+(?P<pct>\d+(?:\.\d+)?)\s*%\s*",
|
|
423
|
+
re.IGNORECASE,
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def _add_init(sub: argparse._SubParsersAction) -> None:
|
|
428
|
+
p = sub.add_parser(
|
|
429
|
+
"init",
|
|
430
|
+
help="initialize a new sindri run from a goal and candidate pool",
|
|
431
|
+
)
|
|
432
|
+
p.add_argument(
|
|
433
|
+
"--goal",
|
|
434
|
+
required=True,
|
|
435
|
+
help="goal string, e.g. 'reduce bundle_bytes by 15%%'",
|
|
436
|
+
)
|
|
437
|
+
p.add_argument(
|
|
438
|
+
"--pool-json",
|
|
439
|
+
required=True,
|
|
440
|
+
help="JSON array of candidate dicts "
|
|
441
|
+
"(id, name, expected_impact_pct, optional files)",
|
|
442
|
+
)
|
|
443
|
+
p.add_argument(
|
|
444
|
+
"--script",
|
|
445
|
+
default=".claude/scripts/sindri/benchmark.py",
|
|
446
|
+
help="path to benchmark script",
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
@_register("init")
|
|
451
|
+
def _handle_init(args: argparse.Namespace) -> int:
|
|
452
|
+
import json
|
|
453
|
+
import subprocess as sp
|
|
454
|
+
from datetime import datetime, timezone
|
|
455
|
+
from pathlib import Path
|
|
456
|
+
|
|
457
|
+
from sindri.core.git_ops import GitError, branch_exists, create_branch
|
|
458
|
+
from sindri.core.metric import MetricParseError, parse_metric_line
|
|
459
|
+
from sindri.core.modes import detect_mode
|
|
460
|
+
from sindri.core.noise import noise_floor
|
|
461
|
+
from sindri.core.state import append_jsonl, write_state
|
|
462
|
+
from sindri.core.validators import (
|
|
463
|
+
Baseline,
|
|
464
|
+
Candidate,
|
|
465
|
+
Goal,
|
|
466
|
+
Guardrails,
|
|
467
|
+
JsonlBaseline,
|
|
468
|
+
JsonlBaselineComplete,
|
|
469
|
+
JsonlSessionStart,
|
|
470
|
+
SindriState,
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
current_dir = Path(".sindri/current")
|
|
474
|
+
if current_dir.exists():
|
|
475
|
+
print(
|
|
476
|
+
"error: .sindri/current/ already exists — another run is active",
|
|
477
|
+
file=sys.stderr,
|
|
478
|
+
)
|
|
479
|
+
return 1
|
|
480
|
+
|
|
481
|
+
# fullmatch rejects goals like "reduce x by 15% and make coffee" — `search`
|
|
482
|
+
# used to silently consume only the prefix and drop the trailing junk.
|
|
483
|
+
m = _GOAL_RE.fullmatch(args.goal)
|
|
484
|
+
if not m:
|
|
485
|
+
print(
|
|
486
|
+
"error: malformed goal — expected 'reduce|increase <metric> by <N>%'",
|
|
487
|
+
file=sys.stderr,
|
|
488
|
+
)
|
|
489
|
+
return 1
|
|
490
|
+
direction = m.group("direction").lower()
|
|
491
|
+
metric_name = m.group("metric")
|
|
492
|
+
target_pct = float(m.group("pct"))
|
|
493
|
+
|
|
494
|
+
try:
|
|
495
|
+
goal = Goal(
|
|
496
|
+
metric_name=metric_name,
|
|
497
|
+
direction=direction, # type: ignore[arg-type]
|
|
498
|
+
target_pct=target_pct,
|
|
499
|
+
)
|
|
500
|
+
except Exception as e:
|
|
501
|
+
print(f"error: invalid goal: {e}", file=sys.stderr)
|
|
502
|
+
return 1
|
|
503
|
+
|
|
504
|
+
try:
|
|
505
|
+
pool_raw = json.loads(args.pool_json)
|
|
506
|
+
pool = [Candidate(**c) for c in pool_raw]
|
|
507
|
+
except Exception as e:
|
|
508
|
+
print(f"error: invalid pool-json: {e}", file=sys.stderr)
|
|
509
|
+
return 1
|
|
510
|
+
|
|
511
|
+
script = Path(args.script)
|
|
512
|
+
if not script.exists():
|
|
513
|
+
print(f"error: benchmark.py not found at {script}", file=sys.stderr)
|
|
514
|
+
return 1
|
|
515
|
+
|
|
516
|
+
# Fail fast on a taken branch name BEFORE spending three baseline runs
|
|
517
|
+
# — reusing them would silently clobber a prior aborted run's history.
|
|
518
|
+
prospective_slug = (
|
|
519
|
+
f"{direction}-{metric_name.replace('_', '-')}-{int(target_pct)}pct"
|
|
520
|
+
)
|
|
521
|
+
prospective_branch = f"sindri/{prospective_slug}"
|
|
522
|
+
if branch_exists(prospective_branch):
|
|
523
|
+
print(
|
|
524
|
+
f"error: branch {prospective_branch!r} already exists — "
|
|
525
|
+
f"delete it or archive the prior run first",
|
|
526
|
+
file=sys.stderr,
|
|
527
|
+
)
|
|
528
|
+
return 1
|
|
529
|
+
|
|
530
|
+
samples: list[float] = []
|
|
531
|
+
for run_idx in range(1, 4):
|
|
532
|
+
try:
|
|
533
|
+
proc = sp.run(
|
|
534
|
+
[sys.executable, str(script)],
|
|
535
|
+
capture_output=True,
|
|
536
|
+
text=True,
|
|
537
|
+
check=False,
|
|
538
|
+
timeout=1800,
|
|
539
|
+
)
|
|
540
|
+
except sp.TimeoutExpired:
|
|
541
|
+
print(
|
|
542
|
+
f"error: benchmark timed out during baseline run {run_idx}",
|
|
543
|
+
file=sys.stderr,
|
|
544
|
+
)
|
|
545
|
+
return 1
|
|
546
|
+
if proc.returncode != 0:
|
|
547
|
+
print(
|
|
548
|
+
f"error: benchmark failed on baseline run {run_idx}: "
|
|
549
|
+
f"{proc.stderr.strip()}",
|
|
550
|
+
file=sys.stderr,
|
|
551
|
+
)
|
|
552
|
+
return 1
|
|
553
|
+
try:
|
|
554
|
+
_, value = parse_metric_line(proc.stdout, expected_name=metric_name)
|
|
555
|
+
except MetricParseError as e:
|
|
556
|
+
print(f"error: baseline run {run_idx}: {e}", file=sys.stderr)
|
|
557
|
+
return 1
|
|
558
|
+
samples.append(value)
|
|
559
|
+
|
|
560
|
+
try:
|
|
561
|
+
sigma = noise_floor(samples)
|
|
562
|
+
except Exception as e:
|
|
563
|
+
print(f"error: noise floor computation failed: {e}", file=sys.stderr)
|
|
564
|
+
return 1
|
|
565
|
+
|
|
566
|
+
# Post-warmup mean: drop the first sample (JIT / cold cache).
|
|
567
|
+
post_warmup = samples[1:]
|
|
568
|
+
mean_val = sum(post_warmup) / len(post_warmup)
|
|
569
|
+
mode = detect_mode(script, samples)
|
|
570
|
+
|
|
571
|
+
branch_name = prospective_branch
|
|
572
|
+
try:
|
|
573
|
+
create_branch(branch_name)
|
|
574
|
+
except GitError as e:
|
|
575
|
+
print(f"error: could not create branch: {e}", file=sys.stderr)
|
|
576
|
+
return 1
|
|
577
|
+
|
|
578
|
+
now = datetime.now(tz=timezone.utc)
|
|
579
|
+
state = SindriState(
|
|
580
|
+
goal=goal,
|
|
581
|
+
baseline=Baseline(value=mean_val, noise_floor=sigma, samples=samples),
|
|
582
|
+
pool=pool,
|
|
583
|
+
branch=branch_name,
|
|
584
|
+
started_at=now,
|
|
585
|
+
guardrails=Guardrails(mode=mode), # type: ignore[arg-type]
|
|
586
|
+
mode=mode,
|
|
587
|
+
)
|
|
588
|
+
write_state(state)
|
|
589
|
+
|
|
590
|
+
sign = "-" if direction == "reduce" else "+"
|
|
591
|
+
append_jsonl(
|
|
592
|
+
JsonlSessionStart(
|
|
593
|
+
ts=now,
|
|
594
|
+
goal=args.goal,
|
|
595
|
+
target_pct=float(f"{sign}{target_pct}"),
|
|
596
|
+
mode=mode,
|
|
597
|
+
)
|
|
598
|
+
)
|
|
599
|
+
for i, v in enumerate(samples, start=1):
|
|
600
|
+
append_jsonl(JsonlBaseline(ts=now, run_index=i, value=v, is_warmup=(i == 1)))
|
|
601
|
+
append_jsonl(JsonlBaselineComplete(ts=now, mean=mean_val, noise_floor=sigma))
|
|
602
|
+
|
|
603
|
+
print(
|
|
604
|
+
json.dumps(
|
|
605
|
+
{
|
|
606
|
+
"ok": True,
|
|
607
|
+
"branch": branch_name,
|
|
608
|
+
"baseline": mean_val,
|
|
609
|
+
"noise_floor": sigma,
|
|
610
|
+
"mode": mode,
|
|
611
|
+
}
|
|
612
|
+
)
|
|
613
|
+
)
|
|
614
|
+
return 0
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
def main(argv: list[str] | None = None) -> int:
|
|
618
|
+
parser = _build_parser()
|
|
619
|
+
args = parser.parse_args(argv)
|
|
620
|
+
if args.subcommand is None:
|
|
621
|
+
parser.print_help()
|
|
622
|
+
return 0
|
|
623
|
+
handler = _HANDLERS.get(args.subcommand)
|
|
624
|
+
if handler is None:
|
|
625
|
+
print(
|
|
626
|
+
f"error: subcommand {args.subcommand!r} not yet implemented",
|
|
627
|
+
file=sys.stderr,
|
|
628
|
+
)
|
|
629
|
+
return 2
|
|
630
|
+
return handler(args)
|
sindri/core/__init__.py
ADDED
|
File without changes
|