pi-autoresearch-vkf 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,21 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.7.0
4
+
5
+ Turn the live widget into a tabular experiment view and add a one-key browser open.
6
+
7
+ - The above-editor widget now shows a **table of the recent runs** — truncated
8
+ 7-char commit, a column per recorded metric, status (keep/discard/outcome), and
9
+ a short description — above run/kept/discarded counts and the memory tally.
10
+ Columns are data-driven from each run's `METRIC name=value` lines (no metric
11
+ names are hardcoded; the session's configured metric is pinned first), capped at
12
+ 5 columns for readability — the browser page still has every metric.
13
+ - Experiments now persist **all** parsed metrics and the capturing commit (the
14
+ working dir's `HEAD` by default), not just the primary metric value.
15
+ - **Open in browser**: press **Ctrl+O** (configurable via
16
+ `PI_AUTORESEARCH_OPEN_SHORTCUT`) or run **`/research-open`** to launch the live
17
+ `progress.html` in your default browser. The widget footer advertises the keys.
18
+
3
19
  ## 0.6.0
4
20
 
5
21
  Add keyless web access so the gather step works on stock pi.
package/README.md CHANGED
@@ -218,12 +218,14 @@ novelty), then synthesis unlocks the combo.
218
218
 
219
219
  Three live views, in increasing detail:
220
220
 
221
- - **Widget** (always on, above the editor) — win/loss counts, best metric, memory
222
- state tally; refreshes after every tool call.
221
+ - **Widget** (always on, above the editor) — run/kept/discarded counts, best
222
+ metric, memory tally, and a table of the recent runs (commit · every metric ·
223
+ status · change); refreshes after every tool call.
223
224
  - **Fullscreen overlay** — press **Ctrl+G** (or call `research_status`) for the
224
225
  full experiment list, memory lifecycle, and verified claims.
225
- - **Browser dashboards** — `export_dashboard` writes two self-contained pages to
226
- `.autoresearch-vkf/session/`:
226
+ - **Browser dashboards** — press **Ctrl+O** (or run `/research-open`) to open the
227
+ live progress page in your default browser. `export_dashboard` writes two
228
+ self-contained pages to `.autoresearch-vkf/session/`:
227
229
  - `progress.html` — metric-over-time chart, experiment timeline, and memory
228
230
  lifecycle; auto-refreshes so an open tab tracks the run live.
229
231
  - `dashboard.html` — the interactive **idea-lineage graph** (paper → claim →
@@ -242,6 +244,8 @@ Three live views, in increasing detail:
242
244
  (default `~`, i.e. the bundle lives at `~/.autoresearch-vkf/memory/`).
243
245
  - `PI_AUTORESEARCH_SHORTCUT` — key for the fullscreen dashboard (default `ctrl+g`;
244
246
  set to `none` to disable).
247
+ - `PI_AUTORESEARCH_OPEN_SHORTCUT` — key to open the progress page in the browser
248
+ (default `ctrl+o`; set to `none` to disable — `/research-open` still works).
245
249
 
246
250
  ## Development
247
251
 
@@ -15,11 +15,26 @@ import {
15
15
  type MemoryState,
16
16
  } from "./cards.ts";
17
17
  import {
18
+ experimentMetrics,
18
19
  OUTCOME_GLYPH,
19
20
  readExperiments,
20
21
  summarize,
22
+ type Experiment,
21
23
  } from "./experiments.ts";
22
24
  import { hasMemory, sessionPaths } from "./paths.ts";
25
+ import { loadShortcuts } from "./shortcuts.ts";
26
+
27
+ /** How many recent runs the live widget table shows. */
28
+ const WIDGET_ROWS = 7;
29
+
30
+ /** A one-line footer advertising the configured shortcuts (the "buttons"). */
31
+ function shortcutHint(): string | undefined {
32
+ const s = loadShortcuts();
33
+ const parts: string[] = [];
34
+ if (s.openBrowser) parts.push(`${s.openBrowser} open in browser`);
35
+ if (s.fullscreenDashboard) parts.push(`${s.fullscreenDashboard} dashboard`);
36
+ return parts.length ? parts.join(" · ") : undefined;
37
+ }
23
38
 
24
39
  const trimNum = (v: number): string =>
25
40
  Number.isInteger(v) ? String(v) : String(Number(v.toFixed(4)));
@@ -52,11 +67,74 @@ function memoryLine(root: string): string {
52
67
  return `memory: ${c.candidate} candidate · ${verified} verified · ${c.contradicted} contradicted`;
53
68
  }
54
69
 
70
+ /** keep / discard if decided, otherwise the raw outcome (win/loss/…). */
71
+ function statusLabel(e: Experiment): string {
72
+ const word = e.kept === true ? "keep" : e.kept === false ? "discard" : e.outcome;
73
+ return `${OUTCOME_GLYPH[e.outcome]} ${word}`;
74
+ }
75
+
76
+ /**
77
+ * Render a fixed-width text table: header row, a rule, then the body rows.
78
+ * `align[i]` controls per-column justification ("r" right-justifies numbers).
79
+ */
80
+ function renderTable(headers: string[], rows: string[][], align: ("l" | "r")[]): string[] {
81
+ const widths = headers.map((h, i) =>
82
+ Math.max(h.length, ...rows.map((r) => (r[i] ?? "").length)),
83
+ );
84
+ const fmt = (cells: string[]): string =>
85
+ cells
86
+ .map((c, i) => (align[i] === "r" ? c.padStart(widths[i]!) : c.padEnd(widths[i]!)))
87
+ .join(" ")
88
+ .trimEnd();
89
+ return [fmt(headers), widths.map((w) => "─".repeat(w)).join(" "), ...rows.map(fmt)];
90
+ }
91
+
92
+ /** The recent-runs table: commit · each metric · status · description. */
93
+ function runsTable(root: string, metricName: string): string[] {
94
+ const experiments = readExperiments(sessionPaths(root).experiments);
95
+ if (experiments.length === 0) return ["(no experiments yet)"];
96
+
97
+ const recent = experiments.slice(-WIDGET_ROWS).reverse(); // newest first
98
+ const perRow = recent.map((e) => experimentMetrics(e, metricName));
99
+
100
+ // Metric columns: the primary metric first, then any others seen, sorted.
101
+ // Cap at 5 columns total to keep the widget readable (the web page has them all).
102
+ const others = new Set<string>();
103
+ for (const m of perRow) for (const k of Object.keys(m)) if (k !== metricName) others.add(k);
104
+ const metricCols = [metricName, ...[...others].sort()].slice(0, 5);
105
+
106
+ const headers = ["commit", ...metricCols, "status", "change"];
107
+ const align: ("l" | "r")[] = ["l", ...metricCols.map((): "r" => "r"), "l", "l"];
108
+ const rows = recent.map((e, i) => {
109
+ const m = perRow[i]!;
110
+ return [
111
+ e.commit ?? "—",
112
+ ...metricCols.map((c) => (m[c] === undefined ? "—" : trimNum(m[c]!))),
113
+ statusLabel(e),
114
+ e.description.length > 40 ? e.description.slice(0, 39) + "…" : e.description,
115
+ ];
116
+ });
117
+ return renderTable(headers, rows, align);
118
+ }
119
+
55
120
  /** Compact widget shown above the editor. Returns `[]` when there is no session. */
56
121
  export function buildWidgetLines(root: string): string[] {
57
122
  const config = readConfig(sessionPaths(root).config);
58
123
  if (!config) return [];
59
- return [`pi-autoresearch-vkf · ${config.name}`, experimentLine(root), memoryLine(root)];
124
+ const s = summarize(readExperiments(sessionPaths(root).experiments), config.direction);
125
+ const best = s.best === undefined ? "—" : trimNum(s.best);
126
+ const runsLine =
127
+ `runs: ${s.total} · kept: ${s.kept} · discarded: ${s.discarded} · ` +
128
+ `inconclusive: ${s.inconclusive} · best ${config.metricName}: ${best}`;
129
+ const hint = shortcutHint();
130
+ return [
131
+ `pi-autoresearch-vkf · ${config.name}`,
132
+ runsLine,
133
+ memoryLine(root),
134
+ "",
135
+ ...runsTable(root, config.metricName),
136
+ ...(hint ? ["", hint] : []),
137
+ ];
60
138
  }
61
139
 
62
140
  /** Full status for the fullscreen overlay. */
@@ -23,8 +23,12 @@ export interface Experiment {
23
23
  description: string;
24
24
  /** Claim/idea this experiment tested (a VKF id, e.g. "claim:adagc"). */
25
25
  claim_id?: string;
26
- /** Metric value obtained. */
26
+ /** Primary metric value obtained (the session's configured metric). */
27
27
  value?: number;
28
+ /** All `METRIC name=value` pairs recorded for this run, primary metric included. */
29
+ metrics?: Record<string, number>;
30
+ /** Short (7-char) commit hash capturing the change, if known. */
31
+ commit?: string;
28
32
  /** Baseline this run was compared against. */
29
33
  baseline?: number;
30
34
  /** Outcome relative to the baseline and metric direction. */
@@ -92,6 +96,10 @@ export interface ExperimentSummary {
92
96
  loss: number;
93
97
  inconclusive: number;
94
98
  pending: number;
99
+ /** Runs whose change was kept (`kept === true`). */
100
+ kept: number;
101
+ /** Runs whose change was reverted (`kept === false`). */
102
+ discarded: number;
95
103
  /** Best metric value seen, respecting direction. */
96
104
  best?: number;
97
105
  }
@@ -100,9 +108,19 @@ export function summarize(
100
108
  experiments: Experiment[],
101
109
  direction: MetricDirection,
102
110
  ): ExperimentSummary {
103
- const s: ExperimentSummary = { total: experiments.length, win: 0, loss: 0, inconclusive: 0, pending: 0 };
111
+ const s: ExperimentSummary = {
112
+ total: experiments.length,
113
+ win: 0,
114
+ loss: 0,
115
+ inconclusive: 0,
116
+ pending: 0,
117
+ kept: 0,
118
+ discarded: 0,
119
+ };
104
120
  for (const e of experiments) {
105
121
  s[e.outcome] += 1;
122
+ if (e.kept === true) s.kept += 1;
123
+ else if (e.kept === false) s.discarded += 1;
106
124
  if (e.value !== undefined) {
107
125
  if (s.best === undefined) s.best = e.value;
108
126
  else s.best = direction === "higher" ? Math.max(s.best, e.value) : Math.min(s.best, e.value);
@@ -110,3 +128,9 @@ export function summarize(
110
128
  }
111
129
  return s;
112
130
  }
131
+
132
+ /** The metrics map for a run, falling back to the primary value for older records. */
133
+ export function experimentMetrics(e: Experiment, primaryMetric: string): Record<string, number> {
134
+ if (e.metrics && Object.keys(e.metrics).length > 0) return e.metrics;
135
+ return e.value === undefined ? {} : { [primaryMetric]: e.value };
136
+ }
@@ -15,9 +15,10 @@
15
15
  * as a VKF *candidate* with a transaction record; promotion to a trusted state is
16
16
  * an explicit, audited step — never silent.
17
17
  */
18
+ import { execFileSync } from "node:child_process";
18
19
  import { existsSync, readFileSync, writeFileSync } from "node:fs";
19
20
  import { basename } from "node:path";
20
- import type { AgentToolResult, ExtensionAPI } from "@earendil-works/pi-coding-agent";
21
+ import type { AgentToolResult, ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-agent";
21
22
  import { Type, type Static } from "typebox";
22
23
 
23
24
  import {
@@ -682,7 +683,8 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
682
683
  kept: Type.Optional(Type.Boolean({ description: "Whether the change was kept (vs reverted)." })),
683
684
  conditions: Type.Optional(Type.String({ description: "Conditions under which this holds (model size, dataset, etc.) — recorded on the memory card." })),
684
685
  notes: Type.Optional(Type.String({ description: "Deviations, surprises, next tests." })),
685
- commit: Type.Optional(Type.String({ description: "Git commit capturing the change, if any." })),
686
+ commit: Type.Optional(Type.String({ description: "Git commit capturing the change, if any. Defaults to the current HEAD of the working dir." })),
687
+ metrics: Type.Optional(Type.Record(Type.String(), Type.Number(), { description: "All `METRIC name=value` pairs from the run (from vkf_run_experiment), so the dashboard can show every metric — not just the primary one." })),
686
688
  });
687
689
 
688
690
  pi.registerTool({
@@ -700,6 +702,11 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
700
702
  const baseline = params.baseline ?? config.baseline;
701
703
  const outcome: Outcome = params.outcome ?? deriveOutcome(baseline, params.value, config.direction);
702
704
 
705
+ // Record every metric (primary included), and the commit that captured the change.
706
+ const metrics = { ...(params.metrics ?? {}) };
707
+ if (metrics[config.metricName] === undefined) metrics[config.metricName] = params.value;
708
+ const commit = shortCommit(params.commit, config.workingDir ?? root);
709
+
703
710
  const experiments = readExperiments(sp.experiments);
704
711
  const seq = String(experiments.length + 1).padStart(3, "0");
705
712
  const expEntry: Experiment = {
@@ -707,6 +714,8 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
707
714
  description: params.description,
708
715
  claim_id: params.claim_id,
709
716
  value: params.value,
717
+ metrics,
718
+ commit,
710
719
  baseline,
711
720
  outcome,
712
721
  kept: params.kept,
@@ -1007,6 +1016,42 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
1007
1016
  });
1008
1017
  }
1009
1018
 
1019
+ // ── open the live progress page in the default browser ───────────────────────
1020
+ const openProgress = async (ctx: ExtensionContext): Promise<void> => {
1021
+ const root = resolveRoot(ctx);
1022
+ if (!hasSession(root)) {
1023
+ if (ctx.hasUI) ctx.ui.notify("No pi-autoresearch-vkf session in this directory yet.", "warning");
1024
+ return;
1025
+ }
1026
+ // Make sure the file exists/is current before handing it to the browser.
1027
+ const file = writeProgressDashboard(root);
1028
+ if (!file) {
1029
+ if (ctx.hasUI) ctx.ui.notify("Could not generate the progress page.", "error");
1030
+ return;
1031
+ }
1032
+ const [cmd, args] = browserOpenCommand(file);
1033
+ try {
1034
+ await pi.exec(cmd, args, { timeout: 10_000 });
1035
+ if (ctx.hasUI) ctx.ui.notify(`Opened progress page in your browser (${cmd}).`, "info");
1036
+ } catch (e) {
1037
+ if (ctx.hasUI) ctx.ui.notify(`Couldn't launch a browser — open ${file} manually. (${(e as Error).message})`, "error");
1038
+ }
1039
+ };
1040
+
1041
+ if (shortcuts.openBrowser) {
1042
+ pi.registerShortcut(shortcuts.openBrowser, {
1043
+ description: "Open the pi-autoresearch-vkf progress page in the browser",
1044
+ handler: openProgress,
1045
+ });
1046
+ }
1047
+
1048
+ pi.registerCommand("research-open", {
1049
+ description: "Open the pi-autoresearch-vkf progress page in your browser",
1050
+ handler: async (_args, ctx) => {
1051
+ await openProgress(ctx);
1052
+ },
1053
+ });
1054
+
1010
1055
  // ── lifecycle ────────────────────────────────────────────────────────────────
1011
1056
  pi.on("session_start", async (_event, ctx) => {
1012
1057
  refreshWidget(ctx, resolveRoot(ctx));
@@ -1152,6 +1197,37 @@ function writeProgressDashboard(root: string, refreshSeconds?: number): string |
1152
1197
  return sp.progressHtml;
1153
1198
  }
1154
1199
 
1200
+ /**
1201
+ * Normalize a commit reference to a 7-char short hash. Uses the explicit value
1202
+ * if given, otherwise best-effort reads the working dir's current HEAD. Returns
1203
+ * `undefined` when there is no resolvable commit (not a repo, git missing, …).
1204
+ */
1205
+ function shortCommit(explicit: string | undefined, cwd: string): string | undefined {
1206
+ const trim = (s: string): string | undefined => {
1207
+ const h = s.trim().replace(/^[^0-9a-f]*/i, "");
1208
+ return /^[0-9a-f]{7,}$/i.test(h) ? h.slice(0, 7) : undefined;
1209
+ };
1210
+ if (explicit) return trim(explicit) ?? (explicit.trim().slice(0, 7) || undefined);
1211
+ try {
1212
+ return trim(execFileSync("git", ["rev-parse", "--short=7", "HEAD"], { cwd, encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] }));
1213
+ } catch {
1214
+ return undefined;
1215
+ }
1216
+ }
1217
+
1218
+ /** The platform command that opens a file/URL in the user's default browser. */
1219
+ function browserOpenCommand(target: string): [string, string[]] {
1220
+ switch (process.platform) {
1221
+ case "darwin":
1222
+ return ["open", [target]];
1223
+ case "win32":
1224
+ // `start` is a cmd builtin; the empty "" is the window-title placeholder.
1225
+ return ["cmd", ["/c", "start", "", target]];
1226
+ default:
1227
+ return ["xdg-open", [target]];
1228
+ }
1229
+ }
1230
+
1155
1231
  function writeFileIfAbsent(path: string, contents: string): void {
1156
1232
  if (!existsSync(path)) writeFileSync(path, contents, "utf8");
1157
1233
  }
@@ -10,13 +10,23 @@ import type { KeyId } from "@earendil-works/pi-tui";
10
10
  export interface ShortcutConfig {
11
11
  /** Open the fullscreen research dashboard. `undefined` disables it. */
12
12
  fullscreenDashboard?: KeyId;
13
+ /** Open the live progress HTML in the default browser. `undefined` disables it. */
14
+ openBrowser?: KeyId;
13
15
  }
14
16
 
15
17
  const DEFAULT_FULLSCREEN: KeyId = "ctrl+g";
18
+ const DEFAULT_OPEN_BROWSER: KeyId = "ctrl+o";
19
+
20
+ function resolve(envVar: string, fallback: KeyId): KeyId | undefined {
21
+ const override = process.env[envVar]?.trim();
22
+ if (override === undefined) return fallback;
23
+ if (override === "" || override.toLowerCase() === "none") return undefined;
24
+ return override as KeyId;
25
+ }
16
26
 
17
27
  export function loadShortcuts(): ShortcutConfig {
18
- const override = process.env.PI_AUTORESEARCH_SHORTCUT?.trim();
19
- if (override === undefined) return { fullscreenDashboard: DEFAULT_FULLSCREEN };
20
- if (override === "" || override.toLowerCase() === "none") return {};
21
- return { fullscreenDashboard: override as KeyId };
28
+ return {
29
+ fullscreenDashboard: resolve("PI_AUTORESEARCH_SHORTCUT", DEFAULT_FULLSCREEN),
30
+ openBrowser: resolve("PI_AUTORESEARCH_OPEN_SHORTCUT", DEFAULT_OPEN_BROWSER),
31
+ };
22
32
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-autoresearch-vkf",
3
- "version": "0.6.0",
3
+ "version": "0.7.0",
4
4
  "type": "module",
5
5
  "description": "Autoresearch with verifiable long-term scientific memory. A pi extension that gathers literature, stores it as VKF claims, runs experiments, and writes verified results back to a git-native knowledge bundle so future runs build on what was learned instead of rediscovering it.",
6
6
  "keywords": [