pi-autoresearch-vkf 0.5.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -0
- package/README.md +8 -4
- package/extensions/pi-autoresearch-vkf/dashboard.ts +79 -1
- package/extensions/pi-autoresearch-vkf/experiments.ts +26 -2
- package/extensions/pi-autoresearch-vkf/index.ts +291 -39
- package/extensions/pi-autoresearch-vkf/shortcuts.ts +14 -4
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,47 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.7.0
|
|
4
|
+
|
|
5
|
+
Turn the live widget into a tabular experiment view and add a one-key browser open.
|
|
6
|
+
|
|
7
|
+
- The above-editor widget now shows a **table of the recent runs** — truncated
|
|
8
|
+
7-char commit, a column per recorded metric, status (keep/discard/outcome), and
|
|
9
|
+
a short description — above run/kept/discarded counts and the memory tally.
|
|
10
|
+
Columns are data-driven from each run's `METRIC name=value` lines (no metric
|
|
11
|
+
names are hardcoded; the session's configured metric is pinned first), capped at
|
|
12
|
+
5 columns for readability — the browser page still has every metric.
|
|
13
|
+
- Experiments now persist **all** parsed metrics and the capturing commit (the
|
|
14
|
+
working dir's `HEAD` by default), not just the primary metric value.
|
|
15
|
+
- **Open in browser**: press **Ctrl+O** (configurable via
|
|
16
|
+
`PI_AUTORESEARCH_OPEN_SHORTCUT`) or run **`/research-open`** to launch the live
|
|
17
|
+
`progress.html` in your default browser. The widget footer advertises the keys.
|
|
18
|
+
|
|
19
|
+
## 0.6.0
|
|
20
|
+
|
|
21
|
+
Add keyless web access so the gather step works on stock pi.
|
|
22
|
+
|
|
23
|
+
- New tools **`WebSearch`** (DuckDuckGo HTML, no API key) and **`WebFetch`**
|
|
24
|
+
(JSON/text verbatim, HTML reduced to readable text). The pi host ships no web
|
|
25
|
+
tools, but `autoresearch-vkf-knowledge-gather` assumed they existed — the agent
|
|
26
|
+
reported it had no web search. These supply the named tools the skill already
|
|
27
|
+
references, against the free literature APIs (arXiv, OpenAlex, Crossref,
|
|
28
|
+
Semantic Scholar). No session required; no setup or keys.
|
|
29
|
+
- Tool names match Claude-Code casing so `pi-ai`'s tool-name table keeps prompt
|
|
30
|
+
caching aligned. `WebSearch` degrades gracefully to a WebFetch-an-API hint when
|
|
31
|
+
the search backend rate-limits or changes layout.
|
|
32
|
+
|
|
33
|
+
Make the browser progress dashboard automatic and live.
|
|
34
|
+
|
|
35
|
+
- `progress.html` is now written on `init_research` (so it exists from iteration
|
|
36
|
+
zero) and refreshed after every `remember_claim`, `verify_claim`, and
|
|
37
|
+
`vkf_log_experiment`. Previously it was only written when the agent explicitly
|
|
38
|
+
called `export_dashboard`, which most runs never did — so the dashboard was
|
|
39
|
+
effectively never created and never updated during a run.
|
|
40
|
+
- The page already meta-refreshes itself, so an open browser tab now tracks the
|
|
41
|
+
loop live with no manual step. `export_dashboard` is now for the heavier
|
|
42
|
+
vkf-CLI idea-lineage graph (`dashboard.html`), a custom refresh interval, or
|
|
43
|
+
opening the page in a browser.
|
|
44
|
+
|
|
3
45
|
## 0.5.2
|
|
4
46
|
|
|
5
47
|
Prefixed all skill names with `autoresearch-vkf-` to avoid namespace conflicts
|
package/README.md
CHANGED
|
@@ -218,12 +218,14 @@ novelty), then synthesis unlocks the combo.
|
|
|
218
218
|
|
|
219
219
|
Three live views, in increasing detail:
|
|
220
220
|
|
|
221
|
-
- **Widget** (always on, above the editor) —
|
|
222
|
-
|
|
221
|
+
- **Widget** (always on, above the editor) — run/kept/discarded counts, best
|
|
222
|
+
metric, memory tally, and a table of the recent runs (commit · every metric ·
|
|
223
|
+
status · change); refreshes after every tool call.
|
|
223
224
|
- **Fullscreen overlay** — press **Ctrl+G** (or call `research_status`) for the
|
|
224
225
|
full experiment list, memory lifecycle, and verified claims.
|
|
225
|
-
- **Browser dashboards** —
|
|
226
|
-
|
|
226
|
+
- **Browser dashboards** — press **Ctrl+O** (or run `/research-open`) to open the
|
|
227
|
+
live progress page in your default browser. `export_dashboard` writes two
|
|
228
|
+
self-contained pages to `.autoresearch-vkf/session/`:
|
|
227
229
|
- `progress.html` — metric-over-time chart, experiment timeline, and memory
|
|
228
230
|
lifecycle; auto-refreshes so an open tab tracks the run live.
|
|
229
231
|
- `dashboard.html` — the interactive **idea-lineage graph** (paper → claim →
|
|
@@ -242,6 +244,8 @@ Three live views, in increasing detail:
|
|
|
242
244
|
(default `~`, i.e. the bundle lives at `~/.autoresearch-vkf/memory/`).
|
|
243
245
|
- `PI_AUTORESEARCH_SHORTCUT` — key for the fullscreen dashboard (default `ctrl+g`;
|
|
244
246
|
set to `none` to disable).
|
|
247
|
+
- `PI_AUTORESEARCH_OPEN_SHORTCUT` — key to open the progress page in the browser
|
|
248
|
+
(default `ctrl+o`; set to `none` to disable — `/research-open` still works).
|
|
245
249
|
|
|
246
250
|
## Development
|
|
247
251
|
|
|
@@ -15,11 +15,26 @@ import {
|
|
|
15
15
|
type MemoryState,
|
|
16
16
|
} from "./cards.ts";
|
|
17
17
|
import {
|
|
18
|
+
experimentMetrics,
|
|
18
19
|
OUTCOME_GLYPH,
|
|
19
20
|
readExperiments,
|
|
20
21
|
summarize,
|
|
22
|
+
type Experiment,
|
|
21
23
|
} from "./experiments.ts";
|
|
22
24
|
import { hasMemory, sessionPaths } from "./paths.ts";
|
|
25
|
+
import { loadShortcuts } from "./shortcuts.ts";
|
|
26
|
+
|
|
27
|
+
/** How many recent runs the live widget table shows. */
|
|
28
|
+
const WIDGET_ROWS = 7;
|
|
29
|
+
|
|
30
|
+
/** A one-line footer advertising the configured shortcuts (the "buttons"). */
|
|
31
|
+
function shortcutHint(): string | undefined {
|
|
32
|
+
const s = loadShortcuts();
|
|
33
|
+
const parts: string[] = [];
|
|
34
|
+
if (s.openBrowser) parts.push(`${s.openBrowser} open in browser`);
|
|
35
|
+
if (s.fullscreenDashboard) parts.push(`${s.fullscreenDashboard} dashboard`);
|
|
36
|
+
return parts.length ? parts.join(" · ") : undefined;
|
|
37
|
+
}
|
|
23
38
|
|
|
24
39
|
const trimNum = (v: number): string =>
|
|
25
40
|
Number.isInteger(v) ? String(v) : String(Number(v.toFixed(4)));
|
|
@@ -52,11 +67,74 @@ function memoryLine(root: string): string {
|
|
|
52
67
|
return `memory: ${c.candidate} candidate · ${verified} verified · ${c.contradicted} contradicted`;
|
|
53
68
|
}
|
|
54
69
|
|
|
70
|
+
/** keep / discard if decided, otherwise the raw outcome (win/loss/…). */
|
|
71
|
+
function statusLabel(e: Experiment): string {
|
|
72
|
+
const word = e.kept === true ? "keep" : e.kept === false ? "discard" : e.outcome;
|
|
73
|
+
return `${OUTCOME_GLYPH[e.outcome]} ${word}`;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Render a fixed-width text table: header row, a rule, then the body rows.
|
|
78
|
+
* `align[i]` controls per-column justification ("r" right-justifies numbers).
|
|
79
|
+
*/
|
|
80
|
+
function renderTable(headers: string[], rows: string[][], align: ("l" | "r")[]): string[] {
|
|
81
|
+
const widths = headers.map((h, i) =>
|
|
82
|
+
Math.max(h.length, ...rows.map((r) => (r[i] ?? "").length)),
|
|
83
|
+
);
|
|
84
|
+
const fmt = (cells: string[]): string =>
|
|
85
|
+
cells
|
|
86
|
+
.map((c, i) => (align[i] === "r" ? c.padStart(widths[i]!) : c.padEnd(widths[i]!)))
|
|
87
|
+
.join(" ")
|
|
88
|
+
.trimEnd();
|
|
89
|
+
return [fmt(headers), widths.map((w) => "─".repeat(w)).join(" "), ...rows.map(fmt)];
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/** The recent-runs table: commit · each metric · status · description. */
|
|
93
|
+
function runsTable(root: string, metricName: string): string[] {
|
|
94
|
+
const experiments = readExperiments(sessionPaths(root).experiments);
|
|
95
|
+
if (experiments.length === 0) return ["(no experiments yet)"];
|
|
96
|
+
|
|
97
|
+
const recent = experiments.slice(-WIDGET_ROWS).reverse(); // newest first
|
|
98
|
+
const perRow = recent.map((e) => experimentMetrics(e, metricName));
|
|
99
|
+
|
|
100
|
+
// Metric columns: the primary metric first, then any others seen, sorted.
|
|
101
|
+
// Cap at 5 columns total to keep the widget readable (the web page has them all).
|
|
102
|
+
const others = new Set<string>();
|
|
103
|
+
for (const m of perRow) for (const k of Object.keys(m)) if (k !== metricName) others.add(k);
|
|
104
|
+
const metricCols = [metricName, ...[...others].sort()].slice(0, 5);
|
|
105
|
+
|
|
106
|
+
const headers = ["commit", ...metricCols, "status", "change"];
|
|
107
|
+
const align: ("l" | "r")[] = ["l", ...metricCols.map((): "r" => "r"), "l", "l"];
|
|
108
|
+
const rows = recent.map((e, i) => {
|
|
109
|
+
const m = perRow[i]!;
|
|
110
|
+
return [
|
|
111
|
+
e.commit ?? "—",
|
|
112
|
+
...metricCols.map((c) => (m[c] === undefined ? "—" : trimNum(m[c]!))),
|
|
113
|
+
statusLabel(e),
|
|
114
|
+
e.description.length > 40 ? e.description.slice(0, 39) + "…" : e.description,
|
|
115
|
+
];
|
|
116
|
+
});
|
|
117
|
+
return renderTable(headers, rows, align);
|
|
118
|
+
}
|
|
119
|
+
|
|
55
120
|
/** Compact widget shown above the editor. Returns `[]` when there is no session. */
|
|
56
121
|
export function buildWidgetLines(root: string): string[] {
|
|
57
122
|
const config = readConfig(sessionPaths(root).config);
|
|
58
123
|
if (!config) return [];
|
|
59
|
-
|
|
124
|
+
const s = summarize(readExperiments(sessionPaths(root).experiments), config.direction);
|
|
125
|
+
const best = s.best === undefined ? "—" : trimNum(s.best);
|
|
126
|
+
const runsLine =
|
|
127
|
+
`runs: ${s.total} · kept: ${s.kept} · discarded: ${s.discarded} · ` +
|
|
128
|
+
`inconclusive: ${s.inconclusive} · best ${config.metricName}: ${best}`;
|
|
129
|
+
const hint = shortcutHint();
|
|
130
|
+
return [
|
|
131
|
+
`pi-autoresearch-vkf · ${config.name}`,
|
|
132
|
+
runsLine,
|
|
133
|
+
memoryLine(root),
|
|
134
|
+
"",
|
|
135
|
+
...runsTable(root, config.metricName),
|
|
136
|
+
...(hint ? ["", hint] : []),
|
|
137
|
+
];
|
|
60
138
|
}
|
|
61
139
|
|
|
62
140
|
/** Full status for the fullscreen overlay. */
|
|
@@ -23,8 +23,12 @@ export interface Experiment {
|
|
|
23
23
|
description: string;
|
|
24
24
|
/** Claim/idea this experiment tested (a VKF id, e.g. "claim:adagc"). */
|
|
25
25
|
claim_id?: string;
|
|
26
|
-
/**
|
|
26
|
+
/** Primary metric value obtained (the session's configured metric). */
|
|
27
27
|
value?: number;
|
|
28
|
+
/** All `METRIC name=value` pairs recorded for this run, primary metric included. */
|
|
29
|
+
metrics?: Record<string, number>;
|
|
30
|
+
/** Short (7-char) commit hash capturing the change, if known. */
|
|
31
|
+
commit?: string;
|
|
28
32
|
/** Baseline this run was compared against. */
|
|
29
33
|
baseline?: number;
|
|
30
34
|
/** Outcome relative to the baseline and metric direction. */
|
|
@@ -92,6 +96,10 @@ export interface ExperimentSummary {
|
|
|
92
96
|
loss: number;
|
|
93
97
|
inconclusive: number;
|
|
94
98
|
pending: number;
|
|
99
|
+
/** Runs whose change was kept (`kept === true`). */
|
|
100
|
+
kept: number;
|
|
101
|
+
/** Runs whose change was reverted (`kept === false`). */
|
|
102
|
+
discarded: number;
|
|
95
103
|
/** Best metric value seen, respecting direction. */
|
|
96
104
|
best?: number;
|
|
97
105
|
}
|
|
@@ -100,9 +108,19 @@ export function summarize(
|
|
|
100
108
|
experiments: Experiment[],
|
|
101
109
|
direction: MetricDirection,
|
|
102
110
|
): ExperimentSummary {
|
|
103
|
-
const s: ExperimentSummary = {
|
|
111
|
+
const s: ExperimentSummary = {
|
|
112
|
+
total: experiments.length,
|
|
113
|
+
win: 0,
|
|
114
|
+
loss: 0,
|
|
115
|
+
inconclusive: 0,
|
|
116
|
+
pending: 0,
|
|
117
|
+
kept: 0,
|
|
118
|
+
discarded: 0,
|
|
119
|
+
};
|
|
104
120
|
for (const e of experiments) {
|
|
105
121
|
s[e.outcome] += 1;
|
|
122
|
+
if (e.kept === true) s.kept += 1;
|
|
123
|
+
else if (e.kept === false) s.discarded += 1;
|
|
106
124
|
if (e.value !== undefined) {
|
|
107
125
|
if (s.best === undefined) s.best = e.value;
|
|
108
126
|
else s.best = direction === "higher" ? Math.max(s.best, e.value) : Math.min(s.best, e.value);
|
|
@@ -110,3 +128,9 @@ export function summarize(
|
|
|
110
128
|
}
|
|
111
129
|
return s;
|
|
112
130
|
}
|
|
131
|
+
|
|
132
|
+
/** The metrics map for a run, falling back to the primary value for older records. */
|
|
133
|
+
export function experimentMetrics(e: Experiment, primaryMetric: string): Record<string, number> {
|
|
134
|
+
if (e.metrics && Object.keys(e.metrics).length > 0) return e.metrics;
|
|
135
|
+
return e.value === undefined ? {} : { [primaryMetric]: e.value };
|
|
136
|
+
}
|
|
@@ -15,9 +15,10 @@
|
|
|
15
15
|
* as a VKF *candidate* with a transaction record; promotion to a trusted state is
|
|
16
16
|
* an explicit, audited step — never silent.
|
|
17
17
|
*/
|
|
18
|
+
import { execFileSync } from "node:child_process";
|
|
18
19
|
import { existsSync, readFileSync, writeFileSync } from "node:fs";
|
|
19
20
|
import { basename } from "node:path";
|
|
20
|
-
import type { AgentToolResult, ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
21
|
+
import type { AgentToolResult, ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-agent";
|
|
21
22
|
import { Type, type Static } from "typebox";
|
|
22
23
|
|
|
23
24
|
import {
|
|
@@ -136,6 +137,9 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
136
137
|
const fresh = scaffoldMemoryBundle(root, params.name, config.memoryProfile);
|
|
137
138
|
appendLog(sp.log, { event: "init", name: config.name, goal: config.goal });
|
|
138
139
|
|
|
140
|
+
// Create the progress dashboard up front so it exists from iteration zero;
|
|
141
|
+
// it then refreshes automatically as experiments are logged.
|
|
142
|
+
writeProgressDashboard(root);
|
|
139
143
|
refreshWidget(ctx, root);
|
|
140
144
|
return textResult(
|
|
141
145
|
[
|
|
@@ -249,6 +253,7 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
249
253
|
});
|
|
250
254
|
appendLog(sp.log, { event: "remember", claim_id: claim.id, paper_id: paperId });
|
|
251
255
|
|
|
256
|
+
writeProgressDashboard(root);
|
|
252
257
|
refreshWidget(ctx, root);
|
|
253
258
|
return textResult(
|
|
254
259
|
[
|
|
@@ -319,6 +324,7 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
319
324
|
});
|
|
320
325
|
appendLog(sp.log, { event: "verify", claim_id: params.id, decision: params.decision });
|
|
321
326
|
|
|
327
|
+
writeProgressDashboard(root);
|
|
322
328
|
refreshWidget(ctx, root);
|
|
323
329
|
return textResult(
|
|
324
330
|
[
|
|
@@ -677,7 +683,8 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
677
683
|
kept: Type.Optional(Type.Boolean({ description: "Whether the change was kept (vs reverted)." })),
|
|
678
684
|
conditions: Type.Optional(Type.String({ description: "Conditions under which this holds (model size, dataset, etc.) — recorded on the memory card." })),
|
|
679
685
|
notes: Type.Optional(Type.String({ description: "Deviations, surprises, next tests." })),
|
|
680
|
-
commit: Type.Optional(Type.String({ description: "Git commit capturing the change, if any." })),
|
|
686
|
+
commit: Type.Optional(Type.String({ description: "Git commit capturing the change, if any. Defaults to the current HEAD of the working dir." })),
|
|
687
|
+
metrics: Type.Optional(Type.Record(Type.String(), Type.Number(), { description: "All `METRIC name=value` pairs from the run (from vkf_run_experiment), so the dashboard can show every metric — not just the primary one." })),
|
|
681
688
|
});
|
|
682
689
|
|
|
683
690
|
pi.registerTool({
|
|
@@ -695,6 +702,11 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
695
702
|
const baseline = params.baseline ?? config.baseline;
|
|
696
703
|
const outcome: Outcome = params.outcome ?? deriveOutcome(baseline, params.value, config.direction);
|
|
697
704
|
|
|
705
|
+
// Record every metric (primary included), and the commit that captured the change.
|
|
706
|
+
const metrics = { ...(params.metrics ?? {}) };
|
|
707
|
+
if (metrics[config.metricName] === undefined) metrics[config.metricName] = params.value;
|
|
708
|
+
const commit = shortCommit(params.commit, config.workingDir ?? root);
|
|
709
|
+
|
|
698
710
|
const experiments = readExperiments(sp.experiments);
|
|
699
711
|
const seq = String(experiments.length + 1).padStart(3, "0");
|
|
700
712
|
const expEntry: Experiment = {
|
|
@@ -702,6 +714,8 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
702
714
|
description: params.description,
|
|
703
715
|
claim_id: params.claim_id,
|
|
704
716
|
value: params.value,
|
|
717
|
+
metrics,
|
|
718
|
+
commit,
|
|
705
719
|
baseline,
|
|
706
720
|
outcome,
|
|
707
721
|
kept: params.kept,
|
|
@@ -774,6 +788,8 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
774
788
|
writeConfig(sp.config, config);
|
|
775
789
|
}
|
|
776
790
|
|
|
791
|
+
// Refresh the browser progress page so an open tab tracks the loop live.
|
|
792
|
+
writeProgressDashboard(root);
|
|
777
793
|
refreshWidget(ctx, root);
|
|
778
794
|
const summary = summarize(readExperiments(sp.experiments), config.direction);
|
|
779
795
|
return textResult(
|
|
@@ -853,7 +869,7 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
853
869
|
name: "export_dashboard",
|
|
854
870
|
label: "Export dashboard",
|
|
855
871
|
description:
|
|
856
|
-
"
|
|
872
|
+
"Build the interactive idea-lineage graph (.autoresearch-vkf/session/dashboard.html — paper → claim → experiment, via the vkf CLI) and refresh the progress page. The progress page (progress.html — metric-over-time chart, experiment timeline, memory lifecycle) is also written automatically on init and after each remember/verify/experiment, and meta-refreshes itself, so an open browser tab tracks the loop live without re-running this. Use this tool for the lineage graph, a custom refresh interval, or to open the page in a browser.",
|
|
857
873
|
parameters: ExportParams,
|
|
858
874
|
async execute(_id, params: Static<typeof ExportParams>, _signal, _onUpdate, ctx): Promise<AgentToolResult<{ progress: string; lineage?: string }>> {
|
|
859
875
|
const root = resolveRoot(ctx);
|
|
@@ -861,42 +877,9 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
861
877
|
requireSession(root);
|
|
862
878
|
const config = readConfig(sp.config)!;
|
|
863
879
|
|
|
864
|
-
// Progress page (self-contained, no CLI needed).
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
description: e.description,
|
|
868
|
-
value: e.value,
|
|
869
|
-
outcome: e.outcome,
|
|
870
|
-
kept: e.kept,
|
|
871
|
-
claim_id: e.claim_id,
|
|
872
|
-
ts: e.ts,
|
|
873
|
-
}));
|
|
874
|
-
const memory: Record<string, number> = Object.fromEntries(MEMORY_STATES.map((s) => [s, 0]));
|
|
875
|
-
for (const c of listCards(root, { type: "claim" })) {
|
|
876
|
-
const st = c.meta["memory_state"] as MemoryState | undefined;
|
|
877
|
-
if (st && st in memory) memory[st]! += 1;
|
|
878
|
-
}
|
|
879
|
-
const claims = listCards(root, { bucket: "verified", type: "claim" })
|
|
880
|
-
.slice(0, 12)
|
|
881
|
-
.map((c) => ({
|
|
882
|
-
title: String(c.meta["title"] ?? c.meta["id"]),
|
|
883
|
-
confidence: String(c.meta["confidence"] ?? "—"),
|
|
884
|
-
state: String(c.meta["memory_state"] ?? "—"),
|
|
885
|
-
}));
|
|
886
|
-
|
|
887
|
-
const progressHtml = renderProgressHtml({
|
|
888
|
-
name: config.name,
|
|
889
|
-
goal: config.goal,
|
|
890
|
-
metricName: config.metricName,
|
|
891
|
-
direction: config.direction,
|
|
892
|
-
baseline: config.baseline,
|
|
893
|
-
experiments,
|
|
894
|
-
memory,
|
|
895
|
-
claims,
|
|
896
|
-
generatedAt: new Date().toISOString(),
|
|
897
|
-
refreshSeconds: params.refresh_seconds,
|
|
898
|
-
});
|
|
899
|
-
writeFileSync(sp.progressHtml, progressHtml, "utf8");
|
|
880
|
+
// Progress page (self-contained, no CLI needed). Same generator the loop
|
|
881
|
+
// calls automatically on init and after each experiment.
|
|
882
|
+
writeProgressDashboard(root, params.refresh_seconds);
|
|
900
883
|
|
|
901
884
|
// Lineage graph via the vkf CLI (best-effort).
|
|
902
885
|
const lineage = vkf.html(memoryPaths(root).dir, sp.dashboardHtml, `Research memory — ${config.name}`);
|
|
@@ -942,6 +925,75 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
942
925
|
},
|
|
943
926
|
});
|
|
944
927
|
|
|
928
|
+
// ── WebSearch ────────────────────────────────────────────────────────────────
|
|
929
|
+
// The pi host ships no web tools, but the gather skill needs them. These two
|
|
930
|
+
// tools give the agent keyless web access: WebSearch (DuckDuckGo HTML) to
|
|
931
|
+
// discover sources, WebFetch to read pages and hit free APIs (arXiv, OpenAlex,
|
|
932
|
+
// Crossref, Semantic Scholar). Named to match the skill text and pi-ai's
|
|
933
|
+
// Claude-Code tool-name table so prompt caching stays aligned.
|
|
934
|
+
const WebSearchParams = Type.Object({
|
|
935
|
+
query: Type.String({ description: "Search query. Prefer the mechanism of the problem over bare keywords." }),
|
|
936
|
+
max_results: Type.Optional(Type.Number({ description: "Max results to return (default 8, capped at 25)." })),
|
|
937
|
+
});
|
|
938
|
+
|
|
939
|
+
pi.registerTool({
|
|
940
|
+
name: "WebSearch",
|
|
941
|
+
label: "Web search",
|
|
942
|
+
description:
|
|
943
|
+
"Search the web with no API key (via DuckDuckGo) and return result titles, URLs, and snippets. Discovery step for the autoresearch gather skill — then read the hits with WebFetch.",
|
|
944
|
+
parameters: WebSearchParams,
|
|
945
|
+
async execute(_id, params: Static<typeof WebSearchParams>, signal): Promise<AgentToolResult<{ results: WebSearchHit[] }>> {
|
|
946
|
+
const limit = Math.max(1, Math.min(params.max_results ?? 8, 25));
|
|
947
|
+
const endpoint = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(params.query)}`;
|
|
948
|
+
let fetched: FetchedText;
|
|
949
|
+
try {
|
|
950
|
+
fetched = await fetchText(endpoint, signal ?? undefined, 20_000);
|
|
951
|
+
} catch (e) {
|
|
952
|
+
return textResult(`Web search failed: ${(e as Error).message}`, { results: [] });
|
|
953
|
+
}
|
|
954
|
+
const results = parseDdgResults(fetched.body, limit);
|
|
955
|
+
if (results.length === 0) {
|
|
956
|
+
return textResult(
|
|
957
|
+
`No results parsed for "${params.query}" (HTTP ${fetched.status}). The search backend may be rate-limiting; fall back to WebFetch against a known API (arXiv, OpenAlex, Crossref, Semantic Scholar).`,
|
|
958
|
+
{ results: [] },
|
|
959
|
+
);
|
|
960
|
+
}
|
|
961
|
+
const body = results.map((r, i) => `${i + 1}. ${r.title}\n ${r.url}\n ${r.snippet}`).join("\n\n");
|
|
962
|
+
return textResult(truncate(body), { results });
|
|
963
|
+
},
|
|
964
|
+
});
|
|
965
|
+
|
|
966
|
+
// ── WebFetch ─────────────────────────────────────────────────────────────────
|
|
967
|
+
const WebFetchParams = Type.Object({
|
|
968
|
+
url: Type.String({ description: "http(s) URL to fetch. JSON/text is returned verbatim; HTML is reduced to readable text." }),
|
|
969
|
+
max_chars: Type.Optional(Type.Number({ description: `Max characters of content to return (default ${MAX_OUTPUT_CHARS}).` })),
|
|
970
|
+
});
|
|
971
|
+
|
|
972
|
+
pi.registerTool({
|
|
973
|
+
name: "WebFetch",
|
|
974
|
+
label: "Web fetch",
|
|
975
|
+
description:
|
|
976
|
+
"Fetch a URL with no API key — JSON/text verbatim, HTML reduced to readable text. Use for the free literature APIs (arXiv, OpenAlex, Crossref, Semantic Scholar) and for reading pages found via WebSearch.",
|
|
977
|
+
parameters: WebFetchParams,
|
|
978
|
+
async execute(_id, params: Static<typeof WebFetchParams>, signal): Promise<AgentToolResult<{ status: number; url: string; content_type: string }>> {
|
|
979
|
+
if (!/^https?:\/\//i.test(params.url)) {
|
|
980
|
+
return textResult(`Refusing to fetch non-http(s) URL: ${params.url}`, { status: 0, url: params.url, content_type: "" });
|
|
981
|
+
}
|
|
982
|
+
let fetched: FetchedText;
|
|
983
|
+
try {
|
|
984
|
+
fetched = await fetchText(params.url, signal ?? undefined, 30_000);
|
|
985
|
+
} catch (e) {
|
|
986
|
+
return textResult(`Fetch failed for ${params.url}: ${(e as Error).message}`, { status: 0, url: params.url, content_type: "" });
|
|
987
|
+
}
|
|
988
|
+
const isHtml = /text\/html|application\/xhtml/i.test(fetched.contentType);
|
|
989
|
+
const text = isHtml ? htmlToText(fetched.body) : fetched.body;
|
|
990
|
+
const cap = Math.max(500, params.max_chars ?? MAX_OUTPUT_CHARS);
|
|
991
|
+
const capped = text.length <= cap ? text : text.slice(0, cap) + `\n…[truncated ${text.length - cap} chars]`;
|
|
992
|
+
const header = `GET ${fetched.finalUrl}\n(HTTP ${fetched.status} · ${fetched.contentType || "unknown"} · ${isHtml ? "html→text" : "raw"})\n\n`;
|
|
993
|
+
return textResult(header + capped, { status: fetched.status, url: fetched.finalUrl, content_type: fetched.contentType });
|
|
994
|
+
},
|
|
995
|
+
});
|
|
996
|
+
|
|
945
997
|
// ── shortcut: fullscreen dashboard ───────────────────────────────────────────
|
|
946
998
|
const shortcuts = loadShortcuts();
|
|
947
999
|
if (shortcuts.fullscreenDashboard) {
|
|
@@ -964,6 +1016,42 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
964
1016
|
});
|
|
965
1017
|
}
|
|
966
1018
|
|
|
1019
|
+
// ── open the live progress page in the default browser ───────────────────────
|
|
1020
|
+
const openProgress = async (ctx: ExtensionContext): Promise<void> => {
|
|
1021
|
+
const root = resolveRoot(ctx);
|
|
1022
|
+
if (!hasSession(root)) {
|
|
1023
|
+
if (ctx.hasUI) ctx.ui.notify("No pi-autoresearch-vkf session in this directory yet.", "warning");
|
|
1024
|
+
return;
|
|
1025
|
+
}
|
|
1026
|
+
// Make sure the file exists/is current before handing it to the browser.
|
|
1027
|
+
const file = writeProgressDashboard(root);
|
|
1028
|
+
if (!file) {
|
|
1029
|
+
if (ctx.hasUI) ctx.ui.notify("Could not generate the progress page.", "error");
|
|
1030
|
+
return;
|
|
1031
|
+
}
|
|
1032
|
+
const [cmd, args] = browserOpenCommand(file);
|
|
1033
|
+
try {
|
|
1034
|
+
await pi.exec(cmd, args, { timeout: 10_000 });
|
|
1035
|
+
if (ctx.hasUI) ctx.ui.notify(`Opened progress page in your browser (${cmd}).`, "info");
|
|
1036
|
+
} catch (e) {
|
|
1037
|
+
if (ctx.hasUI) ctx.ui.notify(`Couldn't launch a browser — open ${file} manually. (${(e as Error).message})`, "error");
|
|
1038
|
+
}
|
|
1039
|
+
};
|
|
1040
|
+
|
|
1041
|
+
if (shortcuts.openBrowser) {
|
|
1042
|
+
pi.registerShortcut(shortcuts.openBrowser, {
|
|
1043
|
+
description: "Open the pi-autoresearch-vkf progress page in the browser",
|
|
1044
|
+
handler: openProgress,
|
|
1045
|
+
});
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
pi.registerCommand("research-open", {
|
|
1049
|
+
description: "Open the pi-autoresearch-vkf progress page in your browser",
|
|
1050
|
+
handler: async (_args, ctx) => {
|
|
1051
|
+
await openProgress(ctx);
|
|
1052
|
+
},
|
|
1053
|
+
});
|
|
1054
|
+
|
|
967
1055
|
// ── lifecycle ────────────────────────────────────────────────────────────────
|
|
968
1056
|
pi.on("session_start", async (_event, ctx) => {
|
|
969
1057
|
refreshWidget(ctx, resolveRoot(ctx));
|
|
@@ -974,8 +1062,172 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
974
1062
|
});
|
|
975
1063
|
}
|
|
976
1064
|
|
|
1065
|
+
// ── web helpers ───────────────────────────────────────────────────────────────
|
|
1066
|
+
|
|
1067
|
+
const WEB_USER_AGENT = "pi-autoresearch-vkf (+https://github.com/EricJahns/pi-autoresearch-vkf)";
|
|
1068
|
+
|
|
1069
|
+
interface FetchedText {
|
|
1070
|
+
status: number;
|
|
1071
|
+
contentType: string;
|
|
1072
|
+
body: string;
|
|
1073
|
+
finalUrl: string;
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
interface WebSearchHit {
|
|
1077
|
+
title: string;
|
|
1078
|
+
url: string;
|
|
1079
|
+
snippet: string;
|
|
1080
|
+
}
|
|
1081
|
+
|
|
1082
|
+
/** Fetch a URL as text, following redirects, aborting on the tool signal or timeout. */
|
|
1083
|
+
async function fetchText(url: string, signal: AbortSignal | undefined, timeoutMs: number): Promise<FetchedText> {
|
|
1084
|
+
const timeout = AbortSignal.timeout(timeoutMs);
|
|
1085
|
+
const composed = signal ? AbortSignal.any([signal, timeout]) : timeout;
|
|
1086
|
+
const res = await fetch(url, {
|
|
1087
|
+
redirect: "follow",
|
|
1088
|
+
signal: composed,
|
|
1089
|
+
headers: { "user-agent": WEB_USER_AGENT, accept: "*/*" },
|
|
1090
|
+
});
|
|
1091
|
+
const body = await res.text();
|
|
1092
|
+
return { status: res.status, contentType: res.headers.get("content-type") ?? "", body, finalUrl: res.url || url };
|
|
1093
|
+
}
|
|
1094
|
+
|
|
1095
|
+
/** Reduce an HTML document to readable plain text (best-effort, no DOM). */
|
|
1096
|
+
function htmlToText(html: string): string {
|
|
1097
|
+
return html
|
|
1098
|
+
.replace(/<script[\s\S]*?<\/script>/gi, " ")
|
|
1099
|
+
.replace(/<style[\s\S]*?<\/style>/gi, " ")
|
|
1100
|
+
.replace(/<!--[\s\S]*?-->/g, " ")
|
|
1101
|
+
.replace(/<br\s*\/?>/gi, "\n")
|
|
1102
|
+
.replace(/<\/(p|div|li|tr|h[1-6]|section|article)\s*>/gi, "\n")
|
|
1103
|
+
.replace(/<[^>]+>/g, " ")
|
|
1104
|
+
.replace(/ /g, " ")
|
|
1105
|
+
.replace(/&/g, "&")
|
|
1106
|
+
.replace(/</g, "<")
|
|
1107
|
+
.replace(/>/g, ">")
|
|
1108
|
+
.replace(/"/g, '"')
|
|
1109
|
+
.replace(/�*39;|'/gi, "'")
|
|
1110
|
+
.replace(/&#(\d+);/g, (_m, n) => String.fromCodePoint(Number(n)))
|
|
1111
|
+
.replace(/[ \t]+\n/g, "\n")
|
|
1112
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
1113
|
+
.replace(/[ \t]{2,}/g, " ")
|
|
1114
|
+
.trim();
|
|
1115
|
+
}
|
|
1116
|
+
|
|
1117
|
+
/** DuckDuckGo HTML wraps result links as //duckduckgo.com/l/?uddg=<encoded>. Unwrap them. */
|
|
1118
|
+
function decodeDdgHref(href: string): string {
|
|
1119
|
+
const m = href.match(/[?&]uddg=([^&]+)/);
|
|
1120
|
+
if (m && m[1]) {
|
|
1121
|
+
try {
|
|
1122
|
+
return decodeURIComponent(m[1]);
|
|
1123
|
+
} catch {
|
|
1124
|
+
/* fall through to raw href */
|
|
1125
|
+
}
|
|
1126
|
+
}
|
|
1127
|
+
return href.startsWith("//") ? "https:" + href : href;
|
|
1128
|
+
}
|
|
1129
|
+
|
|
1130
|
+
/** Parse titles/urls/snippets out of a DuckDuckGo HTML results page. */
|
|
1131
|
+
function parseDdgResults(html: string, limit: number): WebSearchHit[] {
|
|
1132
|
+
const snippets: string[] = [];
|
|
1133
|
+
const snippetRe = /<a[^>]+class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/a>/gi;
|
|
1134
|
+
let s: RegExpExecArray | null;
|
|
1135
|
+
while ((s = snippetRe.exec(html)) !== null) snippets.push(htmlToText(s[1] ?? ""));
|
|
1136
|
+
|
|
1137
|
+
const hits: WebSearchHit[] = [];
|
|
1138
|
+
const linkRe = /<a[^>]+class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/gi;
|
|
1139
|
+
let m: RegExpExecArray | null;
|
|
1140
|
+
let i = 0;
|
|
1141
|
+
while ((m = linkRe.exec(html)) !== null && hits.length < limit) {
|
|
1142
|
+
hits.push({ title: htmlToText(m[2] ?? ""), url: decodeDdgHref(m[1] ?? ""), snippet: snippets[i] ?? "" });
|
|
1143
|
+
i++;
|
|
1144
|
+
}
|
|
1145
|
+
return hits;
|
|
1146
|
+
}
|
|
1147
|
+
|
|
977
1148
|
// ── helpers ───────────────────────────────────────────────────────────────────
|
|
978
1149
|
|
|
1150
|
+
/**
|
|
1151
|
+
* (Re)generate the self-contained progress dashboard (progress.html) from the
|
|
1152
|
+
* current session + memory state. Pure-JS and cheap (no CLI), so it is safe to
|
|
1153
|
+
* call on every state change — an open browser tab meta-refreshes itself live.
|
|
1154
|
+
* No-op (returns undefined) when there is no session/config yet. The interactive
|
|
1155
|
+
* idea-lineage graph (dashboard.html) is heavier and stays in export_dashboard.
|
|
1156
|
+
*/
|
|
1157
|
+
function writeProgressDashboard(root: string, refreshSeconds?: number): string | undefined {
|
|
1158
|
+
const sp = sessionPaths(root);
|
|
1159
|
+
const config = readConfig(sp.config);
|
|
1160
|
+
if (!config) return undefined;
|
|
1161
|
+
|
|
1162
|
+
const experiments: ProgressExperiment[] = readExperiments(sp.experiments).map((e) => ({
|
|
1163
|
+
id: e.id,
|
|
1164
|
+
description: e.description,
|
|
1165
|
+
value: e.value,
|
|
1166
|
+
outcome: e.outcome,
|
|
1167
|
+
kept: e.kept,
|
|
1168
|
+
claim_id: e.claim_id,
|
|
1169
|
+
ts: e.ts,
|
|
1170
|
+
}));
|
|
1171
|
+
const memory: Record<string, number> = Object.fromEntries(MEMORY_STATES.map((s) => [s, 0]));
|
|
1172
|
+
for (const c of listCards(root, { type: "claim" })) {
|
|
1173
|
+
const st = c.meta["memory_state"] as MemoryState | undefined;
|
|
1174
|
+
if (st && st in memory) memory[st]! += 1;
|
|
1175
|
+
}
|
|
1176
|
+
const claims = listCards(root, { bucket: "verified", type: "claim" })
|
|
1177
|
+
.slice(0, 12)
|
|
1178
|
+
.map((c) => ({
|
|
1179
|
+
title: String(c.meta["title"] ?? c.meta["id"]),
|
|
1180
|
+
confidence: String(c.meta["confidence"] ?? "—"),
|
|
1181
|
+
state: String(c.meta["memory_state"] ?? "—"),
|
|
1182
|
+
}));
|
|
1183
|
+
|
|
1184
|
+
const html = renderProgressHtml({
|
|
1185
|
+
name: config.name,
|
|
1186
|
+
goal: config.goal,
|
|
1187
|
+
metricName: config.metricName,
|
|
1188
|
+
direction: config.direction,
|
|
1189
|
+
baseline: config.baseline,
|
|
1190
|
+
experiments,
|
|
1191
|
+
memory,
|
|
1192
|
+
claims,
|
|
1193
|
+
generatedAt: new Date().toISOString(),
|
|
1194
|
+
refreshSeconds,
|
|
1195
|
+
});
|
|
1196
|
+
writeFileSync(sp.progressHtml, html, "utf8");
|
|
1197
|
+
return sp.progressHtml;
|
|
1198
|
+
}
|
|
1199
|
+
|
|
1200
|
+
/**
|
|
1201
|
+
* Normalize a commit reference to a 7-char short hash. Uses the explicit value
|
|
1202
|
+
* if given, otherwise best-effort reads the working dir's current HEAD. Returns
|
|
1203
|
+
* `undefined` when there is no resolvable commit (not a repo, git missing, …).
|
|
1204
|
+
*/
|
|
1205
|
+
function shortCommit(explicit: string | undefined, cwd: string): string | undefined {
|
|
1206
|
+
const trim = (s: string): string | undefined => {
|
|
1207
|
+
const h = s.trim().replace(/^[^0-9a-f]*/i, "");
|
|
1208
|
+
return /^[0-9a-f]{7,}$/i.test(h) ? h.slice(0, 7) : undefined;
|
|
1209
|
+
};
|
|
1210
|
+
if (explicit) return trim(explicit) ?? (explicit.trim().slice(0, 7) || undefined);
|
|
1211
|
+
try {
|
|
1212
|
+
return trim(execFileSync("git", ["rev-parse", "--short=7", "HEAD"], { cwd, encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] }));
|
|
1213
|
+
} catch {
|
|
1214
|
+
return undefined;
|
|
1215
|
+
}
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1218
|
+
/** The platform command that opens a file/URL in the user's default browser. */
|
|
1219
|
+
function browserOpenCommand(target: string): [string, string[]] {
|
|
1220
|
+
switch (process.platform) {
|
|
1221
|
+
case "darwin":
|
|
1222
|
+
return ["open", [target]];
|
|
1223
|
+
case "win32":
|
|
1224
|
+
// `start` is a cmd builtin; the empty "" is the window-title placeholder.
|
|
1225
|
+
return ["cmd", ["/c", "start", "", target]];
|
|
1226
|
+
default:
|
|
1227
|
+
return ["xdg-open", [target]];
|
|
1228
|
+
}
|
|
1229
|
+
}
|
|
1230
|
+
|
|
979
1231
|
function writeFileIfAbsent(path: string, contents: string): void {
|
|
980
1232
|
if (!existsSync(path)) writeFileSync(path, contents, "utf8");
|
|
981
1233
|
}
|
|
@@ -10,13 +10,23 @@ import type { KeyId } from "@earendil-works/pi-tui";
|
|
|
10
10
|
export interface ShortcutConfig {
|
|
11
11
|
/** Open the fullscreen research dashboard. `undefined` disables it. */
|
|
12
12
|
fullscreenDashboard?: KeyId;
|
|
13
|
+
/** Open the live progress HTML in the default browser. `undefined` disables it. */
|
|
14
|
+
openBrowser?: KeyId;
|
|
13
15
|
}
|
|
14
16
|
|
|
15
17
|
const DEFAULT_FULLSCREEN: KeyId = "ctrl+g";
|
|
18
|
+
const DEFAULT_OPEN_BROWSER: KeyId = "ctrl+o";
|
|
19
|
+
|
|
20
|
+
function resolve(envVar: string, fallback: KeyId): KeyId | undefined {
|
|
21
|
+
const override = process.env[envVar]?.trim();
|
|
22
|
+
if (override === undefined) return fallback;
|
|
23
|
+
if (override === "" || override.toLowerCase() === "none") return undefined;
|
|
24
|
+
return override as KeyId;
|
|
25
|
+
}
|
|
16
26
|
|
|
17
27
|
export function loadShortcuts(): ShortcutConfig {
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
28
|
+
return {
|
|
29
|
+
fullscreenDashboard: resolve("PI_AUTORESEARCH_SHORTCUT", DEFAULT_FULLSCREEN),
|
|
30
|
+
openBrowser: resolve("PI_AUTORESEARCH_OPEN_SHORTCUT", DEFAULT_OPEN_BROWSER),
|
|
31
|
+
};
|
|
22
32
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-autoresearch-vkf",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Autoresearch with verifiable long-term scientific memory. A pi extension that gathers literature, stores it as VKF claims, runs experiments, and writes verified results back to a git-native knowledge bundle so future runs build on what was learned instead of rediscovering it.",
|
|
6
6
|
"keywords": [
|