pi-autoresearch-vkf 0.5.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/extensions/pi-autoresearch-vkf/index.ts +213 -37
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,31 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.6.0
|
|
4
|
+
|
|
5
|
+
Add keyless web access so the gather step works on stock pi.
|
|
6
|
+
|
|
7
|
+
- New tools **`WebSearch`** (DuckDuckGo HTML, no API key) and **`WebFetch`**
|
|
8
|
+
(JSON/text verbatim, HTML reduced to readable text). The pi host ships no web
|
|
9
|
+
tools, but `autoresearch-vkf-knowledge-gather` assumed they existed — the agent
|
|
10
|
+
reported it had no web search. These supply the named tools the skill already
|
|
11
|
+
references, against the free literature APIs (arXiv, OpenAlex, Crossref,
|
|
12
|
+
Semantic Scholar). No session required; no setup or keys.
|
|
13
|
+
- Tool names match Claude-Code casing so `pi-ai`'s tool-name table keeps prompt
|
|
14
|
+
caching aligned. `WebSearch` degrades gracefully to a WebFetch-an-API hint when
|
|
15
|
+
the search backend rate-limits or changes layout.
|
|
16
|
+
|
|
17
|
+
Make the browser progress dashboard automatic and live.
|
|
18
|
+
|
|
19
|
+
- `progress.html` is now written on `init_research` (so it exists from iteration
|
|
20
|
+
zero) and refreshed after every `remember_claim`, `verify_claim`, and
|
|
21
|
+
`vkf_log_experiment`. Previously it was only written when the agent explicitly
|
|
22
|
+
called `export_dashboard`, which most runs never did — so the dashboard was
|
|
23
|
+
effectively never created and never updated during a run.
|
|
24
|
+
- The page already meta-refreshes itself, so an open browser tab now tracks the
|
|
25
|
+
loop live with no manual step. `export_dashboard` is now for the heavier
|
|
26
|
+
vkf-CLI idea-lineage graph (`dashboard.html`), a custom refresh interval, or
|
|
27
|
+
opening the page in a browser.
|
|
28
|
+
|
|
3
29
|
## 0.5.2
|
|
4
30
|
|
|
5
31
|
Prefixed all skill names with `autoresearch-vkf-` to avoid namespace conflicts
|
|
@@ -136,6 +136,9 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
136
136
|
const fresh = scaffoldMemoryBundle(root, params.name, config.memoryProfile);
|
|
137
137
|
appendLog(sp.log, { event: "init", name: config.name, goal: config.goal });
|
|
138
138
|
|
|
139
|
+
// Create the progress dashboard up front so it exists from iteration zero;
|
|
140
|
+
// it then refreshes automatically as experiments are logged.
|
|
141
|
+
writeProgressDashboard(root);
|
|
139
142
|
refreshWidget(ctx, root);
|
|
140
143
|
return textResult(
|
|
141
144
|
[
|
|
@@ -249,6 +252,7 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
249
252
|
});
|
|
250
253
|
appendLog(sp.log, { event: "remember", claim_id: claim.id, paper_id: paperId });
|
|
251
254
|
|
|
255
|
+
writeProgressDashboard(root);
|
|
252
256
|
refreshWidget(ctx, root);
|
|
253
257
|
return textResult(
|
|
254
258
|
[
|
|
@@ -319,6 +323,7 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
319
323
|
});
|
|
320
324
|
appendLog(sp.log, { event: "verify", claim_id: params.id, decision: params.decision });
|
|
321
325
|
|
|
326
|
+
writeProgressDashboard(root);
|
|
322
327
|
refreshWidget(ctx, root);
|
|
323
328
|
return textResult(
|
|
324
329
|
[
|
|
@@ -774,6 +779,8 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
774
779
|
writeConfig(sp.config, config);
|
|
775
780
|
}
|
|
776
781
|
|
|
782
|
+
// Refresh the browser progress page so an open tab tracks the loop live.
|
|
783
|
+
writeProgressDashboard(root);
|
|
777
784
|
refreshWidget(ctx, root);
|
|
778
785
|
const summary = summarize(readExperiments(sp.experiments), config.direction);
|
|
779
786
|
return textResult(
|
|
@@ -853,7 +860,7 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
853
860
|
name: "export_dashboard",
|
|
854
861
|
label: "Export dashboard",
|
|
855
862
|
description:
|
|
856
|
-
"
|
|
863
|
+
"Build the interactive idea-lineage graph (.autoresearch-vkf/session/dashboard.html — paper → claim → experiment, via the vkf CLI) and refresh the progress page. The progress page (progress.html — metric-over-time chart, experiment timeline, memory lifecycle) is also written automatically on init and after each remember/verify/experiment, and meta-refreshes itself, so an open browser tab tracks the loop live without re-running this. Use this tool for the lineage graph, a custom refresh interval, or to open the page in a browser.",
|
|
857
864
|
parameters: ExportParams,
|
|
858
865
|
async execute(_id, params: Static<typeof ExportParams>, _signal, _onUpdate, ctx): Promise<AgentToolResult<{ progress: string; lineage?: string }>> {
|
|
859
866
|
const root = resolveRoot(ctx);
|
|
@@ -861,42 +868,9 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
861
868
|
requireSession(root);
|
|
862
869
|
const config = readConfig(sp.config)!;
|
|
863
870
|
|
|
864
|
-
// Progress page (self-contained, no CLI needed).
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
description: e.description,
|
|
868
|
-
value: e.value,
|
|
869
|
-
outcome: e.outcome,
|
|
870
|
-
kept: e.kept,
|
|
871
|
-
claim_id: e.claim_id,
|
|
872
|
-
ts: e.ts,
|
|
873
|
-
}));
|
|
874
|
-
const memory: Record<string, number> = Object.fromEntries(MEMORY_STATES.map((s) => [s, 0]));
|
|
875
|
-
for (const c of listCards(root, { type: "claim" })) {
|
|
876
|
-
const st = c.meta["memory_state"] as MemoryState | undefined;
|
|
877
|
-
if (st && st in memory) memory[st]! += 1;
|
|
878
|
-
}
|
|
879
|
-
const claims = listCards(root, { bucket: "verified", type: "claim" })
|
|
880
|
-
.slice(0, 12)
|
|
881
|
-
.map((c) => ({
|
|
882
|
-
title: String(c.meta["title"] ?? c.meta["id"]),
|
|
883
|
-
confidence: String(c.meta["confidence"] ?? "—"),
|
|
884
|
-
state: String(c.meta["memory_state"] ?? "—"),
|
|
885
|
-
}));
|
|
886
|
-
|
|
887
|
-
const progressHtml = renderProgressHtml({
|
|
888
|
-
name: config.name,
|
|
889
|
-
goal: config.goal,
|
|
890
|
-
metricName: config.metricName,
|
|
891
|
-
direction: config.direction,
|
|
892
|
-
baseline: config.baseline,
|
|
893
|
-
experiments,
|
|
894
|
-
memory,
|
|
895
|
-
claims,
|
|
896
|
-
generatedAt: new Date().toISOString(),
|
|
897
|
-
refreshSeconds: params.refresh_seconds,
|
|
898
|
-
});
|
|
899
|
-
writeFileSync(sp.progressHtml, progressHtml, "utf8");
|
|
871
|
+
// Progress page (self-contained, no CLI needed). Same generator the loop
|
|
872
|
+
// calls automatically on init and after each experiment.
|
|
873
|
+
writeProgressDashboard(root, params.refresh_seconds);
|
|
900
874
|
|
|
901
875
|
// Lineage graph via the vkf CLI (best-effort).
|
|
902
876
|
const lineage = vkf.html(memoryPaths(root).dir, sp.dashboardHtml, `Research memory — ${config.name}`);
|
|
@@ -942,6 +916,75 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
942
916
|
},
|
|
943
917
|
});
|
|
944
918
|
|
|
919
|
+
// ── WebSearch ────────────────────────────────────────────────────────────────
|
|
920
|
+
// The pi host ships no web tools, but the gather skill needs them. These two
|
|
921
|
+
// tools give the agent keyless web access: WebSearch (DuckDuckGo HTML) to
|
|
922
|
+
// discover sources, WebFetch to read pages and hit free APIs (arXiv, OpenAlex,
|
|
923
|
+
// Crossref, Semantic Scholar). Named to match the skill text and pi-ai's
|
|
924
|
+
// Claude-Code tool-name table so prompt caching stays aligned.
|
|
925
|
+
const WebSearchParams = Type.Object({
|
|
926
|
+
query: Type.String({ description: "Search query. Prefer the mechanism of the problem over bare keywords." }),
|
|
927
|
+
max_results: Type.Optional(Type.Number({ description: "Max results to return (default 8, capped at 25)." })),
|
|
928
|
+
});
|
|
929
|
+
|
|
930
|
+
pi.registerTool({
|
|
931
|
+
name: "WebSearch",
|
|
932
|
+
label: "Web search",
|
|
933
|
+
description:
|
|
934
|
+
"Search the web with no API key (via DuckDuckGo) and return result titles, URLs, and snippets. Discovery step for the autoresearch gather skill — then read the hits with WebFetch.",
|
|
935
|
+
parameters: WebSearchParams,
|
|
936
|
+
async execute(_id, params: Static<typeof WebSearchParams>, signal): Promise<AgentToolResult<{ results: WebSearchHit[] }>> {
|
|
937
|
+
const limit = Math.max(1, Math.min(params.max_results ?? 8, 25));
|
|
938
|
+
const endpoint = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(params.query)}`;
|
|
939
|
+
let fetched: FetchedText;
|
|
940
|
+
try {
|
|
941
|
+
fetched = await fetchText(endpoint, signal ?? undefined, 20_000);
|
|
942
|
+
} catch (e) {
|
|
943
|
+
return textResult(`Web search failed: ${(e as Error).message}`, { results: [] });
|
|
944
|
+
}
|
|
945
|
+
const results = parseDdgResults(fetched.body, limit);
|
|
946
|
+
if (results.length === 0) {
|
|
947
|
+
return textResult(
|
|
948
|
+
`No results parsed for "${params.query}" (HTTP ${fetched.status}). The search backend may be rate-limiting; fall back to WebFetch against a known API (arXiv, OpenAlex, Crossref, Semantic Scholar).`,
|
|
949
|
+
{ results: [] },
|
|
950
|
+
);
|
|
951
|
+
}
|
|
952
|
+
const body = results.map((r, i) => `${i + 1}. ${r.title}\n ${r.url}\n ${r.snippet}`).join("\n\n");
|
|
953
|
+
return textResult(truncate(body), { results });
|
|
954
|
+
},
|
|
955
|
+
});
|
|
956
|
+
|
|
957
|
+
// ── WebFetch ─────────────────────────────────────────────────────────────────
|
|
958
|
+
const WebFetchParams = Type.Object({
|
|
959
|
+
url: Type.String({ description: "http(s) URL to fetch. JSON/text is returned verbatim; HTML is reduced to readable text." }),
|
|
960
|
+
max_chars: Type.Optional(Type.Number({ description: `Max characters of content to return (default ${MAX_OUTPUT_CHARS}).` })),
|
|
961
|
+
});
|
|
962
|
+
|
|
963
|
+
pi.registerTool({
|
|
964
|
+
name: "WebFetch",
|
|
965
|
+
label: "Web fetch",
|
|
966
|
+
description:
|
|
967
|
+
"Fetch a URL with no API key — JSON/text verbatim, HTML reduced to readable text. Use for the free literature APIs (arXiv, OpenAlex, Crossref, Semantic Scholar) and for reading pages found via WebSearch.",
|
|
968
|
+
parameters: WebFetchParams,
|
|
969
|
+
async execute(_id, params: Static<typeof WebFetchParams>, signal): Promise<AgentToolResult<{ status: number; url: string; content_type: string }>> {
|
|
970
|
+
if (!/^https?:\/\//i.test(params.url)) {
|
|
971
|
+
return textResult(`Refusing to fetch non-http(s) URL: ${params.url}`, { status: 0, url: params.url, content_type: "" });
|
|
972
|
+
}
|
|
973
|
+
let fetched: FetchedText;
|
|
974
|
+
try {
|
|
975
|
+
fetched = await fetchText(params.url, signal ?? undefined, 30_000);
|
|
976
|
+
} catch (e) {
|
|
977
|
+
return textResult(`Fetch failed for ${params.url}: ${(e as Error).message}`, { status: 0, url: params.url, content_type: "" });
|
|
978
|
+
}
|
|
979
|
+
const isHtml = /text\/html|application\/xhtml/i.test(fetched.contentType);
|
|
980
|
+
const text = isHtml ? htmlToText(fetched.body) : fetched.body;
|
|
981
|
+
const cap = Math.max(500, params.max_chars ?? MAX_OUTPUT_CHARS);
|
|
982
|
+
const capped = text.length <= cap ? text : text.slice(0, cap) + `\n…[truncated ${text.length - cap} chars]`;
|
|
983
|
+
const header = `GET ${fetched.finalUrl}\n(HTTP ${fetched.status} · ${fetched.contentType || "unknown"} · ${isHtml ? "html→text" : "raw"})\n\n`;
|
|
984
|
+
return textResult(header + capped, { status: fetched.status, url: fetched.finalUrl, content_type: fetched.contentType });
|
|
985
|
+
},
|
|
986
|
+
});
|
|
987
|
+
|
|
945
988
|
// ── shortcut: fullscreen dashboard ───────────────────────────────────────────
|
|
946
989
|
const shortcuts = loadShortcuts();
|
|
947
990
|
if (shortcuts.fullscreenDashboard) {
|
|
@@ -974,8 +1017,141 @@ export default function autoresearchExtension(pi: ExtensionAPI): void {
|
|
|
974
1017
|
});
|
|
975
1018
|
}
|
|
976
1019
|
|
|
1020
|
+
// ── web helpers ───────────────────────────────────────────────────────────────
|
|
1021
|
+
|
|
1022
|
+
const WEB_USER_AGENT = "pi-autoresearch-vkf (+https://github.com/EricJahns/pi-autoresearch-vkf)";
|
|
1023
|
+
|
|
1024
|
+
interface FetchedText {
|
|
1025
|
+
status: number;
|
|
1026
|
+
contentType: string;
|
|
1027
|
+
body: string;
|
|
1028
|
+
finalUrl: string;
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
interface WebSearchHit {
|
|
1032
|
+
title: string;
|
|
1033
|
+
url: string;
|
|
1034
|
+
snippet: string;
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
/** Fetch a URL as text, following redirects, aborting on the tool signal or timeout. */
|
|
1038
|
+
async function fetchText(url: string, signal: AbortSignal | undefined, timeoutMs: number): Promise<FetchedText> {
|
|
1039
|
+
const timeout = AbortSignal.timeout(timeoutMs);
|
|
1040
|
+
const composed = signal ? AbortSignal.any([signal, timeout]) : timeout;
|
|
1041
|
+
const res = await fetch(url, {
|
|
1042
|
+
redirect: "follow",
|
|
1043
|
+
signal: composed,
|
|
1044
|
+
headers: { "user-agent": WEB_USER_AGENT, accept: "*/*" },
|
|
1045
|
+
});
|
|
1046
|
+
const body = await res.text();
|
|
1047
|
+
return { status: res.status, contentType: res.headers.get("content-type") ?? "", body, finalUrl: res.url || url };
|
|
1048
|
+
}
|
|
1049
|
+
|
|
1050
|
+
/** Reduce an HTML document to readable plain text (best-effort, no DOM). */
|
|
1051
|
+
function htmlToText(html: string): string {
|
|
1052
|
+
return html
|
|
1053
|
+
.replace(/<script[\s\S]*?<\/script>/gi, " ")
|
|
1054
|
+
.replace(/<style[\s\S]*?<\/style>/gi, " ")
|
|
1055
|
+
.replace(/<!--[\s\S]*?-->/g, " ")
|
|
1056
|
+
.replace(/<br\s*\/?>/gi, "\n")
|
|
1057
|
+
.replace(/<\/(p|div|li|tr|h[1-6]|section|article)\s*>/gi, "\n")
|
|
1058
|
+
.replace(/<[^>]+>/g, " ")
|
|
1059
|
+
.replace(/ /g, " ")
|
|
1060
|
+
.replace(/&/g, "&")
|
|
1061
|
+
.replace(/</g, "<")
|
|
1062
|
+
.replace(/>/g, ">")
|
|
1063
|
+
.replace(/"/g, '"')
|
|
1064
|
+
.replace(/�*39;|'/gi, "'")
|
|
1065
|
+
.replace(/&#(\d+);/g, (_m, n) => String.fromCodePoint(Number(n)))
|
|
1066
|
+
.replace(/[ \t]+\n/g, "\n")
|
|
1067
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
1068
|
+
.replace(/[ \t]{2,}/g, " ")
|
|
1069
|
+
.trim();
|
|
1070
|
+
}
|
|
1071
|
+
|
|
1072
|
+
/** DuckDuckGo HTML wraps result links as //duckduckgo.com/l/?uddg=<encoded>. Unwrap them. */
|
|
1073
|
+
function decodeDdgHref(href: string): string {
|
|
1074
|
+
const m = href.match(/[?&]uddg=([^&]+)/);
|
|
1075
|
+
if (m && m[1]) {
|
|
1076
|
+
try {
|
|
1077
|
+
return decodeURIComponent(m[1]);
|
|
1078
|
+
} catch {
|
|
1079
|
+
/* fall through to raw href */
|
|
1080
|
+
}
|
|
1081
|
+
}
|
|
1082
|
+
return href.startsWith("//") ? "https:" + href : href;
|
|
1083
|
+
}
|
|
1084
|
+
|
|
1085
|
+
/** Parse titles/urls/snippets out of a DuckDuckGo HTML results page. */
|
|
1086
|
+
function parseDdgResults(html: string, limit: number): WebSearchHit[] {
|
|
1087
|
+
const snippets: string[] = [];
|
|
1088
|
+
const snippetRe = /<a[^>]+class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/a>/gi;
|
|
1089
|
+
let s: RegExpExecArray | null;
|
|
1090
|
+
while ((s = snippetRe.exec(html)) !== null) snippets.push(htmlToText(s[1] ?? ""));
|
|
1091
|
+
|
|
1092
|
+
const hits: WebSearchHit[] = [];
|
|
1093
|
+
const linkRe = /<a[^>]+class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/gi;
|
|
1094
|
+
let m: RegExpExecArray | null;
|
|
1095
|
+
let i = 0;
|
|
1096
|
+
while ((m = linkRe.exec(html)) !== null && hits.length < limit) {
|
|
1097
|
+
hits.push({ title: htmlToText(m[2] ?? ""), url: decodeDdgHref(m[1] ?? ""), snippet: snippets[i] ?? "" });
|
|
1098
|
+
i++;
|
|
1099
|
+
}
|
|
1100
|
+
return hits;
|
|
1101
|
+
}
|
|
1102
|
+
|
|
977
1103
|
// ── helpers ───────────────────────────────────────────────────────────────────
|
|
978
1104
|
|
|
1105
|
+
/**
|
|
1106
|
+
* (Re)generate the self-contained progress dashboard (progress.html) from the
|
|
1107
|
+
* current session + memory state. Pure-JS and cheap (no CLI), so it is safe to
|
|
1108
|
+
* call on every state change — an open browser tab meta-refreshes itself live.
|
|
1109
|
+
* No-op (returns undefined) when there is no session/config yet. The interactive
|
|
1110
|
+
* idea-lineage graph (dashboard.html) is heavier and stays in export_dashboard.
|
|
1111
|
+
*/
|
|
1112
|
+
function writeProgressDashboard(root: string, refreshSeconds?: number): string | undefined {
|
|
1113
|
+
const sp = sessionPaths(root);
|
|
1114
|
+
const config = readConfig(sp.config);
|
|
1115
|
+
if (!config) return undefined;
|
|
1116
|
+
|
|
1117
|
+
const experiments: ProgressExperiment[] = readExperiments(sp.experiments).map((e) => ({
|
|
1118
|
+
id: e.id,
|
|
1119
|
+
description: e.description,
|
|
1120
|
+
value: e.value,
|
|
1121
|
+
outcome: e.outcome,
|
|
1122
|
+
kept: e.kept,
|
|
1123
|
+
claim_id: e.claim_id,
|
|
1124
|
+
ts: e.ts,
|
|
1125
|
+
}));
|
|
1126
|
+
const memory: Record<string, number> = Object.fromEntries(MEMORY_STATES.map((s) => [s, 0]));
|
|
1127
|
+
for (const c of listCards(root, { type: "claim" })) {
|
|
1128
|
+
const st = c.meta["memory_state"] as MemoryState | undefined;
|
|
1129
|
+
if (st && st in memory) memory[st]! += 1;
|
|
1130
|
+
}
|
|
1131
|
+
const claims = listCards(root, { bucket: "verified", type: "claim" })
|
|
1132
|
+
.slice(0, 12)
|
|
1133
|
+
.map((c) => ({
|
|
1134
|
+
title: String(c.meta["title"] ?? c.meta["id"]),
|
|
1135
|
+
confidence: String(c.meta["confidence"] ?? "—"),
|
|
1136
|
+
state: String(c.meta["memory_state"] ?? "—"),
|
|
1137
|
+
}));
|
|
1138
|
+
|
|
1139
|
+
const html = renderProgressHtml({
|
|
1140
|
+
name: config.name,
|
|
1141
|
+
goal: config.goal,
|
|
1142
|
+
metricName: config.metricName,
|
|
1143
|
+
direction: config.direction,
|
|
1144
|
+
baseline: config.baseline,
|
|
1145
|
+
experiments,
|
|
1146
|
+
memory,
|
|
1147
|
+
claims,
|
|
1148
|
+
generatedAt: new Date().toISOString(),
|
|
1149
|
+
refreshSeconds,
|
|
1150
|
+
});
|
|
1151
|
+
writeFileSync(sp.progressHtml, html, "utf8");
|
|
1152
|
+
return sp.progressHtml;
|
|
1153
|
+
}
|
|
1154
|
+
|
|
979
1155
|
function writeFileIfAbsent(path: string, contents: string): void {
|
|
980
1156
|
if (!existsSync(path)) writeFileSync(path, contents, "utf8");
|
|
981
1157
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-autoresearch-vkf",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Autoresearch with verifiable long-term scientific memory. A pi extension that gathers literature, stores it as VKF claims, runs experiments, and writes verified results back to a git-native knowledge bundle so future runs build on what was learned instead of rediscovering it.",
|
|
6
6
|
"keywords": [
|