pi-research 1.1.2 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -4
- package/bin/pi-research.js +5 -0
- package/bin/unblind-mcp.js +5 -0
- package/index.js +55 -11
- package/lib/local-logger.js +54 -0
- package/lib/web-research.js +68 -17
- package/mcp/server.js +242 -0
- package/mcp-server.js +18 -0
- package/package.json +12 -2
- package/pi-research.js +5 -0
- package/unblind-mcp.js +5 -0
package/README.md
CHANGED
|
@@ -200,23 +200,98 @@ Run `npm run eval` to execute the eval harness.
|
|
|
200
200
|
|
|
201
201
|
## Install
|
|
202
202
|
|
|
203
|
-
###
|
|
203
|
+
### Pi Coding Agent — extension
|
|
204
|
+
|
|
205
|
+
Existing Pi users should keep installing the main package:
|
|
204
206
|
|
|
205
207
|
```bash
|
|
206
208
|
pi install npm:pi-research
|
|
207
209
|
```
|
|
208
210
|
|
|
209
|
-
|
|
211
|
+
This registers the Pi extension and keeps the public tool name `pi-research`.
|
|
212
|
+
|
|
213
|
+
### npm install
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
npm i pi-research
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
This is the package install command that npm shows on the package page.
|
|
220
|
+
|
|
221
|
+
### MCP-only — any agent
|
|
222
|
+
|
|
223
|
+
Run the MCP server directly from npm:
|
|
224
|
+
|
|
225
|
+
```bash
|
|
226
|
+
npx -y pi-research
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
The MCP server identifies itself as `unblind-mcp`, but the tool it exposes is still named `pi-research`.
|
|
230
|
+
|
|
231
|
+
### Global MCP install
|
|
232
|
+
|
|
233
|
+
```bash
|
|
234
|
+
npm install -g pi-research
|
|
235
|
+
unblind-mcp
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
The global install also provides `pi-research` as a CLI alias for the same MCP server:
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
pi-research
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
### Local development
|
|
245
|
+
|
|
246
|
+
```bash
|
|
247
|
+
node ./mcp/server.js
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
Convenience script:
|
|
210
251
|
|
|
211
252
|
```bash
|
|
212
|
-
npm
|
|
253
|
+
npm run --silent mcp
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
Example MCP config:
|
|
257
|
+
|
|
258
|
+
```json
|
|
259
|
+
{
|
|
260
|
+
"mcpServers": {
|
|
261
|
+
"unblind-mcp": {
|
|
262
|
+
"command": "npx",
|
|
263
|
+
"args": ["-y", "pi-research"]
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
Local path config:
|
|
270
|
+
|
|
271
|
+
```json
|
|
272
|
+
{
|
|
273
|
+
"mcpServers": {
|
|
274
|
+
"unblind-mcp": {
|
|
275
|
+
"command": "node",
|
|
276
|
+
"args": ["/path/to/pi-research/mcp/server.js"]
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
}
|
|
213
280
|
```
|
|
214
281
|
|
|
282
|
+
Compatibility note: `mcp-server.js` remains as a deprecated root-level shim for older local configs.
|
|
283
|
+
|
|
284
|
+
### Future `unblind-mcp` package
|
|
285
|
+
|
|
286
|
+
A separate npm package named `unblind-mcp` can be added later as a tiny wrapper around `pi-research`. It should depend on `pi-research` and start the same MCP server, not duplicate the engine.
|
|
287
|
+
|
|
215
288
|
## Release notes
|
|
216
289
|
|
|
217
290
|
- Package name: `pi-research`
|
|
218
|
-
- Version: `1.
|
|
291
|
+
- Version: `1.2.1`
|
|
219
292
|
- Entry point: `extensions/pi-research.ts`
|
|
293
|
+
- MCP entry point: `mcp/server.js`
|
|
294
|
+
- MCP compatibility shim: `mcp-server.js`
|
|
220
295
|
- License: MIT
|
|
221
296
|
- Third-party notices: `THIRD_PARTY_NOTICES.md`
|
|
222
297
|
- GitHub: `https://github.com/endgegnerbert-tech/pi-research`
|
package/index.js
CHANGED
|
@@ -2,6 +2,7 @@ import { Type } from "typebox";
|
|
|
2
2
|
|
|
3
3
|
import { compactResearchPayload, classifyQueryIntent, inferOfficialDocsSite } from "./lib/research.js";
|
|
4
4
|
import { clearResearchMemory, hashResearchQuery, setResearchMemory, shouldSkipResearch } from "./lib/research-memory.js";
|
|
5
|
+
import { logResearchEvent } from "./lib/local-logger.js";
|
|
5
6
|
import { runWebResearch } from "./lib/web-research.js";
|
|
6
7
|
|
|
7
8
|
const RESEARCH_STATE = new Map();
|
|
@@ -61,11 +62,17 @@ export default function webResearchExtension(pi) {
|
|
|
61
62
|
pi.on("before_agent_start", async (event) => {
|
|
62
63
|
RESEARCH_STATE.clear();
|
|
63
64
|
clearResearchMemory();
|
|
65
|
+
await logResearchEvent("agent_start", {
|
|
66
|
+
systemPrompt: event.systemPrompt,
|
|
67
|
+
guidance: buildWebResearchGuidance(),
|
|
68
|
+
});
|
|
64
69
|
return { systemPrompt: `${event.systemPrompt}\n\n${buildWebResearchGuidance()}` };
|
|
65
70
|
});
|
|
66
71
|
|
|
67
72
|
pi.on("tool_call", async (event) => {
|
|
68
73
|
if (event.toolName !== "pi-research") return;
|
|
74
|
+
event.input ||= {};
|
|
75
|
+
const originalInput = { ...event.input };
|
|
69
76
|
if (!event.input.mode) event.input.mode = defaultMode(event.input.query || "");
|
|
70
77
|
|
|
71
78
|
const queryHash = hashResearchQuery(event.input.query || "");
|
|
@@ -73,9 +80,26 @@ export default function webResearchExtension(pi) {
|
|
|
73
80
|
const mode = event.input.mode;
|
|
74
81
|
const isolate = Boolean(event.input.isolate || process.env.RESEARCH_ISOLATE === "1");
|
|
75
82
|
const force = Boolean(event.input.force);
|
|
83
|
+
let blocked = false;
|
|
84
|
+
let reason = "";
|
|
76
85
|
|
|
77
86
|
if (shouldSkipResearch({ queryHash, lastHash: state.lastHash, lastWasSufficient: state.lastSufficient, force, isolate })) {
|
|
78
|
-
|
|
87
|
+
blocked = true;
|
|
88
|
+
reason = "Recent pi-research result was already sufficient for this exact query.";
|
|
89
|
+
await logResearchEvent("tool_call", {
|
|
90
|
+
originalInput,
|
|
91
|
+
finalInput: { ...event.input },
|
|
92
|
+
queryHash,
|
|
93
|
+
blocked,
|
|
94
|
+
reason,
|
|
95
|
+
state: {
|
|
96
|
+
count: state.count,
|
|
97
|
+
lastHash: state.lastHash,
|
|
98
|
+
lastSufficient: state.lastSufficient,
|
|
99
|
+
fastRecoveryAllowed: state.fastRecoveryAllowed,
|
|
100
|
+
},
|
|
101
|
+
});
|
|
102
|
+
return { block: true, reason };
|
|
79
103
|
}
|
|
80
104
|
|
|
81
105
|
if (mode === "fast" && state.count === 1 && state.fastRecoveryAllowed && !force && !isolate) {
|
|
@@ -85,19 +109,39 @@ export default function webResearchExtension(pi) {
|
|
|
85
109
|
|
|
86
110
|
state.count += 1;
|
|
87
111
|
state.lastHash = queryHash;
|
|
112
|
+
await logResearchEvent("tool_call", {
|
|
113
|
+
originalInput,
|
|
114
|
+
finalInput: { ...event.input },
|
|
115
|
+
queryHash,
|
|
116
|
+
blocked,
|
|
117
|
+
state: {
|
|
118
|
+
count: state.count,
|
|
119
|
+
lastHash: state.lastHash,
|
|
120
|
+
lastSufficient: state.lastSufficient,
|
|
121
|
+
fastRecoveryAllowed: state.fastRecoveryAllowed,
|
|
122
|
+
},
|
|
123
|
+
});
|
|
88
124
|
});
|
|
89
125
|
|
|
90
126
|
pi.on("tool_result", async (event) => {
|
|
91
|
-
if (event.toolName === "pi-research"
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
127
|
+
if (event.toolName === "pi-research") {
|
|
128
|
+
if (!event.isError && event.details?.ok) {
|
|
129
|
+
const queryHash = hashResearchQuery(event.input?.query || "");
|
|
130
|
+
const state = getState(queryHash);
|
|
131
|
+
state.lastHash = queryHash;
|
|
132
|
+
state.lastSufficient = Boolean(event.details.sufficient);
|
|
133
|
+
const query = event.input?.query || "";
|
|
134
|
+
state.fastRecoveryAllowed = !event.details.sufficient
|
|
135
|
+
&& !event.details.authoritativeSourcesFound
|
|
136
|
+
&& ["best_practice", "temporal", "definition"].includes(classifyQueryIntent(query || ""));
|
|
137
|
+
setResearchMemory(`last:${queryHash}`, event.details);
|
|
138
|
+
}
|
|
139
|
+
await logResearchEvent("tool_result", {
|
|
140
|
+
toolName: event.toolName,
|
|
141
|
+
isError: event.isError,
|
|
142
|
+
input: event.input,
|
|
143
|
+
details: event.details,
|
|
144
|
+
});
|
|
101
145
|
}
|
|
102
146
|
return compactWebResearchToolResult(event) || undefined;
|
|
103
147
|
});
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { appendFile, mkdir } from "node:fs/promises";
|
|
2
|
+
import { dirname, join } from "node:path";
|
|
3
|
+
import { homedir } from "node:os";
|
|
4
|
+
|
|
5
|
+
const LOG_PATH = process.env.PI_RESEARCH_LOG_PATH || join(homedir(), ".pi", "logs", "pi-research.jsonl");
|
|
6
|
+
let writeChain = Promise.resolve();
|
|
7
|
+
|
|
8
|
+
function sanitize(value, depth = 0, seen = new WeakSet()) {
|
|
9
|
+
if (value === null || value === undefined) return value;
|
|
10
|
+
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") return value;
|
|
11
|
+
if (typeof value === "bigint") return value.toString();
|
|
12
|
+
if (typeof value === "function") return `[Function ${value.name || "anonymous"}]`;
|
|
13
|
+
if (value instanceof Date) return value.toISOString();
|
|
14
|
+
if (value instanceof RegExp) return value.toString();
|
|
15
|
+
if (value instanceof Error) {
|
|
16
|
+
return { name: value.name, message: value.message, stack: value.stack };
|
|
17
|
+
}
|
|
18
|
+
if (Array.isArray(value)) {
|
|
19
|
+
if (depth >= 6) return "[MaxDepth]";
|
|
20
|
+
return value.map((item) => sanitize(item, depth + 1, seen));
|
|
21
|
+
}
|
|
22
|
+
if (typeof value === "object") {
|
|
23
|
+
if (seen.has(value)) return "[Circular]";
|
|
24
|
+
if (depth >= 6) return "[MaxDepth]";
|
|
25
|
+
seen.add(value);
|
|
26
|
+
const output = {};
|
|
27
|
+
for (const [key, item] of Object.entries(value)) output[key] = sanitize(item, depth + 1, seen);
|
|
28
|
+
seen.delete(value);
|
|
29
|
+
return output;
|
|
30
|
+
}
|
|
31
|
+
return String(value);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function getResearchLogPath() {
|
|
35
|
+
return LOG_PATH;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export async function logResearchEvent(type, data = {}) {
|
|
39
|
+
const record = {
|
|
40
|
+
ts: new Date().toISOString(),
|
|
41
|
+
pid: process.pid,
|
|
42
|
+
cwd: process.cwd(),
|
|
43
|
+
type,
|
|
44
|
+
data: sanitize(data),
|
|
45
|
+
};
|
|
46
|
+
const line = `${JSON.stringify(record)}\n`;
|
|
47
|
+
writeChain = writeChain
|
|
48
|
+
.then(async () => {
|
|
49
|
+
await mkdir(dirname(LOG_PATH), { recursive: true });
|
|
50
|
+
await appendFile(LOG_PATH, line, "utf8");
|
|
51
|
+
})
|
|
52
|
+
.catch(() => {});
|
|
53
|
+
return writeChain;
|
|
54
|
+
}
|
package/lib/web-research.js
CHANGED
|
@@ -46,6 +46,7 @@ import {
|
|
|
46
46
|
setResearchMemory,
|
|
47
47
|
writeCachedResult,
|
|
48
48
|
} from "./research-memory.js";
|
|
49
|
+
import { logResearchEvent } from "./local-logger.js";
|
|
49
50
|
|
|
50
51
|
const USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36";
|
|
51
52
|
const MIN_PAGE_TEXT = 300;
|
|
@@ -263,7 +264,8 @@ async function searchArxiv(query, signal, config) {
|
|
|
263
264
|
const published = entry.match(/<published>([^<]+)<\/published>/)?.[1]?.slice(0, 10);
|
|
264
265
|
return sourceFromPaper(title, url, summary, published);
|
|
265
266
|
}).filter((item) => item.url && item.title);
|
|
266
|
-
} catch {
|
|
267
|
+
} catch (error) {
|
|
268
|
+
await logResearchEvent("search_error", { provider: "arxiv", query, error });
|
|
267
269
|
return [];
|
|
268
270
|
}
|
|
269
271
|
}
|
|
@@ -273,7 +275,8 @@ async function searchSemanticScholar(query, signal, config) {
|
|
|
273
275
|
const response = await fetchTextWithRetry(`https://api.semanticscholar.org/graph/v1/paper/search?query=${encodeURIComponent(query)}&limit=${config.resultsPerQuery}&fields=title,abstract,url,year`, signal, 2, {}, config.pageTimeoutMs);
|
|
274
276
|
const data = await response.json();
|
|
275
277
|
return (data?.data || []).map((item) => sourceFromPaper(item.title, item.url || `https://www.semanticscholar.org/search?q=${encodeURIComponent(item.title)}`, item.abstract || "", item.year ? `${item.year}-01-01` : null)).filter((item) => item.title);
|
|
276
|
-
} catch {
|
|
278
|
+
} catch (error) {
|
|
279
|
+
await logResearchEvent("search_error", { provider: "semanticscholar", query, error });
|
|
277
280
|
return [];
|
|
278
281
|
}
|
|
279
282
|
}
|
|
@@ -288,7 +291,8 @@ async function searchCrossref(query, signal, config) {
|
|
|
288
291
|
const publishDate = dateParts.length ? `${String(dateParts[0]).padStart(4, "0")}-${String(dateParts[1] || 1).padStart(2, "0")}-${String(dateParts[2] || 1).padStart(2, "0")}` : null;
|
|
289
292
|
return sourceFromPaper(item.title?.[0] || "", doi, String(item.abstract || "").replace(/<[^>]+>/g, " "), publishDate);
|
|
290
293
|
}).filter((item) => item.url && item.title);
|
|
291
|
-
} catch {
|
|
294
|
+
} catch (error) {
|
|
295
|
+
await logResearchEvent("search_error", { provider: "crossref", query, error });
|
|
292
296
|
return [];
|
|
293
297
|
}
|
|
294
298
|
}
|
|
@@ -382,7 +386,10 @@ function withinTimeframe(page, config) {
|
|
|
382
386
|
}
|
|
383
387
|
|
|
384
388
|
export async function fetchPageSource(url, signal, config = getResearchConfig()) {
|
|
385
|
-
if (shouldSkipUrl(url))
|
|
389
|
+
if (shouldSkipUrl(url)) {
|
|
390
|
+
await logResearchEvent("fetch_skip", { url, reason: "login_or_account_url" });
|
|
391
|
+
return null;
|
|
392
|
+
}
|
|
386
393
|
const adapter = config.fetchAdapter || pageFetchAdapter;
|
|
387
394
|
const cacheKey = `${normalizeUrl(url)}::${config.pageTextLimit}::${JSON.stringify({
|
|
388
395
|
preferRecent: config.preferRecent || false,
|
|
@@ -391,12 +398,19 @@ export async function fetchPageSource(url, signal, config = getResearchConfig())
|
|
|
391
398
|
useJinaFallback: Boolean(config.useJinaFallback),
|
|
392
399
|
})}`;
|
|
393
400
|
const cached = config.isolate ? null : getCacheValue(pageCache, cacheKey);
|
|
394
|
-
if (cached)
|
|
401
|
+
if (cached) {
|
|
402
|
+
await logResearchEvent("fetch_cache_hit", { url, cacheKey, title: cached.title, textLength: cached.text?.length || 0 });
|
|
403
|
+
return cached;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
await logResearchEvent("fetch_start", { url, cacheKey, config: { isolate: config.isolate, useJinaFallback: Boolean(config.useJinaFallback), pageTextLimit: config.pageTextLimit } });
|
|
395
407
|
|
|
396
408
|
if (shouldUseJinaFirst(url)) {
|
|
397
409
|
const first = await fetchJinaPageSource(url, signal, config);
|
|
398
410
|
if (first && withinTimeframe(first, config)) {
|
|
399
|
-
|
|
411
|
+
const page = config.isolate ? first : setCacheValue(pageCache, cacheKey, first, PAGE_CACHE_TTL_MS);
|
|
412
|
+
await logResearchEvent("fetch_end", { url, via: "jina_first", success: Boolean(page), page: page ? { title: page.title, sourceType: page.sourceType, publishDate: page.publishDate, textLength: page.text?.length || 0 } : null });
|
|
413
|
+
return page;
|
|
400
414
|
}
|
|
401
415
|
}
|
|
402
416
|
|
|
@@ -407,7 +421,10 @@ export async function fetchPageSource(url, signal, config = getResearchConfig())
|
|
|
407
421
|
}, config.pageTimeoutMs);
|
|
408
422
|
|
|
409
423
|
const contentType = response.headers.get("content-type") || "";
|
|
410
|
-
if (!contentType.includes("text/html") && !contentType.includes("text/plain"))
|
|
424
|
+
if (!contentType.includes("text/html") && !contentType.includes("text/plain")) {
|
|
425
|
+
await logResearchEvent("fetch_end", { url, success: false, reason: "unsupported_content_type", contentType });
|
|
426
|
+
return null;
|
|
427
|
+
}
|
|
411
428
|
|
|
412
429
|
const body = await response.text();
|
|
413
430
|
const snapshot = extractPageSnapshot(body, response.url || url);
|
|
@@ -438,10 +455,14 @@ export async function fetchPageSource(url, signal, config = getResearchConfig())
|
|
|
438
455
|
|
|
439
456
|
const resolved = page || await fetchJinaPageSource(url, signal, config);
|
|
440
457
|
const finalPage = resolved && withinTimeframe(resolved, config) ? resolved : null;
|
|
441
|
-
|
|
442
|
-
|
|
458
|
+
const stored = config.isolate ? finalPage : setCacheValue(pageCache, cacheKey, finalPage, PAGE_CACHE_TTL_MS);
|
|
459
|
+
await logResearchEvent("fetch_end", { url, success: Boolean(stored), page: stored ? { title: stored.title, sourceType: stored.sourceType, publishDate: stored.publishDate, textLength: stored.text?.length || 0 } : null });
|
|
460
|
+
return stored;
|
|
461
|
+
} catch (error) {
|
|
443
462
|
const fallback = await fetchJinaPageSource(url, signal, config);
|
|
444
|
-
|
|
463
|
+
const stored = config.isolate ? fallback : setCacheValue(pageCache, cacheKey, fallback, PAGE_CACHE_TTL_MS);
|
|
464
|
+
await logResearchEvent("fetch_error", { url, error, fallback: stored ? { title: stored.title, sourceType: stored.sourceType, publishDate: stored.publishDate, textLength: stored.text?.length || 0 } : null });
|
|
465
|
+
return stored;
|
|
445
466
|
}
|
|
446
467
|
}
|
|
447
468
|
|
|
@@ -455,9 +476,10 @@ async function readLocalFiles(paths, config) {
|
|
|
455
476
|
publishDate: null,
|
|
456
477
|
local: true,
|
|
457
478
|
});
|
|
479
|
+
await logResearchEvent("local_file_read", { path, success: Boolean(page), textLength: text.length, page: page ? { title: page.title, textLength: page.text.length } : null });
|
|
458
480
|
if (page) pages.push(page);
|
|
459
|
-
} catch {
|
|
460
|
-
|
|
481
|
+
} catch (error) {
|
|
482
|
+
await logResearchEvent("local_file_error", { path, error });
|
|
461
483
|
}
|
|
462
484
|
}
|
|
463
485
|
return pages;
|
|
@@ -483,6 +505,7 @@ function fallbackSynthesis(query, pages) {
|
|
|
483
505
|
}
|
|
484
506
|
|
|
485
507
|
export async function synthesizeResearch(query, pages, ctx, signal) {
|
|
508
|
+
await logResearchEvent("synthesis_start", { query, pages: pages.map((page) => ({ title: page.title, url: page.url, sourceType: page.sourceType, textLength: page.text?.length || 0 })) });
|
|
486
509
|
const prompt = [
|
|
487
510
|
"You are a concise research synthesizer.",
|
|
488
511
|
"Answer only from the provided sources.",
|
|
@@ -517,19 +540,23 @@ export async function synthesizeResearch(query, pages, ctx, signal) {
|
|
|
517
540
|
score: typeof pages[id - 1].score === "number" ? pages[id - 1].score : scoreSourceEntry(pages[id - 1], query).total,
|
|
518
541
|
authoritative: typeof pages[id - 1].authoritative === "boolean" ? pages[id - 1].authoritative : scoreSourceEntry(pages[id - 1], query).authoritative,
|
|
519
542
|
})), query);
|
|
520
|
-
|
|
543
|
+
const result = {
|
|
521
544
|
answer: parsed.answer.trim(),
|
|
522
545
|
bullets: parsed.bullets.map((item) => String(item).trim()).filter(Boolean).slice(0, 5),
|
|
523
546
|
sources,
|
|
524
547
|
citations: Array.isArray(parsed.citations) ? parsed.citations.slice(0, 8) : sources.map((source) => ({ text: source.title, sourceIndex: source.number || 0 })),
|
|
525
548
|
};
|
|
549
|
+
await logResearchEvent("synthesis_end", { query, result });
|
|
550
|
+
return result;
|
|
526
551
|
}
|
|
527
552
|
}
|
|
528
553
|
} catch {
|
|
529
554
|
// fall through
|
|
530
555
|
}
|
|
531
556
|
|
|
532
|
-
|
|
557
|
+
const fallback = fallbackSynthesis(query, pages);
|
|
558
|
+
await logResearchEvent("synthesis_end", { query, result: fallback, fallback: true });
|
|
559
|
+
return fallback;
|
|
533
560
|
}
|
|
534
561
|
|
|
535
562
|
function planSubqueries(rootQuery, currentQuery, config, sufficiency) {
|
|
@@ -564,17 +591,28 @@ export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast"
|
|
|
564
591
|
const config = getResearchConfig(typeof mode === "object" ? { ...mode, domain } : { mode, domain });
|
|
565
592
|
const cacheKey = modeCacheKey(query, config);
|
|
566
593
|
|
|
594
|
+
await logResearchEvent("research_start", { query, mode: config.mode, domain, config });
|
|
595
|
+
|
|
567
596
|
if (!config.isolate && !config.force) {
|
|
568
597
|
const memoryHit = getResearchMemory(cacheKey);
|
|
569
|
-
if (memoryHit)
|
|
598
|
+
if (memoryHit) {
|
|
599
|
+
await logResearchEvent("research_cache_hit", { query, cacheKey, source: "memory" });
|
|
600
|
+
await logResearchEvent("research_end", { ...memoryHit, cacheHit: true, cacheSource: "memory" });
|
|
601
|
+
return memoryHit;
|
|
602
|
+
}
|
|
570
603
|
const persistentHit = readCachedResult(cacheKey);
|
|
571
604
|
if (persistentHit) {
|
|
572
605
|
setResearchMemory(cacheKey, persistentHit);
|
|
606
|
+
await logResearchEvent("research_cache_hit", { query, cacheKey, source: "disk" });
|
|
607
|
+
await logResearchEvent("research_end", { ...persistentHit, cacheHit: true, cacheSource: "disk" });
|
|
573
608
|
return persistentHit;
|
|
574
609
|
}
|
|
575
610
|
}
|
|
576
611
|
|
|
577
|
-
const emit = (stage, text) =>
|
|
612
|
+
const emit = (stage, text) => {
|
|
613
|
+
void logResearchEvent("pipeline_stage", { query, stage, text });
|
|
614
|
+
return onUpdate?.({ content: [{ type: "text", text: `[pipeline:${stage}] ${text}` }] });
|
|
615
|
+
};
|
|
578
616
|
const startedAt = Date.now();
|
|
579
617
|
const seenUrls = new Set();
|
|
580
618
|
const seenContentHashes = new Set();
|
|
@@ -613,6 +651,11 @@ export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast"
|
|
|
613
651
|
emit("search", `Searching ${queriesThisTurn.length} queries...`);
|
|
614
652
|
|
|
615
653
|
const searchGroups = await Promise.all(queriesThisTurn.map((subquery) => searchDuckDuckGo(subquery, signal, config)));
|
|
654
|
+
await logResearchEvent("search_results", {
|
|
655
|
+
query,
|
|
656
|
+
queries: queriesThisTurn,
|
|
657
|
+
results: searchGroups.flat().map((result) => ({ title: result.title, url: result.url, snippet: result.snippet, sourceType: result.sourceType, publishDate: result.publishDate })),
|
|
658
|
+
});
|
|
616
659
|
const results = rankSearchResults(searchGroups.flat(), query, config.maxPages * 2, config)
|
|
617
660
|
.filter((result) => {
|
|
618
661
|
const key = normalizeUrl(result.url);
|
|
@@ -624,6 +667,11 @@ export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast"
|
|
|
624
667
|
|
|
625
668
|
emit("fetch", `Reading ${results.length} sources...`);
|
|
626
669
|
const pageCandidates = await Promise.all(results.map((result) => fetchPageSource(result.url, signal, config)));
|
|
670
|
+
await logResearchEvent("page_fetch_results", {
|
|
671
|
+
query,
|
|
672
|
+
urls: results.map((result) => result.url),
|
|
673
|
+
pages: pageCandidates.filter(Boolean).map((page) => ({ title: page.title, url: page.url, sourceType: page.sourceType, publishDate: page.publishDate, textLength: page.text?.length || 0 })),
|
|
674
|
+
});
|
|
627
675
|
const rankedPages = rankFetchedPages(pageCandidates.filter(Boolean).map((page) => {
|
|
628
676
|
const scored = scoreSourceEntry(page, query);
|
|
629
677
|
return {
|
|
@@ -675,7 +723,7 @@ export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast"
|
|
|
675
723
|
}
|
|
676
724
|
|
|
677
725
|
if (mergedPages.length === 0) {
|
|
678
|
-
|
|
726
|
+
const emptyResult = {
|
|
679
727
|
ok: false,
|
|
680
728
|
action: "web_research",
|
|
681
729
|
query,
|
|
@@ -685,6 +733,8 @@ export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast"
|
|
|
685
733
|
openSubQuestions: buildFallbackQueries(query),
|
|
686
734
|
error: "No readable web sources were retrieved.",
|
|
687
735
|
};
|
|
736
|
+
await logResearchEvent("research_end", emptyResult);
|
|
737
|
+
return emptyResult;
|
|
688
738
|
}
|
|
689
739
|
|
|
690
740
|
emit("synthesis", `Synthesizing ${mergedPages.length} sources...`);
|
|
@@ -754,6 +804,7 @@ export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast"
|
|
|
754
804
|
|
|
755
805
|
setResearchMemory(cacheKey, result);
|
|
756
806
|
writeCachedResult(cacheKey, result, config.cacheTtlMs);
|
|
807
|
+
await logResearchEvent("research_end", result);
|
|
757
808
|
return result;
|
|
758
809
|
}
|
|
759
810
|
|
package/mcp/server.js
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { realpathSync } from "node:fs";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
|
|
6
|
+
import pkg from "../package.json" with { type: "json" };
|
|
7
|
+
import { classifyQueryIntent } from "../lib/research.js";
|
|
8
|
+
import { runWebResearch } from "../lib/web-research.js";
|
|
9
|
+
|
|
10
|
+
const SERVER_NAME = "unblind-mcp";
|
|
11
|
+
const TOOL_NAME = "pi-research";
|
|
12
|
+
|
|
13
|
+
function buildWebResearchGuidance() {
|
|
14
|
+
return "Use pi-research for current facts, docs, best practices, comparisons, and citations. Search if unsure.";
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function defaultMode(query) {
|
|
18
|
+
const intent = classifyQueryIntent(query);
|
|
19
|
+
if (intent === "comparison" || intent === "comparative") return "deep";
|
|
20
|
+
if (intent === "academic") return "academic";
|
|
21
|
+
return "fast";
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function buildToolDefinition() {
|
|
25
|
+
return {
|
|
26
|
+
name: TOOL_NAME,
|
|
27
|
+
description: "Live sources, ranking, and cited answers.",
|
|
28
|
+
inputSchema: {
|
|
29
|
+
type: "object",
|
|
30
|
+
properties: {
|
|
31
|
+
query: { type: "string", description: "Live web question" },
|
|
32
|
+
mode: {
|
|
33
|
+
type: "string",
|
|
34
|
+
enum: ["fast", "deep", "code", "academic"],
|
|
35
|
+
description: "Mode",
|
|
36
|
+
},
|
|
37
|
+
force: { type: "boolean", description: "Ignore cache" },
|
|
38
|
+
isolate: { type: "boolean", description: "No cache reuse" },
|
|
39
|
+
options: {
|
|
40
|
+
type: "object",
|
|
41
|
+
properties: {
|
|
42
|
+
allowedSources: { type: "array", items: { type: "string" } },
|
|
43
|
+
maxTurns: { type: "number" },
|
|
44
|
+
maxSites: { type: "number" },
|
|
45
|
+
requireAuthoritative: { type: "boolean" },
|
|
46
|
+
minYear: { type: "number" },
|
|
47
|
+
maxYear: { type: "number" },
|
|
48
|
+
preferRecent: { type: "boolean" },
|
|
49
|
+
files: { type: "array", items: { type: "string" } },
|
|
50
|
+
format: {
|
|
51
|
+
type: "string",
|
|
52
|
+
enum: ["markdown", "json", "table", "latex"],
|
|
53
|
+
},
|
|
54
|
+
deepResearchConfig: {
|
|
55
|
+
type: "object",
|
|
56
|
+
properties: {
|
|
57
|
+
depth: { type: "number", enum: [1, 2, 3] },
|
|
58
|
+
breadth: { type: "number", enum: [2, 3, 4] },
|
|
59
|
+
concurrency: { type: "number", enum: [1, 2, 3, 4] },
|
|
60
|
+
},
|
|
61
|
+
additionalProperties: false,
|
|
62
|
+
},
|
|
63
|
+
},
|
|
64
|
+
additionalProperties: false,
|
|
65
|
+
},
|
|
66
|
+
},
|
|
67
|
+
required: ["query"],
|
|
68
|
+
additionalProperties: false,
|
|
69
|
+
},
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function buildInitializeResult(protocolVersion) {
|
|
74
|
+
return {
|
|
75
|
+
protocolVersion: protocolVersion || "2025-03-26",
|
|
76
|
+
capabilities: {
|
|
77
|
+
tools: {},
|
|
78
|
+
},
|
|
79
|
+
serverInfo: {
|
|
80
|
+
name: SERVER_NAME,
|
|
81
|
+
version: pkg.version,
|
|
82
|
+
},
|
|
83
|
+
instructions: buildWebResearchGuidance(),
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function buildToolResult(payload) {
|
|
88
|
+
const text = payload?.ok ? (payload.contentText || JSON.stringify(payload, null, 2)) : JSON.stringify(payload, null, 2);
|
|
89
|
+
return {
|
|
90
|
+
content: [{ type: "text", text }],
|
|
91
|
+
structuredContent: payload,
|
|
92
|
+
isError: !payload?.ok,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
async function runResearchTool(params = {}, run = runWebResearch) {
|
|
97
|
+
const mode = params.mode ?? defaultMode(params.query || "");
|
|
98
|
+
const payload = await run(params.query || "", undefined, undefined, undefined, {
|
|
99
|
+
mode,
|
|
100
|
+
force: params.force,
|
|
101
|
+
isolate: params.isolate,
|
|
102
|
+
...(params.options || {}),
|
|
103
|
+
});
|
|
104
|
+
return buildToolResult(payload);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function jsonRpcError(id, code, message, data) {
|
|
108
|
+
const error = { code, message };
|
|
109
|
+
if (data !== undefined) error.data = data;
|
|
110
|
+
return { jsonrpc: "2.0", id, error };
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
async function handleMcpRequest(message, deps = {}) {
|
|
114
|
+
const run = deps.runWebResearchFn || runWebResearch;
|
|
115
|
+
|
|
116
|
+
if (!message || typeof message !== "object") {
|
|
117
|
+
return jsonRpcError(null, -32600, "Invalid Request");
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
if (typeof message.method !== "string") {
|
|
121
|
+
return jsonRpcError(message.id ?? null, -32600, "Invalid Request");
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
if (message.method === "notifications/initialized") return null;
|
|
125
|
+
|
|
126
|
+
try {
|
|
127
|
+
if (message.method === "initialize") {
|
|
128
|
+
return {
|
|
129
|
+
jsonrpc: "2.0",
|
|
130
|
+
id: message.id,
|
|
131
|
+
result: buildInitializeResult(message.params?.protocolVersion),
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
if (message.method === "tools/list") {
|
|
136
|
+
return {
|
|
137
|
+
jsonrpc: "2.0",
|
|
138
|
+
id: message.id,
|
|
139
|
+
result: { tools: [buildToolDefinition()] },
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
if (message.method === "tools/call") {
|
|
144
|
+
const params = message.params || {};
|
|
145
|
+
if (params.name !== TOOL_NAME) {
|
|
146
|
+
return jsonRpcError(message.id ?? null, -32602, `Unknown tool: ${String(params.name || "")}`);
|
|
147
|
+
}
|
|
148
|
+
const toolResult = await runResearchTool(params.arguments || {}, run);
|
|
149
|
+
return {
|
|
150
|
+
jsonrpc: "2.0",
|
|
151
|
+
id: message.id,
|
|
152
|
+
result: toolResult,
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return jsonRpcError(message.id ?? null, -32601, `Method not found: ${message.method}`);
|
|
157
|
+
} catch (error) {
|
|
158
|
+
const text = error instanceof Error ? error.stack || error.message : String(error);
|
|
159
|
+
return {
|
|
160
|
+
jsonrpc: "2.0",
|
|
161
|
+
id: message.id ?? null,
|
|
162
|
+
result: {
|
|
163
|
+
content: [{ type: "text", text }],
|
|
164
|
+
isError: true,
|
|
165
|
+
},
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
export function startMcpServer({ input = process.stdin, output = process.stdout, errorOutput = process.stderr, runWebResearchFn = runWebResearch } = {}) {
|
|
171
|
+
function send(message) {
|
|
172
|
+
const json = JSON.stringify(message);
|
|
173
|
+
output.write(`Content-Length: ${Buffer.byteLength(json, "utf8")}\r\n\r\n${json}`);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
let buffer = Buffer.alloc(0);
|
|
177
|
+
|
|
178
|
+
function pump() {
|
|
179
|
+
while (true) {
|
|
180
|
+
const headerEnd = buffer.indexOf("\r\n\r\n");
|
|
181
|
+
if (headerEnd === -1) return;
|
|
182
|
+
|
|
183
|
+
const headerText = buffer.slice(0, headerEnd).toString("utf8");
|
|
184
|
+
const match = headerText.match(/content-length:\s*(\d+)/i);
|
|
185
|
+
if (!match) {
|
|
186
|
+
buffer = buffer.slice(headerEnd + 4);
|
|
187
|
+
continue;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const length = Number(match[1]);
|
|
191
|
+
const bodyStart = headerEnd + 4;
|
|
192
|
+
const bodyEnd = bodyStart + length;
|
|
193
|
+
if (buffer.length < bodyEnd) return;
|
|
194
|
+
|
|
195
|
+
const bodyText = buffer.slice(bodyStart, bodyEnd).toString("utf8");
|
|
196
|
+
buffer = buffer.slice(bodyEnd);
|
|
197
|
+
if (!bodyText.trim()) continue;
|
|
198
|
+
|
|
199
|
+
let message;
|
|
200
|
+
try {
|
|
201
|
+
message = JSON.parse(bodyText);
|
|
202
|
+
} catch (error) {
|
|
203
|
+
errorOutput.write(`${String(error)}\n`);
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
void handleMcpRequest(message, { runWebResearchFn }).then((response) => {
|
|
208
|
+
if (response) send(response);
|
|
209
|
+
}).catch((error) => {
|
|
210
|
+
const text = error instanceof Error ? error.stack || error.message : String(error);
|
|
211
|
+
send({ jsonrpc: "2.0", id: message?.id ?? null, result: { content: [{ type: "text", text }], isError: true } });
|
|
212
|
+
});
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
input.on("data", (chunk) => {
|
|
217
|
+
buffer = Buffer.concat([buffer, chunk]);
|
|
218
|
+
pump();
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
input.on("end", () => {
|
|
222
|
+
process.exitCode = 0;
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
function isMainModule(metaUrl) {
|
|
227
|
+
if (!process.argv[1]) return false;
|
|
228
|
+
return realpathSync(process.argv[1]) === realpathSync(fileURLToPath(metaUrl));
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
if (isMainModule(import.meta.url)) {
|
|
232
|
+
startMcpServer();
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
export {
|
|
236
|
+
buildInitializeResult,
|
|
237
|
+
buildToolDefinition,
|
|
238
|
+
buildToolResult,
|
|
239
|
+
defaultMode,
|
|
240
|
+
handleMcpRequest,
|
|
241
|
+
runResearchTool,
|
|
242
|
+
};
|
package/mcp-server.js
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { realpathSync } from "node:fs";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
|
|
6
|
+
import { startMcpServer } from "./mcp/server.js";
|
|
7
|
+
|
|
8
|
+
export * from "./mcp/server.js";
|
|
9
|
+
|
|
10
|
+
function isMainModule(metaUrl) {
|
|
11
|
+
if (!process.argv[1]) return false;
|
|
12
|
+
return realpathSync(process.argv[1]) === realpathSync(fileURLToPath(metaUrl));
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
if (isMainModule(import.meta.url)) {
|
|
16
|
+
process.stderr.write("mcp-server.js is deprecated; use mcp/server.js instead.\n");
|
|
17
|
+
startMcpServer();
|
|
18
|
+
}
|
package/package.json
CHANGED
|
@@ -1,15 +1,24 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-research",
|
|
3
|
-
"version": "1.1
|
|
3
|
+
"version": "1.2.1",
|
|
4
4
|
"private": false,
|
|
5
5
|
"type": "module",
|
|
6
6
|
"description": "Pi extension for web research.",
|
|
7
7
|
"license": "MIT",
|
|
8
8
|
"main": "./index.js",
|
|
9
|
+
"bin": {
|
|
10
|
+
"pi-research": "./pi-research.js",
|
|
11
|
+
"unblind-mcp": "./unblind-mcp.js"
|
|
12
|
+
},
|
|
9
13
|
"files": [
|
|
14
|
+
"bin",
|
|
10
15
|
"extensions",
|
|
11
16
|
"index.js",
|
|
12
17
|
"lib",
|
|
18
|
+
"mcp",
|
|
19
|
+
"mcp-server.js",
|
|
20
|
+
"pi-research.js",
|
|
21
|
+
"unblind-mcp.js",
|
|
13
22
|
"README.md",
|
|
14
23
|
"THIRD_PARTY_NOTICES.md",
|
|
15
24
|
"package.json"
|
|
@@ -27,7 +36,8 @@
|
|
|
27
36
|
],
|
|
28
37
|
"scripts": {
|
|
29
38
|
"test": "node --test",
|
|
30
|
-
"eval": "node --test test/eval-runner.test.js"
|
|
39
|
+
"eval": "node --test test/eval-runner.test.js",
|
|
40
|
+
"mcp": "node ./mcp/server.js"
|
|
31
41
|
},
|
|
32
42
|
"dependencies": {
|
|
33
43
|
"@mariozechner/pi-ai": "*",
|
package/pi-research.js
ADDED