pi-research 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,90 @@
1
+ # pi-research
2
+
3
+ [![npm version](https://img.shields.io/npm/v/pi-research?color=blue)](https://www.npmjs.com/package/pi-research)
4
+ [![tests](https://img.shields.io/badge/tests-33%2F33-brightgreen)](https://github.com/endgegnerbert-tech/pi-research)
5
+ [![Pi package](https://img.shields.io/badge/pi-package-blueviolet)](https://pi.ai)
6
+
7
+ `pi-research` is a Pi extension for web research.
8
+
9
+ ## Install
10
+
11
+ ```bash
12
+ pi install npm:pi-research
13
+ ```
14
+
15
+ GitHub repository: https://github.com/endgegnerbert-tech/pi-research
16
+
17
+ You can also fork the repository and install it from a local path while developing.
18
+
19
+ ## What it is for
20
+
21
+ Use `pi-research` when you want the agent to search and synthesize the web.
22
+ It is designed for research, not browser navigation.
23
+ Use `browser_action` for clicks, screenshots, DOM inspection, or page interaction.
24
+
25
+ ## Modes
26
+
27
+ | Mode | Best for |
28
+ | --- | --- |
29
+ | `fast` | quick answers with a quality floor |
30
+ | `deep` | broader retrieval with follow-up rounds |
31
+ | `code` | official docs, READMEs, repos, and code snippets |
32
+ | `academic` | scholarly sources like arXiv, Semantic Scholar, and DOI papers |
33
+
34
+ ## Key features
35
+
36
+ - query-isolated caching and sufficiency gating
37
+ - source scoring with visible `sourceType`, `authoritative`, `score`, and `freshness`
38
+ - `openSubQuestions`, `missingAspects`, `conflictSummary`
39
+ - inline citations in the final answer
40
+ - `minYear`, `maxYear`, and `preferRecent` support
41
+ - `files[]` for local source input
42
+ - `codeBlocks[]` extraction for code-focused answers
43
+
44
+ ## Output
45
+
46
+ The tool returns structured data including:
47
+
48
+ - `answer`
49
+ - `bullets`
50
+ - `sources`
51
+ - `citations`
52
+ - `codeBlocks`
53
+ - `confidence`
54
+ - `confidenceScore`
55
+ - `sufficient`
56
+ - `authoritativeSourcesFound`
57
+ - `followupRounds`
58
+ - `followupQuery`
59
+ - `openSubQuestions`
60
+ - `missingAspects`
61
+ - `conflictSummary`
62
+ - `conflictingSourcePairs`
63
+ - `unverifiedClaims`
64
+
65
+ ## Examples
66
+
67
+ ```text
68
+ What are the trade-offs between B-trees and LSM-trees?
69
+ ```
70
+
71
+ ```text
72
+ Show me the best way to add health checks to Docker Compose.
73
+ ```
74
+
75
+ ```text
76
+ Compare React Server Components with traditional SSR.
77
+ ```
78
+
79
+ ## Package manifest
80
+
81
+ This repo is a Pi package. The extension entrypoint is:
82
+
83
+ - `extensions/pi-research.ts`
84
+
85
+ ## Release notes
86
+
87
+ - Package name: `pi-research`
88
+ - Install command: `pi install npm:pi-research`
89
+ - GitHub: `https://github.com/endgegnerbert-tech/pi-research`
90
+ - Tool name: `pi-research`
@@ -0,0 +1 @@
1
+ export { default } from "../index.js";
package/index.js ADDED
@@ -0,0 +1,144 @@
1
+ import { Type } from "typebox";
2
+
3
+ import { compactResearchPayload, classifyQueryIntent, inferOfficialDocsSite } from "./lib/research.js";
4
+ import { clearResearchMemory, hashResearchQuery, setResearchMemory, shouldSkipResearch } from "./lib/research-memory.js";
5
+ import { runWebResearch } from "./lib/web-research.js";
6
+
7
+ const RESEARCH_STATE = new Map();
8
+
9
+ function buildWebResearchGuidance() {
10
+ return "Use pi-research for web search and research. Prefer fast mode for simple questions, deep mode for comparisons or ambiguous cases, and code/academic modes when source type matters.";
11
+ }
12
+
13
+ function defaultMode(query) {
14
+ const intent = classifyQueryIntent(query);
15
+ if (intent === "comparison" || intent === "comparative") return "deep";
16
+ if (intent === "academic") return "academic";
17
+ return "fast";
18
+ }
19
+
20
+ function buildFastRecoveryQuery(query) {
21
+ const docsSite = inferOfficialDocsSite(query || "");
22
+ return docsSite ? `site:${docsSite} ${query}` : `${query} official docs`;
23
+ }
24
+
25
+ function toolResponse(payload, text) {
26
+ return { content: [{ type: "text", text }], details: payload };
27
+ }
28
+
29
+ function compactWebResearchToolResult(event) {
30
+ if (event.isError || event.toolName !== "pi-research") return null;
31
+ const payload = event.details;
32
+ if (!payload?.ok || payload.action !== "web_research") return null;
33
+
34
+ const compact = compactResearchPayload(payload);
35
+ const citationLines = Array.isArray(compact.citations)
36
+ ? compact.citations.map((citation, index) => `${index + 1}. ${citation.text} [source ${citation.sourceIndex}]`)
37
+ : [];
38
+ const text = [
39
+ payload.contentText,
40
+ "",
41
+ "## Citations",
42
+ "",
43
+ ...(citationLines.length ? citationLines : ["None"]),
44
+ "",
45
+ "## Status",
46
+ "",
47
+ `sufficient: ${compact.sufficient}`,
48
+ `authoritativeSourcesFound: ${compact.authoritativeSourcesFound}`,
49
+ ...(compact.conflictSummary ? [`conflictSummary: ${compact.conflictSummary}`] : []),
50
+ ].join("\n").trim();
51
+
52
+ return { content: [{ type: "text", text }] };
53
+ }
54
+
55
+ function getState(queryHash) {
56
+ if (!RESEARCH_STATE.has(queryHash)) RESEARCH_STATE.set(queryHash, { count: 0, lastHash: null, lastSufficient: false, fastRecoveryAllowed: false });
57
+ return RESEARCH_STATE.get(queryHash);
58
+ }
59
+
60
+ export default function webResearchExtension(pi) {
61
+ pi.on("before_agent_start", async (event) => {
62
+ RESEARCH_STATE.clear();
63
+ clearResearchMemory();
64
+ return { systemPrompt: `${event.systemPrompt}\n\n${buildWebResearchGuidance()}` };
65
+ });
66
+
67
+ pi.on("tool_call", async (event) => {
68
+ if (event.toolName !== "pi-research") return;
69
+ if (!event.input.mode) event.input.mode = defaultMode(event.input.query || "");
70
+
71
+ const queryHash = hashResearchQuery(event.input.query || "");
72
+ const state = getState(queryHash);
73
+ const mode = event.input.mode;
74
+ const isolate = Boolean(event.input.isolate || process.env.RESEARCH_ISOLATE === "1");
75
+ const force = Boolean(event.input.force);
76
+
77
+ if (shouldSkipResearch({ queryHash, lastHash: state.lastHash, lastWasSufficient: state.lastSufficient, force, isolate })) {
78
+ return { block: true, reason: "Recent pi-research result was already sufficient for this exact query." };
79
+ }
80
+
81
+ if (mode === "fast" && state.count === 1 && state.fastRecoveryAllowed && !force && !isolate) {
82
+ event.input.query = buildFastRecoveryQuery(event.input.query || "");
83
+ state.fastRecoveryAllowed = false;
84
+ }
85
+
86
+ state.count += 1;
87
+ state.lastHash = queryHash;
88
+ });
89
+
90
+ pi.on("tool_result", async (event) => {
91
+ if (event.toolName === "pi-research" && !event.isError && event.details?.ok) {
92
+ const queryHash = hashResearchQuery(event.input?.query || "");
93
+ const state = getState(queryHash);
94
+ state.lastHash = queryHash;
95
+ state.lastSufficient = Boolean(event.details.sufficient);
96
+ const query = event.input?.query || "";
97
+ state.fastRecoveryAllowed = !event.details.sufficient
98
+ && !event.details.authoritativeSourcesFound
99
+ && ["best_practice", "temporal", "definition"].includes(classifyQueryIntent(query || ""));
100
+ setResearchMemory(`last:${queryHash}`, event.details);
101
+ }
102
+ return compactWebResearchToolResult(event) || undefined;
103
+ });
104
+
105
+ pi.registerTool({
106
+ name: "pi-research",
107
+ label: "Pi Research",
108
+ description: "Search and research the web.",
109
+ promptSnippet: "Use this for web research when needed.",
110
+ promptGuidelines: ["Use pi-research for search, source ranking, and summarization."],
111
+ parameters: Type.Object({
112
+ query: Type.String({ description: "Research question to answer from the web" }),
113
+ mode: Type.Optional(Type.Union([Type.Literal("fast"), Type.Literal("deep"), Type.Literal("code"), Type.Literal("academic")], { description: "Research mode", default: "fast" })),
114
+ force: Type.Optional(Type.Boolean({ description: "Bypass sufficiency gating and cached answers for this call" })),
115
+ isolate: Type.Optional(Type.Boolean({ description: "Run this query in isolation without session/query cache reuse" })),
116
+ options: Type.Optional(Type.Object({
117
+ allowedSources: Type.Optional(Type.Array(Type.String())),
118
+ maxTurns: Type.Optional(Type.Number()),
119
+ maxSites: Type.Optional(Type.Number()),
120
+ requireAuthoritative: Type.Optional(Type.Boolean()),
121
+ minYear: Type.Optional(Type.Number()),
122
+ maxYear: Type.Optional(Type.Number()),
123
+ preferRecent: Type.Optional(Type.Boolean()),
124
+ files: Type.Optional(Type.Array(Type.String())),
125
+ format: Type.Optional(Type.Union([Type.Literal("markdown"), Type.Literal("json"), Type.Literal("table"), Type.Literal("latex")], { default: "markdown" })),
126
+ deepResearchConfig: Type.Optional(Type.Object({
127
+ depth: Type.Optional(Type.Union([Type.Literal(1), Type.Literal(2), Type.Literal(3)])),
128
+ breadth: Type.Optional(Type.Union([Type.Literal(2), Type.Literal(3), Type.Literal(4)])),
129
+ concurrency: Type.Optional(Type.Union([Type.Literal(1), Type.Literal(2), Type.Literal(3), Type.Literal(4)])),
130
+ })),
131
+ })),
132
+ }),
133
+ async execute(_toolCallId, params, signal, onUpdate, ctx) {
134
+ const mode = params.mode ?? defaultMode(params.query || "");
135
+ const payload = await runWebResearch(params.query || "", ctx, signal, onUpdate, {
136
+ mode,
137
+ force: params.force,
138
+ isolate: params.isolate,
139
+ ...(params.options || {}),
140
+ });
141
+ return toolResponse(payload, payload.ok ? payload.contentText : JSON.stringify(payload, null, 2));
142
+ },
143
+ });
144
+ }
package/lib/planner.js ADDED
@@ -0,0 +1,36 @@
1
+ import { buildDeepQueries, buildFastQueries, classifyQueryIntent, inferOfficialDocsSite } from "./research.js";
2
+
3
+ export function planResearch(query, mode = "fast") {
4
+ const intent = classifyQueryIntent(query);
5
+ const docsSite = inferOfficialDocsSite(query || "");
6
+
7
+ if (mode === "academic" || intent === "academic") {
8
+ const subqueries = buildDeepQueries(query, 4);
9
+ return {
10
+ subqueries,
11
+ expectedSources: ["paper", "official_doc"],
12
+ };
13
+ }
14
+
15
+ if (mode === "code") {
16
+ const base = String(query || "").trim();
17
+ const comparisonFallback = intent === "comparison" ? `${base} benchmark comparison` : null;
18
+ const subqueries = [...new Set([
19
+ `${base} site:github.com`,
20
+ `${base} official docs`,
21
+ docsSite ? `${base} site:${docsSite}` : `${base} README`,
22
+ comparisonFallback,
23
+ ...buildFastQueries(query, 2),
24
+ ])].filter(Boolean).slice(0, 4);
25
+
26
+ return {
27
+ subqueries,
28
+ expectedSources: ["github_readme", "github_repo", "official_doc"],
29
+ };
30
+ }
31
+
32
+ return {
33
+ subqueries: buildFastQueries(query, 2),
34
+ expectedSources: intent === "comparison" ? ["official_doc", "other"] : ["official_doc"],
35
+ };
36
+ }
@@ -0,0 +1,87 @@
1
+ import { createHash } from "node:crypto";
2
+ import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
3
+ import { homedir } from "node:os";
4
+ import { dirname, join } from "node:path";
5
+
6
+ const memory = new Map();
7
+ const CACHE_PATH = join(homedir(), ".pi", "agent", "lazy-modules", "web-research", ".cache", "research-cache.json");
8
+
9
+ function ensureCacheDir() {
10
+ mkdirSync(dirname(CACHE_PATH), { recursive: true });
11
+ }
12
+
13
+ function readPersistentCache() {
14
+ try {
15
+ const raw = readFileSync(CACHE_PATH, "utf8");
16
+ return JSON.parse(raw);
17
+ } catch {
18
+ return {};
19
+ }
20
+ }
21
+
22
+ function writePersistentCache(cache) {
23
+ ensureCacheDir();
24
+ writeFileSync(CACHE_PATH, JSON.stringify(cache, null, 2));
25
+ }
26
+
27
+ export function normalizeResearchQuery(query = "") {
28
+ return String(query)
29
+ .toLowerCase()
30
+ .trim()
31
+ .replace(/[?!.,:;]+/g, " ")
32
+ .replace(/\s+/g, " ")
33
+ .trim();
34
+ }
35
+
36
+ export function hashResearchQuery(query = "") {
37
+ return createHash("sha1").update(normalizeResearchQuery(query)).digest("hex");
38
+ }
39
+
40
+ export function shouldSkipResearch({ queryHash, lastHash, lastWasSufficient, force = false, isolate = false }) {
41
+ if (force || isolate) return false;
42
+ return Boolean(queryHash && lastHash && queryHash === lastHash && lastWasSufficient);
43
+ }
44
+
45
+ export function getResearchMemory(key) {
46
+ return memory.get(key) || null;
47
+ }
48
+
49
+ export function setResearchMemory(key, value) {
50
+ memory.set(key, value);
51
+ return value;
52
+ }
53
+
54
+ export function clearResearchMemory() {
55
+ memory.clear();
56
+ }
57
+
58
+ export function writeCachedResult(key, value, ttlMs) {
59
+ const payload = {
60
+ expiresAt: Date.now() + ttlMs,
61
+ value,
62
+ };
63
+ memory.set(`persistent:${key}`, payload);
64
+ const cache = readPersistentCache();
65
+ cache[key] = payload;
66
+ writePersistentCache(cache);
67
+ return value;
68
+ }
69
+
70
+ export function readCachedResult(key) {
71
+ const inMemory = memory.get(`persistent:${key}`);
72
+ if (inMemory) {
73
+ if (inMemory.expiresAt > Date.now()) return inMemory.value;
74
+ memory.delete(`persistent:${key}`);
75
+ }
76
+
77
+ const cache = readPersistentCache();
78
+ const entry = cache[key];
79
+ if (!entry) return null;
80
+ if (entry.expiresAt <= Date.now()) {
81
+ delete cache[key];
82
+ writePersistentCache(cache);
83
+ return null;
84
+ }
85
+ memory.set(`persistent:${key}`, entry);
86
+ return entry.value;
87
+ }
@@ -0,0 +1,72 @@
1
+ {
2
+ "fast": {
3
+ "mode": "fast",
4
+ "maxTurns": 1,
5
+ "maxQueries": 2,
6
+ "resultsPerQuery": 4,
7
+ "maxPages": 3,
8
+ "pageTimeoutMs": 6000,
9
+ "pageTextLimit": 4000,
10
+ "minPageText": 300,
11
+ "useJinaFallback": true,
12
+ "maxChunksPerPage": 3,
13
+ "searchProvider": "ddg_html",
14
+ "allowedSourceTypes": ["official_doc", "github_readme", "github_repo", "paper", "blog", "forum", "other", "file"],
15
+ "minAuthoritativeSources": 1,
16
+ "minSources": 3,
17
+ "cacheTtlMs": 86400000
18
+ },
19
+ "deep": {
20
+ "mode": "deep",
21
+ "maxTurns": 2,
22
+ "maxQueries": 10,
23
+ "resultsPerQuery": 5,
24
+ "maxPages": 8,
25
+ "pageTimeoutMs": 10000,
26
+ "pageTextLimit": 8000,
27
+ "minPageText": 300,
28
+ "useJinaFallback": true,
29
+ "maxChunksPerPage": 3,
30
+ "searchProvider": "ddg_html",
31
+ "allowedSourceTypes": ["official_doc", "github_readme", "github_repo", "paper", "blog", "forum", "other", "file"],
32
+ "minAuthoritativeSources": 1,
33
+ "minSources": 4,
34
+ "cacheTtlMs": 86400000
35
+ },
36
+ "code": {
37
+ "mode": "code",
38
+ "maxTurns": 2,
39
+ "maxQueries": 6,
40
+ "resultsPerQuery": 5,
41
+ "maxPages": 6,
42
+ "pageTimeoutMs": 10000,
43
+ "pageTextLimit": 8000,
44
+ "minPageText": 300,
45
+ "useJinaFallback": true,
46
+ "maxChunksPerPage": 3,
47
+ "searchProvider": "ddg_html",
48
+ "allowedSourceTypes": ["official_doc", "github_readme", "github_repo", "other", "file"],
49
+ "minAuthoritativeSources": 1,
50
+ "minSources": 3,
51
+ "cacheTtlMs": 172800000
52
+ },
53
+ "academic": {
54
+ "mode": "academic",
55
+ "maxTurns": 2,
56
+ "maxQueries": 6,
57
+ "resultsPerQuery": 5,
58
+ "maxPages": 6,
59
+ "pageTimeoutMs": 10000,
60
+ "pageTextLimit": 8000,
61
+ "minPageText": 300,
62
+ "useJinaFallback": true,
63
+ "maxChunksPerPage": 3,
64
+ "searchProvider": "academic",
65
+ "allowedSourceTypes": ["paper", "official_doc", "github_readme", "github_repo", "other", "file"],
66
+ "minAuthoritativeSources": 1,
67
+ "minSources": 3,
68
+ "preferRecent": true,
69
+ "minYear": 2018,
70
+ "cacheTtlMs": 604800000
71
+ }
72
+ }