pi-research 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -2
- package/lib/domains/changelog.js +10 -0
- package/lib/domains/forums.js +9 -0
- package/lib/domains/github.js +9 -0
- package/lib/domains/index.js +46 -0
- package/lib/domains/package-registry.js +11 -0
- package/lib/domains/papers.js +11 -0
- package/lib/domains/security.js +11 -0
- package/lib/domains/specs.js +11 -0
- package/lib/domains/template.js +26 -0
- package/lib/domains/vendor-status.js +10 -0
- package/lib/domains/web.js +7 -0
- package/lib/eval/case-loader.js +13 -0
- package/lib/eval/runner.js +8 -0
- package/lib/research-evidence.js +21 -0
- package/lib/research-intent.js +20 -0
- package/lib/research-output.js +7 -0
- package/lib/research.js +44 -5
- package/lib/types.js +2 -0
- package/lib/web-research.js +26 -12
- package/package.json +6 -4
package/README.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# pi-research
|
|
2
2
|
|
|
3
3
|
[](https://www.npmjs.com/package/pi-research)
|
|
4
|
-
[](https://github.com/endgegnerbert-tech/pi-research)
|
|
5
5
|
[](https://pi.ai)
|
|
6
6
|
|
|
7
7
|
`pi-research` is a Pi extension for fast, local-first web research inside the agent.
|
|
@@ -162,13 +162,53 @@ The tool returns structured data including:
|
|
|
162
162
|
- **fact checking**: unsupported answer sentences are marked as unverified
|
|
163
163
|
- **local source input**: files can be added directly to the research context
|
|
164
164
|
|
|
165
|
-
##
|
|
165
|
+
## Limits
|
|
166
166
|
|
|
167
167
|
- it still depends on live web access for web research
|
|
168
168
|
- it does not browse pages like a human user
|
|
169
169
|
- it is not fully offline unless you only use local files
|
|
170
170
|
- it is not a browser interaction tool
|
|
171
171
|
|
|
172
|
+
## Domain packs
|
|
173
|
+
|
|
174
|
+
- `web`
|
|
175
|
+
- `github`
|
|
176
|
+
- `security`
|
|
177
|
+
- `papers`
|
|
178
|
+
- `specs`
|
|
179
|
+
- `changelog`
|
|
180
|
+
- `forums`
|
|
181
|
+
- `package-registry`
|
|
182
|
+
- `vendor-status`
|
|
183
|
+
|
|
184
|
+
## Community packs
|
|
185
|
+
|
|
186
|
+
You can add your own domain pack by copying `lib/domains/template.js`, adapting the `run()` function, and registering it in `lib/domains/index.js`.
|
|
187
|
+
|
|
188
|
+
Minimal starter example:
|
|
189
|
+
|
|
190
|
+
```js
|
|
191
|
+
export default {
|
|
192
|
+
name: "boxing-training",
|
|
193
|
+
sourceHints: ["web"],
|
|
194
|
+
async run(question) {
|
|
195
|
+
return {
|
|
196
|
+
claims: [
|
|
197
|
+
{
|
|
198
|
+
text: `Starter pack example for ${question}`,
|
|
199
|
+
evidence: [{ type: "web", source: "https://example.com", snippet: "Example" }],
|
|
200
|
+
confidence: "medium",
|
|
201
|
+
},
|
|
202
|
+
],
|
|
203
|
+
};
|
|
204
|
+
},
|
|
205
|
+
};
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
## Eval
|
|
209
|
+
|
|
210
|
+
Run `npm run eval` to execute the eval harness.
|
|
211
|
+
|
|
172
212
|
## Package info
|
|
173
213
|
|
|
174
214
|
- Package name: `pi-research`
|
|
@@ -181,3 +221,4 @@ The tool returns structured data including:
|
|
|
181
221
|
- Pi install: `pi install npm:pi-research`
|
|
182
222
|
- npm install: `npm install pi-research`
|
|
183
223
|
- GitHub: `https://github.com/endgegnerbert-tech/pi-research`
|
|
224
|
+
- Community packs: copy the template pack and register it in `lib/domains/index.js`
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export default {
|
|
2
|
+
name: "changelog",
|
|
3
|
+
sourceHints: ["changelog", "release notes", "releases"],
|
|
4
|
+
allowedSources: ["github.com", "docs.", "release notes"],
|
|
5
|
+
queryHints: ["release notes", "changelog", "site:github.com/releases"],
|
|
6
|
+
requireAuthoritative: true,
|
|
7
|
+
async run() {
|
|
8
|
+
return { name: "changelog" };
|
|
9
|
+
},
|
|
10
|
+
};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export default {
|
|
2
|
+
name: "forums",
|
|
3
|
+
sourceHints: ["stackoverflow", "discourse", "reddit"],
|
|
4
|
+
allowedSources: ["stackoverflow.com", "discourse", "reddit.com"],
|
|
5
|
+
queryHints: ["site:stackoverflow.com", "discourse", "site:reddit.com"],
|
|
6
|
+
async run() {
|
|
7
|
+
return { name: "forums" };
|
|
8
|
+
},
|
|
9
|
+
};
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import web from "./web.js";
|
|
2
|
+
import github from "./github.js";
|
|
3
|
+
import forums from "./forums.js";
|
|
4
|
+
import security from "./security.js";
|
|
5
|
+
import packageRegistry from "./package-registry.js";
|
|
6
|
+
import changelog from "./changelog.js";
|
|
7
|
+
import papers from "./papers.js";
|
|
8
|
+
import specs from "./specs.js";
|
|
9
|
+
import vendorStatus from "./vendor-status.js";
|
|
10
|
+
|
|
11
|
+
const PACKS = {
|
|
12
|
+
web,
|
|
13
|
+
github,
|
|
14
|
+
forums,
|
|
15
|
+
security,
|
|
16
|
+
"package-registry": packageRegistry,
|
|
17
|
+
changelog,
|
|
18
|
+
papers,
|
|
19
|
+
specs,
|
|
20
|
+
"vendor-status": vendorStatus,
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
const DOMAIN_NAMES = ["web", "github", "security", "papers", "specs", "changelog", "forums", "package-registry", "vendor-status"];
|
|
24
|
+
|
|
25
|
+
export function listDomainPacks() {
|
|
26
|
+
return [...DOMAIN_NAMES];
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function getDomainPack(name = "web") {
|
|
30
|
+
return PACKS[name] || web;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
import { classifyQuestionDomain } from "../research-intent.js";
|
|
34
|
+
|
|
35
|
+
export function resolveDomainConfig(questionOrDomain = "web") {
|
|
36
|
+
const name = PACKS[questionOrDomain] ? questionOrDomain : classifyQuestionDomain(questionOrDomain);
|
|
37
|
+
const pack = PACKS[name] || PACKS.web;
|
|
38
|
+
return {
|
|
39
|
+
domain: name,
|
|
40
|
+
allowedSources: pack.allowedSources || [],
|
|
41
|
+
allowedSourceTypes: pack.allowedSourceTypes || [],
|
|
42
|
+
queryHints: pack.queryHints || [],
|
|
43
|
+
requireAuthoritative: Boolean(pack.requireAuthoritative),
|
|
44
|
+
format: pack.format || "markdown",
|
|
45
|
+
};
|
|
46
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export default {
|
|
2
|
+
name: "package-registry",
|
|
3
|
+
sourceHints: ["npm", "pypi", "cargo", "maven"],
|
|
4
|
+
allowedSources: ["npmjs.com", "pypi.org", "crates.io", "mvnrepository.com"],
|
|
5
|
+
allowedSourceTypes: ["official_doc", "github_readme"],
|
|
6
|
+
queryHints: ["site:npmjs.com", "site:pypi.org", "site:crates.io", "site:mvnrepository.com"],
|
|
7
|
+
requireAuthoritative: true,
|
|
8
|
+
async run() {
|
|
9
|
+
return { name: "package-registry" };
|
|
10
|
+
},
|
|
11
|
+
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export default {
|
|
2
|
+
name: "papers",
|
|
3
|
+
sourceHints: ["arxiv", "semanticscholar", "doi"],
|
|
4
|
+
allowedSources: ["arxiv.org", "semanticscholar.org", "doi.org", "pubmed.ncbi.nlm.nih.gov"],
|
|
5
|
+
allowedSourceTypes: ["paper"],
|
|
6
|
+
queryHints: ["site:arxiv.org", "site:semanticscholar.org", "site:doi.org"],
|
|
7
|
+
requireAuthoritative: true,
|
|
8
|
+
async run() {
|
|
9
|
+
return { name: "papers" };
|
|
10
|
+
},
|
|
11
|
+
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export default {
|
|
2
|
+
name: "security",
|
|
3
|
+
sourceHints: ["cve", "advisory", "security bulletin"],
|
|
4
|
+
allowedSources: ["nvd.nist.gov", "cisa.gov", "mitre.org", "ubuntu.com", "redhat.com", "debian.org", "suse.com"],
|
|
5
|
+
allowedSourceTypes: ["official_doc", "paper"],
|
|
6
|
+
queryHints: ["nvd", "cisa", "mitre", "advisory", "cve"],
|
|
7
|
+
requireAuthoritative: true,
|
|
8
|
+
async run() {
|
|
9
|
+
return { name: "security" };
|
|
10
|
+
},
|
|
11
|
+
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export default {
|
|
2
|
+
name: "specs",
|
|
3
|
+
sourceHints: ["rfc", "spec", "standard"],
|
|
4
|
+
allowedSources: ["rfc-editor.org", "datatracker.ietf.org", "w3.org"],
|
|
5
|
+
allowedSourceTypes: ["official_doc"],
|
|
6
|
+
queryHints: ["site:rfc-editor.org", "site:datatracker.ietf.org", "RFC"],
|
|
7
|
+
requireAuthoritative: true,
|
|
8
|
+
async run() {
|
|
9
|
+
return { name: "specs" };
|
|
10
|
+
},
|
|
11
|
+
};
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
export default {
|
|
2
|
+
name: "template",
|
|
3
|
+
description: "Minimal domain pack example for pi-research",
|
|
4
|
+
sourceHints: ["web"],
|
|
5
|
+
queryHints: ["site:example.com"],
|
|
6
|
+
async run(question, options) {
|
|
7
|
+
return {
|
|
8
|
+
claims: [
|
|
9
|
+
{
|
|
10
|
+
text: `This is a minimal example for a domain pack: ${question}`,
|
|
11
|
+
evidence: [
|
|
12
|
+
{
|
|
13
|
+
type: "web",
|
|
14
|
+
source: "https://example.com",
|
|
15
|
+
snippet: "Minimal example",
|
|
16
|
+
},
|
|
17
|
+
],
|
|
18
|
+
confidence: "medium",
|
|
19
|
+
confidenceDescription: "Just an example",
|
|
20
|
+
},
|
|
21
|
+
],
|
|
22
|
+
evidenceSummary: "Starter example only.",
|
|
23
|
+
sourceTypes: ["other"],
|
|
24
|
+
};
|
|
25
|
+
},
|
|
26
|
+
};
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export default {
|
|
2
|
+
name: "vendor-status",
|
|
3
|
+
sourceHints: ["status", "incident", "outage"],
|
|
4
|
+
allowedSources: ["status", "statuspage.io", "status.github.com"],
|
|
5
|
+
queryHints: ["status page", "incident", "outage"],
|
|
6
|
+
requireAuthoritative: true,
|
|
7
|
+
async run() {
|
|
8
|
+
return { name: "vendor-status" };
|
|
9
|
+
},
|
|
10
|
+
};
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { readdirSync, readFileSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
|
|
4
|
+
export function loadEvalCases(domain) {
|
|
5
|
+
const dir = join(process.cwd(), "eval", "cases", domain);
|
|
6
|
+
try {
|
|
7
|
+
return readdirSync(dir)
|
|
8
|
+
.filter((file) => file.endsWith(".json"))
|
|
9
|
+
.map((file) => JSON.parse(readFileSync(join(dir, file), "utf8")));
|
|
10
|
+
} catch {
|
|
11
|
+
return [];
|
|
12
|
+
}
|
|
13
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { loadEvalCases } from "./case-loader.js";
|
|
2
|
+
|
|
3
|
+
export async function runEvalSuite({ domain }) {
|
|
4
|
+
const cases = loadEvalCases(domain);
|
|
5
|
+
const passed = cases.filter((item) => item.expectedDomain === domain).length;
|
|
6
|
+
const total = cases.length;
|
|
7
|
+
return { total, passed, passRate: total ? passed / total : 0 };
|
|
8
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
export function createEvidence(evidence = {}) {
|
|
2
|
+
return {
|
|
3
|
+
type: evidence.type || "web",
|
|
4
|
+
source: evidence.source || "",
|
|
5
|
+
snippet: evidence.snippet || "",
|
|
6
|
+
};
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export function createClaim(claim = {}) {
|
|
10
|
+
return {
|
|
11
|
+
text: claim.text || "",
|
|
12
|
+
confidence: claim.confidence || "low",
|
|
13
|
+
evidence: Array.isArray(claim.evidence) ? claim.evidence.map(createEvidence) : [],
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function explainConfidence(confidence = "low", evidenceCount = 0) {
|
|
18
|
+
if (confidence === "high" && evidenceCount >= 2) return "Multiple sources support this claim.";
|
|
19
|
+
if (confidence === "medium") return "Some supporting evidence was found.";
|
|
20
|
+
return "Limited supporting evidence was found.";
|
|
21
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
function text(value) {
|
|
2
|
+
return String(value || "").toLowerCase();
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
export function classifyQuestionDomain(question) {
|
|
6
|
+
const q = text(question);
|
|
7
|
+
if (/(cve-|cve\b|advisory|security|vulnerability|exploit)/.test(q)) return "security";
|
|
8
|
+
if (/(status page|status|outage|incident)/.test(q)) return "vendor-status";
|
|
9
|
+
if (/(changelog|release notes?|releases?|version history)/.test(q)) return "changelog";
|
|
10
|
+
if (/(github|issue|issues|pull request|repo\b|repository\b|discussions?)/.test(q)) return "github";
|
|
11
|
+
if (/(arxiv|paper|papers|study|(?<!pi-)research|scientific|scholar)/.test(q)) return "papers";
|
|
12
|
+
if (/(rfc|spec|specification|standard|standards)/.test(q)) return "specs";
|
|
13
|
+
if (/(stackoverflow|stack overflow|discourse|reddit|forum|forums)/.test(q)) return "forums";
|
|
14
|
+
if (/(npm|pypi|cargo|maven|package registry|package|library)/.test(q)) return "package-registry";
|
|
15
|
+
return "web";
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function normalizeResearchMode(input = {}, fallback = "fast") {
|
|
19
|
+
return input && typeof input === "object" && input.mode ? input.mode : fallback;
|
|
20
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export function resolveOutputFormat(input = {}, fallback = "markdown") {
|
|
2
|
+
return input && typeof input === "object" && input.format ? input.format : fallback;
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
export function shouldRequireAuthoritativeSources(input = {}, fallback = false) {
|
|
6
|
+
return Boolean(input && typeof input === "object" && input.requireAuthoritative) || Boolean(fallback);
|
|
7
|
+
}
|
package/lib/research.js
CHANGED
|
@@ -386,6 +386,25 @@ export function rankFetchedPages(pages, query, limit = pages.length, config = {}
|
|
|
386
386
|
return [...pages].sort((a, b) => scoreFetchedPage(b, query, config) - scoreFetchedPage(a, query, config)).slice(0, limit);
|
|
387
387
|
}
|
|
388
388
|
|
|
389
|
+
export function detectClaimConflicts(claims = []) {
|
|
390
|
+
const texts = claims.map((claim) => String(claim?.text || claim || "").toLowerCase());
|
|
391
|
+
const hasPositive = texts.some((text) => /\b(supported|works|available|recommended|yes|stable|compatible)\b/.test(text));
|
|
392
|
+
const hasNegative = texts.some((text) => /\b(not supported|unsupported|does not|no support|broken|incompatible|removed)\b/.test(text));
|
|
393
|
+
return {
|
|
394
|
+
detected: hasPositive && hasNegative,
|
|
395
|
+
conflictSummary: hasPositive && hasNegative ? "Claims conflict." : "",
|
|
396
|
+
};
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
export function detectCoverageGaps(input = {}) {
|
|
400
|
+
const claims = Array.isArray(input.claims) ? input.claims : [];
|
|
401
|
+
const authoritativeSourcesFound = claims.some((claim) => Array.isArray(claim?.evidence) && claim.evidence.length > 0);
|
|
402
|
+
return {
|
|
403
|
+
detected: !authoritativeSourcesFound,
|
|
404
|
+
missingAspects: authoritativeSourcesFound ? [] : ["authoritative sources"],
|
|
405
|
+
};
|
|
406
|
+
}
|
|
407
|
+
|
|
389
408
|
export function detectConflictSignals(pages) {
|
|
390
409
|
if (!Array.isArray(pages) || pages.length < 2) {
|
|
391
410
|
return { detected: false, reason: null, conflictSummary: "", conflictingSourcePairs: [] };
|
|
@@ -592,15 +611,17 @@ export function extractCodeBlocks(text) {
|
|
|
592
611
|
export function evaluateSufficiency(input, legacyPages, legacyConflictDetected = false) {
|
|
593
612
|
const payload = typeof input === "string"
|
|
594
613
|
? { query: input, sources: legacyPages || [], conflictDetected: legacyConflictDetected }
|
|
595
|
-
: { query: input?.query || "", sources: input?.sources || [], conflictDetected: Boolean(input?.conflictDetected), confidence: input?.confidence, minSources: input?.minSources };
|
|
614
|
+
: { query: input?.query || "", sources: input?.sources || [], claims: input?.claims || [], conflictDetected: Boolean(input?.conflictDetected), confidence: input?.confidence, minSources: input?.minSources };
|
|
596
615
|
|
|
597
616
|
const scoredSources = payload.sources.map((page) => scoreSourceEntry(page, payload.query || ""));
|
|
598
617
|
const authoritativeCount = scoredSources.filter((scored) => Boolean(scored.authoritative)).length;
|
|
599
618
|
const authoritativeSourcesFound = authoritativeCount > 0;
|
|
600
619
|
const conflict = detectConflictSignals(payload.sources);
|
|
601
|
-
const
|
|
620
|
+
const claimConflict = detectClaimConflicts(payload.claims);
|
|
621
|
+
const coverage = detectCoverageGaps(payload);
|
|
622
|
+
const conflictDetected = payload.conflictDetected || conflict.detected || claimConflict.detected;
|
|
602
623
|
const missingAspects = [];
|
|
603
|
-
if (!authoritativeSourcesFound) missingAspects.push("authoritative sources");
|
|
624
|
+
if (!authoritativeSourcesFound || coverage.detected) missingAspects.push("authoritative sources");
|
|
604
625
|
if (conflictDetected) missingAspects.push("conflict resolution");
|
|
605
626
|
if (!payload.sources.length) missingAspects.push("readable sources");
|
|
606
627
|
|
|
@@ -654,6 +675,16 @@ export function compactResearchPayload(payload) {
|
|
|
654
675
|
...(typeof source.local === "boolean" ? { local: source.local } : {}),
|
|
655
676
|
}))
|
|
656
677
|
: [],
|
|
678
|
+
claims: Array.isArray(payload.claims) ? payload.claims.slice(0, 8).map((claim) => ({
|
|
679
|
+
text: claim.text,
|
|
680
|
+
confidence: claim.confidence,
|
|
681
|
+
evidence: Array.isArray(claim.evidence) ? claim.evidence.slice(0, 5).map((evidence) => ({
|
|
682
|
+
type: evidence.type,
|
|
683
|
+
source: evidence.source,
|
|
684
|
+
snippet: evidence.snippet,
|
|
685
|
+
})) : [],
|
|
686
|
+
})) : [],
|
|
687
|
+
evidenceSummary: payload.evidenceSummary || "",
|
|
657
688
|
sourceTypes: Array.isArray(payload.sourceTypes) ? payload.sourceTypes.slice(0, 8) : [],
|
|
658
689
|
unverifiedClaims: Array.isArray(payload.unverifiedClaims) ? payload.unverifiedClaims.slice(0, 8) : [],
|
|
659
690
|
meta: payload.meta && typeof payload.meta === "object" ? payload.meta : undefined,
|
|
@@ -675,12 +706,20 @@ export function extractPageSnapshot(html, url) {
|
|
|
675
706
|
return { title, url, text: stripTags(body), codeBlocks: extractCodeBlocks(html) };
|
|
676
707
|
}
|
|
677
708
|
|
|
678
|
-
export function formatResearchResponse({ answer, bullets, sources, confidence }) {
|
|
709
|
+
export function formatResearchResponse({ answer, bullets, sources, confidence, format = "markdown" }) {
|
|
710
|
+
const list = Array.isArray(sources) ? sources : [];
|
|
711
|
+
if (format === "json") {
|
|
712
|
+
return JSON.stringify({ answer: String(answer || "").trim(), bullets: bullets || [], confidence: confidence || "", sources: list });
|
|
713
|
+
}
|
|
714
|
+
if (format === "table") {
|
|
715
|
+
const rows = list.map((source, index) => `| ${index + 1} | ${source.title} | ${source.url} |`).join("\n");
|
|
716
|
+
return ["| # | Title | URL |", "|---|---|---|", rows].filter(Boolean).join("\n").trim();
|
|
717
|
+
}
|
|
679
718
|
const parts = ["## Answer", "", String(answer || "").trim(), "", "## Key points"];
|
|
680
719
|
for (const bullet of bullets || []) parts.push(`- ${bullet}`);
|
|
681
720
|
if (confidence) parts.push("", "## Confidence", "", confidence);
|
|
682
721
|
parts.push("", "## Sources");
|
|
683
|
-
|
|
722
|
+
list.forEach((source, index) => {
|
|
684
723
|
const freshness = source.freshness ? ` (${source.freshness})` : "";
|
|
685
724
|
const meta = [];
|
|
686
725
|
if (source.sourceType) meta.push(source.sourceType);
|
package/lib/types.js
CHANGED
|
@@ -36,6 +36,8 @@ export function createResearchResult(result = {}) {
|
|
|
36
36
|
bullets: Array.isArray(result.bullets) ? result.bullets : [],
|
|
37
37
|
citations: Array.isArray(result.citations) ? result.citations : [],
|
|
38
38
|
sources: Array.isArray(result.sources) ? result.sources.map(createResearchSource) : [],
|
|
39
|
+
claims: Array.isArray(result.claims) ? result.claims : [],
|
|
40
|
+
evidenceSummary: result.evidenceSummary || "",
|
|
39
41
|
codeBlocks: Array.isArray(result.codeBlocks) ? result.codeBlocks : [],
|
|
40
42
|
sufficient: Boolean(result.sufficient),
|
|
41
43
|
missingAspects: Array.isArray(result.missingAspects) ? result.missingAspects : [],
|
package/lib/web-research.js
CHANGED
|
@@ -5,6 +5,8 @@ import { complete } from "@mariozechner/pi-ai";
|
|
|
5
5
|
|
|
6
6
|
import profiles from "./research-profiles.json" with { type: "json" };
|
|
7
7
|
import { createResearchResult } from "./types.js";
|
|
8
|
+
import { resolveDomainConfig } from "./domains/index.js";
|
|
9
|
+
import { classifyQuestionDomain } from "./research-intent.js";
|
|
8
10
|
import {
|
|
9
11
|
buildConfidenceSummary,
|
|
10
12
|
buildDeepQueries,
|
|
@@ -33,6 +35,7 @@ import {
|
|
|
33
35
|
scoreSourceEntry,
|
|
34
36
|
selectRelevantChunks,
|
|
35
37
|
} from "./research.js";
|
|
38
|
+
import { resolveOutputFormat, shouldRequireAuthoritativeSources } from "./research-output.js";
|
|
36
39
|
import { planResearch } from "./planner.js";
|
|
37
40
|
import {
|
|
38
41
|
clearResearchMemory,
|
|
@@ -79,15 +82,18 @@ export function resolveResearchConfig(input = "fast") {
|
|
|
79
82
|
const options = normalizeResearchOptions(input);
|
|
80
83
|
const base = profiles[options.mode] || profiles.fast;
|
|
81
84
|
const deep = options.deepResearchConfig || {};
|
|
85
|
+
const domainConfig = resolveDomainConfig(options.domain || "web");
|
|
82
86
|
|
|
83
87
|
return {
|
|
84
88
|
...base,
|
|
89
|
+
...domainConfig,
|
|
85
90
|
...options,
|
|
86
91
|
mode: base.mode,
|
|
87
92
|
maxTurns: options.maxTurns ?? (deep.depth ? Math.max(base.maxTurns || 1, deep.depth) : (base.maxTurns || 1)),
|
|
88
93
|
maxQueries: options.maxQueries ?? (deep.breadth ? Math.max(base.maxQueries || 2, deep.breadth * (deep.depth || 1)) : (base.maxQueries || 2)),
|
|
89
94
|
maxPages: options.maxSites ?? options.maxPages ?? base.maxPages,
|
|
90
|
-
allowedSourceTypes: options.allowedSourceTypes ?? base.allowedSourceTypes,
|
|
95
|
+
allowedSourceTypes: options.allowedSourceTypes ?? (Array.isArray(domainConfig.allowedSourceTypes) && domainConfig.allowedSourceTypes.length ? domainConfig.allowedSourceTypes : base.allowedSourceTypes),
|
|
96
|
+
allowedSources: options.allowedSources ?? (Array.isArray(domainConfig.allowedSources) && domainConfig.allowedSources.length ? domainConfig.allowedSources : base.allowedSources),
|
|
91
97
|
searchProvider: options.searchProvider ?? base.searchProvider,
|
|
92
98
|
concurrentQueries: deep.concurrency ?? options.concurrentQueries ?? 3,
|
|
93
99
|
depth: deep.depth ?? 1,
|
|
@@ -101,7 +107,10 @@ export function resolveResearchConfig(input = "fast") {
|
|
|
101
107
|
files: Array.isArray(options.files) ? options.files : [],
|
|
102
108
|
isolate: Boolean(options.isolate || process.env.RESEARCH_ISOLATE === "1"),
|
|
103
109
|
force: Boolean(options.force),
|
|
104
|
-
format: options.format
|
|
110
|
+
format: resolveOutputFormat(options, domainConfig.format || "markdown"),
|
|
111
|
+
queryHints: Array.isArray(domainConfig.queryHints) ? domainConfig.queryHints : [],
|
|
112
|
+
requireAuthoritative: Boolean(options.requireAuthoritative ?? domainConfig.requireAuthoritative),
|
|
113
|
+
domain: domainConfig.domain,
|
|
105
114
|
};
|
|
106
115
|
}
|
|
107
116
|
|
|
@@ -150,8 +159,11 @@ async function completeWithResearchModel(ctx, signal, prompt, reasoningEffort =
|
|
|
150
159
|
|
|
151
160
|
export async function buildQueries(query, mode = "fast", ctx, signal) {
|
|
152
161
|
const config = getResearchConfig(mode);
|
|
162
|
+
const hintedQueries = Array.isArray(config.queryHints) && config.queryHints.length
|
|
163
|
+
? config.queryHints.map((hint) => `${query} ${hint}`)
|
|
164
|
+
: [];
|
|
153
165
|
if (config.mode === "code") {
|
|
154
|
-
return planResearch(query, "code").subqueries.slice(0, config.maxQueries);
|
|
166
|
+
return [...new Set([...planResearch(query, "code").subqueries, ...hintedQueries])].slice(0, config.maxQueries);
|
|
155
167
|
}
|
|
156
168
|
if (config.mode === "deep" || config.mode === "academic") {
|
|
157
169
|
const prompt = [
|
|
@@ -165,15 +177,15 @@ export async function buildQueries(query, mode = "fast", ctx, signal) {
|
|
|
165
177
|
|
|
166
178
|
try {
|
|
167
179
|
const text = await completeWithResearchModel(ctx, signal, prompt, "low");
|
|
168
|
-
if (text) return parseDeepQueryPlan(text, query, config.maxQueries);
|
|
180
|
+
if (text) return [...new Set([...parseDeepQueryPlan(text, query, config.maxQueries), ...hintedQueries])].slice(0, config.maxQueries);
|
|
169
181
|
} catch {
|
|
170
182
|
// fall through
|
|
171
183
|
}
|
|
172
184
|
|
|
173
|
-
return buildDeepQueries(query, config.maxQueries);
|
|
185
|
+
return [...new Set([...buildDeepQueries(query, config.maxQueries), ...hintedQueries])].slice(0, config.maxQueries);
|
|
174
186
|
}
|
|
175
187
|
|
|
176
|
-
return buildFastQueries(query, config.maxQueries);
|
|
188
|
+
return [...new Set([...buildFastQueries(query, config.maxQueries), ...hintedQueries])].slice(0, config.maxQueries);
|
|
177
189
|
}
|
|
178
190
|
|
|
179
191
|
function withTimeoutSignal(signal, timeoutMs) {
|
|
@@ -499,8 +511,8 @@ function planSubqueries(rootQuery, currentQuery, config, sufficiency) {
|
|
|
499
511
|
return [...new Set(queries.filter(Boolean))].slice(0, Math.max(1, config.breadth || 2));
|
|
500
512
|
}
|
|
501
513
|
|
|
502
|
-
function formatResultText(result) {
|
|
503
|
-
return formatResearchResponse({ answer: result.answer, bullets: result.bullets, sources: result.sources, confidence: result.confidence });
|
|
514
|
+
function formatResultText(result, format) {
|
|
515
|
+
return formatResearchResponse({ answer: result.answer, bullets: result.bullets, sources: result.sources, confidence: result.confidence, format });
|
|
504
516
|
}
|
|
505
517
|
|
|
506
518
|
function modeCacheKey(query, config) {
|
|
@@ -520,7 +532,8 @@ function modeCacheKey(query, config) {
|
|
|
520
532
|
}
|
|
521
533
|
|
|
522
534
|
export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast") {
|
|
523
|
-
const
|
|
535
|
+
const domain = classifyQuestionDomain(query);
|
|
536
|
+
const config = getResearchConfig(typeof mode === "object" ? { ...mode, domain } : { mode, domain });
|
|
524
537
|
const cacheKey = modeCacheKey(query, config);
|
|
525
538
|
|
|
526
539
|
if (!config.isolate && !config.force) {
|
|
@@ -546,7 +559,7 @@ export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast"
|
|
|
546
559
|
let conflictSummary = "";
|
|
547
560
|
let conflictingSourcePairs = [];
|
|
548
561
|
let sufficiency = { sufficient: false, confidenceScore: 0.1, missingAspects: [], openSubQuestions: [] };
|
|
549
|
-
let currentQueries = await buildQueries(query, config
|
|
562
|
+
let currentQueries = await buildQueries(query, config, ctx, signal);
|
|
550
563
|
subqueries = [...currentQueries];
|
|
551
564
|
|
|
552
565
|
const localPages = await readLocalFiles(config.files || [], config);
|
|
@@ -665,7 +678,7 @@ export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast"
|
|
|
665
678
|
citations: synthesis.citations || [],
|
|
666
679
|
sources,
|
|
667
680
|
codeBlocks,
|
|
668
|
-
sufficient: sufficiency.sufficient && unverifiedRatio <= 0.2,
|
|
681
|
+
sufficient: sufficiency.sufficient && unverifiedRatio <= 0.2 && (!shouldRequireAuthoritativeSources(config) || sufficiency.authoritativeSourcesFound),
|
|
669
682
|
missingAspects: sufficiency.missingAspects,
|
|
670
683
|
openSubQuestions,
|
|
671
684
|
conflictSummary: conflictSummary || sufficiency.conflictSummary || "",
|
|
@@ -698,6 +711,7 @@ export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast"
|
|
|
698
711
|
sources: normalizedResult.sources,
|
|
699
712
|
sourceTypes,
|
|
700
713
|
codeBlocks: normalizedResult.codeBlocks,
|
|
714
|
+
format: config.format,
|
|
701
715
|
confidence,
|
|
702
716
|
meta: normalizedResult.meta,
|
|
703
717
|
confidenceScore: sufficiency.confidenceScore,
|
|
@@ -707,7 +721,7 @@ export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast"
|
|
|
707
721
|
openSubQuestions: normalizedResult.openSubQuestions,
|
|
708
722
|
missingAspects: normalizedResult.missingAspects,
|
|
709
723
|
unverifiedClaims: normalizedResult.unverifiedClaims,
|
|
710
|
-
contentText: formatResultText({ answer: normalizedResult.answer, bullets: normalizedResult.bullets, sources: normalizedResult.sources, confidence }),
|
|
724
|
+
contentText: formatResultText({ answer: normalizedResult.answer, bullets: normalizedResult.bullets, sources: normalizedResult.sources, confidence }, config.format),
|
|
711
725
|
};
|
|
712
726
|
|
|
713
727
|
setResearchMemory(cacheKey, result);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-research",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"private": false,
|
|
5
5
|
"type": "module",
|
|
6
6
|
"description": "Pi extension for web research.",
|
|
@@ -25,11 +25,13 @@
|
|
|
25
25
|
"pi-package"
|
|
26
26
|
],
|
|
27
27
|
"scripts": {
|
|
28
|
-
"test": "node --test"
|
|
28
|
+
"test": "node --test",
|
|
29
|
+
"eval": "node --test test/eval-runner.test.js"
|
|
29
30
|
},
|
|
30
31
|
"dependencies": {
|
|
31
|
-
"@mariozechner/pi-ai": "
|
|
32
|
-
"
|
|
32
|
+
"@mariozechner/pi-ai": "*",
|
|
33
|
+
"pi-research": "^1.0.2",
|
|
34
|
+
"typebox": "*"
|
|
33
35
|
},
|
|
34
36
|
"peerDependencies": {
|
|
35
37
|
"@mariozechner/pi-ai": "*",
|