@demigodmode/pi-web-agent 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +176 -0
- package/dist/cache/ttl-cache.d.ts +8 -0
- package/dist/cache/ttl-cache.js +21 -0
- package/dist/extension.d.ts +2 -0
- package/dist/extension.js +114 -0
- package/dist/extract/readability.d.ts +8 -0
- package/dist/extract/readability.js +93 -0
- package/dist/fetch/browser-resolution.d.ts +15 -0
- package/dist/fetch/browser-resolution.js +55 -0
- package/dist/fetch/headless-fetch.d.ts +18 -0
- package/dist/fetch/headless-fetch.js +87 -0
- package/dist/fetch/http-fetch.d.ts +4 -0
- package/dist/fetch/http-fetch.js +50 -0
- package/dist/orchestration/index.d.ts +41 -0
- package/dist/orchestration/index.js +9 -0
- package/dist/orchestration/research-orchestrator.d.ts +43 -0
- package/dist/orchestration/research-orchestrator.js +87 -0
- package/dist/orchestration/research-types.d.ts +41 -0
- package/dist/orchestration/research-types.js +1 -0
- package/dist/orchestration/research-worker.d.ts +16 -0
- package/dist/orchestration/research-worker.js +131 -0
- package/dist/search/duckduckgo.d.ts +4 -0
- package/dist/search/duckduckgo.js +42 -0
- package/dist/tools/web-explore.d.ts +44 -0
- package/dist/tools/web-explore.js +50 -0
- package/dist/tools/web-fetch-headless.d.ts +6 -0
- package/dist/tools/web-fetch-headless.js +14 -0
- package/dist/tools/web-fetch.d.ts +6 -0
- package/dist/tools/web-fetch.js +14 -0
- package/dist/tools/web-search.d.ts +10 -0
- package/dist/tools/web-search.js +44 -0
- package/dist/types.d.ts +48 -0
- package/dist/types.js +7 -0
- package/package.json +68 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import type { WebFetchHeadlessResponse, WebFetchResponse, WebSearchResponse } from '../types.js';
|
|
2
|
+
export declare function createResearchWorkflow({ search, fetchPage, headlessFetch }?: {
|
|
3
|
+
search?: (input: {
|
|
4
|
+
query: string;
|
|
5
|
+
}) => Promise<WebSearchResponse>;
|
|
6
|
+
fetchPage?: (input: {
|
|
7
|
+
url: string;
|
|
8
|
+
}) => Promise<WebFetchResponse>;
|
|
9
|
+
headlessFetch?: (input: {
|
|
10
|
+
url: string;
|
|
11
|
+
}) => Promise<WebFetchHeadlessResponse>;
|
|
12
|
+
}): {
|
|
13
|
+
run({ query }: {
|
|
14
|
+
query: string;
|
|
15
|
+
}): Promise<{
|
|
16
|
+
decision: {
|
|
17
|
+
action: "answer";
|
|
18
|
+
rationale: string;
|
|
19
|
+
approvedEvidence: import("./research-types.js").ResearchEvidence[];
|
|
20
|
+
};
|
|
21
|
+
evidence: import("./research-types.js").ResearchEvidence[];
|
|
22
|
+
workerPass: import("./research-types.js").ResearchWorkerResult;
|
|
23
|
+
} | {
|
|
24
|
+
decision: {
|
|
25
|
+
action: "escalate-headless";
|
|
26
|
+
rationale: string;
|
|
27
|
+
url: string;
|
|
28
|
+
approvedEvidence: import("./research-types.js").ResearchEvidence[];
|
|
29
|
+
};
|
|
30
|
+
evidence: import("./research-types.js").ResearchEvidence[];
|
|
31
|
+
workerPass: import("./research-types.js").ResearchWorkerResult;
|
|
32
|
+
} | {
|
|
33
|
+
decision: {
|
|
34
|
+
action: "research-again";
|
|
35
|
+
rationale: string;
|
|
36
|
+
followupQuery: string;
|
|
37
|
+
};
|
|
38
|
+
evidence: import("./research-types.js").ResearchEvidence[];
|
|
39
|
+
workerPass: import("./research-types.js").ResearchWorkerResult;
|
|
40
|
+
}>;
|
|
41
|
+
};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { createWebFetchHeadlessTool } from '../tools/web-fetch-headless.js';
|
|
2
|
+
import { createWebFetchTool } from '../tools/web-fetch.js';
|
|
3
|
+
import { createWebSearchTool } from '../tools/web-search.js';
|
|
4
|
+
import { createResearchOrchestrator } from './research-orchestrator.js';
|
|
5
|
+
import { createResearchWorker } from './research-worker.js';
|
|
6
|
+
export function createResearchWorkflow({ search = createWebSearchTool(), fetchPage = createWebFetchTool(), headlessFetch = createWebFetchHeadlessTool() } = {}) {
|
|
7
|
+
const worker = createResearchWorker({ search, fetchPage });
|
|
8
|
+
return createResearchOrchestrator({ worker, headlessFetch });
|
|
9
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import type { WebFetchHeadlessResponse } from '../types.js';
|
|
2
|
+
import type { ResearchEvidence, ResearchWorkerResult } from './research-types.js';
|
|
3
|
+
export declare function createResearchOrchestrator({ worker, headlessFetch }: {
|
|
4
|
+
worker: {
|
|
5
|
+
run: (input: {
|
|
6
|
+
query: string;
|
|
7
|
+
maxSearchRounds: number;
|
|
8
|
+
maxFetches: number;
|
|
9
|
+
}) => Promise<ResearchWorkerResult>;
|
|
10
|
+
};
|
|
11
|
+
headlessFetch: (input: {
|
|
12
|
+
url: string;
|
|
13
|
+
}) => Promise<WebFetchHeadlessResponse>;
|
|
14
|
+
}): {
|
|
15
|
+
run({ query }: {
|
|
16
|
+
query: string;
|
|
17
|
+
}): Promise<{
|
|
18
|
+
decision: {
|
|
19
|
+
action: "answer";
|
|
20
|
+
rationale: string;
|
|
21
|
+
approvedEvidence: ResearchEvidence[];
|
|
22
|
+
};
|
|
23
|
+
evidence: ResearchEvidence[];
|
|
24
|
+
workerPass: ResearchWorkerResult;
|
|
25
|
+
} | {
|
|
26
|
+
decision: {
|
|
27
|
+
action: "escalate-headless";
|
|
28
|
+
rationale: string;
|
|
29
|
+
url: string;
|
|
30
|
+
approvedEvidence: ResearchEvidence[];
|
|
31
|
+
};
|
|
32
|
+
evidence: ResearchEvidence[];
|
|
33
|
+
workerPass: ResearchWorkerResult;
|
|
34
|
+
} | {
|
|
35
|
+
decision: {
|
|
36
|
+
action: "research-again";
|
|
37
|
+
rationale: string;
|
|
38
|
+
followupQuery: string;
|
|
39
|
+
};
|
|
40
|
+
evidence: ResearchEvidence[];
|
|
41
|
+
workerPass: ResearchWorkerResult;
|
|
42
|
+
}>;
|
|
43
|
+
};
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
function sourceRank(sourceKind) {
|
|
2
|
+
switch (sourceKind) {
|
|
3
|
+
case 'official-docs':
|
|
4
|
+
return 0;
|
|
5
|
+
case 'official-api':
|
|
6
|
+
return 1;
|
|
7
|
+
case 'official-discussion':
|
|
8
|
+
return 2;
|
|
9
|
+
case 'issue-thread':
|
|
10
|
+
return 3;
|
|
11
|
+
case 'community':
|
|
12
|
+
return 4;
|
|
13
|
+
case 'package-page':
|
|
14
|
+
return 5;
|
|
15
|
+
default:
|
|
16
|
+
return 6;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
function sortEvidence(evidence) {
|
|
20
|
+
return [...evidence].sort((left, right) => sourceRank(left.sourceKind) - sourceRank(right.sourceKind));
|
|
21
|
+
}
|
|
22
|
+
function strongEvidence(evidence) {
|
|
23
|
+
return evidence.filter((item) => item.sourceKind === 'official-docs' ||
|
|
24
|
+
item.sourceKind === 'official-api' ||
|
|
25
|
+
item.sourceKind === 'official-discussion');
|
|
26
|
+
}
|
|
27
|
+
function hasOfficialDocsOrApi(evidence) {
|
|
28
|
+
return evidence.some((item) => item.sourceKind === 'official-docs' || item.sourceKind === 'official-api');
|
|
29
|
+
}
|
|
30
|
+
function hasBotCheck(outcomes) {
|
|
31
|
+
return outcomes.some((outcome) => outcome.kind === 'bot-check');
|
|
32
|
+
}
|
|
33
|
+
function isHeadlessWorthTrying(pass, approvedEvidence) {
|
|
34
|
+
if (!pass.suggestedHeadlessUrl)
|
|
35
|
+
return false;
|
|
36
|
+
if (hasBotCheck(pass.lowValueOutcomes))
|
|
37
|
+
return false;
|
|
38
|
+
if (approvedEvidence.length >= 2 && hasOfficialDocsOrApi(approvedEvidence))
|
|
39
|
+
return false;
|
|
40
|
+
const candidate = pass.suggestedHeadlessUrl;
|
|
41
|
+
return !candidate.includes('npmjs.com/package/');
|
|
42
|
+
}
|
|
43
|
+
export function createResearchOrchestrator({ worker, headlessFetch }) {
|
|
44
|
+
return {
|
|
45
|
+
async run({ query }) {
|
|
46
|
+
const pass = await worker.run({ query, maxSearchRounds: 1, maxFetches: 3 });
|
|
47
|
+
const approvedEvidence = sortEvidence(pass.evidence.filter((item) => item.sourceKind !== 'package-page'));
|
|
48
|
+
const strong = strongEvidence(approvedEvidence);
|
|
49
|
+
const enoughEvidence = strong.length >= 2 && hasOfficialDocsOrApi(approvedEvidence);
|
|
50
|
+
if (enoughEvidence) {
|
|
51
|
+
const decision = {
|
|
52
|
+
action: 'answer',
|
|
53
|
+
rationale: 'Two strong sources with official support are enough to answer safely.',
|
|
54
|
+
approvedEvidence
|
|
55
|
+
};
|
|
56
|
+
return { decision, evidence: approvedEvidence, workerPass: pass };
|
|
57
|
+
}
|
|
58
|
+
if (isHeadlessWorthTrying(pass, approvedEvidence)) {
|
|
59
|
+
const url = pass.suggestedHeadlessUrl;
|
|
60
|
+
await headlessFetch({ url });
|
|
61
|
+
const decision = {
|
|
62
|
+
action: 'escalate-headless',
|
|
63
|
+
rationale: 'One high-value page is worth a single orchestrator-approved headless retry.',
|
|
64
|
+
url,
|
|
65
|
+
approvedEvidence
|
|
66
|
+
};
|
|
67
|
+
return { decision, evidence: approvedEvidence, workerPass: pass };
|
|
68
|
+
}
|
|
69
|
+
const hasConcreteGap = pass.gaps.length > 0;
|
|
70
|
+
const onlyLowValueOutcomes = pass.lowValueOutcomes.length > 0 && pass.evidence.length === 0;
|
|
71
|
+
if (!hasConcreteGap || onlyLowValueOutcomes) {
|
|
72
|
+
const decision = {
|
|
73
|
+
action: 'research-again',
|
|
74
|
+
rationale: 'Current results did not justify more escalation; continue only with a more targeted pass.',
|
|
75
|
+
followupQuery: query
|
|
76
|
+
};
|
|
77
|
+
return { decision, evidence: approvedEvidence, workerPass: pass };
|
|
78
|
+
}
|
|
79
|
+
const decision = {
|
|
80
|
+
action: 'research-again',
|
|
81
|
+
rationale: 'The first pass did not gather enough strong evidence to answer safely.',
|
|
82
|
+
followupQuery: query
|
|
83
|
+
};
|
|
84
|
+
return { decision, evidence: approvedEvidence, workerPass: pass };
|
|
85
|
+
}
|
|
86
|
+
};
|
|
87
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
export type ResearchSourceKind = 'official-docs' | 'official-api' | 'official-discussion' | 'community' | 'issue-thread' | 'package-page' | 'other';
|
|
2
|
+
export type ResearchMethod = 'search' | 'http' | 'headless';
|
|
3
|
+
export type ResearchEvidence = {
|
|
4
|
+
title: string;
|
|
5
|
+
url: string;
|
|
6
|
+
sourceKind: ResearchSourceKind;
|
|
7
|
+
method: Exclude<ResearchMethod, 'search'>;
|
|
8
|
+
summary: string;
|
|
9
|
+
supports: string[];
|
|
10
|
+
};
|
|
11
|
+
export type ResearchGap = {
|
|
12
|
+
kind: 'needs-more-evidence' | 'conflict' | 'fetch-failed';
|
|
13
|
+
message: string;
|
|
14
|
+
};
|
|
15
|
+
export type ResearchLowValueOutcome = {
|
|
16
|
+
kind: 'empty-search' | 'bot-check' | 'low-value-page' | 'duplicate-evidence';
|
|
17
|
+
url?: string;
|
|
18
|
+
message: string;
|
|
19
|
+
};
|
|
20
|
+
export type ResearchWorkerResult = {
|
|
21
|
+
searchQueries: string[];
|
|
22
|
+
evidence: ResearchEvidence[];
|
|
23
|
+
gaps: ResearchGap[];
|
|
24
|
+
lowValueOutcomes: ResearchLowValueOutcome[];
|
|
25
|
+
suggestedHeadlessUrl?: string;
|
|
26
|
+
exhaustedBudget: boolean;
|
|
27
|
+
};
|
|
28
|
+
export type ResearchOrchestratorDecision = {
|
|
29
|
+
action: 'answer';
|
|
30
|
+
rationale: string;
|
|
31
|
+
approvedEvidence: ResearchEvidence[];
|
|
32
|
+
} | {
|
|
33
|
+
action: 'research-again';
|
|
34
|
+
rationale: string;
|
|
35
|
+
followupQuery: string;
|
|
36
|
+
} | {
|
|
37
|
+
action: 'escalate-headless';
|
|
38
|
+
rationale: string;
|
|
39
|
+
url: string;
|
|
40
|
+
approvedEvidence: ResearchEvidence[];
|
|
41
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { WebFetchResponse, WebSearchResponse } from '../types.js';
|
|
2
|
+
import type { ResearchWorkerResult } from './research-types.js';
|
|
3
|
+
export declare function createResearchWorker({ search, fetchPage }: {
|
|
4
|
+
search: (input: {
|
|
5
|
+
query: string;
|
|
6
|
+
}) => Promise<WebSearchResponse>;
|
|
7
|
+
fetchPage: (input: {
|
|
8
|
+
url: string;
|
|
9
|
+
}) => Promise<WebFetchResponse>;
|
|
10
|
+
}): {
|
|
11
|
+
run({ query, maxSearchRounds, maxFetches }: {
|
|
12
|
+
query: string;
|
|
13
|
+
maxSearchRounds: number;
|
|
14
|
+
maxFetches: number;
|
|
15
|
+
}): Promise<ResearchWorkerResult>;
|
|
16
|
+
};
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
function classifySource(url) {
|
|
2
|
+
if (url.includes('/docs/api/') || url.includes('/config/'))
|
|
3
|
+
return 'official-api';
|
|
4
|
+
if (url.includes('playwright.dev/docs') || url.includes('vitest.dev/guide/'))
|
|
5
|
+
return 'official-docs';
|
|
6
|
+
if (url.includes('learn.microsoft.com'))
|
|
7
|
+
return 'official-docs';
|
|
8
|
+
if (url.includes('github.com/') && url.includes('/issues/'))
|
|
9
|
+
return 'issue-thread';
|
|
10
|
+
if (url.includes('npmjs.com/package/'))
|
|
11
|
+
return 'package-page';
|
|
12
|
+
return 'community';
|
|
13
|
+
}
|
|
14
|
+
function summarizeText(text, maxLength = 180) {
|
|
15
|
+
return text.replace(/\s+/g, ' ').trim().slice(0, maxLength);
|
|
16
|
+
}
|
|
17
|
+
function evidenceFromFetch(fetched, fallbackTitle) {
|
|
18
|
+
const content = fetched.content;
|
|
19
|
+
if (fetched.status !== 'ok' || !content)
|
|
20
|
+
return null;
|
|
21
|
+
const sourceKind = classifySource(fetched.url);
|
|
22
|
+
if (sourceKind === 'package-page') {
|
|
23
|
+
return null;
|
|
24
|
+
}
|
|
25
|
+
return {
|
|
26
|
+
title: content.title ?? fallbackTitle,
|
|
27
|
+
url: fetched.url,
|
|
28
|
+
sourceKind,
|
|
29
|
+
method: fetched.metadata.method,
|
|
30
|
+
summary: summarizeText(content.text),
|
|
31
|
+
supports: [summarizeText(content.text, 120)]
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
function lowValueOutcomeFromFetch(fetched) {
|
|
35
|
+
if (fetched.status !== 'ok' || !fetched.content)
|
|
36
|
+
return null;
|
|
37
|
+
if (classifySource(fetched.url) !== 'package-page')
|
|
38
|
+
return null;
|
|
39
|
+
return {
|
|
40
|
+
kind: 'low-value-page',
|
|
41
|
+
url: fetched.url,
|
|
42
|
+
message: 'Fetched page did not add strong research evidence.'
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
export function createResearchWorker({ search, fetchPage }) {
|
|
46
|
+
return {
|
|
47
|
+
async run({ query, maxSearchRounds, maxFetches }) {
|
|
48
|
+
const searchQueries = [query];
|
|
49
|
+
const evidence = [];
|
|
50
|
+
const gaps = [];
|
|
51
|
+
const lowValueOutcomes = [];
|
|
52
|
+
let suggestedHeadlessUrl;
|
|
53
|
+
if (maxSearchRounds <= 0 || maxFetches <= 0) {
|
|
54
|
+
return {
|
|
55
|
+
searchQueries: [],
|
|
56
|
+
evidence,
|
|
57
|
+
gaps: [{ kind: 'needs-more-evidence', message: 'Research worker budget was zero.' }],
|
|
58
|
+
lowValueOutcomes,
|
|
59
|
+
suggestedHeadlessUrl,
|
|
60
|
+
exhaustedBudget: true
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
const searchResult = await search({ query });
|
|
64
|
+
if (searchResult.status !== 'ok') {
|
|
65
|
+
return {
|
|
66
|
+
searchQueries,
|
|
67
|
+
evidence,
|
|
68
|
+
gaps: [
|
|
69
|
+
{
|
|
70
|
+
kind: 'fetch-failed',
|
|
71
|
+
message: searchResult.error?.message ?? 'Search failed during research worker pass.'
|
|
72
|
+
}
|
|
73
|
+
],
|
|
74
|
+
lowValueOutcomes,
|
|
75
|
+
suggestedHeadlessUrl,
|
|
76
|
+
exhaustedBudget: false
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
if (searchResult.results.length === 0) {
|
|
80
|
+
return {
|
|
81
|
+
searchQueries,
|
|
82
|
+
evidence,
|
|
83
|
+
gaps,
|
|
84
|
+
lowValueOutcomes: [
|
|
85
|
+
{
|
|
86
|
+
kind: 'empty-search',
|
|
87
|
+
message: 'Search returned no results for this pass.'
|
|
88
|
+
}
|
|
89
|
+
],
|
|
90
|
+
suggestedHeadlessUrl,
|
|
91
|
+
exhaustedBudget: false
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
const candidates = searchResult.results.slice(0, maxFetches);
|
|
95
|
+
for (const candidate of candidates) {
|
|
96
|
+
const fetched = await fetchPage({ url: candidate.url });
|
|
97
|
+
if (fetched.status === 'ok') {
|
|
98
|
+
const parsedEvidence = evidenceFromFetch(fetched, candidate.title);
|
|
99
|
+
if (parsedEvidence) {
|
|
100
|
+
evidence.push(parsedEvidence);
|
|
101
|
+
continue;
|
|
102
|
+
}
|
|
103
|
+
const lowValueOutcome = lowValueOutcomeFromFetch(fetched);
|
|
104
|
+
if (lowValueOutcome) {
|
|
105
|
+
lowValueOutcomes.push(lowValueOutcome);
|
|
106
|
+
}
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
if (fetched.status === 'needs_headless') {
|
|
110
|
+
if (!suggestedHeadlessUrl) {
|
|
111
|
+
suggestedHeadlessUrl = fetched.url;
|
|
112
|
+
}
|
|
113
|
+
gaps.push({ kind: 'fetch-failed', message: `HTTP fetch was weak for ${fetched.url}` });
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
gaps.push({
|
|
117
|
+
kind: 'fetch-failed',
|
|
118
|
+
message: fetched.error?.message ?? `Fetch failed for ${candidate.url}`
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
return {
|
|
122
|
+
searchQueries,
|
|
123
|
+
evidence,
|
|
124
|
+
gaps,
|
|
125
|
+
lowValueOutcomes,
|
|
126
|
+
suggestedHeadlessUrl,
|
|
127
|
+
exhaustedBudget: false
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
};
|
|
131
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import * as cheerio from 'cheerio';
|
|
2
|
+
function normalizeDuckDuckGoUrl(rawUrl) {
|
|
3
|
+
try {
|
|
4
|
+
const absolute = rawUrl.startsWith('//') ? `https:${rawUrl}` : rawUrl;
|
|
5
|
+
const parsed = new URL(absolute);
|
|
6
|
+
const isDuckDuckGoRedirect = parsed.hostname === 'duckduckgo.com' && parsed.pathname === '/l/';
|
|
7
|
+
if (!isDuckDuckGoRedirect) {
|
|
8
|
+
return rawUrl;
|
|
9
|
+
}
|
|
10
|
+
const target = parsed.searchParams.get('uddg');
|
|
11
|
+
if (!target) {
|
|
12
|
+
return rawUrl;
|
|
13
|
+
}
|
|
14
|
+
return decodeURIComponent(target);
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
return rawUrl;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
export function buildSearchUrl(query) {
|
|
21
|
+
const params = new URLSearchParams({ q: query });
|
|
22
|
+
return `https://html.duckduckgo.com/html/?${params.toString()}`;
|
|
23
|
+
}
|
|
24
|
+
export async function fetchDuckDuckGoHtml(query) {
|
|
25
|
+
const response = await fetch(buildSearchUrl(query));
|
|
26
|
+
if (!response.ok) {
|
|
27
|
+
throw new Error(`DuckDuckGo request failed with ${response.status}`);
|
|
28
|
+
}
|
|
29
|
+
return response.text();
|
|
30
|
+
}
|
|
31
|
+
export function parseDuckDuckGoResults(html) {
|
|
32
|
+
const $ = cheerio.load(html);
|
|
33
|
+
return $('.result')
|
|
34
|
+
.map((_, element) => {
|
|
35
|
+
const title = $(element).find('.result__a').first().text().trim();
|
|
36
|
+
const url = normalizeDuckDuckGoUrl($(element).find('.result__a').first().attr('href')?.trim() ?? '');
|
|
37
|
+
const snippet = $(element).find('.result__snippet').first().text().trim();
|
|
38
|
+
return title && url ? { title, url, snippet } : null;
|
|
39
|
+
})
|
|
40
|
+
.get()
|
|
41
|
+
.filter((value) => value !== null);
|
|
42
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import type { ResearchEvidence } from '../orchestration/research-types.js';
|
|
2
|
+
export declare function createWebExploreTool({ explore }?: {
|
|
3
|
+
explore?: {
|
|
4
|
+
run: (input: {
|
|
5
|
+
query: string;
|
|
6
|
+
}) => Promise<{
|
|
7
|
+
decision: {
|
|
8
|
+
action: 'answer' | 'research-again' | 'escalate-headless';
|
|
9
|
+
};
|
|
10
|
+
evidence: ResearchEvidence[];
|
|
11
|
+
workerPass: unknown;
|
|
12
|
+
}>;
|
|
13
|
+
} | ((input: {
|
|
14
|
+
query: string;
|
|
15
|
+
}) => Promise<{
|
|
16
|
+
decision: {
|
|
17
|
+
action: 'answer' | 'research-again' | 'escalate-headless';
|
|
18
|
+
};
|
|
19
|
+
evidence: ResearchEvidence[];
|
|
20
|
+
workerPass: unknown;
|
|
21
|
+
}>);
|
|
22
|
+
}): ({ query }: {
|
|
23
|
+
query: string;
|
|
24
|
+
}) => Promise<{
|
|
25
|
+
status: "error";
|
|
26
|
+
findings: never[];
|
|
27
|
+
sources: never[];
|
|
28
|
+
error: {
|
|
29
|
+
code: string;
|
|
30
|
+
message: string;
|
|
31
|
+
};
|
|
32
|
+
caveat?: undefined;
|
|
33
|
+
text?: undefined;
|
|
34
|
+
} | {
|
|
35
|
+
status: "ok";
|
|
36
|
+
findings: string[];
|
|
37
|
+
sources: {
|
|
38
|
+
title: string;
|
|
39
|
+
url: string;
|
|
40
|
+
}[];
|
|
41
|
+
caveat: string | undefined;
|
|
42
|
+
text: string;
|
|
43
|
+
error?: undefined;
|
|
44
|
+
}>;
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { createResearchWorkflow } from '../orchestration/index.js';
|
|
2
|
+
function findingFromEvidence(evidence, index) {
|
|
3
|
+
if (evidence.summary.includes('Use channel')) {
|
|
4
|
+
return 'Use channel for branded Chrome or Edge when possible.';
|
|
5
|
+
}
|
|
6
|
+
if (evidence.summary.includes('use at your own risk') || evidence.summary.includes('risky')) {
|
|
7
|
+
return 'Treat executablePath as a fallback because Playwright documents it as use-at-your-own-risk.';
|
|
8
|
+
}
|
|
9
|
+
if (evidence.summary.includes('coverage.provider to v8') ||
|
|
10
|
+
evidence.summary.includes('@vitest/coverage-v8')) {
|
|
11
|
+
return 'Vitest coverage docs say to set coverage.provider to v8 and install @vitest/coverage-v8.';
|
|
12
|
+
}
|
|
13
|
+
return evidence.summary || `Finding ${index + 1}`;
|
|
14
|
+
}
|
|
15
|
+
function formatExploreText({ findings, sources, caveat }) {
|
|
16
|
+
const findingLines = findings.map((finding) => `- ${finding}`).join('\n');
|
|
17
|
+
const sourceLines = sources.map((source) => `- ${source.title}: ${source.url}`).join('\n');
|
|
18
|
+
const caveatBlock = caveat ? `\n\nCaveat\n${caveat}` : '';
|
|
19
|
+
return `Findings\n${findingLines}\n\nSources\n${sourceLines}${caveatBlock}`;
|
|
20
|
+
}
|
|
21
|
+
export function createWebExploreTool({ explore = createResearchWorkflow() } = {}) {
|
|
22
|
+
const runExplore = typeof explore === 'function' ? explore : explore.run.bind(explore);
|
|
23
|
+
return async function webExplore({ query }) {
|
|
24
|
+
const normalizedQuery = query.trim();
|
|
25
|
+
if (!normalizedQuery) {
|
|
26
|
+
return {
|
|
27
|
+
status: 'error',
|
|
28
|
+
findings: [],
|
|
29
|
+
sources: [],
|
|
30
|
+
error: { code: 'INVALID_QUERY', message: 'Query must not be empty.' }
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
const result = await runExplore({ query: normalizedQuery });
|
|
34
|
+
const findings = result.evidence.slice(0, 5).map(findingFromEvidence);
|
|
35
|
+
const sources = result.evidence.slice(0, 4).map((item) => ({
|
|
36
|
+
title: item.title,
|
|
37
|
+
url: item.url
|
|
38
|
+
}));
|
|
39
|
+
const caveat = result.decision.action === 'answer'
|
|
40
|
+
? undefined
|
|
41
|
+
: 'Evidence is partial, so this answer is based on the strongest source found so far.';
|
|
42
|
+
return {
|
|
43
|
+
status: 'ok',
|
|
44
|
+
findings,
|
|
45
|
+
sources,
|
|
46
|
+
caveat,
|
|
47
|
+
text: formatExploreText({ findings, sources, caveat })
|
|
48
|
+
};
|
|
49
|
+
};
|
|
50
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { WebFetchHeadlessResponse } from '../types.js';
|
|
2
|
+
export declare function createWebFetchHeadlessTool({ fetchPage }?: {
|
|
3
|
+
fetchPage?: (url: string) => Promise<WebFetchHeadlessResponse>;
|
|
4
|
+
}): ({ url }: {
|
|
5
|
+
url: string;
|
|
6
|
+
}) => Promise<WebFetchHeadlessResponse>;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { headlessFetch } from '../fetch/headless-fetch.js';
|
|
2
|
+
export function createWebFetchHeadlessTool({ fetchPage = headlessFetch } = {}) {
|
|
3
|
+
return async function webFetchHeadless({ url }) {
|
|
4
|
+
if (!/^https?:\/\//.test(url)) {
|
|
5
|
+
return {
|
|
6
|
+
status: 'unsupported',
|
|
7
|
+
url,
|
|
8
|
+
metadata: { method: 'headless', cacheHit: false },
|
|
9
|
+
error: { code: 'UNSUPPORTED_URL', message: 'Only http and https URLs are supported.' }
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
return fetchPage(url);
|
|
13
|
+
};
|
|
14
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { createHttpFetcher } from '../fetch/http-fetch.js';
|
|
2
|
+
export function createWebFetchTool({ fetchPage = createHttpFetcher() } = {}) {
|
|
3
|
+
return async function webFetch({ url }) {
|
|
4
|
+
if (!/^https?:\/\//.test(url)) {
|
|
5
|
+
return {
|
|
6
|
+
status: 'unsupported',
|
|
7
|
+
url,
|
|
8
|
+
metadata: { method: 'http', cacheHit: false },
|
|
9
|
+
error: { code: 'UNSUPPORTED_URL', message: 'Only http and https URLs are supported.' }
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
return fetchPage(url);
|
|
13
|
+
};
|
|
14
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { WebSearchResponse } from '../types.js';
|
|
2
|
+
export declare function createWebSearchTool({ searchHtml, cache }?: {
|
|
3
|
+
searchHtml?: (query: string) => Promise<string>;
|
|
4
|
+
cache?: {
|
|
5
|
+
get(key: string): WebSearchResponse | undefined;
|
|
6
|
+
set(key: string, value: WebSearchResponse): void;
|
|
7
|
+
};
|
|
8
|
+
}): ({ query }: {
|
|
9
|
+
query: string;
|
|
10
|
+
}) => Promise<WebSearchResponse>;
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { createCacheKey, createTtlCache } from '../cache/ttl-cache.js';
|
|
2
|
+
import { fetchDuckDuckGoHtml, parseDuckDuckGoResults } from '../search/duckduckgo.js';
|
|
3
|
+
export function createWebSearchTool({ searchHtml = fetchDuckDuckGoHtml, cache = createTtlCache({ ttlMs: 30_000 }) } = {}) {
|
|
4
|
+
return async function webSearch({ query }) {
|
|
5
|
+
const normalizedQuery = query.trim();
|
|
6
|
+
if (!normalizedQuery) {
|
|
7
|
+
return {
|
|
8
|
+
status: 'error',
|
|
9
|
+
results: [],
|
|
10
|
+
metadata: { backend: 'duckduckgo', cacheHit: false },
|
|
11
|
+
error: { code: 'INVALID_QUERY', message: 'Query must not be empty.' }
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
const cacheKey = createCacheKey(['web_search', normalizedQuery]);
|
|
15
|
+
const cached = cache.get(cacheKey);
|
|
16
|
+
if (cached) {
|
|
17
|
+
return {
|
|
18
|
+
...cached,
|
|
19
|
+
metadata: { ...cached.metadata, cacheHit: true }
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
try {
|
|
23
|
+
const html = await searchHtml(normalizedQuery);
|
|
24
|
+
const result = {
|
|
25
|
+
status: 'ok',
|
|
26
|
+
results: parseDuckDuckGoResults(html),
|
|
27
|
+
metadata: { backend: 'duckduckgo', cacheHit: false }
|
|
28
|
+
};
|
|
29
|
+
cache.set(cacheKey, result);
|
|
30
|
+
return result;
|
|
31
|
+
}
|
|
32
|
+
catch (error) {
|
|
33
|
+
return {
|
|
34
|
+
status: 'error',
|
|
35
|
+
results: [],
|
|
36
|
+
metadata: { backend: 'duckduckgo', cacheHit: false },
|
|
37
|
+
error: {
|
|
38
|
+
code: 'SEARCH_FAILED',
|
|
39
|
+
message: error instanceof Error ? error.message : 'Unknown search failure.'
|
|
40
|
+
}
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
};
|
|
44
|
+
}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
export declare const TOOL_STATUSES: readonly ["ok", "needs_headless", "blocked", "unsupported", "error"];
|
|
2
|
+
export type ToolStatus = (typeof TOOL_STATUSES)[number];
|
|
3
|
+
export type SearchResult = {
|
|
4
|
+
title: string;
|
|
5
|
+
url: string;
|
|
6
|
+
snippet: string;
|
|
7
|
+
};
|
|
8
|
+
export type ToolError = {
|
|
9
|
+
code: string;
|
|
10
|
+
message: string;
|
|
11
|
+
};
|
|
12
|
+
export type SearchMetadata = {
|
|
13
|
+
backend: 'duckduckgo';
|
|
14
|
+
cacheHit: boolean;
|
|
15
|
+
};
|
|
16
|
+
export type FetchMetadata = {
|
|
17
|
+
method: 'http' | 'headless';
|
|
18
|
+
cacheHit: boolean;
|
|
19
|
+
contentType?: string;
|
|
20
|
+
truncated?: boolean;
|
|
21
|
+
browser?: 'configured' | 'chrome' | 'edge';
|
|
22
|
+
navigationMs?: number;
|
|
23
|
+
};
|
|
24
|
+
export type ExtractedContent = {
|
|
25
|
+
title?: string;
|
|
26
|
+
byline?: string;
|
|
27
|
+
text: string;
|
|
28
|
+
};
|
|
29
|
+
export type WebSearchResponse = {
|
|
30
|
+
status: 'ok' | 'error';
|
|
31
|
+
results: SearchResult[];
|
|
32
|
+
metadata: SearchMetadata;
|
|
33
|
+
error?: ToolError;
|
|
34
|
+
};
|
|
35
|
+
export type WebFetchResponse = {
|
|
36
|
+
status: ToolStatus;
|
|
37
|
+
url: string;
|
|
38
|
+
content?: ExtractedContent;
|
|
39
|
+
metadata: FetchMetadata;
|
|
40
|
+
error?: ToolError;
|
|
41
|
+
};
|
|
42
|
+
export type WebFetchHeadlessResponse = {
|
|
43
|
+
status: Exclude<ToolStatus, 'needs_headless'>;
|
|
44
|
+
url: string;
|
|
45
|
+
content?: ExtractedContent;
|
|
46
|
+
metadata: FetchMetadata;
|
|
47
|
+
error?: ToolError;
|
|
48
|
+
};
|