@demigodmode/pi-web-agent 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +63 -199
  2. package/dist/scripts/live-web-eval.d.ts +1 -0
  3. package/dist/scripts/live-web-eval.js +411 -0
  4. package/dist/src/cache/ttl-cache.d.ts +8 -0
  5. package/dist/src/cache/ttl-cache.js +21 -0
  6. package/dist/src/extension.d.ts +2 -0
  7. package/dist/src/extension.js +155 -0
  8. package/dist/src/extract/readability.d.ts +8 -0
  9. package/dist/src/extract/readability.js +93 -0
  10. package/dist/src/fetch/browser-resolution.d.ts +15 -0
  11. package/dist/src/fetch/browser-resolution.js +55 -0
  12. package/dist/src/fetch/headless-fetch.d.ts +18 -0
  13. package/dist/src/fetch/headless-fetch.js +87 -0
  14. package/dist/src/fetch/http-fetch.d.ts +4 -0
  15. package/dist/src/fetch/http-fetch.js +50 -0
  16. package/dist/src/orchestration/index.d.ts +41 -0
  17. package/dist/src/orchestration/index.js +9 -0
  18. package/dist/src/orchestration/research-orchestrator.d.ts +43 -0
  19. package/dist/src/orchestration/research-orchestrator.js +87 -0
  20. package/dist/src/orchestration/research-types.d.ts +41 -0
  21. package/dist/src/orchestration/research-types.js +1 -0
  22. package/dist/src/orchestration/research-worker.d.ts +16 -0
  23. package/dist/src/orchestration/research-worker.js +131 -0
  24. package/dist/src/search/duckduckgo.d.ts +9 -0
  25. package/dist/src/search/duckduckgo.js +52 -0
  26. package/dist/src/tools/web-explore.d.ts +44 -0
  27. package/dist/src/tools/web-explore.js +50 -0
  28. package/dist/src/tools/web-fetch-headless.d.ts +6 -0
  29. package/dist/src/tools/web-fetch-headless.js +14 -0
  30. package/dist/src/tools/web-fetch.d.ts +6 -0
  31. package/dist/src/tools/web-fetch.js +14 -0
  32. package/dist/src/tools/web-search.d.ts +10 -0
  33. package/dist/src/tools/web-search.js +103 -0
  34. package/dist/src/types.d.ts +48 -0
  35. package/dist/src/types.js +7 -0
  36. package/dist/tests/cache/ttl-cache.test.d.ts +1 -0
  37. package/dist/tests/cache/ttl-cache.test.js +19 -0
  38. package/dist/tests/contracts.test.d.ts +1 -0
  39. package/dist/tests/contracts.test.js +65 -0
  40. package/dist/tests/extension.test.d.ts +1 -0
  41. package/dist/tests/extension.test.js +123 -0
  42. package/dist/tests/extract/readability.test.d.ts +1 -0
  43. package/dist/tests/extract/readability.test.js +79 -0
  44. package/dist/tests/fetch/browser-resolution.test.d.ts +1 -0
  45. package/dist/tests/fetch/browser-resolution.test.js +37 -0
  46. package/dist/tests/fetch/headless-fetch.smoke.test.d.ts +1 -0
  47. package/dist/tests/fetch/headless-fetch.smoke.test.js +17 -0
  48. package/dist/tests/fetch/headless-fetch.test.d.ts +1 -0
  49. package/dist/tests/fetch/headless-fetch.test.js +150 -0
  50. package/dist/tests/fetch/http-fetch.test.d.ts +1 -0
  51. package/dist/tests/fetch/http-fetch.test.js +129 -0
  52. package/dist/tests/orchestration/research-orchestrator.test.d.ts +1 -0
  53. package/dist/tests/orchestration/research-orchestrator.test.js +298 -0
  54. package/dist/tests/orchestration/research-worker.test.d.ts +1 -0
  55. package/dist/tests/orchestration/research-worker.test.js +171 -0
  56. package/dist/tests/orchestration/research-workflow.test.d.ts +1 -0
  57. package/dist/tests/orchestration/research-workflow.test.js +119 -0
  58. package/dist/tests/package-manifest.test.d.ts +1 -0
  59. package/dist/tests/package-manifest.test.js +29 -0
  60. package/dist/tests/release-foundation.test.d.ts +1 -0
  61. package/dist/tests/release-foundation.test.js +16 -0
  62. package/dist/tests/release-script.test.d.ts +1 -0
  63. package/dist/tests/release-script.test.js +72 -0
  64. package/dist/tests/search/duckduckgo.test.d.ts +1 -0
  65. package/dist/tests/search/duckduckgo.test.js +103 -0
  66. package/dist/tests/tools/web-explore.test.d.ts +1 -0
  67. package/dist/tests/tools/web-explore.test.js +163 -0
  68. package/dist/tests/tools/web-fetch-headless.test.d.ts +1 -0
  69. package/dist/tests/tools/web-fetch-headless.test.js +31 -0
  70. package/dist/tests/tools/web-fetch.test.d.ts +1 -0
  71. package/dist/tests/tools/web-fetch.test.js +27 -0
  72. package/dist/tests/tools/web-search.test.d.ts +1 -0
  73. package/dist/tests/tools/web-search.test.js +125 -0
  74. package/dist/vitest.config.d.ts +2 -0
  75. package/dist/vitest.config.js +13 -0
  76. package/package.json +5 -1
@@ -0,0 +1,87 @@
1
+ function sourceRank(sourceKind) {
2
+ switch (sourceKind) {
3
+ case 'official-docs':
4
+ return 0;
5
+ case 'official-api':
6
+ return 1;
7
+ case 'official-discussion':
8
+ return 2;
9
+ case 'issue-thread':
10
+ return 3;
11
+ case 'community':
12
+ return 4;
13
+ case 'package-page':
14
+ return 5;
15
+ default:
16
+ return 6;
17
+ }
18
+ }
19
+ function sortEvidence(evidence) {
20
+ return [...evidence].sort((left, right) => sourceRank(left.sourceKind) - sourceRank(right.sourceKind));
21
+ }
22
+ function strongEvidence(evidence) {
23
+ return evidence.filter((item) => item.sourceKind === 'official-docs' ||
24
+ item.sourceKind === 'official-api' ||
25
+ item.sourceKind === 'official-discussion');
26
+ }
27
+ function hasOfficialDocsOrApi(evidence) {
28
+ return evidence.some((item) => item.sourceKind === 'official-docs' || item.sourceKind === 'official-api');
29
+ }
30
+ function hasBotCheck(outcomes) {
31
+ return outcomes.some((outcome) => outcome.kind === 'bot-check');
32
+ }
33
+ function isHeadlessWorthTrying(pass, approvedEvidence) {
34
+ if (!pass.suggestedHeadlessUrl)
35
+ return false;
36
+ if (hasBotCheck(pass.lowValueOutcomes))
37
+ return false;
38
+ if (approvedEvidence.length >= 2 && hasOfficialDocsOrApi(approvedEvidence))
39
+ return false;
40
+ const candidate = pass.suggestedHeadlessUrl;
41
+ return !candidate.includes('npmjs.com/package/');
42
+ }
43
+ export function createResearchOrchestrator({ worker, headlessFetch }) {
44
+ return {
45
+ async run({ query }) {
46
+ const pass = await worker.run({ query, maxSearchRounds: 1, maxFetches: 3 });
47
+ const approvedEvidence = sortEvidence(pass.evidence.filter((item) => item.sourceKind !== 'package-page'));
48
+ const strong = strongEvidence(approvedEvidence);
49
+ const enoughEvidence = strong.length >= 2 && hasOfficialDocsOrApi(approvedEvidence);
50
+ if (enoughEvidence) {
51
+ const decision = {
52
+ action: 'answer',
53
+ rationale: 'Two strong sources with official support are enough to answer safely.',
54
+ approvedEvidence
55
+ };
56
+ return { decision, evidence: approvedEvidence, workerPass: pass };
57
+ }
58
+ if (isHeadlessWorthTrying(pass, approvedEvidence)) {
59
+ const url = pass.suggestedHeadlessUrl;
60
+ await headlessFetch({ url });
61
+ const decision = {
62
+ action: 'escalate-headless',
63
+ rationale: 'One high-value page is worth a single orchestrator-approved headless retry.',
64
+ url,
65
+ approvedEvidence
66
+ };
67
+ return { decision, evidence: approvedEvidence, workerPass: pass };
68
+ }
69
+ const hasConcreteGap = pass.gaps.length > 0;
70
+ const onlyLowValueOutcomes = pass.lowValueOutcomes.length > 0 && pass.evidence.length === 0;
71
+ if (!hasConcreteGap || onlyLowValueOutcomes) {
72
+ const decision = {
73
+ action: 'research-again',
74
+ rationale: 'Current results did not justify more escalation; continue only with a more targeted pass.',
75
+ followupQuery: query
76
+ };
77
+ return { decision, evidence: approvedEvidence, workerPass: pass };
78
+ }
79
+ const decision = {
80
+ action: 'research-again',
81
+ rationale: 'The first pass did not gather enough strong evidence to answer safely.',
82
+ followupQuery: query
83
+ };
84
+ return { decision, evidence: approvedEvidence, workerPass: pass };
85
+ }
86
+ };
87
+ }
@@ -0,0 +1,41 @@
1
+ export type ResearchSourceKind = 'official-docs' | 'official-api' | 'official-discussion' | 'community' | 'issue-thread' | 'package-page' | 'other';
2
+ export type ResearchMethod = 'search' | 'http' | 'headless';
3
+ export type ResearchEvidence = {
4
+ title: string;
5
+ url: string;
6
+ sourceKind: ResearchSourceKind;
7
+ method: Exclude<ResearchMethod, 'search'>;
8
+ summary: string;
9
+ supports: string[];
10
+ };
11
+ export type ResearchGap = {
12
+ kind: 'needs-more-evidence' | 'conflict' | 'fetch-failed';
13
+ message: string;
14
+ };
15
+ export type ResearchLowValueOutcome = {
16
+ kind: 'empty-search' | 'bot-check' | 'low-value-page' | 'duplicate-evidence';
17
+ url?: string;
18
+ message: string;
19
+ };
20
+ export type ResearchWorkerResult = {
21
+ searchQueries: string[];
22
+ evidence: ResearchEvidence[];
23
+ gaps: ResearchGap[];
24
+ lowValueOutcomes: ResearchLowValueOutcome[];
25
+ suggestedHeadlessUrl?: string;
26
+ exhaustedBudget: boolean;
27
+ };
28
+ export type ResearchOrchestratorDecision = {
29
+ action: 'answer';
30
+ rationale: string;
31
+ approvedEvidence: ResearchEvidence[];
32
+ } | {
33
+ action: 'research-again';
34
+ rationale: string;
35
+ followupQuery: string;
36
+ } | {
37
+ action: 'escalate-headless';
38
+ rationale: string;
39
+ url: string;
40
+ approvedEvidence: ResearchEvidence[];
41
+ };
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,16 @@
1
+ import type { WebFetchResponse, WebSearchResponse } from '../types.js';
2
+ import type { ResearchWorkerResult } from './research-types.js';
3
+ export declare function createResearchWorker({ search, fetchPage }: {
4
+ search: (input: {
5
+ query: string;
6
+ }) => Promise<WebSearchResponse>;
7
+ fetchPage: (input: {
8
+ url: string;
9
+ }) => Promise<WebFetchResponse>;
10
+ }): {
11
+ run({ query, maxSearchRounds, maxFetches }: {
12
+ query: string;
13
+ maxSearchRounds: number;
14
+ maxFetches: number;
15
+ }): Promise<ResearchWorkerResult>;
16
+ };
@@ -0,0 +1,131 @@
1
+ function classifySource(url) {
2
+ if (url.includes('/docs/api/') || url.includes('/config/'))
3
+ return 'official-api';
4
+ if (url.includes('playwright.dev/docs') || url.includes('vitest.dev/guide/'))
5
+ return 'official-docs';
6
+ if (url.includes('learn.microsoft.com'))
7
+ return 'official-docs';
8
+ if (url.includes('github.com/') && url.includes('/issues/'))
9
+ return 'issue-thread';
10
+ if (url.includes('npmjs.com/package/'))
11
+ return 'package-page';
12
+ return 'community';
13
+ }
14
+ function summarizeText(text, maxLength = 180) {
15
+ return text.replace(/\s+/g, ' ').trim().slice(0, maxLength);
16
+ }
17
+ function evidenceFromFetch(fetched, fallbackTitle) {
18
+ const content = fetched.content;
19
+ if (fetched.status !== 'ok' || !content)
20
+ return null;
21
+ const sourceKind = classifySource(fetched.url);
22
+ if (sourceKind === 'package-page') {
23
+ return null;
24
+ }
25
+ return {
26
+ title: content.title ?? fallbackTitle,
27
+ url: fetched.url,
28
+ sourceKind,
29
+ method: fetched.metadata.method,
30
+ summary: summarizeText(content.text),
31
+ supports: [summarizeText(content.text, 120)]
32
+ };
33
+ }
34
+ function lowValueOutcomeFromFetch(fetched) {
35
+ if (fetched.status !== 'ok' || !fetched.content)
36
+ return null;
37
+ if (classifySource(fetched.url) !== 'package-page')
38
+ return null;
39
+ return {
40
+ kind: 'low-value-page',
41
+ url: fetched.url,
42
+ message: 'Fetched page did not add strong research evidence.'
43
+ };
44
+ }
45
+ export function createResearchWorker({ search, fetchPage }) {
46
+ return {
47
+ async run({ query, maxSearchRounds, maxFetches }) {
48
+ const searchQueries = [query];
49
+ const evidence = [];
50
+ const gaps = [];
51
+ const lowValueOutcomes = [];
52
+ let suggestedHeadlessUrl;
53
+ if (maxSearchRounds <= 0 || maxFetches <= 0) {
54
+ return {
55
+ searchQueries: [],
56
+ evidence,
57
+ gaps: [{ kind: 'needs-more-evidence', message: 'Research worker budget was zero.' }],
58
+ lowValueOutcomes,
59
+ suggestedHeadlessUrl,
60
+ exhaustedBudget: true
61
+ };
62
+ }
63
+ const searchResult = await search({ query });
64
+ if (searchResult.status !== 'ok') {
65
+ return {
66
+ searchQueries,
67
+ evidence,
68
+ gaps: [
69
+ {
70
+ kind: 'fetch-failed',
71
+ message: searchResult.error?.message ?? 'Search failed during research worker pass.'
72
+ }
73
+ ],
74
+ lowValueOutcomes,
75
+ suggestedHeadlessUrl,
76
+ exhaustedBudget: false
77
+ };
78
+ }
79
+ if (searchResult.results.length === 0) {
80
+ return {
81
+ searchQueries,
82
+ evidence,
83
+ gaps,
84
+ lowValueOutcomes: [
85
+ {
86
+ kind: 'empty-search',
87
+ message: 'Search returned no results for this pass.'
88
+ }
89
+ ],
90
+ suggestedHeadlessUrl,
91
+ exhaustedBudget: false
92
+ };
93
+ }
94
+ const candidates = searchResult.results.slice(0, maxFetches);
95
+ for (const candidate of candidates) {
96
+ const fetched = await fetchPage({ url: candidate.url });
97
+ if (fetched.status === 'ok') {
98
+ const parsedEvidence = evidenceFromFetch(fetched, candidate.title);
99
+ if (parsedEvidence) {
100
+ evidence.push(parsedEvidence);
101
+ continue;
102
+ }
103
+ const lowValueOutcome = lowValueOutcomeFromFetch(fetched);
104
+ if (lowValueOutcome) {
105
+ lowValueOutcomes.push(lowValueOutcome);
106
+ }
107
+ continue;
108
+ }
109
+ if (fetched.status === 'needs_headless') {
110
+ if (!suggestedHeadlessUrl) {
111
+ suggestedHeadlessUrl = fetched.url;
112
+ }
113
+ gaps.push({ kind: 'fetch-failed', message: `HTTP fetch was weak for ${fetched.url}` });
114
+ continue;
115
+ }
116
+ gaps.push({
117
+ kind: 'fetch-failed',
118
+ message: fetched.error?.message ?? `Fetch failed for ${candidate.url}`
119
+ });
120
+ }
121
+ return {
122
+ searchQueries,
123
+ evidence,
124
+ gaps,
125
+ lowValueOutcomes,
126
+ suggestedHeadlessUrl,
127
+ exhaustedBudget: false
128
+ };
129
+ }
130
+ };
131
+ }
@@ -0,0 +1,9 @@
1
+ import type { SearchResult } from '../types.js';
2
+ export type ParsedDuckDuckGoResults = {
3
+ results: SearchResult[];
4
+ noResults: boolean;
5
+ hasResultContainers: boolean;
6
+ };
7
+ export declare function buildSearchUrl(query: string): string;
8
+ export declare function fetchDuckDuckGoHtml(query: string): Promise<string>;
9
+ export declare function parseDuckDuckGoResults(html: string): ParsedDuckDuckGoResults;
@@ -0,0 +1,52 @@
1
+ import * as cheerio from 'cheerio';
2
+ function normalizeDuckDuckGoUrl(rawUrl) {
3
+ try {
4
+ const absolute = rawUrl.startsWith('//') ? `https:${rawUrl}` : rawUrl;
5
+ const parsed = new URL(absolute);
6
+ const isDuckDuckGoRedirect = parsed.hostname === 'duckduckgo.com' && parsed.pathname === '/l/';
7
+ if (!isDuckDuckGoRedirect) {
8
+ return rawUrl;
9
+ }
10
+ const target = parsed.searchParams.get('uddg');
11
+ if (!target) {
12
+ return rawUrl;
13
+ }
14
+ return decodeURIComponent(target);
15
+ }
16
+ catch {
17
+ return rawUrl;
18
+ }
19
+ }
20
+ export function buildSearchUrl(query) {
21
+ const params = new URLSearchParams({ q: query });
22
+ return `https://html.duckduckgo.com/html/?${params.toString()}`;
23
+ }
24
+ export async function fetchDuckDuckGoHtml(query) {
25
+ const response = await fetch(buildSearchUrl(query));
26
+ if (!response.ok) {
27
+ throw new Error(`DuckDuckGo request failed with ${response.status}`);
28
+ }
29
+ return response.text();
30
+ }
31
+ export function parseDuckDuckGoResults(html) {
32
+ const $ = cheerio.load(html);
33
+ const resultContainers = $('.result');
34
+ const results = resultContainers
35
+ .map((_, element) => {
36
+ const title = $(element).find('.result__a').first().text().trim();
37
+ const url = normalizeDuckDuckGoUrl($(element).find('.result__a').first().attr('href')?.trim() ?? '');
38
+ const snippet = $(element).find('.result__snippet').first().text().trim();
39
+ return title && url ? { title, url, snippet } : null;
40
+ })
41
+ .get()
42
+ .filter((value) => value !== null);
43
+ const text = $.text().toLowerCase();
44
+ const noResults = text.includes('no results found') ||
45
+ text.includes('no more results') ||
46
+ text.includes('did not match any documents');
47
+ return {
48
+ results,
49
+ noResults,
50
+ hasResultContainers: resultContainers.length > 0
51
+ };
52
+ }
@@ -0,0 +1,44 @@
1
+ import type { ResearchEvidence } from '../orchestration/research-types.js';
2
+ export declare function createWebExploreTool({ explore }?: {
3
+ explore?: {
4
+ run: (input: {
5
+ query: string;
6
+ }) => Promise<{
7
+ decision: {
8
+ action: 'answer' | 'research-again' | 'escalate-headless';
9
+ };
10
+ evidence: ResearchEvidence[];
11
+ workerPass: unknown;
12
+ }>;
13
+ } | ((input: {
14
+ query: string;
15
+ }) => Promise<{
16
+ decision: {
17
+ action: 'answer' | 'research-again' | 'escalate-headless';
18
+ };
19
+ evidence: ResearchEvidence[];
20
+ workerPass: unknown;
21
+ }>);
22
+ }): ({ query }: {
23
+ query: string;
24
+ }) => Promise<{
25
+ status: "error";
26
+ findings: never[];
27
+ sources: never[];
28
+ error: {
29
+ code: string;
30
+ message: string;
31
+ };
32
+ caveat?: undefined;
33
+ text?: undefined;
34
+ } | {
35
+ status: "ok";
36
+ findings: string[];
37
+ sources: {
38
+ title: string;
39
+ url: string;
40
+ }[];
41
+ caveat: string | undefined;
42
+ text: string;
43
+ error?: undefined;
44
+ }>;
@@ -0,0 +1,50 @@
1
+ import { createResearchWorkflow } from '../orchestration/index.js';
2
+ function findingFromEvidence(evidence, index) {
3
+ if (evidence.summary.includes('Use channel')) {
4
+ return 'Use channel for branded Chrome or Edge when possible.';
5
+ }
6
+ if (evidence.summary.includes('use at your own risk') || evidence.summary.includes('risky')) {
7
+ return 'Treat executablePath as a fallback because Playwright documents it as use-at-your-own-risk.';
8
+ }
9
+ if (evidence.summary.includes('coverage.provider to v8') ||
10
+ evidence.summary.includes('@vitest/coverage-v8')) {
11
+ return 'Vitest coverage docs say to set coverage.provider to v8 and install @vitest/coverage-v8.';
12
+ }
13
+ return evidence.summary || `Finding ${index + 1}`;
14
+ }
15
+ function formatExploreText({ findings, sources, caveat }) {
16
+ const findingLines = findings.map((finding) => `- ${finding}`).join('\n');
17
+ const sourceLines = sources.map((source) => `- ${source.title}: ${source.url}`).join('\n');
18
+ const caveatBlock = caveat ? `\n\nCaveat\n${caveat}` : '';
19
+ return `Findings\n${findingLines}\n\nSources\n${sourceLines}${caveatBlock}`;
20
+ }
21
+ export function createWebExploreTool({ explore = createResearchWorkflow() } = {}) {
22
+ const runExplore = typeof explore === 'function' ? explore : explore.run.bind(explore);
23
+ return async function webExplore({ query }) {
24
+ const normalizedQuery = query.trim();
25
+ if (!normalizedQuery) {
26
+ return {
27
+ status: 'error',
28
+ findings: [],
29
+ sources: [],
30
+ error: { code: 'INVALID_QUERY', message: 'Query must not be empty.' }
31
+ };
32
+ }
33
+ const result = await runExplore({ query: normalizedQuery });
34
+ const findings = result.evidence.slice(0, 5).map(findingFromEvidence);
35
+ const sources = result.evidence.slice(0, 4).map((item) => ({
36
+ title: item.title,
37
+ url: item.url
38
+ }));
39
+ const caveat = result.decision.action === 'answer'
40
+ ? undefined
41
+ : 'Evidence is partial, so this answer is based on the strongest source found so far.';
42
+ return {
43
+ status: 'ok',
44
+ findings,
45
+ sources,
46
+ caveat,
47
+ text: formatExploreText({ findings, sources, caveat })
48
+ };
49
+ };
50
+ }
@@ -0,0 +1,6 @@
1
+ import type { WebFetchHeadlessResponse } from '../types.js';
2
+ export declare function createWebFetchHeadlessTool({ fetchPage }?: {
3
+ fetchPage?: (url: string) => Promise<WebFetchHeadlessResponse>;
4
+ }): ({ url }: {
5
+ url: string;
6
+ }) => Promise<WebFetchHeadlessResponse>;
@@ -0,0 +1,14 @@
1
+ import { headlessFetch } from '../fetch/headless-fetch.js';
2
+ export function createWebFetchHeadlessTool({ fetchPage = headlessFetch } = {}) {
3
+ return async function webFetchHeadless({ url }) {
4
+ if (!/^https?:\/\//.test(url)) {
5
+ return {
6
+ status: 'unsupported',
7
+ url,
8
+ metadata: { method: 'headless', cacheHit: false },
9
+ error: { code: 'UNSUPPORTED_URL', message: 'Only http and https URLs are supported.' }
10
+ };
11
+ }
12
+ return fetchPage(url);
13
+ };
14
+ }
@@ -0,0 +1,6 @@
1
+ import type { WebFetchResponse } from '../types.js';
2
+ export declare function createWebFetchTool({ fetchPage }?: {
3
+ fetchPage?: (url: string) => Promise<WebFetchResponse>;
4
+ }): ({ url }: {
5
+ url: string;
6
+ }) => Promise<WebFetchResponse>;
@@ -0,0 +1,14 @@
1
+ import { createHttpFetcher } from '../fetch/http-fetch.js';
2
+ export function createWebFetchTool({ fetchPage = createHttpFetcher() } = {}) {
3
+ return async function webFetch({ url }) {
4
+ if (!/^https?:\/\//.test(url)) {
5
+ return {
6
+ status: 'unsupported',
7
+ url,
8
+ metadata: { method: 'http', cacheHit: false },
9
+ error: { code: 'UNSUPPORTED_URL', message: 'Only http and https URLs are supported.' }
10
+ };
11
+ }
12
+ return fetchPage(url);
13
+ };
14
+ }
@@ -0,0 +1,10 @@
1
+ import type { WebSearchResponse } from '../types.js';
2
+ export declare function createWebSearchTool({ searchHtml, cache }?: {
3
+ searchHtml?: (query: string) => Promise<string>;
4
+ cache?: {
5
+ get(key: string): WebSearchResponse | undefined;
6
+ set(key: string, value: WebSearchResponse): void;
7
+ };
8
+ }): ({ query }: {
9
+ query: string;
10
+ }) => Promise<WebSearchResponse>;
@@ -0,0 +1,103 @@
1
+ import { createCacheKey, createTtlCache } from '../cache/ttl-cache.js';
2
+ import { fetchDuckDuckGoHtml, parseDuckDuckGoResults } from '../search/duckduckgo.js';
3
+ function classifySearchFailure(error) {
4
+ const rawMessage = error instanceof Error ? error.message : 'Unknown search failure.';
5
+ const normalized = rawMessage.toLowerCase();
6
+ if (normalized.includes('blocked') ||
7
+ normalized.includes('rate limit') ||
8
+ normalized.includes('rate-limit') ||
9
+ normalized.includes('403') ||
10
+ normalized.includes('429') ||
11
+ normalized.includes('captcha') ||
12
+ normalized.includes('challenge')) {
13
+ return {
14
+ code: 'BLOCKED',
15
+ message: 'DuckDuckGo search appears to be blocked or rate limited.'
16
+ };
17
+ }
18
+ return {
19
+ code: 'FETCH_FAILED',
20
+ message: `DuckDuckGo search request failed: ${rawMessage}`
21
+ };
22
+ }
23
+ function htmlLooksBlocked(html) {
24
+ const normalized = html.toLowerCase();
25
+ return (normalized.includes('captcha') ||
26
+ normalized.includes('challenge') ||
27
+ normalized.includes('verify you are human') ||
28
+ normalized.includes('are you a robot') ||
29
+ normalized.includes('unusual traffic'));
30
+ }
31
+ export function createWebSearchTool({ searchHtml = fetchDuckDuckGoHtml, cache = createTtlCache({ ttlMs: 30_000 }) } = {}) {
32
+ return async function webSearch({ query }) {
33
+ const normalizedQuery = query.trim();
34
+ if (!normalizedQuery) {
35
+ return {
36
+ status: 'error',
37
+ results: [],
38
+ metadata: { backend: 'duckduckgo', cacheHit: false },
39
+ error: { code: 'INVALID_QUERY', message: 'Query must not be empty.' }
40
+ };
41
+ }
42
+ const cacheKey = createCacheKey(['web_search', normalizedQuery]);
43
+ const cached = cache.get(cacheKey);
44
+ if (cached) {
45
+ return {
46
+ ...cached,
47
+ metadata: { ...cached.metadata, cacheHit: true }
48
+ };
49
+ }
50
+ try {
51
+ const html = await searchHtml(normalizedQuery);
52
+ const parsed = parseDuckDuckGoResults(html);
53
+ if (parsed.results.length > 0) {
54
+ const result = {
55
+ status: 'ok',
56
+ results: parsed.results,
57
+ metadata: { backend: 'duckduckgo', cacheHit: false }
58
+ };
59
+ cache.set(cacheKey, result);
60
+ return result;
61
+ }
62
+ if (parsed.noResults) {
63
+ return {
64
+ status: 'error',
65
+ results: [],
66
+ metadata: { backend: 'duckduckgo', cacheHit: false },
67
+ error: {
68
+ code: 'NO_RESULTS',
69
+ message: 'DuckDuckGo returned no usable results for this query.'
70
+ }
71
+ };
72
+ }
73
+ if (htmlLooksBlocked(html)) {
74
+ return {
75
+ status: 'error',
76
+ results: [],
77
+ metadata: { backend: 'duckduckgo', cacheHit: false },
78
+ error: {
79
+ code: 'BLOCKED',
80
+ message: 'DuckDuckGo search appears to be blocked or rate limited.'
81
+ }
82
+ };
83
+ }
84
+ return {
85
+ status: 'error',
86
+ results: [],
87
+ metadata: { backend: 'duckduckgo', cacheHit: false },
88
+ error: {
89
+ code: 'PARSE_FAILED',
90
+ message: 'DuckDuckGo returned a page, but it did not match the expected results format.'
91
+ }
92
+ };
93
+ }
94
+ catch (error) {
95
+ return {
96
+ status: 'error',
97
+ results: [],
98
+ metadata: { backend: 'duckduckgo', cacheHit: false },
99
+ error: classifySearchFailure(error)
100
+ };
101
+ }
102
+ };
103
+ }
@@ -0,0 +1,48 @@
1
+ export declare const TOOL_STATUSES: readonly ["ok", "needs_headless", "blocked", "unsupported", "error"];
2
+ export type ToolStatus = (typeof TOOL_STATUSES)[number];
3
+ export type SearchResult = {
4
+ title: string;
5
+ url: string;
6
+ snippet: string;
7
+ };
8
+ export type ToolError = {
9
+ code: string;
10
+ message: string;
11
+ };
12
+ export type SearchMetadata = {
13
+ backend: 'duckduckgo';
14
+ cacheHit: boolean;
15
+ };
16
+ export type FetchMetadata = {
17
+ method: 'http' | 'headless';
18
+ cacheHit: boolean;
19
+ contentType?: string;
20
+ truncated?: boolean;
21
+ browser?: 'configured' | 'chrome' | 'edge';
22
+ navigationMs?: number;
23
+ };
24
+ export type ExtractedContent = {
25
+ title?: string;
26
+ byline?: string;
27
+ text: string;
28
+ };
29
+ export type WebSearchResponse = {
30
+ status: 'ok' | 'error';
31
+ results: SearchResult[];
32
+ metadata: SearchMetadata;
33
+ error?: ToolError;
34
+ };
35
+ export type WebFetchResponse = {
36
+ status: ToolStatus;
37
+ url: string;
38
+ content?: ExtractedContent;
39
+ metadata: FetchMetadata;
40
+ error?: ToolError;
41
+ };
42
+ export type WebFetchHeadlessResponse = {
43
+ status: Exclude<ToolStatus, 'needs_headless'>;
44
+ url: string;
45
+ content?: ExtractedContent;
46
+ metadata: FetchMetadata;
47
+ error?: ToolError;
48
+ };
@@ -0,0 +1,7 @@
1
+ export const TOOL_STATUSES = [
2
+ 'ok',
3
+ 'needs_headless',
4
+ 'blocked',
5
+ 'unsupported',
6
+ 'error'
7
+ ];
@@ -0,0 +1 @@
1
+ export {};