@demigodmode/pi-web-agent 0.2.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -661
- package/README.md +61 -5
- package/dist/commands/web-agent-config.d.ts +23 -0
- package/dist/commands/web-agent-config.js +249 -0
- package/dist/extension.js +30 -66
- package/dist/orchestration/answer-synthesizer.d.ts +8 -0
- package/dist/orchestration/answer-synthesizer.js +17 -0
- package/dist/orchestration/candidate-selector.d.ts +6 -0
- package/dist/orchestration/candidate-selector.js +24 -0
- package/dist/orchestration/evidence-ranker.d.ts +4 -0
- package/dist/orchestration/evidence-ranker.js +36 -0
- package/dist/orchestration/index.d.ts +6 -21
- package/dist/orchestration/query-planner.d.ts +7 -0
- package/dist/orchestration/query-planner.js +37 -0
- package/dist/orchestration/research-orchestrator.d.ts +7 -22
- package/dist/orchestration/research-orchestrator.js +185 -73
- package/dist/orchestration/research-types.d.ts +6 -0
- package/dist/orchestration/research-worker.js +8 -1
- package/dist/orchestration/stop-decider.d.ts +19 -0
- package/dist/orchestration/stop-decider.js +14 -0
- package/dist/presentation/config-store.d.ts +23 -0
- package/dist/presentation/config-store.js +64 -0
- package/dist/presentation/config.d.ts +7 -0
- package/dist/presentation/config.js +44 -0
- package/dist/presentation/explore-presentation.d.ts +3 -0
- package/dist/presentation/explore-presentation.js +56 -0
- package/dist/presentation/fetch-presentation.d.ts +5 -0
- package/dist/presentation/fetch-presentation.js +40 -0
- package/dist/presentation/search-presentation.d.ts +3 -0
- package/dist/presentation/search-presentation.js +30 -0
- package/dist/presentation/select-view.d.ts +2 -0
- package/dist/presentation/select-view.js +12 -0
- package/dist/presentation/types.d.ts +50 -0
- package/dist/presentation/types.js +1 -0
- package/dist/search/duckduckgo.d.ts +6 -1
- package/dist/search/duckduckgo.js +11 -1
- package/dist/tools/web-explore.d.ts +16 -16
- package/dist/tools/web-explore.js +21 -29
- package/dist/tools/web-fetch-headless.js +11 -2
- package/dist/tools/web-fetch.js +11 -2
- package/dist/tools/web-search.js +99 -12
- package/dist/types.d.ts +22 -0
- package/package.json +75 -75
- package/dist/scripts/live-web-eval.d.ts +0 -1
- package/dist/scripts/live-web-eval.js +0 -411
- package/dist/src/cache/ttl-cache.d.ts +0 -8
- package/dist/src/cache/ttl-cache.js +0 -21
- package/dist/src/extension.d.ts +0 -2
- package/dist/src/extension.js +0 -155
- package/dist/src/extract/readability.d.ts +0 -8
- package/dist/src/extract/readability.js +0 -93
- package/dist/src/fetch/browser-resolution.d.ts +0 -15
- package/dist/src/fetch/browser-resolution.js +0 -55
- package/dist/src/fetch/headless-fetch.d.ts +0 -18
- package/dist/src/fetch/headless-fetch.js +0 -87
- package/dist/src/fetch/http-fetch.d.ts +0 -4
- package/dist/src/fetch/http-fetch.js +0 -50
- package/dist/src/orchestration/index.d.ts +0 -41
- package/dist/src/orchestration/index.js +0 -9
- package/dist/src/orchestration/research-orchestrator.d.ts +0 -43
- package/dist/src/orchestration/research-orchestrator.js +0 -87
- package/dist/src/orchestration/research-types.d.ts +0 -41
- package/dist/src/orchestration/research-types.js +0 -1
- package/dist/src/orchestration/research-worker.d.ts +0 -16
- package/dist/src/orchestration/research-worker.js +0 -131
- package/dist/src/search/duckduckgo.d.ts +0 -9
- package/dist/src/search/duckduckgo.js +0 -52
- package/dist/src/tools/web-explore.d.ts +0 -44
- package/dist/src/tools/web-explore.js +0 -50
- package/dist/src/tools/web-fetch-headless.d.ts +0 -6
- package/dist/src/tools/web-fetch-headless.js +0 -14
- package/dist/src/tools/web-fetch.d.ts +0 -6
- package/dist/src/tools/web-fetch.js +0 -14
- package/dist/src/tools/web-search.d.ts +0 -10
- package/dist/src/tools/web-search.js +0 -103
- package/dist/src/types.d.ts +0 -48
- package/dist/src/types.js +0 -7
- package/dist/tests/cache/ttl-cache.test.d.ts +0 -1
- package/dist/tests/cache/ttl-cache.test.js +0 -19
- package/dist/tests/contracts.test.d.ts +0 -1
- package/dist/tests/contracts.test.js +0 -65
- package/dist/tests/extension.test.d.ts +0 -1
- package/dist/tests/extension.test.js +0 -123
- package/dist/tests/extract/readability.test.d.ts +0 -1
- package/dist/tests/extract/readability.test.js +0 -79
- package/dist/tests/fetch/browser-resolution.test.d.ts +0 -1
- package/dist/tests/fetch/browser-resolution.test.js +0 -37
- package/dist/tests/fetch/headless-fetch.smoke.test.d.ts +0 -1
- package/dist/tests/fetch/headless-fetch.smoke.test.js +0 -17
- package/dist/tests/fetch/headless-fetch.test.d.ts +0 -1
- package/dist/tests/fetch/headless-fetch.test.js +0 -150
- package/dist/tests/fetch/http-fetch.test.d.ts +0 -1
- package/dist/tests/fetch/http-fetch.test.js +0 -129
- package/dist/tests/orchestration/research-orchestrator.test.d.ts +0 -1
- package/dist/tests/orchestration/research-orchestrator.test.js +0 -298
- package/dist/tests/orchestration/research-worker.test.d.ts +0 -1
- package/dist/tests/orchestration/research-worker.test.js +0 -171
- package/dist/tests/orchestration/research-workflow.test.d.ts +0 -1
- package/dist/tests/orchestration/research-workflow.test.js +0 -119
- package/dist/tests/package-manifest.test.d.ts +0 -1
- package/dist/tests/package-manifest.test.js +0 -29
- package/dist/tests/release-foundation.test.d.ts +0 -1
- package/dist/tests/release-foundation.test.js +0 -16
- package/dist/tests/release-script.test.d.ts +0 -1
- package/dist/tests/release-script.test.js +0 -72
- package/dist/tests/search/duckduckgo.test.d.ts +0 -1
- package/dist/tests/search/duckduckgo.test.js +0 -103
- package/dist/tests/tools/web-explore.test.d.ts +0 -1
- package/dist/tests/tools/web-explore.test.js +0 -163
- package/dist/tests/tools/web-fetch-headless.test.d.ts +0 -1
- package/dist/tests/tools/web-fetch-headless.test.js +0 -31
- package/dist/tests/tools/web-fetch.test.d.ts +0 -1
- package/dist/tests/tools/web-fetch.test.js +0 -27
- package/dist/tests/tools/web-search.test.d.ts +0 -1
- package/dist/tests/tools/web-search.test.js +0 -125
- package/dist/vitest.config.d.ts +0 -2
- package/dist/vitest.config.js +0 -13
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
import { Readability } from '@mozilla/readability';
|
|
2
|
-
import { JSDOM, VirtualConsole } from 'jsdom';
|
|
3
|
-
export function extractReadableContent(html, maxLength = 4000) {
|
|
4
|
-
let stylesheetError;
|
|
5
|
-
const virtualConsole = new VirtualConsole();
|
|
6
|
-
virtualConsole.on('jsdomError', (error) => {
|
|
7
|
-
if (!stylesheetError && error.message.includes('Could not parse CSS stylesheet')) {
|
|
8
|
-
stylesheetError = error;
|
|
9
|
-
}
|
|
10
|
-
});
|
|
11
|
-
const dom = new JSDOM(html, {
|
|
12
|
-
url: 'https://example.com',
|
|
13
|
-
virtualConsole
|
|
14
|
-
});
|
|
15
|
-
if (stylesheetError) {
|
|
16
|
-
throw stylesheetError;
|
|
17
|
-
}
|
|
18
|
-
const article = new Readability(dom.window.document).parse();
|
|
19
|
-
const rawText = (article?.textContent ?? dom.window.document.body.textContent ?? '').trim();
|
|
20
|
-
const text = rawText.slice(0, maxLength);
|
|
21
|
-
const fallbackTitle = dom.window.document.title || undefined;
|
|
22
|
-
return {
|
|
23
|
-
title: article?.title ?? fallbackTitle,
|
|
24
|
-
byline: article?.byline || undefined,
|
|
25
|
-
text
|
|
26
|
-
};
|
|
27
|
-
}
|
|
28
|
-
function decodeHtmlEntities(text) {
|
|
29
|
-
return text
|
|
30
|
-
.replace(/ /gi, ' ')
|
|
31
|
-
.replace(/&/gi, '&')
|
|
32
|
-
.replace(/</gi, '<')
|
|
33
|
-
.replace(/>/gi, '>')
|
|
34
|
-
.replace(/"/gi, '"')
|
|
35
|
-
.replace(/'/gi, "'")
|
|
36
|
-
.replace(/'/gi, "'")
|
|
37
|
-
.replace(///gi, '/')
|
|
38
|
-
.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(Number(code)))
|
|
39
|
-
.replace(/&#x([\da-f]+);/gi, (_, code) => String.fromCharCode(parseInt(code, 16)));
|
|
40
|
-
}
|
|
41
|
-
function extractTitle(html) {
|
|
42
|
-
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
43
|
-
if (!match)
|
|
44
|
-
return undefined;
|
|
45
|
-
return decodeHtmlEntities(match[1].replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim()) || undefined;
|
|
46
|
-
}
|
|
47
|
-
function stripTagContent(html, tagName) {
|
|
48
|
-
return html.replace(new RegExp(`<${tagName}\\b[^>]*>[\\s\\S]*?<\\/${tagName}>`, 'gi'), ' ');
|
|
49
|
-
}
|
|
50
|
-
function extractPreferredSection(html) {
|
|
51
|
-
const mainMatch = html.match(/<main\b[^>]*>([\s\S]*?)<\/main>/i);
|
|
52
|
-
if (mainMatch)
|
|
53
|
-
return mainMatch[1];
|
|
54
|
-
const articleMatch = html.match(/<article\b[^>]*>([\s\S]*?)<\/article>/i);
|
|
55
|
-
if (articleMatch)
|
|
56
|
-
return articleMatch[1];
|
|
57
|
-
const bodyMatch = html.match(/<body\b[^>]*>([\s\S]*?)<\/body>/i);
|
|
58
|
-
if (bodyMatch)
|
|
59
|
-
return bodyMatch[1];
|
|
60
|
-
return html;
|
|
61
|
-
}
|
|
62
|
-
function extractFallbackText(html, maxLength) {
|
|
63
|
-
const title = extractTitle(html);
|
|
64
|
-
let section = extractPreferredSection(html);
|
|
65
|
-
section = stripTagContent(section, 'script');
|
|
66
|
-
section = stripTagContent(section, 'style');
|
|
67
|
-
section = stripTagContent(section, 'noscript');
|
|
68
|
-
section = stripTagContent(section, 'svg');
|
|
69
|
-
section = stripTagContent(section, 'template');
|
|
70
|
-
const text = decodeHtmlEntities(section)
|
|
71
|
-
.replace(/<[^>]+>/g, ' ')
|
|
72
|
-
.replace(/\s+/g, ' ')
|
|
73
|
-
.trim()
|
|
74
|
-
.slice(0, maxLength);
|
|
75
|
-
return {
|
|
76
|
-
title,
|
|
77
|
-
text
|
|
78
|
-
};
|
|
79
|
-
}
|
|
80
|
-
export function extractReadableContentSafely(html, maxLength = 4000) {
|
|
81
|
-
try {
|
|
82
|
-
return {
|
|
83
|
-
mode: 'readability',
|
|
84
|
-
content: extractReadableContent(html, maxLength)
|
|
85
|
-
};
|
|
86
|
-
}
|
|
87
|
-
catch {
|
|
88
|
-
return {
|
|
89
|
-
mode: 'fallback',
|
|
90
|
-
content: extractFallbackText(html, maxLength)
|
|
91
|
-
};
|
|
92
|
-
}
|
|
93
|
-
}
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
export type BrowserResolutionResult = {
|
|
2
|
-
ok: true;
|
|
3
|
-
executablePath: string;
|
|
4
|
-
browser: 'configured' | 'chrome' | 'edge';
|
|
5
|
-
} | {
|
|
6
|
-
ok: false;
|
|
7
|
-
error: {
|
|
8
|
-
code: 'BROWSER_NOT_FOUND' | 'CONFIGURED_BROWSER_NOT_FOUND';
|
|
9
|
-
message: string;
|
|
10
|
-
};
|
|
11
|
-
};
|
|
12
|
-
export declare function resolveBrowserExecutable({ configuredPath, fileExists }: {
|
|
13
|
-
configuredPath?: string;
|
|
14
|
-
fileExists?: (path: string) => Promise<boolean>;
|
|
15
|
-
}): Promise<BrowserResolutionResult>;
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
const WINDOWS_CANDIDATES = {
|
|
2
|
-
chrome: [
|
|
3
|
-
'C:/Program Files/Google/Chrome/Application/chrome.exe',
|
|
4
|
-
'C:/Program Files (x86)/Google/Chrome/Application/chrome.exe'
|
|
5
|
-
],
|
|
6
|
-
edge: [
|
|
7
|
-
'C:/Program Files/Microsoft/Edge/Application/msedge.exe',
|
|
8
|
-
'C:/Program Files (x86)/Microsoft/Edge/Application/msedge.exe'
|
|
9
|
-
]
|
|
10
|
-
};
|
|
11
|
-
export async function resolveBrowserExecutable({ configuredPath, fileExists = defaultFileExists }) {
|
|
12
|
-
if (configuredPath) {
|
|
13
|
-
if (await fileExists(configuredPath)) {
|
|
14
|
-
return {
|
|
15
|
-
ok: true,
|
|
16
|
-
executablePath: configuredPath,
|
|
17
|
-
browser: 'configured'
|
|
18
|
-
};
|
|
19
|
-
}
|
|
20
|
-
return {
|
|
21
|
-
ok: false,
|
|
22
|
-
error: {
|
|
23
|
-
code: 'CONFIGURED_BROWSER_NOT_FOUND',
|
|
24
|
-
message: `Configured browser path was not found: ${configuredPath}`
|
|
25
|
-
}
|
|
26
|
-
};
|
|
27
|
-
}
|
|
28
|
-
for (const path of WINDOWS_CANDIDATES.chrome) {
|
|
29
|
-
if (await fileExists(path)) {
|
|
30
|
-
return { ok: true, executablePath: path, browser: 'chrome' };
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
for (const path of WINDOWS_CANDIDATES.edge) {
|
|
34
|
-
if (await fileExists(path)) {
|
|
35
|
-
return { ok: true, executablePath: path, browser: 'edge' };
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
return {
|
|
39
|
-
ok: false,
|
|
40
|
-
error: {
|
|
41
|
-
code: 'BROWSER_NOT_FOUND',
|
|
42
|
-
message: 'No compatible local browser was found for headless fetch.'
|
|
43
|
-
}
|
|
44
|
-
};
|
|
45
|
-
}
|
|
46
|
-
async function defaultFileExists(path) {
|
|
47
|
-
try {
|
|
48
|
-
const { access } = await import('node:fs/promises');
|
|
49
|
-
await access(path);
|
|
50
|
-
return true;
|
|
51
|
-
}
|
|
52
|
-
catch {
|
|
53
|
-
return false;
|
|
54
|
-
}
|
|
55
|
-
}
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
import { type BrowserResolutionResult } from './browser-resolution.js';
|
|
2
|
-
import type { WebFetchHeadlessResponse } from '../types.js';
|
|
3
|
-
export declare function headlessFetch(url: string, { configuredPath, resolveBrowser, launchBrowser, now }?: {
|
|
4
|
-
configuredPath?: string;
|
|
5
|
-
resolveBrowser?: (options?: {
|
|
6
|
-
configuredPath?: string;
|
|
7
|
-
}) => Promise<BrowserResolutionResult>;
|
|
8
|
-
launchBrowser?: (options: {
|
|
9
|
-
executablePath: string;
|
|
10
|
-
}) => Promise<{
|
|
11
|
-
newContext: () => Promise<{
|
|
12
|
-
newPage: () => Promise<any>;
|
|
13
|
-
close: () => Promise<void>;
|
|
14
|
-
}>;
|
|
15
|
-
close: () => Promise<void>;
|
|
16
|
-
}>;
|
|
17
|
-
now?: () => number;
|
|
18
|
-
}): Promise<WebFetchHeadlessResponse>;
|
|
@@ -1,87 +0,0 @@
|
|
|
1
|
-
import { chromium } from 'playwright-core';
|
|
2
|
-
import { extractReadableContentSafely } from '../extract/readability.js';
|
|
3
|
-
import { resolveBrowserExecutable } from './browser-resolution.js';
|
|
4
|
-
function cleanupRenderedText(text) {
|
|
5
|
-
let cleaned = text.replace(/(Show more)(\s+\1){1,}/gi, '$1');
|
|
6
|
-
cleaned = cleaned.replace(/(Privacy Terms)(\s+\1){1,}/gi, '$1');
|
|
7
|
-
cleaned = cleaned.replace(/\s+/g, ' ').trim();
|
|
8
|
-
return cleaned;
|
|
9
|
-
}
|
|
10
|
-
export async function headlessFetch(url, { configuredPath, resolveBrowser = (options) => resolveBrowserExecutable({ configuredPath: options?.configuredPath }), launchBrowser = ({ executablePath }) => chromium.launch({ executablePath, headless: true }), now = () => Date.now() } = {}) {
|
|
11
|
-
const resolved = await resolveBrowser({ configuredPath });
|
|
12
|
-
if (!resolved.ok) {
|
|
13
|
-
return {
|
|
14
|
-
status: 'error',
|
|
15
|
-
url,
|
|
16
|
-
metadata: { method: 'headless', cacheHit: false },
|
|
17
|
-
error: resolved.error
|
|
18
|
-
};
|
|
19
|
-
}
|
|
20
|
-
let browser;
|
|
21
|
-
let context;
|
|
22
|
-
let page;
|
|
23
|
-
try {
|
|
24
|
-
browser = await launchBrowser({ executablePath: resolved.executablePath });
|
|
25
|
-
context = await browser.newContext();
|
|
26
|
-
page = await context.newPage();
|
|
27
|
-
const startedAt = now();
|
|
28
|
-
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 20000 });
|
|
29
|
-
await page.waitForLoadState('load', { timeout: 10000 });
|
|
30
|
-
await page.waitForLoadState('networkidle', { timeout: 5000 }).catch(() => undefined);
|
|
31
|
-
const html = await page.content();
|
|
32
|
-
const finishedAt = now();
|
|
33
|
-
const extraction = extractReadableContentSafely(html);
|
|
34
|
-
const cleanedContent = {
|
|
35
|
-
...extraction.content,
|
|
36
|
-
text: cleanupRenderedText(extraction.content.text)
|
|
37
|
-
};
|
|
38
|
-
if (!cleanedContent.text || cleanedContent.text.length < 40) {
|
|
39
|
-
return {
|
|
40
|
-
status: 'blocked',
|
|
41
|
-
url,
|
|
42
|
-
metadata: {
|
|
43
|
-
method: 'headless',
|
|
44
|
-
cacheHit: false,
|
|
45
|
-
browser: resolved.browser,
|
|
46
|
-
navigationMs: finishedAt - startedAt
|
|
47
|
-
},
|
|
48
|
-
error: {
|
|
49
|
-
code: 'HEADLESS_EXTRACTION_WEAK',
|
|
50
|
-
message: 'Rendered page did not produce enough readable content.'
|
|
51
|
-
}
|
|
52
|
-
};
|
|
53
|
-
}
|
|
54
|
-
return {
|
|
55
|
-
status: 'ok',
|
|
56
|
-
url,
|
|
57
|
-
content: cleanedContent,
|
|
58
|
-
metadata: {
|
|
59
|
-
method: 'headless',
|
|
60
|
-
cacheHit: false,
|
|
61
|
-
browser: resolved.browser,
|
|
62
|
-
navigationMs: finishedAt - startedAt,
|
|
63
|
-
truncated: cleanedContent.text.length >= 4000
|
|
64
|
-
}
|
|
65
|
-
};
|
|
66
|
-
}
|
|
67
|
-
catch (error) {
|
|
68
|
-
return {
|
|
69
|
-
status: 'error',
|
|
70
|
-
url,
|
|
71
|
-
metadata: {
|
|
72
|
-
method: 'headless',
|
|
73
|
-
cacheHit: false,
|
|
74
|
-
browser: resolved.browser
|
|
75
|
-
},
|
|
76
|
-
error: {
|
|
77
|
-
code: 'HEADLESS_NAVIGATION_FAILED',
|
|
78
|
-
message: error instanceof Error ? error.message : 'Unknown headless navigation failure.'
|
|
79
|
-
}
|
|
80
|
-
};
|
|
81
|
-
}
|
|
82
|
-
finally {
|
|
83
|
-
await page?.close?.().catch(() => undefined);
|
|
84
|
-
await context?.close?.().catch(() => undefined);
|
|
85
|
-
await browser?.close?.().catch(() => undefined);
|
|
86
|
-
}
|
|
87
|
-
}
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
import { extractReadableContentSafely } from '../extract/readability.js';
|
|
2
|
-
function looksLikeScriptShell(html) {
|
|
3
|
-
const lower = html.toLowerCase();
|
|
4
|
-
return lower.includes('<script') && (lower.includes('id="app"') || lower.includes('id="root"'));
|
|
5
|
-
}
|
|
6
|
-
function isWeakHttpContent(options) {
|
|
7
|
-
const normalizedText = options.text.replace(/\s+/g, ' ').trim();
|
|
8
|
-
const normalizedHtml = options.html.replace(/\s+/g, ' ').trim();
|
|
9
|
-
const textLength = normalizedText.length;
|
|
10
|
-
const htmlLength = normalizedHtml.length;
|
|
11
|
-
const hasGenericShellMarker = /enable javascript|javascript required|please turn on javascript/i.test(options.html);
|
|
12
|
-
const veryShortBody = textLength > 0 && textLength < 120;
|
|
13
|
-
const lowDensity = htmlLength > 0 && textLength / htmlLength < 0.02;
|
|
14
|
-
return veryShortBody && (lowDensity || hasGenericShellMarker);
|
|
15
|
-
}
|
|
16
|
-
export function createHttpFetcher({ fetchImpl = fetch } = {}) {
|
|
17
|
-
return async function httpFetch(url) {
|
|
18
|
-
const response = await fetchImpl(url);
|
|
19
|
-
const contentType = response.headers.get('content-type') ?? '';
|
|
20
|
-
if (!contentType.includes('text/html')) {
|
|
21
|
-
return {
|
|
22
|
-
status: 'unsupported',
|
|
23
|
-
url: response.url,
|
|
24
|
-
metadata: { method: 'http', cacheHit: false, contentType }
|
|
25
|
-
};
|
|
26
|
-
}
|
|
27
|
-
const html = await response.text();
|
|
28
|
-
const extraction = extractReadableContentSafely(html);
|
|
29
|
-
const content = extraction.content;
|
|
30
|
-
if (looksLikeScriptShell(html) ||
|
|
31
|
-
content.text.length < 40 ||
|
|
32
|
-
isWeakHttpContent({ html, title: content.title, text: content.text })) {
|
|
33
|
-
return {
|
|
34
|
-
status: 'needs_headless',
|
|
35
|
-
url: response.url,
|
|
36
|
-
metadata: { method: 'http', cacheHit: false, contentType },
|
|
37
|
-
error: {
|
|
38
|
-
code: 'WEAK_EXTRACTION',
|
|
39
|
-
message: 'HTTP extraction was not reliable enough.'
|
|
40
|
-
}
|
|
41
|
-
};
|
|
42
|
-
}
|
|
43
|
-
return {
|
|
44
|
-
status: 'ok',
|
|
45
|
-
url: response.url,
|
|
46
|
-
content,
|
|
47
|
-
metadata: { method: 'http', cacheHit: false, contentType, truncated: content.text.length >= 4000 }
|
|
48
|
-
};
|
|
49
|
-
};
|
|
50
|
-
}
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
import type { WebFetchHeadlessResponse, WebFetchResponse, WebSearchResponse } from '../types.js';
|
|
2
|
-
export declare function createResearchWorkflow({ search, fetchPage, headlessFetch }?: {
|
|
3
|
-
search?: (input: {
|
|
4
|
-
query: string;
|
|
5
|
-
}) => Promise<WebSearchResponse>;
|
|
6
|
-
fetchPage?: (input: {
|
|
7
|
-
url: string;
|
|
8
|
-
}) => Promise<WebFetchResponse>;
|
|
9
|
-
headlessFetch?: (input: {
|
|
10
|
-
url: string;
|
|
11
|
-
}) => Promise<WebFetchHeadlessResponse>;
|
|
12
|
-
}): {
|
|
13
|
-
run({ query }: {
|
|
14
|
-
query: string;
|
|
15
|
-
}): Promise<{
|
|
16
|
-
decision: {
|
|
17
|
-
action: "answer";
|
|
18
|
-
rationale: string;
|
|
19
|
-
approvedEvidence: import("./research-types.js").ResearchEvidence[];
|
|
20
|
-
};
|
|
21
|
-
evidence: import("./research-types.js").ResearchEvidence[];
|
|
22
|
-
workerPass: import("./research-types.js").ResearchWorkerResult;
|
|
23
|
-
} | {
|
|
24
|
-
decision: {
|
|
25
|
-
action: "escalate-headless";
|
|
26
|
-
rationale: string;
|
|
27
|
-
url: string;
|
|
28
|
-
approvedEvidence: import("./research-types.js").ResearchEvidence[];
|
|
29
|
-
};
|
|
30
|
-
evidence: import("./research-types.js").ResearchEvidence[];
|
|
31
|
-
workerPass: import("./research-types.js").ResearchWorkerResult;
|
|
32
|
-
} | {
|
|
33
|
-
decision: {
|
|
34
|
-
action: "research-again";
|
|
35
|
-
rationale: string;
|
|
36
|
-
followupQuery: string;
|
|
37
|
-
};
|
|
38
|
-
evidence: import("./research-types.js").ResearchEvidence[];
|
|
39
|
-
workerPass: import("./research-types.js").ResearchWorkerResult;
|
|
40
|
-
}>;
|
|
41
|
-
};
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
import { createWebFetchHeadlessTool } from '../tools/web-fetch-headless.js';
|
|
2
|
-
import { createWebFetchTool } from '../tools/web-fetch.js';
|
|
3
|
-
import { createWebSearchTool } from '../tools/web-search.js';
|
|
4
|
-
import { createResearchOrchestrator } from './research-orchestrator.js';
|
|
5
|
-
import { createResearchWorker } from './research-worker.js';
|
|
6
|
-
export function createResearchWorkflow({ search = createWebSearchTool(), fetchPage = createWebFetchTool(), headlessFetch = createWebFetchHeadlessTool() } = {}) {
|
|
7
|
-
const worker = createResearchWorker({ search, fetchPage });
|
|
8
|
-
return createResearchOrchestrator({ worker, headlessFetch });
|
|
9
|
-
}
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
import type { WebFetchHeadlessResponse } from '../types.js';
|
|
2
|
-
import type { ResearchEvidence, ResearchWorkerResult } from './research-types.js';
|
|
3
|
-
export declare function createResearchOrchestrator({ worker, headlessFetch }: {
|
|
4
|
-
worker: {
|
|
5
|
-
run: (input: {
|
|
6
|
-
query: string;
|
|
7
|
-
maxSearchRounds: number;
|
|
8
|
-
maxFetches: number;
|
|
9
|
-
}) => Promise<ResearchWorkerResult>;
|
|
10
|
-
};
|
|
11
|
-
headlessFetch: (input: {
|
|
12
|
-
url: string;
|
|
13
|
-
}) => Promise<WebFetchHeadlessResponse>;
|
|
14
|
-
}): {
|
|
15
|
-
run({ query }: {
|
|
16
|
-
query: string;
|
|
17
|
-
}): Promise<{
|
|
18
|
-
decision: {
|
|
19
|
-
action: "answer";
|
|
20
|
-
rationale: string;
|
|
21
|
-
approvedEvidence: ResearchEvidence[];
|
|
22
|
-
};
|
|
23
|
-
evidence: ResearchEvidence[];
|
|
24
|
-
workerPass: ResearchWorkerResult;
|
|
25
|
-
} | {
|
|
26
|
-
decision: {
|
|
27
|
-
action: "escalate-headless";
|
|
28
|
-
rationale: string;
|
|
29
|
-
url: string;
|
|
30
|
-
approvedEvidence: ResearchEvidence[];
|
|
31
|
-
};
|
|
32
|
-
evidence: ResearchEvidence[];
|
|
33
|
-
workerPass: ResearchWorkerResult;
|
|
34
|
-
} | {
|
|
35
|
-
decision: {
|
|
36
|
-
action: "research-again";
|
|
37
|
-
rationale: string;
|
|
38
|
-
followupQuery: string;
|
|
39
|
-
};
|
|
40
|
-
evidence: ResearchEvidence[];
|
|
41
|
-
workerPass: ResearchWorkerResult;
|
|
42
|
-
}>;
|
|
43
|
-
};
|
|
@@ -1,87 +0,0 @@
|
|
|
1
|
-
function sourceRank(sourceKind) {
|
|
2
|
-
switch (sourceKind) {
|
|
3
|
-
case 'official-docs':
|
|
4
|
-
return 0;
|
|
5
|
-
case 'official-api':
|
|
6
|
-
return 1;
|
|
7
|
-
case 'official-discussion':
|
|
8
|
-
return 2;
|
|
9
|
-
case 'issue-thread':
|
|
10
|
-
return 3;
|
|
11
|
-
case 'community':
|
|
12
|
-
return 4;
|
|
13
|
-
case 'package-page':
|
|
14
|
-
return 5;
|
|
15
|
-
default:
|
|
16
|
-
return 6;
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
function sortEvidence(evidence) {
|
|
20
|
-
return [...evidence].sort((left, right) => sourceRank(left.sourceKind) - sourceRank(right.sourceKind));
|
|
21
|
-
}
|
|
22
|
-
function strongEvidence(evidence) {
|
|
23
|
-
return evidence.filter((item) => item.sourceKind === 'official-docs' ||
|
|
24
|
-
item.sourceKind === 'official-api' ||
|
|
25
|
-
item.sourceKind === 'official-discussion');
|
|
26
|
-
}
|
|
27
|
-
function hasOfficialDocsOrApi(evidence) {
|
|
28
|
-
return evidence.some((item) => item.sourceKind === 'official-docs' || item.sourceKind === 'official-api');
|
|
29
|
-
}
|
|
30
|
-
function hasBotCheck(outcomes) {
|
|
31
|
-
return outcomes.some((outcome) => outcome.kind === 'bot-check');
|
|
32
|
-
}
|
|
33
|
-
function isHeadlessWorthTrying(pass, approvedEvidence) {
|
|
34
|
-
if (!pass.suggestedHeadlessUrl)
|
|
35
|
-
return false;
|
|
36
|
-
if (hasBotCheck(pass.lowValueOutcomes))
|
|
37
|
-
return false;
|
|
38
|
-
if (approvedEvidence.length >= 2 && hasOfficialDocsOrApi(approvedEvidence))
|
|
39
|
-
return false;
|
|
40
|
-
const candidate = pass.suggestedHeadlessUrl;
|
|
41
|
-
return !candidate.includes('npmjs.com/package/');
|
|
42
|
-
}
|
|
43
|
-
export function createResearchOrchestrator({ worker, headlessFetch }) {
|
|
44
|
-
return {
|
|
45
|
-
async run({ query }) {
|
|
46
|
-
const pass = await worker.run({ query, maxSearchRounds: 1, maxFetches: 3 });
|
|
47
|
-
const approvedEvidence = sortEvidence(pass.evidence.filter((item) => item.sourceKind !== 'package-page'));
|
|
48
|
-
const strong = strongEvidence(approvedEvidence);
|
|
49
|
-
const enoughEvidence = strong.length >= 2 && hasOfficialDocsOrApi(approvedEvidence);
|
|
50
|
-
if (enoughEvidence) {
|
|
51
|
-
const decision = {
|
|
52
|
-
action: 'answer',
|
|
53
|
-
rationale: 'Two strong sources with official support are enough to answer safely.',
|
|
54
|
-
approvedEvidence
|
|
55
|
-
};
|
|
56
|
-
return { decision, evidence: approvedEvidence, workerPass: pass };
|
|
57
|
-
}
|
|
58
|
-
if (isHeadlessWorthTrying(pass, approvedEvidence)) {
|
|
59
|
-
const url = pass.suggestedHeadlessUrl;
|
|
60
|
-
await headlessFetch({ url });
|
|
61
|
-
const decision = {
|
|
62
|
-
action: 'escalate-headless',
|
|
63
|
-
rationale: 'One high-value page is worth a single orchestrator-approved headless retry.',
|
|
64
|
-
url,
|
|
65
|
-
approvedEvidence
|
|
66
|
-
};
|
|
67
|
-
return { decision, evidence: approvedEvidence, workerPass: pass };
|
|
68
|
-
}
|
|
69
|
-
const hasConcreteGap = pass.gaps.length > 0;
|
|
70
|
-
const onlyLowValueOutcomes = pass.lowValueOutcomes.length > 0 && pass.evidence.length === 0;
|
|
71
|
-
if (!hasConcreteGap || onlyLowValueOutcomes) {
|
|
72
|
-
const decision = {
|
|
73
|
-
action: 'research-again',
|
|
74
|
-
rationale: 'Current results did not justify more escalation; continue only with a more targeted pass.',
|
|
75
|
-
followupQuery: query
|
|
76
|
-
};
|
|
77
|
-
return { decision, evidence: approvedEvidence, workerPass: pass };
|
|
78
|
-
}
|
|
79
|
-
const decision = {
|
|
80
|
-
action: 'research-again',
|
|
81
|
-
rationale: 'The first pass did not gather enough strong evidence to answer safely.',
|
|
82
|
-
followupQuery: query
|
|
83
|
-
};
|
|
84
|
-
return { decision, evidence: approvedEvidence, workerPass: pass };
|
|
85
|
-
}
|
|
86
|
-
};
|
|
87
|
-
}
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
export type ResearchSourceKind = 'official-docs' | 'official-api' | 'official-discussion' | 'community' | 'issue-thread' | 'package-page' | 'other';
|
|
2
|
-
export type ResearchMethod = 'search' | 'http' | 'headless';
|
|
3
|
-
export type ResearchEvidence = {
|
|
4
|
-
title: string;
|
|
5
|
-
url: string;
|
|
6
|
-
sourceKind: ResearchSourceKind;
|
|
7
|
-
method: Exclude<ResearchMethod, 'search'>;
|
|
8
|
-
summary: string;
|
|
9
|
-
supports: string[];
|
|
10
|
-
};
|
|
11
|
-
export type ResearchGap = {
|
|
12
|
-
kind: 'needs-more-evidence' | 'conflict' | 'fetch-failed';
|
|
13
|
-
message: string;
|
|
14
|
-
};
|
|
15
|
-
export type ResearchLowValueOutcome = {
|
|
16
|
-
kind: 'empty-search' | 'bot-check' | 'low-value-page' | 'duplicate-evidence';
|
|
17
|
-
url?: string;
|
|
18
|
-
message: string;
|
|
19
|
-
};
|
|
20
|
-
export type ResearchWorkerResult = {
|
|
21
|
-
searchQueries: string[];
|
|
22
|
-
evidence: ResearchEvidence[];
|
|
23
|
-
gaps: ResearchGap[];
|
|
24
|
-
lowValueOutcomes: ResearchLowValueOutcome[];
|
|
25
|
-
suggestedHeadlessUrl?: string;
|
|
26
|
-
exhaustedBudget: boolean;
|
|
27
|
-
};
|
|
28
|
-
export type ResearchOrchestratorDecision = {
|
|
29
|
-
action: 'answer';
|
|
30
|
-
rationale: string;
|
|
31
|
-
approvedEvidence: ResearchEvidence[];
|
|
32
|
-
} | {
|
|
33
|
-
action: 'research-again';
|
|
34
|
-
rationale: string;
|
|
35
|
-
followupQuery: string;
|
|
36
|
-
} | {
|
|
37
|
-
action: 'escalate-headless';
|
|
38
|
-
rationale: string;
|
|
39
|
-
url: string;
|
|
40
|
-
approvedEvidence: ResearchEvidence[];
|
|
41
|
-
};
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import type { WebFetchResponse, WebSearchResponse } from '../types.js';
|
|
2
|
-
import type { ResearchWorkerResult } from './research-types.js';
|
|
3
|
-
export declare function createResearchWorker({ search, fetchPage }: {
|
|
4
|
-
search: (input: {
|
|
5
|
-
query: string;
|
|
6
|
-
}) => Promise<WebSearchResponse>;
|
|
7
|
-
fetchPage: (input: {
|
|
8
|
-
url: string;
|
|
9
|
-
}) => Promise<WebFetchResponse>;
|
|
10
|
-
}): {
|
|
11
|
-
run({ query, maxSearchRounds, maxFetches }: {
|
|
12
|
-
query: string;
|
|
13
|
-
maxSearchRounds: number;
|
|
14
|
-
maxFetches: number;
|
|
15
|
-
}): Promise<ResearchWorkerResult>;
|
|
16
|
-
};
|