@demigodmode/pi-web-agent 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -199
- package/dist/scripts/live-web-eval.d.ts +1 -0
- package/dist/scripts/live-web-eval.js +411 -0
- package/dist/src/cache/ttl-cache.d.ts +8 -0
- package/dist/src/cache/ttl-cache.js +21 -0
- package/dist/src/extension.d.ts +2 -0
- package/dist/src/extension.js +155 -0
- package/dist/src/extract/readability.d.ts +8 -0
- package/dist/src/extract/readability.js +93 -0
- package/dist/src/fetch/browser-resolution.d.ts +15 -0
- package/dist/src/fetch/browser-resolution.js +55 -0
- package/dist/src/fetch/headless-fetch.d.ts +18 -0
- package/dist/src/fetch/headless-fetch.js +87 -0
- package/dist/src/fetch/http-fetch.d.ts +4 -0
- package/dist/src/fetch/http-fetch.js +50 -0
- package/dist/src/orchestration/index.d.ts +41 -0
- package/dist/src/orchestration/index.js +9 -0
- package/dist/src/orchestration/research-orchestrator.d.ts +43 -0
- package/dist/src/orchestration/research-orchestrator.js +87 -0
- package/dist/src/orchestration/research-types.d.ts +41 -0
- package/dist/src/orchestration/research-types.js +1 -0
- package/dist/src/orchestration/research-worker.d.ts +16 -0
- package/dist/src/orchestration/research-worker.js +131 -0
- package/dist/src/search/duckduckgo.d.ts +9 -0
- package/dist/src/search/duckduckgo.js +52 -0
- package/dist/src/tools/web-explore.d.ts +44 -0
- package/dist/src/tools/web-explore.js +50 -0
- package/dist/src/tools/web-fetch-headless.d.ts +6 -0
- package/dist/src/tools/web-fetch-headless.js +14 -0
- package/dist/src/tools/web-fetch.d.ts +6 -0
- package/dist/src/tools/web-fetch.js +14 -0
- package/dist/src/tools/web-search.d.ts +10 -0
- package/dist/src/tools/web-search.js +103 -0
- package/dist/src/types.d.ts +48 -0
- package/dist/src/types.js +7 -0
- package/dist/tests/cache/ttl-cache.test.d.ts +1 -0
- package/dist/tests/cache/ttl-cache.test.js +19 -0
- package/dist/tests/contracts.test.d.ts +1 -0
- package/dist/tests/contracts.test.js +65 -0
- package/dist/tests/extension.test.d.ts +1 -0
- package/dist/tests/extension.test.js +123 -0
- package/dist/tests/extract/readability.test.d.ts +1 -0
- package/dist/tests/extract/readability.test.js +79 -0
- package/dist/tests/fetch/browser-resolution.test.d.ts +1 -0
- package/dist/tests/fetch/browser-resolution.test.js +37 -0
- package/dist/tests/fetch/headless-fetch.smoke.test.d.ts +1 -0
- package/dist/tests/fetch/headless-fetch.smoke.test.js +17 -0
- package/dist/tests/fetch/headless-fetch.test.d.ts +1 -0
- package/dist/tests/fetch/headless-fetch.test.js +150 -0
- package/dist/tests/fetch/http-fetch.test.d.ts +1 -0
- package/dist/tests/fetch/http-fetch.test.js +129 -0
- package/dist/tests/orchestration/research-orchestrator.test.d.ts +1 -0
- package/dist/tests/orchestration/research-orchestrator.test.js +298 -0
- package/dist/tests/orchestration/research-worker.test.d.ts +1 -0
- package/dist/tests/orchestration/research-worker.test.js +171 -0
- package/dist/tests/orchestration/research-workflow.test.d.ts +1 -0
- package/dist/tests/orchestration/research-workflow.test.js +119 -0
- package/dist/tests/package-manifest.test.d.ts +1 -0
- package/dist/tests/package-manifest.test.js +29 -0
- package/dist/tests/release-foundation.test.d.ts +1 -0
- package/dist/tests/release-foundation.test.js +16 -0
- package/dist/tests/release-script.test.d.ts +1 -0
- package/dist/tests/release-script.test.js +72 -0
- package/dist/tests/search/duckduckgo.test.d.ts +1 -0
- package/dist/tests/search/duckduckgo.test.js +103 -0
- package/dist/tests/tools/web-explore.test.d.ts +1 -0
- package/dist/tests/tools/web-explore.test.js +163 -0
- package/dist/tests/tools/web-fetch-headless.test.d.ts +1 -0
- package/dist/tests/tools/web-fetch-headless.test.js +31 -0
- package/dist/tests/tools/web-fetch.test.d.ts +1 -0
- package/dist/tests/tools/web-fetch.test.js +27 -0
- package/dist/tests/tools/web-search.test.d.ts +1 -0
- package/dist/tests/tools/web-search.test.js +125 -0
- package/dist/vitest.config.d.ts +2 -0
- package/dist/vitest.config.js +13 -0
- package/package.json +5 -1
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { createTtlCache } from '../../src/cache/ttl-cache.js';
|
|
3
|
+
describe('TTL cache', () => {
|
|
4
|
+
it('returns cached values before expiry', () => {
|
|
5
|
+
let now = 1_000;
|
|
6
|
+
const cache = createTtlCache({ ttlMs: 100, now: () => now });
|
|
7
|
+
cache.set('key', 'value');
|
|
8
|
+
expect(cache.get('key')).toBe('value');
|
|
9
|
+
now = 1_050;
|
|
10
|
+
expect(cache.get('key')).toBe('value');
|
|
11
|
+
});
|
|
12
|
+
it('drops cached values after expiry', () => {
|
|
13
|
+
let now = 1_000;
|
|
14
|
+
const cache = createTtlCache({ ttlMs: 100, now: () => now });
|
|
15
|
+
cache.set('key', 'value');
|
|
16
|
+
now = 1_101;
|
|
17
|
+
expect(cache.get('key')).toBeUndefined();
|
|
18
|
+
});
|
|
19
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { TOOL_STATUSES } from '../src/types.js';
|
|
3
|
+
import extension from '../src/extension.js';
|
|
4
|
+
describe('shared tool contracts', () => {
|
|
5
|
+
it('exposes the allowed tool statuses', () => {
|
|
6
|
+
expect(TOOL_STATUSES).toEqual([
|
|
7
|
+
'ok',
|
|
8
|
+
'needs_headless',
|
|
9
|
+
'blocked',
|
|
10
|
+
'unsupported',
|
|
11
|
+
'error'
|
|
12
|
+
]);
|
|
13
|
+
});
|
|
14
|
+
it('allows normalized search results', () => {
|
|
15
|
+
const result = {
|
|
16
|
+
title: 'Example',
|
|
17
|
+
url: 'https://example.com',
|
|
18
|
+
snippet: 'Example snippet'
|
|
19
|
+
};
|
|
20
|
+
expect(result.url).toContain('https://');
|
|
21
|
+
});
|
|
22
|
+
it('shapes search and fetch responses around status + metadata', () => {
|
|
23
|
+
const search = {
|
|
24
|
+
status: 'ok',
|
|
25
|
+
results: [],
|
|
26
|
+
metadata: { backend: 'duckduckgo', cacheHit: false }
|
|
27
|
+
};
|
|
28
|
+
const fetch = {
|
|
29
|
+
status: 'needs_headless',
|
|
30
|
+
url: 'https://example.com',
|
|
31
|
+
metadata: { method: 'http', cacheHit: false }
|
|
32
|
+
};
|
|
33
|
+
const headless = {
|
|
34
|
+
status: 'error',
|
|
35
|
+
url: 'https://example.com',
|
|
36
|
+
metadata: { method: 'headless', cacheHit: false },
|
|
37
|
+
error: { code: 'NOT_IMPLEMENTED', message: 'stub' }
|
|
38
|
+
};
|
|
39
|
+
expect(search.status).toBe('ok');
|
|
40
|
+
expect(fetch.status).toBe('needs_headless');
|
|
41
|
+
expect(headless.metadata.method).toBe('headless');
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
describe('extension surface', () => {
|
|
45
|
+
it('exports a Pi extension function', () => {
|
|
46
|
+
expect(typeof extension).toBe('function');
|
|
47
|
+
});
|
|
48
|
+
});
|
|
49
|
+
describe('headless metadata', () => {
|
|
50
|
+
it('allows headless metadata to carry browser and navigation timing', () => {
|
|
51
|
+
const headless = {
|
|
52
|
+
status: 'ok',
|
|
53
|
+
url: 'https://example.com',
|
|
54
|
+
metadata: {
|
|
55
|
+
method: 'headless',
|
|
56
|
+
cacheHit: false,
|
|
57
|
+
browser: 'chrome',
|
|
58
|
+
navigationMs: 1500
|
|
59
|
+
},
|
|
60
|
+
content: { text: 'Rendered text' }
|
|
61
|
+
};
|
|
62
|
+
expect(headless.metadata.browser).toBe('chrome');
|
|
63
|
+
expect(headless.metadata.navigationMs).toBe(1500);
|
|
64
|
+
});
|
|
65
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import extension from '../src/extension.js';
|
|
3
|
+
describe('Pi extension entrypoint', () => {
|
|
4
|
+
it('registers four tools with Pi', () => {
|
|
5
|
+
const registerTool = vi.fn();
|
|
6
|
+
const pi = { registerTool, on: vi.fn() };
|
|
7
|
+
extension(pi);
|
|
8
|
+
expect(registerTool).toHaveBeenCalledTimes(4);
|
|
9
|
+
expect(registerTool.mock.calls.map((call) => call[0].name)).toEqual([
|
|
10
|
+
'web_search',
|
|
11
|
+
'web_fetch',
|
|
12
|
+
'web_fetch_headless',
|
|
13
|
+
'web_explore'
|
|
14
|
+
]);
|
|
15
|
+
});
|
|
16
|
+
it('describes web_explore as the preferred tool for research-style web questions', () => {
|
|
17
|
+
const registerTool = vi.fn();
|
|
18
|
+
const pi = { registerTool, on: vi.fn() };
|
|
19
|
+
extension(pi);
|
|
20
|
+
const tools = registerTool.mock.calls.map((call) => call[0]);
|
|
21
|
+
const webExplore = tools.find((tool) => tool.name === 'web_explore');
|
|
22
|
+
expect(webExplore).toBeDefined();
|
|
23
|
+
expect(webExplore.description).toContain('Prefer this for multi-source web research');
|
|
24
|
+
expect(webExplore.description).toContain('current docs/discussion lookups');
|
|
25
|
+
expect(webExplore.description).toContain('Use this instead of chaining low-level web tools');
|
|
26
|
+
expect(webExplore.parameters.properties).toHaveProperty('query');
|
|
27
|
+
expect(Object.keys(webExplore.parameters.properties)).toEqual(['query']);
|
|
28
|
+
});
|
|
29
|
+
it('describes low-level tools as direct/manual tools and points research prompts to web_explore', () => {
|
|
30
|
+
const registerTool = vi.fn();
|
|
31
|
+
const pi = { registerTool, on: vi.fn() };
|
|
32
|
+
extension(pi);
|
|
33
|
+
const tools = registerTool.mock.calls.map((call) => call[0]);
|
|
34
|
+
const webSearch = tools.find((tool) => tool.name === 'web_search');
|
|
35
|
+
const webFetch = tools.find((tool) => tool.name === 'web_fetch');
|
|
36
|
+
const webFetchHeadless = tools.find((tool) => tool.name === 'web_fetch_headless');
|
|
37
|
+
expect(webSearch.description).toContain('Direct search tool for manual discovery');
|
|
38
|
+
expect(webSearch.description).toContain('Prefer web_explore for broader research questions');
|
|
39
|
+
expect(webFetch.description).toContain('Direct HTTP page fetch for a specific URL');
|
|
40
|
+
expect(webFetch.description).toContain('Prefer web_explore for broader research across multiple sources');
|
|
41
|
+
expect(webFetchHeadless.description).toContain('Direct headless page fetch for a specific URL');
|
|
42
|
+
expect(webFetchHeadless.description).toContain('Prefer web_explore for research tasks');
|
|
43
|
+
});
|
|
44
|
+
it('adds a short research hint that prefers web_explore for multi-source web questions', async () => {
|
|
45
|
+
const handlers = new Map();
|
|
46
|
+
const pi = {
|
|
47
|
+
registerTool: vi.fn(),
|
|
48
|
+
on: vi.fn((eventName, handler) => {
|
|
49
|
+
handlers.set(eventName, handler);
|
|
50
|
+
})
|
|
51
|
+
};
|
|
52
|
+
extension(pi);
|
|
53
|
+
const beforeAgentStart = handlers.get('before_agent_start');
|
|
54
|
+
expect(beforeAgentStart).toBeDefined();
|
|
55
|
+
const result = await beforeAgentStart({
|
|
56
|
+
prompt: 'Find current docs or discussions about Playwright launching an installed Chrome or Edge executable instead of a bundled browser, then summarize the recommended approach.',
|
|
57
|
+
images: [],
|
|
58
|
+
systemPrompt: 'Base system prompt'
|
|
59
|
+
}, {});
|
|
60
|
+
expect(result.systemPrompt).toContain('prefer web_explore');
|
|
61
|
+
expect(result.systemPrompt).toContain('finding and comparing multiple sources');
|
|
62
|
+
expect(result.systemPrompt).toContain('Use web_search, web_fetch, and web_fetch_headless for direct/manual operations');
|
|
63
|
+
expect(result.systemPrompt).toContain('After using web_explore, only call low-level web tools if there is a specific unresolved gap');
|
|
64
|
+
expect(result.systemPrompt).toContain('Do not keep searching or fetching just for extra confirmation');
|
|
65
|
+
});
|
|
66
|
+
it('does not register a context hook that injects reminder text into the visible session', () => {
|
|
67
|
+
const handlers = new Map();
|
|
68
|
+
const pi = {
|
|
69
|
+
registerTool: vi.fn(),
|
|
70
|
+
on: vi.fn((eventName, handler) => {
|
|
71
|
+
handlers.set(eventName, handler);
|
|
72
|
+
})
|
|
73
|
+
};
|
|
74
|
+
extension(pi);
|
|
75
|
+
expect(handlers.has('context')).toBe(false);
|
|
76
|
+
});
|
|
77
|
+
it('returns human-readable content for web_explore instead of only raw json', async () => {
|
|
78
|
+
const tools = [];
|
|
79
|
+
const pi = {
|
|
80
|
+
registerTool: (tool) => tools.push(tool),
|
|
81
|
+
on: vi.fn()
|
|
82
|
+
};
|
|
83
|
+
extension(pi);
|
|
84
|
+
const webExplore = tools.find((tool) => tool.name === 'web_explore');
|
|
85
|
+
const result = await webExplore.execute('tool-call-1', {
|
|
86
|
+
query: 'example query'
|
|
87
|
+
});
|
|
88
|
+
expect(result.content[0].text).toContain('Findings');
|
|
89
|
+
expect(result.content[0].text).toContain('Sources');
|
|
90
|
+
}, 15000);
|
|
91
|
+
it('blocks low-level web_search after a successful web_explore in the same tool flow', async () => {
|
|
92
|
+
const tools = [];
|
|
93
|
+
const pi = {
|
|
94
|
+
registerTool: (tool) => tools.push(tool),
|
|
95
|
+
on: vi.fn()
|
|
96
|
+
};
|
|
97
|
+
extension(pi);
|
|
98
|
+
const webExplore = tools.find((tool) => tool.name === 'web_explore');
|
|
99
|
+
const webSearch = tools.find((tool) => tool.name === 'web_search');
|
|
100
|
+
await webExplore.execute('tool-call-1', { query: 'example query' });
|
|
101
|
+
const result = await webSearch.execute('tool-call-2', { query: 'follow-up search' });
|
|
102
|
+
expect(result.isError).toBe(true);
|
|
103
|
+
expect(result.details).toMatchObject({
|
|
104
|
+
status: 'error',
|
|
105
|
+
error: {
|
|
106
|
+
code: 'POST_WEB_EXPLORE_GUARD',
|
|
107
|
+
message: 'web_explore already ran for this research task. Only use low-level web tools if there is a specific unresolved gap.'
|
|
108
|
+
}
|
|
109
|
+
});
|
|
110
|
+
}, 15000);
|
|
111
|
+
it('still allows low-level tools before web_explore runs', async () => {
|
|
112
|
+
const tools = [];
|
|
113
|
+
const pi = {
|
|
114
|
+
registerTool: (tool) => tools.push(tool),
|
|
115
|
+
on: vi.fn()
|
|
116
|
+
};
|
|
117
|
+
extension(pi);
|
|
118
|
+
const webSearch = tools.find((tool) => tool.name === 'web_search');
|
|
119
|
+
const result = await webSearch.execute('tool-call-1', { query: 'plain search' });
|
|
120
|
+
expect(result.details.status).not.toBe('error');
|
|
121
|
+
expect(result.details.error?.code).not.toBe('POST_WEB_EXPLORE_GUARD');
|
|
122
|
+
}, 15000);
|
|
123
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { extractReadableContent, extractReadableContentSafely } from '../../src/extract/readability.js';
|
|
3
|
+
describe('readability extraction', () => {
|
|
4
|
+
it('extracts readable text from article-like HTML', () => {
|
|
5
|
+
const result = extractReadableContent(`
|
|
6
|
+
<html>
|
|
7
|
+
<head><title>Example Title</title></head>
|
|
8
|
+
<body>
|
|
9
|
+
<article>
|
|
10
|
+
<h1>Example Title</h1>
|
|
11
|
+
<p>First paragraph.</p>
|
|
12
|
+
<p>Second paragraph.</p>
|
|
13
|
+
</article>
|
|
14
|
+
</body>
|
|
15
|
+
</html>
|
|
16
|
+
`);
|
|
17
|
+
expect(result).toMatchObject({
|
|
18
|
+
title: 'Example Title',
|
|
19
|
+
text: expect.stringContaining('First paragraph.')
|
|
20
|
+
});
|
|
21
|
+
});
|
|
22
|
+
it('truncates long extracted text', () => {
|
|
23
|
+
const result = extractReadableContent(`
|
|
24
|
+
<html><body><article><p>${'word '.repeat(200)}</p></article></body></html>
|
|
25
|
+
`, 50);
|
|
26
|
+
expect(result.text.length).toBeLessThanOrEqual(50);
|
|
27
|
+
});
|
|
28
|
+
it('falls back to lightweight extraction when stylesheet parsing breaks the primary path', () => {
|
|
29
|
+
const result = extractReadableContentSafely(`
|
|
30
|
+
<html>
|
|
31
|
+
<head>
|
|
32
|
+
<title>Broken CSS Page</title>
|
|
33
|
+
<style>
|
|
34
|
+
.btn {
|
|
35
|
+
color: red;
|
|
36
|
+
&:hover {
|
|
37
|
+
color: blue;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
</style>
|
|
41
|
+
</head>
|
|
42
|
+
<body>
|
|
43
|
+
<main>
|
|
44
|
+
<h1>Broken CSS Page</h1>
|
|
45
|
+
<p>This content should still be recoverable even if JSDOM rejects the stylesheet.</p>
|
|
46
|
+
<p>The fallback path should keep this text readable.</p>
|
|
47
|
+
</main>
|
|
48
|
+
</body>
|
|
49
|
+
</html>
|
|
50
|
+
`);
|
|
51
|
+
expect(result.mode).toBe('fallback');
|
|
52
|
+
expect(result.content.title).toBe('Broken CSS Page');
|
|
53
|
+
expect(result.content.text).toContain('This content should still be recoverable');
|
|
54
|
+
expect(result.content.text).not.toContain('color: red');
|
|
55
|
+
});
|
|
56
|
+
it('returns fallback text from body content when main/article tags are missing', () => {
|
|
57
|
+
const result = extractReadableContentSafely(`
|
|
58
|
+
<html>
|
|
59
|
+
<head>
|
|
60
|
+
<title>Body Fallback</title>
|
|
61
|
+
<style>
|
|
62
|
+
.card {
|
|
63
|
+
&:hover {
|
|
64
|
+
opacity: 1;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
</style>
|
|
68
|
+
</head>
|
|
69
|
+
<body>
|
|
70
|
+
<div>First useful paragraph with enough readable content to keep.</div>
|
|
71
|
+
<div>Second useful paragraph that should also survive fallback extraction.</div>
|
|
72
|
+
</body>
|
|
73
|
+
</html>
|
|
74
|
+
`);
|
|
75
|
+
expect(result.mode).toBe('fallback');
|
|
76
|
+
expect(result.content.title).toBe('Body Fallback');
|
|
77
|
+
expect(result.content.text).toContain('First useful paragraph');
|
|
78
|
+
});
|
|
79
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import { resolveBrowserExecutable } from '../../src/fetch/browser-resolution.js';
|
|
3
|
+
describe('browser resolution', () => {
|
|
4
|
+
it('prefers an explicit configured browser path', async () => {
|
|
5
|
+
const result = await resolveBrowserExecutable({
|
|
6
|
+
configuredPath: 'C:/Browsers/Chrome/chrome.exe',
|
|
7
|
+
fileExists: vi.fn(async (path) => path === 'C:/Browsers/Chrome/chrome.exe')
|
|
8
|
+
});
|
|
9
|
+
expect(result).toEqual({
|
|
10
|
+
ok: true,
|
|
11
|
+
executablePath: 'C:/Browsers/Chrome/chrome.exe',
|
|
12
|
+
browser: 'configured'
|
|
13
|
+
});
|
|
14
|
+
});
|
|
15
|
+
it('falls back to detected Windows Chrome before Edge', async () => {
|
|
16
|
+
const result = await resolveBrowserExecutable({
|
|
17
|
+
fileExists: vi.fn(async (path) => path === 'C:/Program Files/Google/Chrome/Application/chrome.exe')
|
|
18
|
+
});
|
|
19
|
+
expect(result).toEqual({
|
|
20
|
+
ok: true,
|
|
21
|
+
executablePath: 'C:/Program Files/Google/Chrome/Application/chrome.exe',
|
|
22
|
+
browser: 'chrome'
|
|
23
|
+
});
|
|
24
|
+
});
|
|
25
|
+
it('returns a structured startup error when no browser exists', async () => {
|
|
26
|
+
const result = await resolveBrowserExecutable({
|
|
27
|
+
fileExists: vi.fn(async () => false)
|
|
28
|
+
});
|
|
29
|
+
expect(result).toEqual({
|
|
30
|
+
ok: false,
|
|
31
|
+
error: {
|
|
32
|
+
code: 'BROWSER_NOT_FOUND',
|
|
33
|
+
message: 'No compatible local browser was found for headless fetch.'
|
|
34
|
+
}
|
|
35
|
+
});
|
|
36
|
+
});
|
|
37
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { resolveBrowserExecutable } from '../../src/fetch/browser-resolution.js';
|
|
3
|
+
import { headlessFetch } from '../../src/fetch/headless-fetch.js';
|
|
4
|
+
const runSmoke = process.env.PI_HEADLESS_SMOKE === '1';
|
|
5
|
+
describe.skipIf(!runSmoke)('headless fetch smoke', () => {
|
|
6
|
+
it('launches a local browser when one is available', async () => {
|
|
7
|
+
const resolved = await resolveBrowserExecutable({});
|
|
8
|
+
expect(resolved.ok).toBe(true);
|
|
9
|
+
if (!resolved.ok)
|
|
10
|
+
return;
|
|
11
|
+
const result = await headlessFetch('https://example.com', {
|
|
12
|
+
configuredPath: resolved.executablePath
|
|
13
|
+
});
|
|
14
|
+
expect(['ok', 'blocked']).toContain(result.status);
|
|
15
|
+
expect(result.metadata.method).toBe('headless');
|
|
16
|
+
}, 30000);
|
|
17
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import { describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import { headlessFetch } from '../../src/fetch/headless-fetch.js';
|
|
3
|
+
describe('headless fetch', () => {
|
|
4
|
+
it('returns a startup error when no browser can be resolved', async () => {
|
|
5
|
+
const result = await headlessFetch('https://example.com', {
|
|
6
|
+
resolveBrowser: vi.fn().mockResolvedValue({
|
|
7
|
+
ok: false,
|
|
8
|
+
error: {
|
|
9
|
+
code: 'BROWSER_NOT_FOUND',
|
|
10
|
+
message: 'No compatible local browser was found for headless fetch.'
|
|
11
|
+
}
|
|
12
|
+
})
|
|
13
|
+
});
|
|
14
|
+
expect(result).toMatchObject({
|
|
15
|
+
status: 'error',
|
|
16
|
+
error: { code: 'BROWSER_NOT_FOUND' },
|
|
17
|
+
metadata: { method: 'headless', cacheHit: false }
|
|
18
|
+
});
|
|
19
|
+
});
|
|
20
|
+
it('extracts rendered page content and includes navigation metadata', async () => {
|
|
21
|
+
const closePage = vi.fn(async () => undefined);
|
|
22
|
+
const closeContext = vi.fn(async () => undefined);
|
|
23
|
+
const closeBrowser = vi.fn(async () => undefined);
|
|
24
|
+
const page = {
|
|
25
|
+
goto: vi.fn(async () => undefined),
|
|
26
|
+
waitForLoadState: vi.fn(async () => undefined),
|
|
27
|
+
content: vi.fn(async () => '<html><body><article><p>Rendered text with enough content to pass extraction.</p></article></body></html>'),
|
|
28
|
+
close: closePage
|
|
29
|
+
};
|
|
30
|
+
const context = {
|
|
31
|
+
newPage: vi.fn(async () => page),
|
|
32
|
+
close: closeContext
|
|
33
|
+
};
|
|
34
|
+
const browser = {
|
|
35
|
+
newContext: vi.fn(async () => context),
|
|
36
|
+
close: closeBrowser
|
|
37
|
+
};
|
|
38
|
+
const result = await headlessFetch('https://example.com', {
|
|
39
|
+
resolveBrowser: vi.fn().mockResolvedValue({
|
|
40
|
+
ok: true,
|
|
41
|
+
executablePath: 'C:/Program Files/Google/Chrome/Application/chrome.exe',
|
|
42
|
+
browser: 'chrome'
|
|
43
|
+
}),
|
|
44
|
+
launchBrowser: vi.fn(async () => browser),
|
|
45
|
+
now: vi.fn()
|
|
46
|
+
.mockReturnValueOnce(1000)
|
|
47
|
+
.mockReturnValueOnce(2600)
|
|
48
|
+
});
|
|
49
|
+
expect(result.status).toBe('ok');
|
|
50
|
+
expect(result.metadata.method).toBe('headless');
|
|
51
|
+
expect(result.metadata.browser).toBe('chrome');
|
|
52
|
+
expect(result.metadata.navigationMs).toBe(1600);
|
|
53
|
+
expect(result.content?.text).toContain('Rendered text');
|
|
54
|
+
expect(closePage).toHaveBeenCalledTimes(1);
|
|
55
|
+
expect(closeContext).toHaveBeenCalledTimes(1);
|
|
56
|
+
expect(closeBrowser).toHaveBeenCalledTimes(1);
|
|
57
|
+
});
|
|
58
|
+
it('still cleans up browser resources when navigation fails', async () => {
|
|
59
|
+
const closePage = vi.fn(async () => undefined);
|
|
60
|
+
const closeContext = vi.fn(async () => undefined);
|
|
61
|
+
const closeBrowser = vi.fn(async () => undefined);
|
|
62
|
+
const page = {
|
|
63
|
+
goto: vi.fn(async () => {
|
|
64
|
+
throw new Error('navigation failed');
|
|
65
|
+
}),
|
|
66
|
+
waitForLoadState: vi.fn(async () => undefined),
|
|
67
|
+
content: vi.fn(async () => ''),
|
|
68
|
+
close: closePage
|
|
69
|
+
};
|
|
70
|
+
const context = {
|
|
71
|
+
newPage: vi.fn(async () => page),
|
|
72
|
+
close: closeContext
|
|
73
|
+
};
|
|
74
|
+
const browser = {
|
|
75
|
+
newContext: vi.fn(async () => context),
|
|
76
|
+
close: closeBrowser
|
|
77
|
+
};
|
|
78
|
+
const result = await headlessFetch('https://example.com', {
|
|
79
|
+
resolveBrowser: vi.fn().mockResolvedValue({
|
|
80
|
+
ok: true,
|
|
81
|
+
executablePath: 'C:/Program Files/Google/Chrome/Application/chrome.exe',
|
|
82
|
+
browser: 'chrome'
|
|
83
|
+
}),
|
|
84
|
+
launchBrowser: vi.fn(async () => browser)
|
|
85
|
+
});
|
|
86
|
+
expect(result).toMatchObject({
|
|
87
|
+
status: 'error',
|
|
88
|
+
error: { code: 'HEADLESS_NAVIGATION_FAILED' }
|
|
89
|
+
});
|
|
90
|
+
expect(closePage).toHaveBeenCalledTimes(1);
|
|
91
|
+
expect(closeContext).toHaveBeenCalledTimes(1);
|
|
92
|
+
expect(closeBrowser).toHaveBeenCalledTimes(1);
|
|
93
|
+
});
|
|
94
|
+
it('uses safe extraction for rendered pages with broken stylesheet content', async () => {
|
|
95
|
+
const result = await headlessFetch('https://example.com/broken-css', {
|
|
96
|
+
resolveBrowser: vi.fn().mockResolvedValue({
|
|
97
|
+
ok: true,
|
|
98
|
+
browser: 'edge',
|
|
99
|
+
executablePath: 'C:/Program Files/Microsoft/Edge/Application/msedge.exe'
|
|
100
|
+
}),
|
|
101
|
+
launchBrowser: vi.fn(async () => ({
|
|
102
|
+
newContext: async () => ({
|
|
103
|
+
newPage: async () => ({
|
|
104
|
+
goto: async () => undefined,
|
|
105
|
+
waitForLoadState: async () => undefined,
|
|
106
|
+
content: async () => `
|
|
107
|
+
<html>
|
|
108
|
+
<head>
|
|
109
|
+
<title>Broken CSS Page</title>
|
|
110
|
+
<style>
|
|
111
|
+
.btn {
|
|
112
|
+
color: red;
|
|
113
|
+
&:hover {
|
|
114
|
+
color: blue;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
</style>
|
|
118
|
+
</head>
|
|
119
|
+
<body>
|
|
120
|
+
<main>
|
|
121
|
+
<h1>Broken CSS Page</h1>
|
|
122
|
+
<p>Main content starts here.</p>
|
|
123
|
+
<p>Show more Show more Show more Show more</p>
|
|
124
|
+
<p>Useful details for the user.</p>
|
|
125
|
+
<p>Privacy Terms Privacy Terms Privacy Terms</p>
|
|
126
|
+
</main>
|
|
127
|
+
</body>
|
|
128
|
+
</html>
|
|
129
|
+
`,
|
|
130
|
+
close: async () => undefined
|
|
131
|
+
}),
|
|
132
|
+
close: async () => undefined
|
|
133
|
+
}),
|
|
134
|
+
close: async () => undefined
|
|
135
|
+
})),
|
|
136
|
+
now: (() => {
|
|
137
|
+
let tick = 0;
|
|
138
|
+
return () => (tick += 100);
|
|
139
|
+
})()
|
|
140
|
+
});
|
|
141
|
+
expect(result.status).toBe('ok');
|
|
142
|
+
if (result.status !== 'ok' || !result.content)
|
|
143
|
+
return;
|
|
144
|
+
expect(result.content.title).toBe('Broken CSS Page');
|
|
145
|
+
expect(result.content.text).toContain('Main content starts here.');
|
|
146
|
+
expect(result.content.text).toContain('Useful details for the user.');
|
|
147
|
+
expect(result.content.text.match(/Show more/g)?.length ?? 0).toBeLessThan(4);
|
|
148
|
+
expect(result.content.text.match(/Privacy Terms/g)?.length ?? 0).toBeLessThan(3);
|
|
149
|
+
});
|
|
150
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import { describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import { createHttpFetcher } from '../../src/fetch/http-fetch.js';
|
|
3
|
+
describe('http fetcher', () => {
|
|
4
|
+
it('returns ok for clearly readable HTML', async () => {
|
|
5
|
+
const fetcher = createHttpFetcher({
|
|
6
|
+
fetchImpl: vi.fn().mockResolvedValue({
|
|
7
|
+
ok: true,
|
|
8
|
+
url: 'https://example.com/article',
|
|
9
|
+
headers: new Headers({ 'content-type': 'text/html' }),
|
|
10
|
+
text: async () => '<html><body><article><p>Readable article text with enough content to look solid.</p></article></body></html>'
|
|
11
|
+
})
|
|
12
|
+
});
|
|
13
|
+
const result = await fetcher('https://example.com/article');
|
|
14
|
+
expect(result.status).toBe('ok');
|
|
15
|
+
expect(result.metadata.method).toBe('http');
|
|
16
|
+
});
|
|
17
|
+
it('returns needs_headless for weak script-shell pages', async () => {
|
|
18
|
+
const fetcher = createHttpFetcher({
|
|
19
|
+
fetchImpl: vi.fn().mockResolvedValue({
|
|
20
|
+
ok: true,
|
|
21
|
+
url: 'https://example.com/app',
|
|
22
|
+
headers: new Headers({ 'content-type': 'text/html' }),
|
|
23
|
+
text: async () => '<html><body><div id="app"></div><script src="app.js"></script></body></html>'
|
|
24
|
+
})
|
|
25
|
+
});
|
|
26
|
+
const result = await fetcher('https://example.com/app');
|
|
27
|
+
expect(result.status).toBe('needs_headless');
|
|
28
|
+
});
|
|
29
|
+
it('returns needs_headless for weak content on a js-heavy shell page', async () => {
|
|
30
|
+
const fetcher = createHttpFetcher({
|
|
31
|
+
fetchImpl: vi.fn().mockResolvedValue({
|
|
32
|
+
ok: true,
|
|
33
|
+
url: 'https://www.airbnb.com/',
|
|
34
|
+
headers: new Headers({ 'content-type': 'text/html;charset=utf-8' }),
|
|
35
|
+
text: async () => '<html><head><title>Airbnb | Vacation rentals, cabins, beach houses, & more</title></head><body><div id="root"></div><noscript>Enable JavaScript</noscript><main><p>Become a host</p><p>It\'s easy to start hosting and earn extra income.</p></main></body></html>'
|
|
36
|
+
})
|
|
37
|
+
});
|
|
38
|
+
const result = await fetcher('https://www.airbnb.com/');
|
|
39
|
+
expect(result.status).toBe('needs_headless');
|
|
40
|
+
expect(result.metadata.method).toBe('http');
|
|
41
|
+
});
|
|
42
|
+
it('returns ok for a short but legitimate simple page', async () => {
|
|
43
|
+
const fetcher = createHttpFetcher({
|
|
44
|
+
fetchImpl: vi.fn().mockResolvedValue({
|
|
45
|
+
ok: true,
|
|
46
|
+
url: 'https://example.com/',
|
|
47
|
+
headers: new Headers({ 'content-type': 'text/html' }),
|
|
48
|
+
text: async () => '<html><head><title>Example Domain</title></head><body><main><p>This domain is for use in documentation examples without needing permission.</p><p>Avoid use in operations.</p><a href="https://iana.org">Learn more</a></main></body></html>'
|
|
49
|
+
})
|
|
50
|
+
});
|
|
51
|
+
const result = await fetcher('https://example.com/');
|
|
52
|
+
expect(result.status).toBe('ok');
|
|
53
|
+
expect(result.metadata.method).toBe('http');
|
|
54
|
+
});
|
|
55
|
+
it('falls back to simple extraction and returns ok when content is still readable', async () => {
|
|
56
|
+
const fetcher = createHttpFetcher({
|
|
57
|
+
fetchImpl: vi.fn().mockResolvedValue({
|
|
58
|
+
ok: true,
|
|
59
|
+
url: 'https://example.com/broken-css',
|
|
60
|
+
headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }),
|
|
61
|
+
text: async () => `<html>
|
|
62
|
+
<head>
|
|
63
|
+
<title>Broken CSS Page</title>
|
|
64
|
+
<style>
|
|
65
|
+
.btn {
|
|
66
|
+
color: red;
|
|
67
|
+
&:hover {
|
|
68
|
+
color: blue;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
</style>
|
|
72
|
+
</head>
|
|
73
|
+
<body>
|
|
74
|
+
<main>
|
|
75
|
+
<h1>Broken CSS Page</h1>
|
|
76
|
+
<p>This page should still produce useful readable text through the fallback path.</p>
|
|
77
|
+
<p>It contains enough content to count as a legitimate HTTP success.</p>
|
|
78
|
+
</main>
|
|
79
|
+
</body>
|
|
80
|
+
</html>`
|
|
81
|
+
})
|
|
82
|
+
});
|
|
83
|
+
const result = await fetcher('https://example.com/broken-css');
|
|
84
|
+
expect(result.status).toBe('ok');
|
|
85
|
+
expect(result.content?.title).toBe('Broken CSS Page');
|
|
86
|
+
expect(result.content?.text).toContain('useful readable text through the fallback path');
|
|
87
|
+
});
|
|
88
|
+
it('returns needs_headless instead of crashing when fallback content is still weak', async () => {
|
|
89
|
+
const fetcher = createHttpFetcher({
|
|
90
|
+
fetchImpl: vi.fn().mockResolvedValue({
|
|
91
|
+
ok: true,
|
|
92
|
+
url: 'https://example.com/broken-shell',
|
|
93
|
+
headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }),
|
|
94
|
+
text: async () => `<html>
|
|
95
|
+
<head>
|
|
96
|
+
<title>Broken Shell</title>
|
|
97
|
+
<style>
|
|
98
|
+
.card {
|
|
99
|
+
&:hover {
|
|
100
|
+
opacity: 1;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
</style>
|
|
104
|
+
</head>
|
|
105
|
+
<body>
|
|
106
|
+
<div id="app"></div>
|
|
107
|
+
<noscript>Enable JavaScript</noscript>
|
|
108
|
+
<main><p>Hi</p></main>
|
|
109
|
+
</body>
|
|
110
|
+
</html>`
|
|
111
|
+
})
|
|
112
|
+
});
|
|
113
|
+
const result = await fetcher('https://example.com/broken-shell');
|
|
114
|
+
expect(result.status).toBe('needs_headless');
|
|
115
|
+
expect(result.error?.code).toBe('WEAK_EXTRACTION');
|
|
116
|
+
});
|
|
117
|
+
it('returns unsupported for binary content', async () => {
|
|
118
|
+
const fetcher = createHttpFetcher({
|
|
119
|
+
fetchImpl: vi.fn().mockResolvedValue({
|
|
120
|
+
ok: true,
|
|
121
|
+
url: 'https://example.com/file.pdf',
|
|
122
|
+
headers: new Headers({ 'content-type': 'application/pdf' }),
|
|
123
|
+
text: async () => ''
|
|
124
|
+
})
|
|
125
|
+
});
|
|
126
|
+
const result = await fetcher('https://example.com/file.pdf');
|
|
127
|
+
expect(result.status).toBe('unsupported');
|
|
128
|
+
});
|
|
129
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|