page-analyzer 1.2.2 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +36 -0
- package/README.md +27 -1
- package/extractors/block-assigner.js +1 -1
- package/index.d.ts +318 -0
- package/index.js +192 -36
- package/llm/analyzers/event-analyzer/event-analyzer-blocks.js +19 -0
- package/llm/providers/claude-cli-provider.js +137 -0
- package/llm/providers/cli-runner.js +129 -0
- package/llm/providers/codex-cli-provider.js +154 -0
- package/llm/providers/index.js +61 -0
- package/package.json +6 -1
- package/page-extractor.js +153 -5
- package/scripts/analyze.js +10 -5
- package/test/smoke.test.js +82 -1
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import { BaseLlmProvider } from './base-provider.js';
|
|
2
|
+
import { runCli, makeTmpOutFile } from './cli-runner.js';
|
|
3
|
+
|
|
4
|
+
const FAST_MODEL_NAME = 'gpt-5.5';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Codex CLI provider — invokes the locally installed `codex exec` command.
|
|
8
|
+
*
|
|
9
|
+
* Auth is whatever the user's local codex install already has; no API key needed.
|
|
10
|
+
* Returns the final agent message (read from `-o` outfile) as a plain string,
|
|
11
|
+
* matching the BaseLlmProvider contract.
|
|
12
|
+
*/
|
|
13
|
+
export class CodexCliProvider extends BaseLlmProvider {
|
|
14
|
+
constructor(config = {}) {
|
|
15
|
+
super(config);
|
|
16
|
+
if (!this.model) {
|
|
17
|
+
throw new Error('CodexCliProvider: model is required');
|
|
18
|
+
}
|
|
19
|
+
this.cliPath = config.cliPath || 'codex';
|
|
20
|
+
this.cwd = config.cwd || null;
|
|
21
|
+
this.fast = typeof config.fast === 'boolean'
|
|
22
|
+
? config.fast
|
|
23
|
+
: this.model === FAST_MODEL_NAME;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
buildArgs(outFile) {
|
|
27
|
+
const args = [
|
|
28
|
+
'exec',
|
|
29
|
+
'-m', this.model,
|
|
30
|
+
'--skip-git-repo-check',
|
|
31
|
+
'--sandbox', 'read-only',
|
|
32
|
+
'--ephemeral',
|
|
33
|
+
'--color', 'never',
|
|
34
|
+
'--dangerously-bypass-approvals-and-sandbox',
|
|
35
|
+
'-o', outFile
|
|
36
|
+
];
|
|
37
|
+
if (this.fast) {
|
|
38
|
+
args.push('-c', 'service_tier="fast"');
|
|
39
|
+
}
|
|
40
|
+
return args;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
resolveInteractionContext(metadata) {
|
|
44
|
+
const context = metadata && typeof metadata === 'object' ? metadata : {};
|
|
45
|
+
const domain = String(context.domain || '').trim();
|
|
46
|
+
const nodeId = String(context.nodeId || '').trim();
|
|
47
|
+
if (!domain || !nodeId) {
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
return {
|
|
51
|
+
domain,
|
|
52
|
+
nodeId,
|
|
53
|
+
operation: String(context.operation || 'analysis').trim() || 'analysis',
|
|
54
|
+
chunkLabel: String(context.chunkLabel || '').trim() || null
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async makeRequest(prompt, options = {}) {
|
|
59
|
+
const metadata = options && typeof options.metadata === 'object' ? options.metadata : {};
|
|
60
|
+
const interactionContext = this.resolveInteractionContext(metadata);
|
|
61
|
+
const { filePath: outFile, cleanup } = await makeTmpOutFile('codex-cli', 'last-message.txt');
|
|
62
|
+
const args = this.buildArgs(outFile);
|
|
63
|
+
const requestPayload = { argv: [this.cliPath, ...args], model: this.model, fast: this.fast };
|
|
64
|
+
|
|
65
|
+
let failureLogged = false;
|
|
66
|
+
try {
|
|
67
|
+
const result = await runCli({
|
|
68
|
+
command: this.cliPath,
|
|
69
|
+
args,
|
|
70
|
+
prompt,
|
|
71
|
+
timeoutMs: this.timeout,
|
|
72
|
+
outFile,
|
|
73
|
+
cwd: this.cwd || undefined
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
const outputText = String(result.outFileContent ?? '').trim();
|
|
77
|
+
if (result.code !== 0 || !outputText) {
|
|
78
|
+
if (interactionContext) {
|
|
79
|
+
await this.emitInteractionLog({
|
|
80
|
+
...interactionContext,
|
|
81
|
+
provider: 'Codex',
|
|
82
|
+
model: this.model,
|
|
83
|
+
requestId: null,
|
|
84
|
+
inputText: prompt,
|
|
85
|
+
outputText: outputText || null,
|
|
86
|
+
requestPayload,
|
|
87
|
+
responsePayload: { stdout: result.stdout, stderr: result.stderr, code: result.code, signal: result.signal },
|
|
88
|
+
usagePromptTokens: null,
|
|
89
|
+
usageCompletionTokens: null,
|
|
90
|
+
usageReasoningTokens: null,
|
|
91
|
+
usageCost: null
|
|
92
|
+
});
|
|
93
|
+
failureLogged = true;
|
|
94
|
+
}
|
|
95
|
+
const reason = result.code !== 0
|
|
96
|
+
? `exited with code ${result.code}${result.signal ? ` (signal ${result.signal})` : ''}`
|
|
97
|
+
: 'produced empty output file';
|
|
98
|
+
const stderrTail = String(result.stderr || '').slice(-500);
|
|
99
|
+
throw new Error(`codex exec ${reason}${stderrTail ? `: ${stderrTail}` : ''}`);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (interactionContext) {
|
|
103
|
+
await this.emitInteractionLog({
|
|
104
|
+
...interactionContext,
|
|
105
|
+
provider: 'Codex',
|
|
106
|
+
model: this.model,
|
|
107
|
+
requestId: null,
|
|
108
|
+
inputText: prompt,
|
|
109
|
+
outputText,
|
|
110
|
+
requestPayload,
|
|
111
|
+
responsePayload: { stdout: result.stdout, stderr: result.stderr, code: result.code, signal: result.signal },
|
|
112
|
+
usagePromptTokens: null,
|
|
113
|
+
usageCompletionTokens: null,
|
|
114
|
+
usageReasoningTokens: null,
|
|
115
|
+
usageCost: null
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return outputText;
|
|
120
|
+
} catch (error) {
|
|
121
|
+
if (!failureLogged && interactionContext) {
|
|
122
|
+
await this.emitInteractionLog({
|
|
123
|
+
...interactionContext,
|
|
124
|
+
provider: 'Codex',
|
|
125
|
+
model: this.model,
|
|
126
|
+
requestId: null,
|
|
127
|
+
inputText: prompt,
|
|
128
|
+
outputText: null,
|
|
129
|
+
requestPayload,
|
|
130
|
+
responsePayload: null,
|
|
131
|
+
usagePromptTokens: null,
|
|
132
|
+
usageCompletionTokens: null,
|
|
133
|
+
usageReasoningTokens: null,
|
|
134
|
+
usageCost: null
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
throw error;
|
|
138
|
+
} finally {
|
|
139
|
+
await cleanup();
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
async analyze(content, options = {}) {
|
|
144
|
+
const requestOptions = { ...options };
|
|
145
|
+
delete requestOptions.parseJson;
|
|
146
|
+
return this.makeRequestWithRetry(() => this.makeRequest(String(content ?? ''), requestOptions));
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
validateConfig() {
|
|
150
|
+
if (!this.model) {
|
|
151
|
+
throw new Error('Model is required');
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { OpenAiProvider } from './openai-provider.js';
|
|
2
|
+
import { CodexCliProvider } from './codex-cli-provider.js';
|
|
3
|
+
import { ClaudeCliProvider } from './claude-cli-provider.js';
|
|
4
|
+
|
|
5
|
+
export const LLM_PROVIDER_TYPES = Object.freeze(['openai', 'codex', 'claude']);
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Build the right LLM provider instance from an llmConfig object.
|
|
9
|
+
*
|
|
10
|
+
* @param {Object} llmConfig
|
|
11
|
+
* @param {'openai'|'codex'|'claude'} [llmConfig.type='openai']
|
|
12
|
+
* @param {string} llmConfig.model
|
|
13
|
+
* @param {string} [llmConfig.apiKey]
|
|
14
|
+
* @param {string} [llmConfig.apiEndpoint]
|
|
15
|
+
* @param {string} [llmConfig.cliPath]
|
|
16
|
+
* @param {string} [llmConfig.cwd]
|
|
17
|
+
* @param {boolean} [llmConfig.fast]
|
|
18
|
+
* @param {number} [llmConfig.maxTokens]
|
|
19
|
+
* @param {number} [llmConfig.temperature]
|
|
20
|
+
* @param {number} [llmConfig.timeout]
|
|
21
|
+
* @param {number} [llmConfig.maxRetries]
|
|
22
|
+
* @param {Function} [llmConfig.interactionLogger]
|
|
23
|
+
*/
|
|
24
|
+
export function createLlmProvider(llmConfig = {}) {
|
|
25
|
+
const type = String(llmConfig.type || 'openai').toLowerCase();
|
|
26
|
+
const shared = {
|
|
27
|
+
model: llmConfig.model,
|
|
28
|
+
timeout: llmConfig.timeout,
|
|
29
|
+
maxRetries: llmConfig.maxRetries,
|
|
30
|
+
maxTokens: llmConfig.maxTokens,
|
|
31
|
+
interactionLogger: llmConfig.interactionLogger
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
switch (type) {
|
|
35
|
+
case 'openai':
|
|
36
|
+
return new OpenAiProvider({
|
|
37
|
+
...shared,
|
|
38
|
+
apiKey: llmConfig.apiKey,
|
|
39
|
+
apiEndpoint: llmConfig.apiEndpoint,
|
|
40
|
+
temperature: llmConfig.temperature
|
|
41
|
+
});
|
|
42
|
+
case 'codex':
|
|
43
|
+
return new CodexCliProvider({
|
|
44
|
+
...shared,
|
|
45
|
+
cliPath: llmConfig.cliPath,
|
|
46
|
+
cwd: llmConfig.cwd,
|
|
47
|
+
fast: llmConfig.fast
|
|
48
|
+
});
|
|
49
|
+
case 'claude':
|
|
50
|
+
return new ClaudeCliProvider({
|
|
51
|
+
...shared,
|
|
52
|
+
cliPath: llmConfig.cliPath,
|
|
53
|
+
cwd: llmConfig.cwd
|
|
54
|
+
});
|
|
55
|
+
default:
|
|
56
|
+
throw new Error(`Unknown llm.type: ${llmConfig.type}. Expected one of: ${LLM_PROVIDER_TYPES.join(', ')}`);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export { OpenAiProvider, CodexCliProvider, ClaudeCliProvider };
|
|
61
|
+
export { BaseLlmProvider } from './base-provider.js';
|
package/package.json
CHANGED
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "page-analyzer",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.3",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Standalone page analysis module.",
|
|
6
6
|
"license": "MIT",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "git+https://github.com/liuzemei/page-analyzer.git"
|
|
10
|
+
},
|
|
7
11
|
"main": "index.js",
|
|
12
|
+
"types": "index.d.ts",
|
|
8
13
|
"scripts": {
|
|
9
14
|
"test": "node test/smoke.test.js",
|
|
10
15
|
"analyze": "node scripts/analyze.js",
|
package/page-extractor.js
CHANGED
|
@@ -161,6 +161,10 @@ export class PageExtractor {
|
|
|
161
161
|
textPreviewMaxChars: Number.isInteger(config.textPreviewMaxChars)
|
|
162
162
|
? Math.max(120, config.textPreviewMaxChars)
|
|
163
163
|
: 1200,
|
|
164
|
+
sizedElementsEnabled: Boolean(config.sizedElementsEnabled),
|
|
165
|
+
sizedElementMinSize: Number.isInteger(config.sizedElementMinSize)
|
|
166
|
+
? Math.max(0, config.sizedElementMinSize)
|
|
167
|
+
: 24,
|
|
164
168
|
waitForImagesLoaded: Boolean(config.waitForImagesLoaded),
|
|
165
169
|
fullPageScreenshot: Boolean(config.fullPageScreenshot),
|
|
166
170
|
blockScreenshots: Boolean(config.blockScreenshots),
|
|
@@ -476,6 +480,147 @@ export class PageExtractor {
|
|
|
476
480
|
});
|
|
477
481
|
}
|
|
478
482
|
|
|
483
|
+
/**
|
|
484
|
+
* Collect ALL visible DOM elements that have "some size" (width > minSize OR
|
|
485
|
+
* height > minSize), each annotated with basic raw info. Broader than
|
|
486
|
+
* collectElementGeometries (which is interactive-only); used to nest element
|
|
487
|
+
* detail under each block in the analysis output.
|
|
488
|
+
*/
|
|
489
|
+
async collectSizedElements(page) {
|
|
490
|
+
const minSize = this.config.sizedElementMinSize;
|
|
491
|
+
const records = await page.evaluate((minSizePx) => {
|
|
492
|
+
const INTERACTIVE_SELECTOR = 'a, button, form, input, select, textarea, [onclick], [role="button"], [role="link"]';
|
|
493
|
+
const SKIP_TAGS = new Set([
|
|
494
|
+
'script', 'style', 'noscript', 'template', 'meta', 'link', 'br', 'wbr', 'head', 'title'
|
|
495
|
+
]);
|
|
496
|
+
const MEDIA_TAGS = new Set(['img', 'source', 'video', 'iframe', 'audio', 'embed']);
|
|
497
|
+
const records = [];
|
|
498
|
+
|
|
499
|
+
const normalizeTextInPage = (value, maxLength = 240) => String(value || '')
|
|
500
|
+
.replace(/\s+/g, ' ')
|
|
501
|
+
.trim()
|
|
502
|
+
.slice(0, maxLength);
|
|
503
|
+
|
|
504
|
+
const normalizeHref = (value) => {
|
|
505
|
+
const raw = String(value || '').trim();
|
|
506
|
+
if (!raw) return '';
|
|
507
|
+
try {
|
|
508
|
+
const resolved = new URL(raw, location.href);
|
|
509
|
+
resolved.hash = '';
|
|
510
|
+
const href = resolved.href;
|
|
511
|
+
return href.endsWith('/') && href !== `${resolved.origin}/`
|
|
512
|
+
? href.slice(0, -1)
|
|
513
|
+
: href;
|
|
514
|
+
} catch {
|
|
515
|
+
return raw;
|
|
516
|
+
}
|
|
517
|
+
};
|
|
518
|
+
|
|
519
|
+
const buildPath = (el, useNthOfType) => {
|
|
520
|
+
if (!el || !el.tagName) return '';
|
|
521
|
+
const parts = [];
|
|
522
|
+
let current = el;
|
|
523
|
+
while (current && current.tagName && current.tagName.toLowerCase() !== 'body') {
|
|
524
|
+
const parent = current.parentElement;
|
|
525
|
+
if (!parent || parent.tagName.toLowerCase() === 'html') break;
|
|
526
|
+
const tag = current.tagName.toLowerCase();
|
|
527
|
+
const siblings = Array.from(parent.children).filter((child) => {
|
|
528
|
+
if (!(child instanceof Element)) return false;
|
|
529
|
+
return useNthOfType ? child.tagName.toLowerCase() === tag : true;
|
|
530
|
+
});
|
|
531
|
+
const index = siblings.indexOf(current) + 1;
|
|
532
|
+
const suffix = useNthOfType
|
|
533
|
+
? `:nth-of-type(${index})`
|
|
534
|
+
: `:nth-child(${index})`;
|
|
535
|
+
parts.unshift(`${tag}${suffix}`);
|
|
536
|
+
current = parent;
|
|
537
|
+
}
|
|
538
|
+
if (parts.length === 0) return '';
|
|
539
|
+
return `body > ${parts.join(' > ')}`;
|
|
540
|
+
};
|
|
541
|
+
|
|
542
|
+
const pageWidth = Math.max(
|
|
543
|
+
document.documentElement?.scrollWidth || 0,
|
|
544
|
+
document.body?.scrollWidth || 0
|
|
545
|
+
);
|
|
546
|
+
const pageHeight = Math.max(
|
|
547
|
+
document.documentElement?.scrollHeight || 0,
|
|
548
|
+
document.body?.scrollHeight || 0
|
|
549
|
+
);
|
|
550
|
+
|
|
551
|
+
for (const element of document.querySelectorAll('body *')) {
|
|
552
|
+
if (!(element instanceof Element)) continue;
|
|
553
|
+
const tag = element.tagName.toLowerCase();
|
|
554
|
+
if (SKIP_TAGS.has(tag)) continue;
|
|
555
|
+
|
|
556
|
+
const style = getComputedStyle(element);
|
|
557
|
+
if (style.display === 'none') continue;
|
|
558
|
+
if (style.visibility === 'hidden' || style.visibility === 'collapse') continue;
|
|
559
|
+
if (parseFloat(style.opacity) === 0) continue;
|
|
560
|
+
|
|
561
|
+
const rect = element.getBoundingClientRect();
|
|
562
|
+
if (rect.width <= 0 || rect.height <= 0) continue;
|
|
563
|
+
if (!(rect.width > minSizePx || rect.height > minSizePx)) continue;
|
|
564
|
+
|
|
565
|
+
const top = rect.top + window.scrollY;
|
|
566
|
+
const left = rect.left + window.scrollX;
|
|
567
|
+
// Skip elements positioned fully outside the document (e.g. a11y skip-links at top:-17000)
|
|
568
|
+
if (top + rect.height <= 0 || left + rect.width <= 0) continue;
|
|
569
|
+
if (pageHeight > 0 && top >= pageHeight) continue;
|
|
570
|
+
if (pageWidth > 0 && left >= pageWidth) continue;
|
|
571
|
+
|
|
572
|
+
let imageAlt = '';
|
|
573
|
+
if (tag === 'img') {
|
|
574
|
+
imageAlt = normalizeTextInPage(element.getAttribute('alt') || '', 240);
|
|
575
|
+
} else {
|
|
576
|
+
const childImg = element.querySelector(':scope > img');
|
|
577
|
+
if (childImg) {
|
|
578
|
+
imageAlt = normalizeTextInPage(childImg.getAttribute('alt') || '', 240);
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
let src = '';
|
|
583
|
+
if (MEDIA_TAGS.has(tag)) {
|
|
584
|
+
src = normalizeHref(element.getAttribute('src') || element.currentSrc || '');
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
const role = element.getAttribute('role') || '';
|
|
588
|
+
const roleLower = role.toLowerCase();
|
|
589
|
+
|
|
590
|
+
records.push({
|
|
591
|
+
tag,
|
|
592
|
+
text: normalizeTextInPage(element.innerText || element.textContent || ''),
|
|
593
|
+
href: normalizeHref(
|
|
594
|
+
element.getAttribute('href')
|
|
595
|
+
|| element.getAttribute('action')
|
|
596
|
+
|| element.getAttribute('formaction')
|
|
597
|
+
|| ''
|
|
598
|
+
),
|
|
599
|
+
src,
|
|
600
|
+
width: rect.width,
|
|
601
|
+
height: rect.height,
|
|
602
|
+
top,
|
|
603
|
+
left,
|
|
604
|
+
cssSelector: buildPath(element, true),
|
|
605
|
+
id: element.id || '',
|
|
606
|
+
class: normalizeTextInPage(element.getAttribute('class') || '', 240),
|
|
607
|
+
role,
|
|
608
|
+
ariaLabel: normalizeTextInPage(element.getAttribute('aria-label') || '', 120),
|
|
609
|
+
imageAlt,
|
|
610
|
+
interactive: element.matches(INTERACTIVE_SELECTOR)
|
|
611
|
+
|| element.hasAttribute('onclick')
|
|
612
|
+
|| roleLower === 'button'
|
|
613
|
+
|| roleLower === 'link'
|
|
614
|
+
});
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
return records;
|
|
618
|
+
}, minSize);
|
|
619
|
+
|
|
620
|
+
console.log(`[page-analyzer] Collected ${records.length} sized elements (width or height > ${minSize}px)`);
|
|
621
|
+
return records;
|
|
622
|
+
}
|
|
623
|
+
|
|
479
624
|
async collectPageSize(page) {
|
|
480
625
|
return page.evaluate(() => {
|
|
481
626
|
const html = document.documentElement;
|
|
@@ -741,11 +886,11 @@ export class PageExtractor {
|
|
|
741
886
|
|
|
742
887
|
/**
|
|
743
888
|
* Extract page data from an already prepared Playwright page:
|
|
744
|
-
* html, blocks, elementGeometries, screenshots.
|
|
889
|
+
* html, blocks, elementGeometries, sizedElements, screenshots.
|
|
745
890
|
* When config.s3 is provided, screenshots are uploaded to S3 and returned as URLs.
|
|
746
891
|
* @param {import('playwright').Page} page - Prepared Playwright page
|
|
747
892
|
* @param {string} targetUrl - URL loaded in the page
|
|
748
|
-
* @returns {Promise<{html, blocks, elementGeometries, screenshots, pageSize}>}
|
|
893
|
+
* @returns {Promise<{html, blocks, elementGeometries, sizedElements, screenshots, pageSize}>}
|
|
749
894
|
*/
|
|
750
895
|
async extractPreparedPage(page, targetUrl) {
|
|
751
896
|
const viewport = {
|
|
@@ -766,17 +911,20 @@ export class PageExtractor {
|
|
|
766
911
|
});
|
|
767
912
|
const blocks = Array.isArray(blocksResult?.blocks) ? blocksResult.blocks : [];
|
|
768
913
|
const elementGeometries = await this.collectElementGeometries(page);
|
|
914
|
+
const sizedElements = this.config.sizedElementsEnabled
|
|
915
|
+
? await this.collectSizedElements(page)
|
|
916
|
+
: [];
|
|
769
917
|
const finalPageSize = await this.collectPageSize(page);
|
|
770
918
|
const screenshots = await this.captureScreenshots(page, targetUrl, blocks);
|
|
771
919
|
|
|
772
|
-
return { html, blocks, elementGeometries, screenshots, pageSize: finalPageSize };
|
|
920
|
+
return { html, blocks, elementGeometries, sizedElements, screenshots, pageSize: finalPageSize };
|
|
773
921
|
}
|
|
774
922
|
|
|
775
923
|
/**
|
|
776
|
-
* Extract page data: html, blocks, elementGeometries, screenshots.
|
|
924
|
+
* Extract page data: html, blocks, elementGeometries, sizedElements, screenshots.
|
|
777
925
|
* When config.s3 is provided, screenshots are uploaded to S3 and returned as URLs.
|
|
778
926
|
* @param {string} url - URL to extract
|
|
779
|
-
* @returns {Promise<{html, blocks, elementGeometries, screenshots, pageSize}>}
|
|
927
|
+
* @returns {Promise<{html, blocks, elementGeometries, sizedElements, screenshots, pageSize}>}
|
|
780
928
|
*/
|
|
781
929
|
async extract(url) {
|
|
782
930
|
return this.withPreparedPage(url, async (page, targetUrl) => {
|
package/scripts/analyze.js
CHANGED
|
@@ -33,14 +33,19 @@ loadDotEnv(path.join(projectRoot, '.env'));
|
|
|
33
33
|
|
|
34
34
|
const url = process.argv[2] || 'https://www.jcb.co.jp/ordercard/kojin_card/os_card_w2.html';
|
|
35
35
|
|
|
36
|
+
const llmType = (process.env.LLM_TYPE || 'openai').toLowerCase();
|
|
37
|
+
const llmConfig = { type: llmType, model: process.env.LLM_MODEL };
|
|
38
|
+
if (llmType === 'openai') {
|
|
39
|
+
llmConfig.apiKey = process.env.LLM_API_KEY;
|
|
40
|
+
llmConfig.apiEndpoint = process.env.LLM_API_ENDPOINT;
|
|
41
|
+
}
|
|
42
|
+
|
|
36
43
|
const result = await analyzeUrl(url, {
|
|
37
44
|
fullPageScreenshot: true,
|
|
38
45
|
blockScreenshots: true,
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
model: process.env.LLM_MODEL
|
|
43
|
-
}
|
|
46
|
+
showElement: true,
|
|
47
|
+
elementSize: 24,
|
|
48
|
+
llm: llmConfig
|
|
44
49
|
});
|
|
45
50
|
|
|
46
51
|
fs.writeFileSync(
|
package/test/smoke.test.js
CHANGED
|
@@ -3,6 +3,11 @@ import { createHash } from 'node:crypto';
|
|
|
3
3
|
import { EventAnalyzer } from '../llm/analyzers/event-analyzer/event-analyzer.js';
|
|
4
4
|
import { buildBlockAnalysisArtifact } from '../llm/analyzers/event-analyzer/event-analyzer-blocks.js';
|
|
5
5
|
import { OpenAiProvider } from '../llm/providers/openai-provider.js';
|
|
6
|
+
import {
|
|
7
|
+
createLlmProvider,
|
|
8
|
+
CodexCliProvider,
|
|
9
|
+
ClaudeCliProvider
|
|
10
|
+
} from '../llm/providers/index.js';
|
|
6
11
|
import { PageExtractor } from '../page-extractor.js';
|
|
7
12
|
import { analyzeUrl } from '../index.js';
|
|
8
13
|
|
|
@@ -205,6 +210,23 @@ async function analyzeWith(options = {}) {
|
|
|
205
210
|
height: 24,
|
|
206
211
|
selectorNthOfType: 'body > main:nth-of-type(1) > section:nth-of-type(1) > a:nth-of-type(1)'
|
|
207
212
|
}],
|
|
213
|
+
sizedElements: [{
|
|
214
|
+
tag: 'a',
|
|
215
|
+
text: 'Sign up',
|
|
216
|
+
href: 'https://example.com/signup',
|
|
217
|
+
src: '',
|
|
218
|
+
width: 80,
|
|
219
|
+
height: 30,
|
|
220
|
+
top: 0,
|
|
221
|
+
left: 0,
|
|
222
|
+
cssSelector: 'body > main:nth-of-type(1) > section:nth-of-type(1) > a:nth-of-type(1)',
|
|
223
|
+
id: '',
|
|
224
|
+
class: 'cta',
|
|
225
|
+
role: '',
|
|
226
|
+
ariaLabel: '',
|
|
227
|
+
imageAlt: '',
|
|
228
|
+
interactive: true
|
|
229
|
+
}],
|
|
208
230
|
screenshots: { fullPage: '/tmp/full-page.png' },
|
|
209
231
|
pageSize: { width: 1000, height: 800 }
|
|
210
232
|
};
|
|
@@ -241,7 +263,9 @@ async function analyzeWith(options = {}) {
|
|
|
241
263
|
},
|
|
242
264
|
fullPageScreenshot: true,
|
|
243
265
|
blockScreenshots: true,
|
|
244
|
-
showBlockIdx: true
|
|
266
|
+
showBlockIdx: true,
|
|
267
|
+
showElement: true,
|
|
268
|
+
elementSize: 24
|
|
245
269
|
});
|
|
246
270
|
|
|
247
271
|
assert.equal(calls.filter((call) => call[0] === 'withPreparedPage').length, 1);
|
|
@@ -253,6 +277,16 @@ async function analyzeWith(options = {}) {
|
|
|
253
277
|
result.analysis.block_analysis.blocks[0].blockScreenshotPaths[0],
|
|
254
278
|
'/tmp/logical-block-0.png'
|
|
255
279
|
);
|
|
280
|
+
|
|
281
|
+
const firstBlockElements = result.analysis.block_analysis.blocks[0].elements;
|
|
282
|
+
assert.ok(Array.isArray(firstBlockElements), 'block should carry a sized elements array');
|
|
283
|
+
assert.equal(firstBlockElements.length, 1);
|
|
284
|
+
assert.equal(firstBlockElements[0].tag, 'a');
|
|
285
|
+
assert.equal(firstBlockElements[0].interactive, true);
|
|
286
|
+
assert.equal(firstBlockElements[0].cssSelector, 'body > main:nth-of-type(1) > section:nth-of-type(1) > a:nth-of-type(1)');
|
|
287
|
+
assert.equal(firstBlockElements[0].class, 'cta');
|
|
288
|
+
assert.equal(firstBlockElements[0].semantic, undefined);
|
|
289
|
+
assert.equal(firstBlockElements[0].event_type, undefined);
|
|
256
290
|
} finally {
|
|
257
291
|
PageExtractor.prototype.withPreparedPage = originalWithPreparedPage;
|
|
258
292
|
PageExtractor.prototype.extractPreparedPage = originalExtractPreparedPage;
|
|
@@ -507,4 +541,51 @@ async function analyzeWith(options = {}) {
|
|
|
507
541
|
assert.equal(artifact.blocks[0].blockCssPath, 'body > main:nth-of-type(1)');
|
|
508
542
|
}
|
|
509
543
|
|
|
544
|
+
{
|
|
545
|
+
// Factory dispatches to the right provider class
|
|
546
|
+
const openaiProvider = createLlmProvider({
|
|
547
|
+
type: 'openai',
|
|
548
|
+
apiKey: 'k',
|
|
549
|
+
apiEndpoint: 'https://example.invalid/v1/chat/completions',
|
|
550
|
+
model: 'm'
|
|
551
|
+
});
|
|
552
|
+
assert.ok(openaiProvider instanceof OpenAiProvider, 'type=openai → OpenAiProvider');
|
|
553
|
+
|
|
554
|
+
const codexProvider = createLlmProvider({ type: 'codex', model: 'gpt-5.5' });
|
|
555
|
+
assert.ok(codexProvider instanceof CodexCliProvider, 'type=codex → CodexCliProvider');
|
|
556
|
+
assert.equal(codexProvider.fast, true, 'gpt-5.5 auto-enables fast');
|
|
557
|
+
assert.ok(codexProvider.buildArgs('/tmp/x').includes('service_tier="fast"'), 'fast injects -c service_tier');
|
|
558
|
+
|
|
559
|
+
const codexOther = createLlmProvider({ type: 'codex', model: 'gpt-5-codex' });
|
|
560
|
+
assert.equal(codexOther.fast, false, 'other models do not auto-enable fast');
|
|
561
|
+
assert.ok(!codexOther.buildArgs('/tmp/x').includes('service_tier="fast"'));
|
|
562
|
+
|
|
563
|
+
const codexExplicitFast = createLlmProvider({ type: 'codex', model: 'gpt-5-codex', fast: true });
|
|
564
|
+
assert.equal(codexExplicitFast.fast, true, 'explicit fast:true overrides');
|
|
565
|
+
|
|
566
|
+
const codexExplicitOff = createLlmProvider({ type: 'codex', model: 'gpt-5.5', fast: false });
|
|
567
|
+
assert.equal(codexExplicitOff.fast, false, 'explicit fast:false overrides gpt-5.5');
|
|
568
|
+
|
|
569
|
+
const claudeProvider = createLlmProvider({ type: 'claude', model: 'sonnet' });
|
|
570
|
+
assert.ok(claudeProvider instanceof ClaudeCliProvider, 'type=claude → ClaudeCliProvider');
|
|
571
|
+
|
|
572
|
+
// Default type is openai
|
|
573
|
+
const defaultProvider = createLlmProvider({
|
|
574
|
+
apiKey: 'k',
|
|
575
|
+
apiEndpoint: 'https://example.invalid/v1/chat/completions',
|
|
576
|
+
model: 'm'
|
|
577
|
+
});
|
|
578
|
+
assert.ok(defaultProvider instanceof OpenAiProvider, 'missing type → openai default');
|
|
579
|
+
|
|
580
|
+
// Unknown type rejects
|
|
581
|
+
assert.throws(
|
|
582
|
+
() => createLlmProvider({ type: 'unknown', model: 'm' }),
|
|
583
|
+
/Unknown llm\.type/
|
|
584
|
+
);
|
|
585
|
+
|
|
586
|
+
// CLI providers require model
|
|
587
|
+
assert.throws(() => createLlmProvider({ type: 'codex' }), /model is required/);
|
|
588
|
+
assert.throws(() => createLlmProvider({ type: 'claude' }), /model is required/);
|
|
589
|
+
}
|
|
590
|
+
|
|
510
591
|
console.log('smoke tests passed');
|