page-analyzer 1.2.1 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +36 -0
- package/README.md +29 -3
- package/extractors/block-assigner.js +1 -1
- package/index.d.ts +318 -0
- package/index.js +192 -36
- package/llm/analyzers/event-analyzer/event-analyzer-blocks.js +19 -0
- package/llm/providers/claude-cli-provider.js +137 -0
- package/llm/providers/cli-runner.js +129 -0
- package/llm/providers/codex-cli-provider.js +154 -0
- package/llm/providers/index.js +61 -0
- package/package.json +6 -1
- package/page-extractor.js +210 -17
- package/scripts/analyze.js +10 -5
- package/test/smoke.test.js +151 -12
package/test/smoke.test.js
CHANGED
|
@@ -1,10 +1,20 @@
|
|
|
1
1
|
import assert from 'node:assert/strict';
|
|
2
|
+
import { createHash } from 'node:crypto';
|
|
2
3
|
import { EventAnalyzer } from '../llm/analyzers/event-analyzer/event-analyzer.js';
|
|
3
4
|
import { buildBlockAnalysisArtifact } from '../llm/analyzers/event-analyzer/event-analyzer-blocks.js';
|
|
4
5
|
import { OpenAiProvider } from '../llm/providers/openai-provider.js';
|
|
6
|
+
import {
|
|
7
|
+
createLlmProvider,
|
|
8
|
+
CodexCliProvider,
|
|
9
|
+
ClaudeCliProvider
|
|
10
|
+
} from '../llm/providers/index.js';
|
|
5
11
|
import { PageExtractor } from '../page-extractor.js';
|
|
6
12
|
import { analyzeUrl } from '../index.js';
|
|
7
13
|
|
|
14
|
+
function md5(value) {
|
|
15
|
+
return createHash('md5').update(value).digest('hex');
|
|
16
|
+
}
|
|
17
|
+
|
|
8
18
|
class FakeProvider {
|
|
9
19
|
constructor() {
|
|
10
20
|
this.calls = [];
|
|
@@ -26,9 +36,10 @@ class FakeProvider {
|
|
|
26
36
|
}
|
|
27
37
|
|
|
28
38
|
class FakeLocator {
|
|
29
|
-
constructor({ count = 1, throwOnScreenshot = false } = {}) {
|
|
39
|
+
constructor({ count = 1, throwOnScreenshot = false, screenshotBodies = null } = {}) {
|
|
30
40
|
this.countValue = count;
|
|
31
41
|
this.throwOnScreenshot = throwOnScreenshot;
|
|
42
|
+
this.screenshotBodies = Array.isArray(screenshotBodies) ? screenshotBodies : null;
|
|
32
43
|
this.screenshots = [];
|
|
33
44
|
}
|
|
34
45
|
|
|
@@ -41,11 +52,14 @@ class FakeLocator {
|
|
|
41
52
|
}
|
|
42
53
|
|
|
43
54
|
async screenshot(options) {
|
|
55
|
+
const screenshotIndex = this.screenshots.length;
|
|
44
56
|
this.screenshots.push(options);
|
|
45
57
|
if (this.throwOnScreenshot) {
|
|
46
58
|
throw new Error('selector screenshot failed');
|
|
47
59
|
}
|
|
48
|
-
return Buffer.from(
|
|
60
|
+
return Buffer.from(
|
|
61
|
+
this.screenshotBodies?.[screenshotIndex] || `locator screenshot:${options?.path || 'buffer'}`
|
|
62
|
+
);
|
|
49
63
|
}
|
|
50
64
|
}
|
|
51
65
|
|
|
@@ -74,22 +88,36 @@ class FakePage {
|
|
|
74
88
|
}
|
|
75
89
|
|
|
76
90
|
class FakeS3Client {
|
|
77
|
-
constructor({ failPredicate = null } = {}) {
|
|
91
|
+
constructor({ failPredicate = null, existingKeys = [] } = {}) {
|
|
78
92
|
this.failPredicate = failPredicate;
|
|
93
|
+
this.existingKeys = new Set(existingKeys);
|
|
94
|
+
this.headCommands = [];
|
|
79
95
|
this.commands = [];
|
|
80
96
|
this.attemptsByKey = new Map();
|
|
81
97
|
}
|
|
82
98
|
|
|
83
99
|
async send(command) {
|
|
84
100
|
const input = command.input;
|
|
85
|
-
|
|
101
|
+
if (command.constructor.name === 'HeadObjectCommand') {
|
|
102
|
+
this.headCommands.push(input);
|
|
103
|
+
if (this.existingKeys.has(input.Key)) {
|
|
104
|
+
return {};
|
|
105
|
+
}
|
|
106
|
+
const error = new Error(`s3 object not found for ${input.Key}`);
|
|
107
|
+
error.name = 'NotFound';
|
|
108
|
+
error.$metadata = { httpStatusCode: 404 };
|
|
109
|
+
throw error;
|
|
110
|
+
}
|
|
111
|
+
|
|
86
112
|
const attempts = (this.attemptsByKey.get(input.Key) || 0) + 1;
|
|
87
113
|
this.attemptsByKey.set(input.Key, attempts);
|
|
114
|
+
this.commands.push(input);
|
|
88
115
|
|
|
89
116
|
if (this.failPredicate?.(input, attempts)) {
|
|
90
117
|
throw new Error(`s3 upload failed for ${input.Key}`);
|
|
91
118
|
}
|
|
92
119
|
|
|
120
|
+
this.existingKeys.add(input.Key);
|
|
93
121
|
return {};
|
|
94
122
|
}
|
|
95
123
|
}
|
|
@@ -182,6 +210,23 @@ async function analyzeWith(options = {}) {
|
|
|
182
210
|
height: 24,
|
|
183
211
|
selectorNthOfType: 'body > main:nth-of-type(1) > section:nth-of-type(1) > a:nth-of-type(1)'
|
|
184
212
|
}],
|
|
213
|
+
sizedElements: [{
|
|
214
|
+
tag: 'a',
|
|
215
|
+
text: 'Sign up',
|
|
216
|
+
href: 'https://example.com/signup',
|
|
217
|
+
src: '',
|
|
218
|
+
width: 80,
|
|
219
|
+
height: 30,
|
|
220
|
+
top: 0,
|
|
221
|
+
left: 0,
|
|
222
|
+
cssSelector: 'body > main:nth-of-type(1) > section:nth-of-type(1) > a:nth-of-type(1)',
|
|
223
|
+
id: '',
|
|
224
|
+
class: 'cta',
|
|
225
|
+
role: '',
|
|
226
|
+
ariaLabel: '',
|
|
227
|
+
imageAlt: '',
|
|
228
|
+
interactive: true
|
|
229
|
+
}],
|
|
185
230
|
screenshots: { fullPage: '/tmp/full-page.png' },
|
|
186
231
|
pageSize: { width: 1000, height: 800 }
|
|
187
232
|
};
|
|
@@ -218,7 +263,9 @@ async function analyzeWith(options = {}) {
|
|
|
218
263
|
},
|
|
219
264
|
fullPageScreenshot: true,
|
|
220
265
|
blockScreenshots: true,
|
|
221
|
-
showBlockIdx: true
|
|
266
|
+
showBlockIdx: true,
|
|
267
|
+
showElement: true,
|
|
268
|
+
elementSize: 24
|
|
222
269
|
});
|
|
223
270
|
|
|
224
271
|
assert.equal(calls.filter((call) => call[0] === 'withPreparedPage').length, 1);
|
|
@@ -230,6 +277,16 @@ async function analyzeWith(options = {}) {
|
|
|
230
277
|
result.analysis.block_analysis.blocks[0].blockScreenshotPaths[0],
|
|
231
278
|
'/tmp/logical-block-0.png'
|
|
232
279
|
);
|
|
280
|
+
|
|
281
|
+
const firstBlockElements = result.analysis.block_analysis.blocks[0].elements;
|
|
282
|
+
assert.ok(Array.isArray(firstBlockElements), 'block should carry a sized elements array');
|
|
283
|
+
assert.equal(firstBlockElements.length, 1);
|
|
284
|
+
assert.equal(firstBlockElements[0].tag, 'a');
|
|
285
|
+
assert.equal(firstBlockElements[0].interactive, true);
|
|
286
|
+
assert.equal(firstBlockElements[0].cssSelector, 'body > main:nth-of-type(1) > section:nth-of-type(1) > a:nth-of-type(1)');
|
|
287
|
+
assert.equal(firstBlockElements[0].class, 'cta');
|
|
288
|
+
assert.equal(firstBlockElements[0].semantic, undefined);
|
|
289
|
+
assert.equal(firstBlockElements[0].event_type, undefined);
|
|
233
290
|
} finally {
|
|
234
291
|
PageExtractor.prototype.withPreparedPage = originalWithPreparedPage;
|
|
235
292
|
PageExtractor.prototype.extractPreparedPage = originalExtractPreparedPage;
|
|
@@ -348,14 +405,16 @@ async function analyzeWith(options = {}) {
|
|
|
348
405
|
assert.deepEqual(page.pageScreenshots[0], { fullPage: true });
|
|
349
406
|
assert.equal(locator.screenshots.length, 1);
|
|
350
407
|
assert.deepEqual(locator.screenshots[0], {});
|
|
408
|
+
assert.equal(s3Client.headCommands.length, 2);
|
|
351
409
|
assert.equal(s3Client.commands.length, 2);
|
|
352
410
|
|
|
353
411
|
const [fullPageUpload, blockUpload] = s3Client.commands;
|
|
354
412
|
assert.equal(fullPageUpload.Bucket, 'page-analyzer-test');
|
|
355
413
|
assert.equal(fullPageUpload.ContentType, 'image/png');
|
|
356
414
|
assert.equal(Buffer.isBuffer(fullPageUpload.Body), true);
|
|
357
|
-
assert.match(fullPageUpload.Key, /^page-analyzer\/snapshots\/example
|
|
358
|
-
assert.match(blockUpload.Key, /^page-analyzer\/snapshots\/example
|
|
415
|
+
assert.match(fullPageUpload.Key, /^page-analyzer\/snapshots\/example\.com\/[a-f0-9]{32}\.png$/);
|
|
416
|
+
assert.match(blockUpload.Key, /^page-analyzer\/snapshots\/example\.com\/[a-f0-9]{32}\.png$/);
|
|
417
|
+
assert.notEqual(fullPageUpload.Key, blockUpload.Key);
|
|
359
418
|
|
|
360
419
|
assert.equal(
|
|
361
420
|
screenshots.fullPage,
|
|
@@ -384,21 +443,49 @@ async function analyzeWith(options = {}) {
|
|
|
384
443
|
});
|
|
385
444
|
|
|
386
445
|
const uploadedKey = s3Client.commands[0].Key;
|
|
387
|
-
assert.match(uploadedKey, /^nested\/prefix\/example
|
|
446
|
+
assert.match(uploadedKey, /^nested\/prefix\/example\.com\/[a-f0-9]{32}\.png$/);
|
|
388
447
|
assert.equal(
|
|
389
448
|
screenshots.blocks[0].path,
|
|
390
449
|
`https://page-analyzer-test.s3.ap-northeast-1.amazonaws.com/${uploadedKey}`
|
|
391
450
|
);
|
|
392
451
|
}
|
|
393
452
|
|
|
453
|
+
{
|
|
454
|
+
const body = Buffer.from('already uploaded screenshot');
|
|
455
|
+
const existingKey = `page-analyzer/snapshots/example.com/${md5(body)}.png`;
|
|
456
|
+
const s3Client = new FakeS3Client({
|
|
457
|
+
existingKeys: [existingKey]
|
|
458
|
+
});
|
|
459
|
+
const extractor = new PageExtractor({
|
|
460
|
+
s3: {
|
|
461
|
+
bucket: 'page-analyzer-test',
|
|
462
|
+
region: 'ap-northeast-1',
|
|
463
|
+
prefix: 'page-analyzer/snapshots',
|
|
464
|
+
publicBaseUrl: 'https://cdn.example.com',
|
|
465
|
+
client: s3Client
|
|
466
|
+
}
|
|
467
|
+
});
|
|
468
|
+
|
|
469
|
+
const url = await extractor.uploadScreenshotToS3('https://example.com/demo', body);
|
|
470
|
+
|
|
471
|
+
assert.equal(s3Client.headCommands.length, 1);
|
|
472
|
+
assert.equal(s3Client.headCommands[0].Key, existingKey);
|
|
473
|
+
assert.equal(s3Client.commands.length, 0);
|
|
474
|
+
assert.equal(url, `https://cdn.example.com/${existingKey}`);
|
|
475
|
+
}
|
|
476
|
+
|
|
394
477
|
{
|
|
395
478
|
const originalWarn = console.warn;
|
|
396
479
|
const warnings = [];
|
|
397
480
|
console.warn = (message) => warnings.push(message);
|
|
398
481
|
|
|
399
482
|
try {
|
|
483
|
+
const failingBlockKey =
|
|
484
|
+
`page-analyzer/snapshots/example.com/${md5('locator screenshot:block-0')}.png`;
|
|
485
|
+
const successfulBlockKey =
|
|
486
|
+
`page-analyzer/snapshots/example.com/${md5('locator screenshot:block-1')}.png`;
|
|
400
487
|
const s3Client = new FakeS3Client({
|
|
401
|
-
failPredicate: (input) => input.Key
|
|
488
|
+
failPredicate: (input) => input.Key === failingBlockKey
|
|
402
489
|
});
|
|
403
490
|
const extractor = new PageExtractor({
|
|
404
491
|
s3: {
|
|
@@ -409,7 +496,12 @@ async function analyzeWith(options = {}) {
|
|
|
409
496
|
client: s3Client
|
|
410
497
|
}
|
|
411
498
|
});
|
|
412
|
-
const locator = new FakeLocator(
|
|
499
|
+
const locator = new FakeLocator({
|
|
500
|
+
screenshotBodies: [
|
|
501
|
+
'locator screenshot:block-0',
|
|
502
|
+
'locator screenshot:block-1'
|
|
503
|
+
]
|
|
504
|
+
});
|
|
413
505
|
const page = new FakePage(locator);
|
|
414
506
|
const screenshots = await extractor.captureScreenshots(page, 'https://example.com/demo', [
|
|
415
507
|
{ blockName: 'Hero', blockCssPath: '#hero' },
|
|
@@ -421,8 +513,8 @@ async function analyzeWith(options = {}) {
|
|
|
421
513
|
|
|
422
514
|
assert.equal(screenshots.blocks.length, 1);
|
|
423
515
|
assert.equal(screenshots.blocks[0].blockIdx, 1);
|
|
424
|
-
assert.equal(s3Client.commands.filter((input) => input.Key
|
|
425
|
-
assert.equal(s3Client.commands.filter((input) => input.Key
|
|
516
|
+
assert.equal(s3Client.commands.filter((input) => input.Key === failingBlockKey).length, 3);
|
|
517
|
+
assert.equal(s3Client.commands.filter((input) => input.Key === successfulBlockKey).length, 1);
|
|
426
518
|
assert.equal(warnings.some((message) => message.includes('retrying')), true);
|
|
427
519
|
assert.equal(warnings.some((message) => message.includes('Failed to capture/upload block 0')), true);
|
|
428
520
|
} finally {
|
|
@@ -449,4 +541,51 @@ async function analyzeWith(options = {}) {
|
|
|
449
541
|
assert.equal(artifact.blocks[0].blockCssPath, 'body > main:nth-of-type(1)');
|
|
450
542
|
}
|
|
451
543
|
|
|
544
|
+
{
|
|
545
|
+
// Factory dispatches to the right provider class
|
|
546
|
+
const openaiProvider = createLlmProvider({
|
|
547
|
+
type: 'openai',
|
|
548
|
+
apiKey: 'k',
|
|
549
|
+
apiEndpoint: 'https://example.invalid/v1/chat/completions',
|
|
550
|
+
model: 'm'
|
|
551
|
+
});
|
|
552
|
+
assert.ok(openaiProvider instanceof OpenAiProvider, 'type=openai → OpenAiProvider');
|
|
553
|
+
|
|
554
|
+
const codexProvider = createLlmProvider({ type: 'codex', model: 'gpt-5.5' });
|
|
555
|
+
assert.ok(codexProvider instanceof CodexCliProvider, 'type=codex → CodexCliProvider');
|
|
556
|
+
assert.equal(codexProvider.fast, true, 'gpt-5.5 auto-enables fast');
|
|
557
|
+
assert.ok(codexProvider.buildArgs('/tmp/x').includes('service_tier="fast"'), 'fast injects -c service_tier');
|
|
558
|
+
|
|
559
|
+
const codexOther = createLlmProvider({ type: 'codex', model: 'gpt-5-codex' });
|
|
560
|
+
assert.equal(codexOther.fast, false, 'other models do not auto-enable fast');
|
|
561
|
+
assert.ok(!codexOther.buildArgs('/tmp/x').includes('service_tier="fast"'));
|
|
562
|
+
|
|
563
|
+
const codexExplicitFast = createLlmProvider({ type: 'codex', model: 'gpt-5-codex', fast: true });
|
|
564
|
+
assert.equal(codexExplicitFast.fast, true, 'explicit fast:true overrides');
|
|
565
|
+
|
|
566
|
+
const codexExplicitOff = createLlmProvider({ type: 'codex', model: 'gpt-5.5', fast: false });
|
|
567
|
+
assert.equal(codexExplicitOff.fast, false, 'explicit fast:false overrides gpt-5.5');
|
|
568
|
+
|
|
569
|
+
const claudeProvider = createLlmProvider({ type: 'claude', model: 'sonnet' });
|
|
570
|
+
assert.ok(claudeProvider instanceof ClaudeCliProvider, 'type=claude → ClaudeCliProvider');
|
|
571
|
+
|
|
572
|
+
// Default type is openai
|
|
573
|
+
const defaultProvider = createLlmProvider({
|
|
574
|
+
apiKey: 'k',
|
|
575
|
+
apiEndpoint: 'https://example.invalid/v1/chat/completions',
|
|
576
|
+
model: 'm'
|
|
577
|
+
});
|
|
578
|
+
assert.ok(defaultProvider instanceof OpenAiProvider, 'missing type → openai default');
|
|
579
|
+
|
|
580
|
+
// Unknown type rejects
|
|
581
|
+
assert.throws(
|
|
582
|
+
() => createLlmProvider({ type: 'unknown', model: 'm' }),
|
|
583
|
+
/Unknown llm\.type/
|
|
584
|
+
);
|
|
585
|
+
|
|
586
|
+
// CLI providers require model
|
|
587
|
+
assert.throws(() => createLlmProvider({ type: 'codex' }), /model is required/);
|
|
588
|
+
assert.throws(() => createLlmProvider({ type: 'claude' }), /model is required/);
|
|
589
|
+
}
|
|
590
|
+
|
|
452
591
|
console.log('smoke tests passed');
|