page-analyzer 1.0.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +430 -0
- package/index.js +186 -6
- package/llm/analyzers/event-analyzer/event-analyzer-blocks.js +23 -2
- package/llm/analyzers/event-analyzer/event-analyzer-constants.js +1 -1
- package/llm/analyzers/event-analyzer/event-analyzer.js +1 -1
- package/package.json +5 -3
- package/page-extractor.js +364 -17
- package/result-viewer.html +879 -0
- package/scripts/analyze.js +51 -0
- package/scripts/build-result-viewer.js +891 -0
- package/scripts/serve-result-viewer.js +68 -0
- package/test/smoke.test.js +213 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import http from 'node:http';
|
|
2
|
+
import fs from 'node:fs/promises';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
import { fileURLToPath } from 'node:url';
|
|
5
|
+
|
|
6
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
7
|
+
const projectRoot = path.resolve(__dirname, '..');
|
|
8
|
+
const preferredPort = Number.parseInt(process.argv[2] || process.env.PORT || '4173', 10);
|
|
9
|
+
|
|
10
|
+
const contentTypes = new Map([
|
|
11
|
+
['.html', 'text/html; charset=utf-8'],
|
|
12
|
+
['.json', 'application/json; charset=utf-8'],
|
|
13
|
+
['.js', 'text/javascript; charset=utf-8'],
|
|
14
|
+
['.css', 'text/css; charset=utf-8'],
|
|
15
|
+
['.png', 'image/png'],
|
|
16
|
+
['.jpg', 'image/jpeg'],
|
|
17
|
+
['.jpeg', 'image/jpeg'],
|
|
18
|
+
['.webp', 'image/webp'],
|
|
19
|
+
['.svg', 'image/svg+xml']
|
|
20
|
+
]);
|
|
21
|
+
|
|
22
|
+
function resolveRequestPath(requestUrl) {
|
|
23
|
+
const url = new URL(requestUrl || '/', 'http://127.0.0.1');
|
|
24
|
+
const pathname = url.pathname === '/' ? '/result-viewer.html' : decodeURIComponent(url.pathname);
|
|
25
|
+
const filePath = path.resolve(projectRoot, `.${pathname}`);
|
|
26
|
+
if (!filePath.startsWith(projectRoot)) {
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
return filePath;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const server = http.createServer(async (request, response) => {
|
|
33
|
+
const filePath = resolveRequestPath(request.url);
|
|
34
|
+
if (!filePath) {
|
|
35
|
+
response.writeHead(403);
|
|
36
|
+
response.end('Forbidden');
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
try {
|
|
41
|
+
const body = await fs.readFile(filePath);
|
|
42
|
+
response.writeHead(200, {
|
|
43
|
+
'Content-Type': contentTypes.get(path.extname(filePath).toLowerCase()) || 'application/octet-stream',
|
|
44
|
+
'Cache-Control': 'no-store'
|
|
45
|
+
});
|
|
46
|
+
response.end(body);
|
|
47
|
+
} catch {
|
|
48
|
+
response.writeHead(404);
|
|
49
|
+
response.end('Not found');
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
function listen(port, attemptsLeft = 20) {
|
|
54
|
+
server.once('error', (error) => {
|
|
55
|
+
if (error.code === 'EADDRINUSE' && attemptsLeft > 0) {
|
|
56
|
+
listen(port + 1, attemptsLeft - 1);
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
throw error;
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
server.listen(port, '127.0.0.1', () => {
|
|
63
|
+
const address = server.address();
|
|
64
|
+
console.log(`Result viewer: http://127.0.0.1:${address.port}/result-viewer.html`);
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
listen(Number.isFinite(preferredPort) ? preferredPort : 4173);
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import assert from 'node:assert/strict';
|
|
2
|
+
import { EventAnalyzer } from '../llm/analyzers/event-analyzer/event-analyzer.js';
|
|
3
|
+
import { buildBlockAnalysisArtifact } from '../llm/analyzers/event-analyzer/event-analyzer-blocks.js';
|
|
4
|
+
import { PageExtractor } from '../page-extractor.js';
|
|
5
|
+
import '../index.js';
|
|
6
|
+
|
|
7
|
+
class FakeProvider {
|
|
8
|
+
constructor() {
|
|
9
|
+
this.calls = [];
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
async analyze(prompt) {
|
|
13
|
+
if (prompt.includes('DOM CSV')) {
|
|
14
|
+
this.calls.push('event');
|
|
15
|
+
return 'csv_id,event_type,attributes_kv\n0,signup,intent=cta';
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
this.calls.push('special');
|
|
19
|
+
return [
|
|
20
|
+
'Demo page with a primary CTA',
|
|
21
|
+
'blockIdxs,blockName,blockDescription,blockPossibleEvents',
|
|
22
|
+
'0,CTASection,Primary CTA section,signup.cta_click'
|
|
23
|
+
].join('\n');
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
class FakeLocator {
|
|
28
|
+
constructor({ count = 1, throwOnScreenshot = false } = {}) {
|
|
29
|
+
this.countValue = count;
|
|
30
|
+
this.throwOnScreenshot = throwOnScreenshot;
|
|
31
|
+
this.screenshots = [];
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
first() {
|
|
35
|
+
return this;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
async count() {
|
|
39
|
+
return this.countValue;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
async screenshot(options) {
|
|
43
|
+
this.screenshots.push(options);
|
|
44
|
+
if (this.throwOnScreenshot) {
|
|
45
|
+
throw new Error('selector screenshot failed');
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
class FakePage {
|
|
51
|
+
constructor(locator) {
|
|
52
|
+
this.locatorInstance = locator;
|
|
53
|
+
this.locatorSelectors = [];
|
|
54
|
+
this.evaluateCalls = [];
|
|
55
|
+
this.pageScreenshots = [];
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
locator(selector) {
|
|
59
|
+
this.locatorSelectors.push(selector);
|
|
60
|
+
return this.locatorInstance;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async evaluate(_fn, arg) {
|
|
64
|
+
this.evaluateCalls.push(arg);
|
|
65
|
+
return 1;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async screenshot(options) {
|
|
69
|
+
this.pageScreenshots.push(options);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const csvContent = [
|
|
74
|
+
'idx,blockIdx,tag,imageAlt,text,context,href',
|
|
75
|
+
'0,0,a,,Sign up,,https://example.com/signup'
|
|
76
|
+
].join('\n');
|
|
77
|
+
|
|
78
|
+
const blocks = [{
|
|
79
|
+
blockIdx: 0,
|
|
80
|
+
branchPath: 'body.0',
|
|
81
|
+
depth: 1,
|
|
82
|
+
domOrder: 1,
|
|
83
|
+
tag: 'section',
|
|
84
|
+
fixed: false,
|
|
85
|
+
top: 0,
|
|
86
|
+
left: 0,
|
|
87
|
+
width: 1000,
|
|
88
|
+
height: 200,
|
|
89
|
+
textPreview: 'Sign up',
|
|
90
|
+
childInteractiveCount: 1
|
|
91
|
+
}];
|
|
92
|
+
|
|
93
|
+
async function analyzeWith(options = {}) {
|
|
94
|
+
const provider = new FakeProvider();
|
|
95
|
+
const analyzer = new EventAnalyzer(provider, {});
|
|
96
|
+
const result = await analyzer.analyzeEvents(csvContent, '', [], {
|
|
97
|
+
blocks,
|
|
98
|
+
...options
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
return { provider, result };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
{
|
|
105
|
+
const { provider, result } = await analyzeWith();
|
|
106
|
+
|
|
107
|
+
assert.deepEqual(provider.calls, ['special']);
|
|
108
|
+
assert.equal(result.events_by_node.length, 0);
|
|
109
|
+
assert.equal(result.block_analysis.stats.llm_blocks, 1);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
{
|
|
113
|
+
const { provider, result } = await analyzeWith({ analyzeNodeEvents: true });
|
|
114
|
+
|
|
115
|
+
assert.deepEqual(provider.calls, ['special', 'event']);
|
|
116
|
+
assert.equal(result.events_by_node.length, 1);
|
|
117
|
+
assert.equal(result.events_by_node[0].event_type, 'signup');
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
{
|
|
121
|
+
const extractor = new PageExtractor();
|
|
122
|
+
const locator = new FakeLocator({ count: 0 });
|
|
123
|
+
const page = new FakePage(locator);
|
|
124
|
+
const captured = await extractor.captureBlockScreenshot(page, {
|
|
125
|
+
blockCssPath: 'main > section:nth-of-type(1)',
|
|
126
|
+
left: 0,
|
|
127
|
+
top: 0,
|
|
128
|
+
width: 1200,
|
|
129
|
+
height: 300
|
|
130
|
+
}, '/tmp/block.png');
|
|
131
|
+
|
|
132
|
+
assert.equal(captured, false);
|
|
133
|
+
assert.deepEqual(page.locatorSelectors, ['main > section:nth-of-type(1)']);
|
|
134
|
+
assert.equal(page.pageScreenshots.length, 0);
|
|
135
|
+
assert.equal(page.evaluateCalls.length, 0);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
{
|
|
139
|
+
const extractor = new PageExtractor();
|
|
140
|
+
const locator = new FakeLocator();
|
|
141
|
+
const page = new FakePage(locator);
|
|
142
|
+
const captured = await extractor.captureBlockScreenshot(page, {
|
|
143
|
+
blockCssPath: 'main > section:nth-of-type(2)'
|
|
144
|
+
}, '/tmp/block.png');
|
|
145
|
+
|
|
146
|
+
assert.equal(captured, true);
|
|
147
|
+
assert.deepEqual(locator.screenshots, [{ path: '/tmp/block.png' }]);
|
|
148
|
+
assert.equal(page.pageScreenshots.length, 0);
|
|
149
|
+
assert.equal(page.evaluateCalls.length, 2);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
{
|
|
153
|
+
const extractor = new PageExtractor();
|
|
154
|
+
const locator = new FakeLocator({ throwOnScreenshot: true });
|
|
155
|
+
const page = new FakePage(locator);
|
|
156
|
+
const captured = await extractor.captureBlockScreenshot(page, {
|
|
157
|
+
blockCssPath: 'main > section:nth-of-type(3)',
|
|
158
|
+
left: 0,
|
|
159
|
+
top: 0,
|
|
160
|
+
width: 1200,
|
|
161
|
+
height: 300
|
|
162
|
+
}, '/tmp/block.png');
|
|
163
|
+
|
|
164
|
+
assert.equal(captured, false);
|
|
165
|
+
assert.equal(page.pageScreenshots.length, 0);
|
|
166
|
+
assert.equal(page.evaluateCalls.length, 2);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
{
|
|
170
|
+
const extractor = new PageExtractor({
|
|
171
|
+
snapshotDir: '/tmp/page-analyzer-smoke-snapshots'
|
|
172
|
+
});
|
|
173
|
+
const locator = new FakeLocator();
|
|
174
|
+
const page = new FakePage(locator);
|
|
175
|
+
const screenshots = await extractor.captureScreenshots(page, 'https://example.com/demo', [
|
|
176
|
+
{ blockName: 'Hero', blockCssPath: '#hero' },
|
|
177
|
+
{ blockName: 'Footer', blockCssPath: '#footer' }
|
|
178
|
+
], {
|
|
179
|
+
fullPageScreenshot: false,
|
|
180
|
+
blockScreenshots: true
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
assert.equal(screenshots.blocks.length, 2);
|
|
184
|
+
assert.deepEqual(
|
|
185
|
+
screenshots.blocks.map((item) => ({ blockIdx: item.blockIdx, blockName: item.blockName })),
|
|
186
|
+
[
|
|
187
|
+
{ blockIdx: 0, blockName: 'Hero' },
|
|
188
|
+
{ blockIdx: 1, blockName: 'Footer' }
|
|
189
|
+
]
|
|
190
|
+
);
|
|
191
|
+
assert.equal(page.pageScreenshots.length, 0);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
{
|
|
195
|
+
const artifact = buildBlockAnalysisArtifact('Demo', [{
|
|
196
|
+
blockIdx: 3,
|
|
197
|
+
blockIdxKey: '3.4',
|
|
198
|
+
blockName: 'ContentSection',
|
|
199
|
+
blockDescription: '',
|
|
200
|
+
possibleEvents: [],
|
|
201
|
+
semanticLabels: [],
|
|
202
|
+
semanticGroups: [],
|
|
203
|
+
rows: [],
|
|
204
|
+
sourceBlocks: [
|
|
205
|
+
{ blockCssPath: 'body > main:nth-of-type(1) > section:nth-of-type(1)' },
|
|
206
|
+
{ blockCssPath: 'body > main:nth-of-type(1) > section:nth-of-type(2)' }
|
|
207
|
+
]
|
|
208
|
+
}], []);
|
|
209
|
+
|
|
210
|
+
assert.equal(artifact.blocks[0].blockCssPath, 'body > main:nth-of-type(1)');
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
console.log('smoke tests passed');
|