page-analyzer 1.0.1 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +39 -9
- package/index.js +186 -6
- package/llm/analyzers/event-analyzer/event-analyzer-blocks.js +23 -2
- package/llm/analyzers/event-analyzer/event-analyzer-constants.js +1 -1
- package/llm/analyzers/event-analyzer/event-analyzer.js +1 -1
- package/package.json +5 -3
- package/page-extractor.js +364 -17
- package/result-viewer.html +879 -0
- package/scripts/analyze.js +51 -0
- package/scripts/build-result-viewer.js +891 -0
- package/scripts/serve-result-viewer.js +68 -0
- package/test/smoke.test.js +213 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 page-analyzer contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -71,21 +71,21 @@ LLM_API_ENDPOINT=https://api.openai.com/v1/chat/completions
|
|
|
71
71
|
LLM_MODEL=gpt-4o-mini
|
|
72
72
|
```
|
|
73
73
|
|
|
74
|
-
##
|
|
74
|
+
## 运行测试和示例
|
|
75
75
|
|
|
76
|
-
|
|
76
|
+
本地测试不会调用真实网页或 LLM 接口:
|
|
77
77
|
|
|
78
78
|
```bash
|
|
79
79
|
npm test
|
|
80
80
|
```
|
|
81
81
|
|
|
82
|
-
|
|
82
|
+
如需手动分析真实页面,可以运行示例脚本。它会读取项目根目录下的 `.env`,分析指定 URL,并把结果写入 `result.json`。
|
|
83
83
|
|
|
84
84
|
```bash
|
|
85
|
-
|
|
85
|
+
npm run analyze -- https://example.com
|
|
86
86
|
```
|
|
87
87
|
|
|
88
|
-
注意:`
|
|
88
|
+
注意:`npm run analyze` 依赖以下环境变量:
|
|
89
89
|
|
|
90
90
|
- `LLM_API_KEY`
|
|
91
91
|
- `LLM_API_ENDPOINT`
|
|
@@ -114,6 +114,9 @@ const result = await analyzeUrl('https://example.com', {
|
|
|
114
114
|
},
|
|
115
115
|
showEvents: true,
|
|
116
116
|
showBlockIdx: true,
|
|
117
|
+
fullPageScreenshot: true,
|
|
118
|
+
blockScreenshots: true,
|
|
119
|
+
waitForImagesLoaded: true,
|
|
117
120
|
knownEventTypes: ['click_link', 'submit_form'],
|
|
118
121
|
extractorConfig: {
|
|
119
122
|
viewportWidth: 1440,
|
|
@@ -145,6 +148,9 @@ const result = await analyzeUrl('https://example.com', {
|
|
|
145
148
|
| `options.extractorConfig` | `object` | 否 | Playwright 页面抓取配置 |
|
|
146
149
|
| `options.showEvents` | `boolean` | 否 | 是否返回完整事件数组和元素明细 |
|
|
147
150
|
| `options.showBlockIdx` | `boolean` | 否 | 是否返回 CSV 与区块索引相关字段 |
|
|
151
|
+
| `options.fullPageScreenshot` | `boolean` | 否 | 是否保存整页截图到当前运行目录的 `snapshots/` 并返回文件路径 |
|
|
152
|
+
| `options.blockScreenshots` | `boolean` | 否 | 是否在 LLM 合并区块后,保存每个逻辑区块截图到当前运行目录的 `snapshots/` 并返回文件路径 |
|
|
153
|
+
| `options.waitForImagesLoaded` | `boolean` | 否 | 是否在提取区块、分析和截图前等待页面图片加载完成,默认 `false` |
|
|
148
154
|
|
|
149
155
|
### analyzePageEvents(input)
|
|
150
156
|
|
|
@@ -239,6 +245,28 @@ const result = await analyzePageEvents({
|
|
|
239
245
|
|
|
240
246
|
启用 `showBlockIdx: true` 后,区块结果中会额外包含 `blockIdxs`、`blockSemanticGroups`、`rowCount` 等字段,并返回 `csvContent`。
|
|
241
247
|
|
|
248
|
+
启用 `fullPageScreenshot: true` 后,返回结果会包含 `screenshots.fullPage`,值为整页截图文件路径。
|
|
249
|
+
|
|
250
|
+
启用 `blockScreenshots: true` 后,模块会在 LLM 合并区块后再截图。返回结果会包含 `screenshots.blocks`,每项包含逻辑区块序号 `blockIdx` 和对应截图 `path`;区块分析结果中的每个 block 也会额外带上 `blockScreenshotPaths`,每个逻辑区块最多对应一张截图。无法通过 `blockCssPath` 截图的隐藏或空区块会被跳过。
|
|
251
|
+
|
|
252
|
+
启用 `waitForImagesLoaded: true` 后,模块会先滚动页面触发懒加载,再等待当前 DOM 中的 `<img>` 完成加载或失败,之后再提取区块、分析和截图;等待时间受 `extractorConfig.timeoutMs` 控制。
|
|
253
|
+
|
|
254
|
+
截图参数启用后的新增输出示例:
|
|
255
|
+
|
|
256
|
+
```js
|
|
257
|
+
{
|
|
258
|
+
screenshots: {
|
|
259
|
+
fullPage: '/path/to/page-analyzer/snapshots/example-com-20260507-095500-full-page.png',
|
|
260
|
+
blocks: [
|
|
261
|
+
{
|
|
262
|
+
blockIdx: 0,
|
|
263
|
+
path: '/path/to/page-analyzer/snapshots/example-com-20260507-095500-block-000.png'
|
|
264
|
+
}
|
|
265
|
+
]
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
```
|
|
269
|
+
|
|
242
270
|
## 配置项
|
|
243
271
|
|
|
244
272
|
### extractorConfig
|
|
@@ -255,6 +283,7 @@ const result = await analyzePageEvents({
|
|
|
255
283
|
| `blockMaxHeightRatio` | `1.5` | 最大区块高度占视口高度比例 |
|
|
256
284
|
| `blockMaxDepth` | `15` | 区块提取最大 DOM 深度 |
|
|
257
285
|
| `textPreviewMaxChars` | `1200` | 区块文本预览最大长度 |
|
|
286
|
+
| `waitForImagesLoaded` | `false` | 是否在提取区块、分析和截图前等待页面图片加载完成 |
|
|
258
287
|
|
|
259
288
|
### parserConfig
|
|
260
289
|
|
|
@@ -320,7 +349,7 @@ data.choices[0].message.content
|
|
|
320
349
|
npm install
|
|
321
350
|
```
|
|
322
351
|
|
|
323
|
-
|
|
352
|
+
运行本地测试:
|
|
324
353
|
|
|
325
354
|
```bash
|
|
326
355
|
npm test
|
|
@@ -355,12 +384,13 @@ page-analyzer/
|
|
|
355
384
|
models/ # 上下文数据模型
|
|
356
385
|
utils/ # 文本、URL、选择器工具
|
|
357
386
|
vendor/ # 浏览器内区块提取脚本
|
|
358
|
-
|
|
387
|
+
scripts/analyze.js # 手动真实页面分析脚本
|
|
388
|
+
test/smoke.test.js # 本地 smoke test
|
|
359
389
|
```
|
|
360
390
|
|
|
361
391
|
## 常见问题
|
|
362
392
|
|
|
363
|
-
### npm
|
|
393
|
+
### npm run analyze 报 LLM 配置缺失
|
|
364
394
|
|
|
365
395
|
确认项目根目录存在 `.env`,并且包含:
|
|
366
396
|
|
|
@@ -397,4 +427,4 @@ extractorConfig: {
|
|
|
397
427
|
|
|
398
428
|
## License
|
|
399
429
|
|
|
400
|
-
|
|
430
|
+
MIT License. See [LICENSE](./LICENSE).
|
package/index.js
CHANGED
|
@@ -38,6 +38,131 @@ function normalizeDisplayOptions(options = {}) {
|
|
|
38
38
|
};
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
+
function parseBlockIdxs(value) {
|
|
42
|
+
if (Array.isArray(value)) {
|
|
43
|
+
return value
|
|
44
|
+
.map((item) => Number.parseInt(String(item), 10))
|
|
45
|
+
.filter(Number.isInteger);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if (Number.isInteger(value)) {
|
|
49
|
+
return [value];
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return String(value || '')
|
|
53
|
+
.split(/[.,\s]+/)
|
|
54
|
+
.map((item) => Number.parseInt(item, 10))
|
|
55
|
+
.filter(Number.isInteger);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function buildBlockScreenshotMap(screenshots) {
|
|
59
|
+
const map = new Map();
|
|
60
|
+
for (const item of Array.isArray(screenshots?.blocks) ? screenshots.blocks : []) {
|
|
61
|
+
const blockIdx = Number.isInteger(item?.blockIdx)
|
|
62
|
+
? item.blockIdx
|
|
63
|
+
: Number.parseInt(String(item?.blockIdx), 10);
|
|
64
|
+
const screenshotPath = typeof item?.path === 'string' ? item.path : '';
|
|
65
|
+
if (Number.isInteger(blockIdx) && screenshotPath) {
|
|
66
|
+
map.set(blockIdx, screenshotPath);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return map;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function attachBlockScreenshotPaths(analysis, screenshots) {
|
|
73
|
+
const screenshotByBlockIdx = buildBlockScreenshotMap(screenshots);
|
|
74
|
+
if (screenshotByBlockIdx.size === 0 || !isObject(analysis?.block_analysis)) {
|
|
75
|
+
return analysis;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const sourceBlocks = analysis.block_analysis.blocks;
|
|
79
|
+
if (!Array.isArray(sourceBlocks)) {
|
|
80
|
+
return analysis;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const blocks = sourceBlocks.map((block) => {
|
|
84
|
+
const blockIdxs = parseBlockIdxs(block?.blockIdxs ?? block?.blockIdx);
|
|
85
|
+
const blockScreenshotPaths = blockIdxs
|
|
86
|
+
.map((blockIdx) => screenshotByBlockIdx.get(blockIdx))
|
|
87
|
+
.filter(Boolean);
|
|
88
|
+
|
|
89
|
+
if (blockScreenshotPaths.length === 0) {
|
|
90
|
+
return block;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return {
|
|
94
|
+
...block,
|
|
95
|
+
blockScreenshotPaths
|
|
96
|
+
};
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
return {
|
|
100
|
+
...analysis,
|
|
101
|
+
block_analysis: {
|
|
102
|
+
...analysis.block_analysis,
|
|
103
|
+
blocks
|
|
104
|
+
}
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function hasScreenshots(screenshots) {
|
|
109
|
+
return Boolean(
|
|
110
|
+
screenshots?.fullPage ||
|
|
111
|
+
(Array.isArray(screenshots?.blocks) && screenshots.blocks.length > 0)
|
|
112
|
+
);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function mergeScreenshots(primary, secondary) {
|
|
116
|
+
const merged = {};
|
|
117
|
+
if (primary?.fullPage) {
|
|
118
|
+
merged.fullPage = primary.fullPage;
|
|
119
|
+
}
|
|
120
|
+
if (secondary?.fullPage) {
|
|
121
|
+
merged.fullPage = secondary.fullPage;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const primaryBlocks = Array.isArray(primary?.blocks) ? primary.blocks : [];
|
|
125
|
+
const secondaryBlocks = Array.isArray(secondary?.blocks) ? secondary.blocks : [];
|
|
126
|
+
const blocks = secondaryBlocks.length > 0 ? secondaryBlocks : primaryBlocks;
|
|
127
|
+
if (blocks.length > 0) {
|
|
128
|
+
merged.blocks = blocks;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return hasScreenshots(merged) ? merged : null;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function attachLogicalBlockScreenshotPaths(result, screenshots) {
|
|
135
|
+
const blocks = result?.analysis?.block_analysis?.blocks;
|
|
136
|
+
if (!Array.isArray(blocks) || blocks.length === 0) {
|
|
137
|
+
return result;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const screenshotByLogicalIndex = buildBlockScreenshotMap(screenshots);
|
|
141
|
+
if (screenshotByLogicalIndex.size === 0) {
|
|
142
|
+
return result;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return {
|
|
146
|
+
...result,
|
|
147
|
+
analysis: {
|
|
148
|
+
...result.analysis,
|
|
149
|
+
block_analysis: {
|
|
150
|
+
...result.analysis.block_analysis,
|
|
151
|
+
blocks: blocks.map((block, index) => {
|
|
152
|
+
const screenshotPath = screenshotByLogicalIndex.get(index);
|
|
153
|
+
if (!screenshotPath) {
|
|
154
|
+
return block;
|
|
155
|
+
}
|
|
156
|
+
return {
|
|
157
|
+
...block,
|
|
158
|
+
blockScreenshotPaths: [screenshotPath]
|
|
159
|
+
};
|
|
160
|
+
})
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
|
|
41
166
|
function compactBlockAnalysisBlock(block, displayOptions) {
|
|
42
167
|
const source = isObject(block) ? block : {};
|
|
43
168
|
const out = {
|
|
@@ -63,6 +188,10 @@ function compactBlockAnalysisBlock(block, displayOptions) {
|
|
|
63
188
|
out.mode = source.mode;
|
|
64
189
|
}
|
|
65
190
|
|
|
191
|
+
if (Array.isArray(source.blockScreenshotPaths) && source.blockScreenshotPaths.length > 0) {
|
|
192
|
+
out.blockScreenshotPaths = source.blockScreenshotPaths;
|
|
193
|
+
}
|
|
194
|
+
|
|
66
195
|
return out;
|
|
67
196
|
}
|
|
68
197
|
|
|
@@ -121,14 +250,20 @@ function buildPageAnalysisResult({
|
|
|
121
250
|
csvContent,
|
|
122
251
|
pageData,
|
|
123
252
|
analysis,
|
|
124
|
-
displayOptions
|
|
253
|
+
displayOptions,
|
|
254
|
+
screenshots
|
|
125
255
|
}) {
|
|
256
|
+
const analysisWithScreenshots = attachBlockScreenshotPaths(analysis, screenshots);
|
|
126
257
|
const result = {
|
|
127
258
|
title: pageData.title,
|
|
128
259
|
parseMetrics: pageData.metrics,
|
|
129
|
-
analysis: buildAnalysisResult(
|
|
260
|
+
analysis: buildAnalysisResult(analysisWithScreenshots, displayOptions)
|
|
130
261
|
};
|
|
131
262
|
|
|
263
|
+
if (hasScreenshots(screenshots)) {
|
|
264
|
+
result.screenshots = screenshots;
|
|
265
|
+
}
|
|
266
|
+
|
|
132
267
|
if (displayOptions.showEvents) {
|
|
133
268
|
result.elements = elements;
|
|
134
269
|
result.csvContent = csvContent;
|
|
@@ -154,19 +289,40 @@ function buildPageAnalysisResult({
|
|
|
154
289
|
* @param {boolean} [options.showEvents=false] - Include event arrays and full event-related metadata.
|
|
155
290
|
* Also enables node-level event classification.
|
|
156
291
|
* @param {boolean} [options.showBlockIdx=false] - Include CSV/block index alignment fields.
|
|
292
|
+
* @param {boolean} [options.fullPageScreenshot=false] - Save a full-page screenshot to snapshots/ and return its path.
|
|
293
|
+
* @param {boolean} [options.blockScreenshots=false] - Save one screenshot per merged logical block to snapshots/ and return their paths.
|
|
294
|
+
* @param {boolean} [options.waitForImagesLoaded=false] - Wait for page images before extracting and screenshotting.
|
|
157
295
|
* @returns {Promise<Object>} Analysis result. Event and idx fields are omitted unless requested.
|
|
158
296
|
*/
|
|
159
297
|
export async function analyzeUrl(url, options = {}) {
|
|
160
|
-
const {
|
|
298
|
+
const {
|
|
299
|
+
llm: llmConfig,
|
|
300
|
+
knownEventTypes,
|
|
301
|
+
parserConfig,
|
|
302
|
+
extractorConfig,
|
|
303
|
+
showEvents,
|
|
304
|
+
showBlockIdx,
|
|
305
|
+
fullPageScreenshot,
|
|
306
|
+
blockScreenshots,
|
|
307
|
+
waitForImagesLoaded
|
|
308
|
+
} = options;
|
|
161
309
|
|
|
162
310
|
if (!url) throw new Error('url is required');
|
|
163
311
|
if (!llmConfig?.apiKey || !llmConfig?.apiEndpoint || !llmConfig?.model) {
|
|
164
312
|
throw new Error('options.llm.apiKey, apiEndpoint, and model are required');
|
|
165
313
|
}
|
|
166
314
|
|
|
315
|
+
const shouldCaptureFullPage = fullPageScreenshot ?? extractorConfig?.fullPageScreenshot;
|
|
316
|
+
const shouldCaptureBlocks = blockScreenshots ?? extractorConfig?.blockScreenshots;
|
|
317
|
+
|
|
167
318
|
// Step 0: Playwright extraction
|
|
168
319
|
console.log(`[page-analyzer] Extracting ${url} ...`);
|
|
169
|
-
const extractor = new PageExtractor(
|
|
320
|
+
const extractor = new PageExtractor({
|
|
321
|
+
...extractorConfig,
|
|
322
|
+
fullPageScreenshot: shouldCaptureFullPage,
|
|
323
|
+
blockScreenshots: false,
|
|
324
|
+
waitForImagesLoaded: waitForImagesLoaded ?? extractorConfig?.waitForImagesLoaded
|
|
325
|
+
});
|
|
170
326
|
const bundle = await extractor.extract(url);
|
|
171
327
|
console.log(`[page-analyzer] Extracted: ${bundle.blocks.length} blocks, ${bundle.elementGeometries.length} geometries`);
|
|
172
328
|
|
|
@@ -174,7 +330,7 @@ export async function analyzeUrl(url, options = {}) {
|
|
|
174
330
|
let domain = '';
|
|
175
331
|
try { domain = new URL(url).hostname.replace(/^www\./, ''); } catch { /* ignore */ }
|
|
176
332
|
|
|
177
|
-
|
|
333
|
+
let result = await analyzePageEvents({
|
|
178
334
|
html: bundle.html,
|
|
179
335
|
url,
|
|
180
336
|
blocks: bundle.blocks,
|
|
@@ -184,9 +340,30 @@ export async function analyzeUrl(url, options = {}) {
|
|
|
184
340
|
parserConfig,
|
|
185
341
|
showEvents,
|
|
186
342
|
showBlockIdx,
|
|
343
|
+
screenshots: bundle.screenshots,
|
|
187
344
|
domain,
|
|
188
345
|
nodeId: `${domain}-root`
|
|
189
346
|
});
|
|
347
|
+
|
|
348
|
+
if (shouldCaptureBlocks) {
|
|
349
|
+
const logicalBlocks = Array.isArray(result?.analysis?.block_analysis?.blocks)
|
|
350
|
+
? result.analysis.block_analysis.blocks
|
|
351
|
+
: [];
|
|
352
|
+
const blockScreenshotsBundle = await extractor.captureUrlScreenshots(url, logicalBlocks, {
|
|
353
|
+
fullPageScreenshot: false,
|
|
354
|
+
blockScreenshots: true
|
|
355
|
+
});
|
|
356
|
+
const screenshots = mergeScreenshots(result.screenshots, blockScreenshotsBundle);
|
|
357
|
+
result = attachLogicalBlockScreenshotPaths(
|
|
358
|
+
{
|
|
359
|
+
...result,
|
|
360
|
+
...(screenshots ? { screenshots } : {})
|
|
361
|
+
},
|
|
362
|
+
screenshots
|
|
363
|
+
);
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
return result;
|
|
190
367
|
}
|
|
191
368
|
|
|
192
369
|
/**
|
|
@@ -213,6 +390,7 @@ export async function analyzeUrl(url, options = {}) {
|
|
|
213
390
|
* @param {boolean} [input.showEvents=false] - Include event arrays and full event-related metadata.
|
|
214
391
|
* Also enables node-level event classification.
|
|
215
392
|
* @param {boolean} [input.showBlockIdx=false] - Include CSV/block index alignment fields.
|
|
393
|
+
* @param {Object} [input.screenshots] - Screenshot paths captured during extraction.
|
|
216
394
|
* @param {string} [input.nodeId] - Node ID for logging context
|
|
217
395
|
* @param {string} [input.domain] - Domain for logging context
|
|
218
396
|
* @returns {Promise<Object>} Analysis result. Event and idx fields are omitted unless requested.
|
|
@@ -229,6 +407,7 @@ export async function analyzePageEvents(input) {
|
|
|
229
407
|
parserConfig = {},
|
|
230
408
|
showEvents = false,
|
|
231
409
|
showBlockIdx = false,
|
|
410
|
+
screenshots = null,
|
|
232
411
|
nodeId = '',
|
|
233
412
|
domain = ''
|
|
234
413
|
} = input;
|
|
@@ -289,7 +468,8 @@ export async function analyzePageEvents(input) {
|
|
|
289
468
|
csvContent,
|
|
290
469
|
pageData,
|
|
291
470
|
analysis,
|
|
292
|
-
displayOptions
|
|
471
|
+
displayOptions,
|
|
472
|
+
screenshots
|
|
293
473
|
});
|
|
294
474
|
}
|
|
295
475
|
|
|
@@ -115,13 +115,34 @@ function buildLogicalBlockPosition(sourceBlocks = []) {
|
|
|
115
115
|
}
|
|
116
116
|
|
|
117
117
|
function resolveLogicalBlockCssPath(sourceBlocks = []) {
|
|
118
|
+
const paths = [];
|
|
118
119
|
for (const block of Array.isArray(sourceBlocks) ? sourceBlocks : []) {
|
|
119
120
|
const path = cleanText(block?.blockCssPath || block?.cssPath || '', 500);
|
|
120
121
|
if (path) {
|
|
121
|
-
|
|
122
|
+
paths.push(path);
|
|
122
123
|
}
|
|
123
124
|
}
|
|
124
|
-
|
|
125
|
+
|
|
126
|
+
if (paths.length === 0) {
|
|
127
|
+
return '';
|
|
128
|
+
}
|
|
129
|
+
if (paths.length === 1) {
|
|
130
|
+
return paths[0];
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const partsList = paths.map((path) => path.split('>').map((part) => part.trim()).filter(Boolean));
|
|
134
|
+
const commonParts = [];
|
|
135
|
+
const firstParts = partsList[0];
|
|
136
|
+
for (let index = 0; index < firstParts.length; index += 1) {
|
|
137
|
+
const part = firstParts[index];
|
|
138
|
+
if (partsList.every((parts) => parts[index] === part)) {
|
|
139
|
+
commonParts.push(part);
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
break;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return commonParts.length > 1 ? commonParts.join(' > ') : paths[0];
|
|
125
146
|
}
|
|
126
147
|
|
|
127
148
|
function normalizePossibleEvents(responseHelper, value) {
|
|
@@ -307,7 +307,7 @@ class EventAnalyzer {
|
|
|
307
307
|
}
|
|
308
308
|
|
|
309
309
|
async analyzeEvents(csvData, _mdData, knownEventTypes = [], options = {}) {
|
|
310
|
-
const analyzeNodeEvents =
|
|
310
|
+
const analyzeNodeEvents = options?.analyzeNodeEvents === true;
|
|
311
311
|
const configuredKnownEventTypes = this.response.normalizeStringList(
|
|
312
312
|
this.config?.knownEventTypes,
|
|
313
313
|
{ eventType: true }
|
package/package.json
CHANGED
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "page-analyzer",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.1.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Standalone page analysis module.",
|
|
6
|
+
"license": "MIT",
|
|
6
7
|
"main": "index.js",
|
|
7
8
|
"scripts": {
|
|
8
|
-
"test": "node test.js",
|
|
9
|
-
"analyze": "node
|
|
9
|
+
"test": "node test/smoke.test.js",
|
|
10
|
+
"analyze": "node scripts/analyze.js",
|
|
11
|
+
"viewer": "node scripts/serve-result-viewer.js"
|
|
10
12
|
},
|
|
11
13
|
"dependencies": {
|
|
12
14
|
"cheerio": "^1.2.0",
|