page-analyzer 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/package.json +1 -1
- package/page-extractor.js +60 -15
- package/result-viewer.html +25 -8
- package/scripts/build-result-viewer.js +25 -8
- package/test/smoke.test.js +73 -17
package/README.md
CHANGED
|
@@ -250,7 +250,7 @@ const result = await analyzePageEvents({
|
|
|
250
250
|
|
|
251
251
|
启用 `blockScreenshots: true` 后,模块会在 LLM 合并区块后再截图。返回结果会包含 `screenshots.blocks`,每项包含逻辑区块序号 `blockIdx` 和对应截图 `path`;区块分析结果中的每个 block 也会额外带上 `blockScreenshotPaths`,每个逻辑区块最多对应一张截图。无法通过 `blockCssPath` 截图的隐藏或空区块会被跳过。
|
|
252
252
|
|
|
253
|
-
如果配置 `extractorConfig.s3`,截图不会写入本地 `snapshots/`,而是直接上传到 S3;`screenshots.fullPage`、`screenshots.blocks[].path` 和 `blockScreenshotPaths` 会返回 HTTPS URL
|
|
253
|
+
如果配置 `extractorConfig.s3`,截图不会写入本地 `snapshots/`,而是直接上传到 S3;`screenshots.fullPage`、`screenshots.blocks[].path` 和 `blockScreenshotPaths` 会返回 HTTPS URL。S3 对象 key 使用 `<prefix>/<domain>/<file-md5>.png`,上传前会先检查对象是否已存在,已存在时直接返回对应 URL,避免重复上传和冗余对象。上传不会设置 ACL,访问权限沿用 bucket 策略。单张截图检查或上传失败会重试 3 次,仍失败则跳过该截图。
|
|
254
254
|
|
|
255
255
|
启用 `waitForImagesLoaded: true` 后,模块会先滚动页面触发懒加载,再等待当前 DOM 中的 `<img>` 完成加载或失败,之后再提取区块、分析和截图;等待时间受 `extractorConfig.timeoutMs` 控制。
|
|
256
256
|
|
|
@@ -305,7 +305,7 @@ const result = await analyzeUrl('https://example.com', {
|
|
|
305
305
|
bucket: 'my-bucket',
|
|
306
306
|
region: 'ap-northeast-1',
|
|
307
307
|
prefix: 'page-analyzer/snapshots',
|
|
308
|
-
publicBaseUrl: 'https://cdn.example.com
|
|
308
|
+
publicBaseUrl: 'https://cdn.example.com',
|
|
309
309
|
credentials: {
|
|
310
310
|
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
|
|
311
311
|
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
|
|
@@ -316,7 +316,7 @@ const result = await analyzeUrl('https://example.com', {
|
|
|
316
316
|
});
|
|
317
317
|
```
|
|
318
318
|
|
|
319
|
-
`extractorConfig.s3.bucket` 和 `extractorConfig.s3.region` 必填。`credentials` 可省略,省略时使用 AWS SDK 默认凭证链。`publicBaseUrl` 可省略,省略时返回 `https://<bucket>.s3.<region>.amazonaws.com/<key
|
|
319
|
+
`extractorConfig.s3.bucket` 和 `extractorConfig.s3.region` 必填。`credentials` 可省略,省略时使用 AWS SDK 默认凭证链。`publicBaseUrl` 可省略,省略时返回 `https://<bucket>.s3.<region>.amazonaws.com/<key>`;配置后返回 `${publicBaseUrl}/<key>`。启用 S3 上传时,需要凭证具备 `s3:GetObject` 和 `s3:PutObject` 权限;如果希望不存在的对象能被稳定识别为 404,还需要对应 bucket/prefix 的 `s3:ListBucket` 权限。
|
|
320
320
|
|
|
321
321
|
### parserConfig
|
|
322
322
|
|
package/package.json
CHANGED
package/page-extractor.js
CHANGED
|
@@ -5,7 +5,8 @@
|
|
|
5
5
|
|
|
6
6
|
import fs from 'node:fs/promises';
|
|
7
7
|
import path from 'node:path';
|
|
8
|
-
import {
|
|
8
|
+
import { createHash } from 'node:crypto';
|
|
9
|
+
import { HeadObjectCommand, PutObjectCommand, S3Client } from '@aws-sdk/client-s3';
|
|
9
10
|
|
|
10
11
|
// In-browser block extraction function (serialized into page.evaluate)
|
|
11
12
|
// Imported from the project's extract-blocks script
|
|
@@ -41,6 +42,24 @@ function createSnapshotRunId() {
|
|
|
41
42
|
.replace(/^-+|-+$/g, '');
|
|
42
43
|
}
|
|
43
44
|
|
|
45
|
+
function createS3DomainSegment(url) {
|
|
46
|
+
const source = String(url || '').trim();
|
|
47
|
+
try {
|
|
48
|
+
const parsed = new URL(source);
|
|
49
|
+
const hostname = parsed.hostname
|
|
50
|
+
.toLowerCase()
|
|
51
|
+
.replace(/[^a-z0-9.-]+/g, '-')
|
|
52
|
+
.replace(/^-+|-+$/g, '');
|
|
53
|
+
return hostname || 'page';
|
|
54
|
+
} catch {
|
|
55
|
+
return createSnapshotSlug(source);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function createFileMd5(body) {
|
|
60
|
+
return createHash('md5').update(body).digest('hex');
|
|
61
|
+
}
|
|
62
|
+
|
|
44
63
|
function getBlockNumber(block, fallbackIndex) {
|
|
45
64
|
return Number.isInteger(block?.blockIdx) ? block.blockIdx : fallbackIndex;
|
|
46
65
|
}
|
|
@@ -96,8 +115,8 @@ function normalizeS3Config(config) {
|
|
|
96
115
|
};
|
|
97
116
|
}
|
|
98
117
|
|
|
99
|
-
function joinS3Key(
|
|
100
|
-
return
|
|
118
|
+
function joinS3Key(...parts) {
|
|
119
|
+
return parts.filter(Boolean).join('/');
|
|
101
120
|
}
|
|
102
121
|
|
|
103
122
|
function encodeS3Key(key) {
|
|
@@ -107,9 +126,9 @@ function encodeS3Key(key) {
|
|
|
107
126
|
.join('/');
|
|
108
127
|
}
|
|
109
128
|
|
|
110
|
-
function buildS3Url(s3Config, key
|
|
129
|
+
function buildS3Url(s3Config, key) {
|
|
111
130
|
if (s3Config.publicBaseUrl) {
|
|
112
|
-
return `${s3Config.publicBaseUrl}/${
|
|
131
|
+
return `${s3Config.publicBaseUrl}/${encodeS3Key(key)}`;
|
|
113
132
|
}
|
|
114
133
|
|
|
115
134
|
return `https://${s3Config.bucket}.s3.${s3Config.region}.amazonaws.com/${encodeS3Key(key)}`;
|
|
@@ -119,6 +138,12 @@ function getErrorMessage(error) {
|
|
|
119
138
|
return error instanceof Error ? error.message : String(error);
|
|
120
139
|
}
|
|
121
140
|
|
|
141
|
+
function isS3NotFoundError(error) {
|
|
142
|
+
const statusCode = error?.$metadata?.httpStatusCode;
|
|
143
|
+
const errorName = String(error?.name || error?.Code || error?.code || '');
|
|
144
|
+
return statusCode === 404 || errorName === 'NotFound' || errorName === 'NoSuchKey';
|
|
145
|
+
}
|
|
146
|
+
|
|
122
147
|
export class PageExtractor {
|
|
123
148
|
constructor(config = {}) {
|
|
124
149
|
this.config = {
|
|
@@ -176,13 +201,30 @@ export class PageExtractor {
|
|
|
176
201
|
return this.s3Client;
|
|
177
202
|
}
|
|
178
203
|
|
|
179
|
-
async
|
|
204
|
+
async s3ObjectExists(client, key) {
|
|
205
|
+
const s3Config = this.config.s3;
|
|
206
|
+
try {
|
|
207
|
+
await client.send(new HeadObjectCommand({
|
|
208
|
+
Bucket: s3Config.bucket,
|
|
209
|
+
Key: key
|
|
210
|
+
}));
|
|
211
|
+
return true;
|
|
212
|
+
} catch (error) {
|
|
213
|
+
if (isS3NotFoundError(error)) {
|
|
214
|
+
return false;
|
|
215
|
+
}
|
|
216
|
+
throw error;
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
async uploadScreenshotToS3(targetUrl, body) {
|
|
180
221
|
const s3Config = this.config.s3;
|
|
181
222
|
if (!s3Config) {
|
|
182
223
|
throw new Error('S3 is not configured');
|
|
183
224
|
}
|
|
184
225
|
|
|
185
|
-
const
|
|
226
|
+
const domain = createS3DomainSegment(targetUrl);
|
|
227
|
+
const key = joinS3Key(s3Config.prefix, domain, `${createFileMd5(body)}.png`);
|
|
186
228
|
const client = this.getS3Client();
|
|
187
229
|
const commandInput = {
|
|
188
230
|
Bucket: s3Config.bucket,
|
|
@@ -194,14 +236,17 @@ export class PageExtractor {
|
|
|
194
236
|
let lastError = null;
|
|
195
237
|
for (let attempt = 1; attempt <= s3Config.maxUploadAttempts; attempt += 1) {
|
|
196
238
|
try {
|
|
239
|
+
if (await this.s3ObjectExists(client, key)) {
|
|
240
|
+
return buildS3Url(s3Config, key);
|
|
241
|
+
}
|
|
197
242
|
const command = new PutObjectCommand(commandInput);
|
|
198
243
|
await client.send(command);
|
|
199
|
-
return buildS3Url(s3Config, key
|
|
244
|
+
return buildS3Url(s3Config, key);
|
|
200
245
|
} catch (error) {
|
|
201
246
|
lastError = error;
|
|
202
247
|
if (attempt < s3Config.maxUploadAttempts) {
|
|
203
248
|
console.warn(
|
|
204
|
-
`[page-analyzer] Failed to upload ${key} to S3, retrying ` +
|
|
249
|
+
`[page-analyzer] Failed to check/upload ${key} to S3, retrying ` +
|
|
205
250
|
`(${attempt}/${s3Config.maxUploadAttempts}): ${getErrorMessage(error)}`
|
|
206
251
|
);
|
|
207
252
|
}
|
|
@@ -572,16 +617,16 @@ export class PageExtractor {
|
|
|
572
617
|
await fs.mkdir(this.config.snapshotDir, { recursive: true });
|
|
573
618
|
}
|
|
574
619
|
|
|
575
|
-
const
|
|
620
|
+
const localPrefix = `${createSnapshotSlug(targetUrl)}-${createSnapshotRunId()}`;
|
|
576
621
|
const screenshots = {};
|
|
577
622
|
|
|
578
623
|
if (fullPageScreenshot) {
|
|
579
|
-
const fullPageFilename = `${prefix}-full-page.png`;
|
|
580
624
|
try {
|
|
581
625
|
if (useS3) {
|
|
582
626
|
const body = await page.screenshot({ fullPage: true });
|
|
583
|
-
screenshots.fullPage = await this.uploadScreenshotToS3(
|
|
627
|
+
screenshots.fullPage = await this.uploadScreenshotToS3(targetUrl, body);
|
|
584
628
|
} else {
|
|
629
|
+
const fullPageFilename = `${localPrefix}-full-page.png`;
|
|
585
630
|
const fullPagePath = path.join(this.config.snapshotDir, fullPageFilename);
|
|
586
631
|
await page.screenshot({
|
|
587
632
|
path: fullPagePath,
|
|
@@ -603,15 +648,13 @@ export class PageExtractor {
|
|
|
603
648
|
const block = blocks[index];
|
|
604
649
|
const blockIdx = getBlockNumber(block, index);
|
|
605
650
|
|
|
606
|
-
const blockLabel = String(blockIdx).padStart(3, '0').replace(/[^0-9a-z-]+/gi, '-');
|
|
607
|
-
const blockFilename = `${prefix}-block-${blockLabel}.png`;
|
|
608
651
|
try {
|
|
609
652
|
if (useS3) {
|
|
610
653
|
const body = await this.captureBlockScreenshotData(page, block);
|
|
611
654
|
if (!body) {
|
|
612
655
|
continue;
|
|
613
656
|
}
|
|
614
|
-
const url = await this.uploadScreenshotToS3(
|
|
657
|
+
const url = await this.uploadScreenshotToS3(targetUrl, body);
|
|
615
658
|
const screenshotRecord = {
|
|
616
659
|
blockIdx,
|
|
617
660
|
path: url
|
|
@@ -626,6 +669,8 @@ export class PageExtractor {
|
|
|
626
669
|
continue;
|
|
627
670
|
}
|
|
628
671
|
|
|
672
|
+
const blockLabel = String(blockIdx).padStart(3, '0').replace(/[^0-9a-z-]+/gi, '-');
|
|
673
|
+
const blockFilename = `${localPrefix}-block-${blockLabel}.png`;
|
|
629
674
|
const blockPath = path.join(this.config.snapshotDir, blockFilename);
|
|
630
675
|
const captured = await this.captureBlockScreenshot(page, block, blockPath);
|
|
631
676
|
if (captured) {
|
package/result-viewer.html
CHANGED
|
@@ -689,6 +689,19 @@
|
|
|
689
689
|
return text;
|
|
690
690
|
}
|
|
691
691
|
|
|
692
|
+
function safeResourceUrl(value) {
|
|
693
|
+
const url = pathToUrl(value);
|
|
694
|
+
if (!url) return '';
|
|
695
|
+
if (/^(https?:|file:|blob:)/i.test(url)) return url;
|
|
696
|
+
if (/^data:image\/(png|jpe?g|gif|webp);/i.test(url)) return url;
|
|
697
|
+
if (!/^[a-z][a-z0-9+.-]*:/i.test(url)) return url;
|
|
698
|
+
return '';
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
function imageSrcAttr(value) {
|
|
702
|
+
return escapeHtml(safeResourceUrl(value));
|
|
703
|
+
}
|
|
704
|
+
|
|
692
705
|
function getShot(block, index) {
|
|
693
706
|
const direct = asArray(block.blockScreenshotPaths)[0] || block.blockScreenshotPath || block.screenshotPath || '';
|
|
694
707
|
if (direct) return { path: direct };
|
|
@@ -792,8 +805,9 @@
|
|
|
792
805
|
}
|
|
793
806
|
els.allBlocks.innerHTML = rows.map(({ block, index }) => {
|
|
794
807
|
const shot = getShot(block, index);
|
|
795
|
-
const
|
|
796
|
-
|
|
808
|
+
const shotUrl = shot?.path ? imageSrcAttr(shot.path) : '';
|
|
809
|
+
const image = shotUrl
|
|
810
|
+
? '<img src="' + shotUrl + '" alt="Screenshot for block ' + index + '">'
|
|
797
811
|
: '<div class="empty-thumb">No selector screenshot</div>';
|
|
798
812
|
return '<article class="mini" data-index="' + index + '">' +
|
|
799
813
|
image +
|
|
@@ -815,8 +829,9 @@
|
|
|
815
829
|
els.selectedTitle.textContent = 'No blocks found';
|
|
816
830
|
els.selectedDescription.textContent = 'Loaded JSON does not contain block analysis rows.';
|
|
817
831
|
els.copySelector.disabled = true;
|
|
818
|
-
|
|
819
|
-
els.fullPageLink.
|
|
832
|
+
const fullPageUrl = safeResourceUrl(data.screenshots?.fullPage || '');
|
|
833
|
+
els.fullPageLink.href = fullPageUrl;
|
|
834
|
+
els.fullPageLink.style.display = fullPageUrl ? 'inline-flex' : 'none';
|
|
820
835
|
els.screenshot.innerHTML = '<div class="missing-shot">Load a Page Analyzer result with analysis.block_analysis.blocks.</div>';
|
|
821
836
|
els.info.innerHTML = '';
|
|
822
837
|
els.raw.textContent = JSON.stringify(data, null, 2);
|
|
@@ -828,12 +843,14 @@
|
|
|
828
843
|
els.selectedTitle.textContent = '#' + selectedIndex + ' ' + (block.blockName || 'Unnamed block');
|
|
829
844
|
els.selectedDescription.textContent = block.blockDescription || 'No description available.';
|
|
830
845
|
els.copySelector.disabled = !block.blockCssPath;
|
|
831
|
-
|
|
832
|
-
els.fullPageLink.
|
|
846
|
+
const fullPageUrl = safeResourceUrl(data.screenshots?.fullPage || '');
|
|
847
|
+
els.fullPageLink.href = fullPageUrl;
|
|
848
|
+
els.fullPageLink.style.display = fullPageUrl ? 'inline-flex' : 'none';
|
|
833
849
|
|
|
834
|
-
|
|
850
|
+
const shotUrl = shot?.path ? imageSrcAttr(shot.path) : '';
|
|
851
|
+
if (shotUrl) {
|
|
835
852
|
els.screenshot.innerHTML =
|
|
836
|
-
'<div class="screenshot-frame"><img src="' +
|
|
853
|
+
'<div class="screenshot-frame"><img src="' + shotUrl + '" alt="Screenshot for selected block"></div>' +
|
|
837
854
|
'<div class="info wide"><label>Screenshot path</label><span>' + escapeHtml(shot.path) + '</span></div>';
|
|
838
855
|
} else {
|
|
839
856
|
els.screenshot.innerHTML =
|
|
@@ -697,6 +697,19 @@ const html = `<!doctype html>
|
|
|
697
697
|
return text;
|
|
698
698
|
}
|
|
699
699
|
|
|
700
|
+
function safeResourceUrl(value) {
|
|
701
|
+
const url = pathToUrl(value);
|
|
702
|
+
if (!url) return '';
|
|
703
|
+
if (/^(https?:|file:|blob:)/i.test(url)) return url;
|
|
704
|
+
if (/^data:image\\/(png|jpe?g|gif|webp);/i.test(url)) return url;
|
|
705
|
+
if (!/^[a-z][a-z0-9+.-]*:/i.test(url)) return url;
|
|
706
|
+
return '';
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
function imageSrcAttr(value) {
|
|
710
|
+
return escapeHtml(safeResourceUrl(value));
|
|
711
|
+
}
|
|
712
|
+
|
|
700
713
|
function getShot(block, index) {
|
|
701
714
|
const direct = asArray(block.blockScreenshotPaths)[0] || block.blockScreenshotPath || block.screenshotPath || '';
|
|
702
715
|
if (direct) return { path: direct };
|
|
@@ -800,8 +813,9 @@ const html = `<!doctype html>
|
|
|
800
813
|
}
|
|
801
814
|
els.allBlocks.innerHTML = rows.map(({ block, index }) => {
|
|
802
815
|
const shot = getShot(block, index);
|
|
803
|
-
const
|
|
804
|
-
|
|
816
|
+
const shotUrl = shot?.path ? imageSrcAttr(shot.path) : '';
|
|
817
|
+
const image = shotUrl
|
|
818
|
+
? '<img src="' + shotUrl + '" alt="Screenshot for block ' + index + '">'
|
|
805
819
|
: '<div class="empty-thumb">No selector screenshot</div>';
|
|
806
820
|
return '<article class="mini" data-index="' + index + '">' +
|
|
807
821
|
image +
|
|
@@ -823,8 +837,9 @@ const html = `<!doctype html>
|
|
|
823
837
|
els.selectedTitle.textContent = 'No blocks found';
|
|
824
838
|
els.selectedDescription.textContent = 'Loaded JSON does not contain block analysis rows.';
|
|
825
839
|
els.copySelector.disabled = true;
|
|
826
|
-
|
|
827
|
-
els.fullPageLink.
|
|
840
|
+
const fullPageUrl = safeResourceUrl(data.screenshots?.fullPage || '');
|
|
841
|
+
els.fullPageLink.href = fullPageUrl;
|
|
842
|
+
els.fullPageLink.style.display = fullPageUrl ? 'inline-flex' : 'none';
|
|
828
843
|
els.screenshot.innerHTML = '<div class="missing-shot">Load a Page Analyzer result with analysis.block_analysis.blocks.</div>';
|
|
829
844
|
els.info.innerHTML = '';
|
|
830
845
|
els.raw.textContent = JSON.stringify(data, null, 2);
|
|
@@ -836,12 +851,14 @@ const html = `<!doctype html>
|
|
|
836
851
|
els.selectedTitle.textContent = '#' + selectedIndex + ' ' + (block.blockName || 'Unnamed block');
|
|
837
852
|
els.selectedDescription.textContent = block.blockDescription || 'No description available.';
|
|
838
853
|
els.copySelector.disabled = !block.blockCssPath;
|
|
839
|
-
|
|
840
|
-
els.fullPageLink.
|
|
854
|
+
const fullPageUrl = safeResourceUrl(data.screenshots?.fullPage || '');
|
|
855
|
+
els.fullPageLink.href = fullPageUrl;
|
|
856
|
+
els.fullPageLink.style.display = fullPageUrl ? 'inline-flex' : 'none';
|
|
841
857
|
|
|
842
|
-
|
|
858
|
+
const shotUrl = shot?.path ? imageSrcAttr(shot.path) : '';
|
|
859
|
+
if (shotUrl) {
|
|
843
860
|
els.screenshot.innerHTML =
|
|
844
|
-
'<div class="screenshot-frame"><img src="' +
|
|
861
|
+
'<div class="screenshot-frame"><img src="' + shotUrl + '" alt="Screenshot for selected block"></div>' +
|
|
845
862
|
'<div class="info wide"><label>Screenshot path</label><span>' + escapeHtml(shot.path) + '</span></div>';
|
|
846
863
|
} else {
|
|
847
864
|
els.screenshot.innerHTML =
|
package/test/smoke.test.js
CHANGED
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
import assert from 'node:assert/strict';
|
|
2
|
+
import { createHash } from 'node:crypto';
|
|
2
3
|
import { EventAnalyzer } from '../llm/analyzers/event-analyzer/event-analyzer.js';
|
|
3
4
|
import { buildBlockAnalysisArtifact } from '../llm/analyzers/event-analyzer/event-analyzer-blocks.js';
|
|
4
5
|
import { OpenAiProvider } from '../llm/providers/openai-provider.js';
|
|
5
6
|
import { PageExtractor } from '../page-extractor.js';
|
|
6
7
|
import { analyzeUrl } from '../index.js';
|
|
7
8
|
|
|
9
|
+
function md5(value) {
|
|
10
|
+
return createHash('md5').update(value).digest('hex');
|
|
11
|
+
}
|
|
12
|
+
|
|
8
13
|
class FakeProvider {
|
|
9
14
|
constructor() {
|
|
10
15
|
this.calls = [];
|
|
@@ -26,9 +31,10 @@ class FakeProvider {
|
|
|
26
31
|
}
|
|
27
32
|
|
|
28
33
|
class FakeLocator {
|
|
29
|
-
constructor({ count = 1, throwOnScreenshot = false } = {}) {
|
|
34
|
+
constructor({ count = 1, throwOnScreenshot = false, screenshotBodies = null } = {}) {
|
|
30
35
|
this.countValue = count;
|
|
31
36
|
this.throwOnScreenshot = throwOnScreenshot;
|
|
37
|
+
this.screenshotBodies = Array.isArray(screenshotBodies) ? screenshotBodies : null;
|
|
32
38
|
this.screenshots = [];
|
|
33
39
|
}
|
|
34
40
|
|
|
@@ -41,11 +47,14 @@ class FakeLocator {
|
|
|
41
47
|
}
|
|
42
48
|
|
|
43
49
|
async screenshot(options) {
|
|
50
|
+
const screenshotIndex = this.screenshots.length;
|
|
44
51
|
this.screenshots.push(options);
|
|
45
52
|
if (this.throwOnScreenshot) {
|
|
46
53
|
throw new Error('selector screenshot failed');
|
|
47
54
|
}
|
|
48
|
-
return Buffer.from(
|
|
55
|
+
return Buffer.from(
|
|
56
|
+
this.screenshotBodies?.[screenshotIndex] || `locator screenshot:${options?.path || 'buffer'}`
|
|
57
|
+
);
|
|
49
58
|
}
|
|
50
59
|
}
|
|
51
60
|
|
|
@@ -74,22 +83,36 @@ class FakePage {
|
|
|
74
83
|
}
|
|
75
84
|
|
|
76
85
|
class FakeS3Client {
|
|
77
|
-
constructor({ failPredicate = null } = {}) {
|
|
86
|
+
constructor({ failPredicate = null, existingKeys = [] } = {}) {
|
|
78
87
|
this.failPredicate = failPredicate;
|
|
88
|
+
this.existingKeys = new Set(existingKeys);
|
|
89
|
+
this.headCommands = [];
|
|
79
90
|
this.commands = [];
|
|
80
91
|
this.attemptsByKey = new Map();
|
|
81
92
|
}
|
|
82
93
|
|
|
83
94
|
async send(command) {
|
|
84
95
|
const input = command.input;
|
|
85
|
-
|
|
96
|
+
if (command.constructor.name === 'HeadObjectCommand') {
|
|
97
|
+
this.headCommands.push(input);
|
|
98
|
+
if (this.existingKeys.has(input.Key)) {
|
|
99
|
+
return {};
|
|
100
|
+
}
|
|
101
|
+
const error = new Error(`s3 object not found for ${input.Key}`);
|
|
102
|
+
error.name = 'NotFound';
|
|
103
|
+
error.$metadata = { httpStatusCode: 404 };
|
|
104
|
+
throw error;
|
|
105
|
+
}
|
|
106
|
+
|
|
86
107
|
const attempts = (this.attemptsByKey.get(input.Key) || 0) + 1;
|
|
87
108
|
this.attemptsByKey.set(input.Key, attempts);
|
|
109
|
+
this.commands.push(input);
|
|
88
110
|
|
|
89
111
|
if (this.failPredicate?.(input, attempts)) {
|
|
90
112
|
throw new Error(`s3 upload failed for ${input.Key}`);
|
|
91
113
|
}
|
|
92
114
|
|
|
115
|
+
this.existingKeys.add(input.Key);
|
|
93
116
|
return {};
|
|
94
117
|
}
|
|
95
118
|
}
|
|
@@ -331,7 +354,7 @@ async function analyzeWith(options = {}) {
|
|
|
331
354
|
bucket: 'page-analyzer-test',
|
|
332
355
|
region: 'ap-northeast-1',
|
|
333
356
|
prefix: '/page-analyzer/snapshots/',
|
|
334
|
-
publicBaseUrl: 'https://cdn.example.com/
|
|
357
|
+
publicBaseUrl: 'https://cdn.example.com/',
|
|
335
358
|
client: s3Client
|
|
336
359
|
}
|
|
337
360
|
});
|
|
@@ -348,22 +371,22 @@ async function analyzeWith(options = {}) {
|
|
|
348
371
|
assert.deepEqual(page.pageScreenshots[0], { fullPage: true });
|
|
349
372
|
assert.equal(locator.screenshots.length, 1);
|
|
350
373
|
assert.deepEqual(locator.screenshots[0], {});
|
|
374
|
+
assert.equal(s3Client.headCommands.length, 2);
|
|
351
375
|
assert.equal(s3Client.commands.length, 2);
|
|
352
376
|
|
|
353
377
|
const [fullPageUpload, blockUpload] = s3Client.commands;
|
|
354
378
|
assert.equal(fullPageUpload.Bucket, 'page-analyzer-test');
|
|
355
379
|
assert.equal(fullPageUpload.ContentType, 'image/png');
|
|
356
380
|
assert.equal(Buffer.isBuffer(fullPageUpload.Body), true);
|
|
357
|
-
assert.match(fullPageUpload.Key, /^page-analyzer\/snapshots\/example
|
|
358
|
-
assert.match(blockUpload.Key, /^page-analyzer\/snapshots\/example
|
|
381
|
+
assert.match(fullPageUpload.Key, /^page-analyzer\/snapshots\/example\.com\/[a-f0-9]{32}\.png$/);
|
|
382
|
+
assert.match(blockUpload.Key, /^page-analyzer\/snapshots\/example\.com\/[a-f0-9]{32}\.png$/);
|
|
383
|
+
assert.notEqual(fullPageUpload.Key, blockUpload.Key);
|
|
359
384
|
|
|
360
|
-
const fullPageFilename = fullPageUpload.Key.split('/').pop();
|
|
361
|
-
const blockFilename = blockUpload.Key.split('/').pop();
|
|
362
385
|
assert.equal(
|
|
363
386
|
screenshots.fullPage,
|
|
364
|
-
`https://cdn.example.com
|
|
387
|
+
`https://cdn.example.com/${fullPageUpload.Key}`
|
|
365
388
|
);
|
|
366
|
-
assert.equal(screenshots.blocks[0].path, `https://cdn.example.com
|
|
389
|
+
assert.equal(screenshots.blocks[0].path, `https://cdn.example.com/${blockUpload.Key}`);
|
|
367
390
|
}
|
|
368
391
|
|
|
369
392
|
{
|
|
@@ -386,32 +409,65 @@ async function analyzeWith(options = {}) {
|
|
|
386
409
|
});
|
|
387
410
|
|
|
388
411
|
const uploadedKey = s3Client.commands[0].Key;
|
|
389
|
-
assert.match(uploadedKey, /^nested\/prefix\/example
|
|
412
|
+
assert.match(uploadedKey, /^nested\/prefix\/example\.com\/[a-f0-9]{32}\.png$/);
|
|
390
413
|
assert.equal(
|
|
391
414
|
screenshots.blocks[0].path,
|
|
392
415
|
`https://page-analyzer-test.s3.ap-northeast-1.amazonaws.com/${uploadedKey}`
|
|
393
416
|
);
|
|
394
417
|
}
|
|
395
418
|
|
|
419
|
+
{
|
|
420
|
+
const body = Buffer.from('already uploaded screenshot');
|
|
421
|
+
const existingKey = `page-analyzer/snapshots/example.com/${md5(body)}.png`;
|
|
422
|
+
const s3Client = new FakeS3Client({
|
|
423
|
+
existingKeys: [existingKey]
|
|
424
|
+
});
|
|
425
|
+
const extractor = new PageExtractor({
|
|
426
|
+
s3: {
|
|
427
|
+
bucket: 'page-analyzer-test',
|
|
428
|
+
region: 'ap-northeast-1',
|
|
429
|
+
prefix: 'page-analyzer/snapshots',
|
|
430
|
+
publicBaseUrl: 'https://cdn.example.com',
|
|
431
|
+
client: s3Client
|
|
432
|
+
}
|
|
433
|
+
});
|
|
434
|
+
|
|
435
|
+
const url = await extractor.uploadScreenshotToS3('https://example.com/demo', body);
|
|
436
|
+
|
|
437
|
+
assert.equal(s3Client.headCommands.length, 1);
|
|
438
|
+
assert.equal(s3Client.headCommands[0].Key, existingKey);
|
|
439
|
+
assert.equal(s3Client.commands.length, 0);
|
|
440
|
+
assert.equal(url, `https://cdn.example.com/${existingKey}`);
|
|
441
|
+
}
|
|
442
|
+
|
|
396
443
|
{
|
|
397
444
|
const originalWarn = console.warn;
|
|
398
445
|
const warnings = [];
|
|
399
446
|
console.warn = (message) => warnings.push(message);
|
|
400
447
|
|
|
401
448
|
try {
|
|
449
|
+
const failingBlockKey =
|
|
450
|
+
`page-analyzer/snapshots/example.com/${md5('locator screenshot:block-0')}.png`;
|
|
451
|
+
const successfulBlockKey =
|
|
452
|
+
`page-analyzer/snapshots/example.com/${md5('locator screenshot:block-1')}.png`;
|
|
402
453
|
const s3Client = new FakeS3Client({
|
|
403
|
-
failPredicate: (input) => input.Key
|
|
454
|
+
failPredicate: (input) => input.Key === failingBlockKey
|
|
404
455
|
});
|
|
405
456
|
const extractor = new PageExtractor({
|
|
406
457
|
s3: {
|
|
407
458
|
bucket: 'page-analyzer-test',
|
|
408
459
|
region: 'ap-northeast-1',
|
|
409
460
|
prefix: 'page-analyzer/snapshots',
|
|
410
|
-
publicBaseUrl: 'https://cdn.example.com
|
|
461
|
+
publicBaseUrl: 'https://cdn.example.com',
|
|
411
462
|
client: s3Client
|
|
412
463
|
}
|
|
413
464
|
});
|
|
414
|
-
const locator = new FakeLocator(
|
|
465
|
+
const locator = new FakeLocator({
|
|
466
|
+
screenshotBodies: [
|
|
467
|
+
'locator screenshot:block-0',
|
|
468
|
+
'locator screenshot:block-1'
|
|
469
|
+
]
|
|
470
|
+
});
|
|
415
471
|
const page = new FakePage(locator);
|
|
416
472
|
const screenshots = await extractor.captureScreenshots(page, 'https://example.com/demo', [
|
|
417
473
|
{ blockName: 'Hero', blockCssPath: '#hero' },
|
|
@@ -423,8 +479,8 @@ async function analyzeWith(options = {}) {
|
|
|
423
479
|
|
|
424
480
|
assert.equal(screenshots.blocks.length, 1);
|
|
425
481
|
assert.equal(screenshots.blocks[0].blockIdx, 1);
|
|
426
|
-
assert.equal(s3Client.commands.filter((input) => input.Key
|
|
427
|
-
assert.equal(s3Client.commands.filter((input) => input.Key
|
|
482
|
+
assert.equal(s3Client.commands.filter((input) => input.Key === failingBlockKey).length, 3);
|
|
483
|
+
assert.equal(s3Client.commands.filter((input) => input.Key === successfulBlockKey).length, 1);
|
|
428
484
|
assert.equal(warnings.some((message) => message.includes('retrying')), true);
|
|
429
485
|
assert.equal(warnings.some((message) => message.includes('Failed to capture/upload block 0')), true);
|
|
430
486
|
} finally {
|