page-analyzer 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -250,7 +250,7 @@ const result = await analyzePageEvents({
250
250
 
251
251
  启用 `blockScreenshots: true` 后,模块会在 LLM 合并区块后再截图。返回结果会包含 `screenshots.blocks`,每项包含逻辑区块序号 `blockIdx` 和对应截图 `path`;区块分析结果中的每个 block 也会额外带上 `blockScreenshotPaths`,每个逻辑区块最多对应一张截图。无法通过 `blockCssPath` 截图的隐藏或空区块会被跳过。
252
252
 
253
- 如果配置 `extractorConfig.s3`,截图不会写入本地 `snapshots/`,而是直接上传到 S3;`screenshots.fullPage`、`screenshots.blocks[].path` 和 `blockScreenshotPaths` 会返回 HTTPS URL。上传不会设置 ACL,访问权限沿用 bucket 策略。单张截图上传失败会重试 3 次,仍失败则跳过该截图。
253
+ 如果配置 `extractorConfig.s3`,截图不会写入本地 `snapshots/`,而是直接上传到 S3;`screenshots.fullPage`、`screenshots.blocks[].path` 和 `blockScreenshotPaths` 会返回 HTTPS URL。S3 对象 key 使用 `<prefix>/<domain>/<file-md5>.png`,上传前会先检查对象是否已存在,已存在时直接返回对应 URL,避免重复上传和冗余对象。上传不会设置 ACL,访问权限沿用 bucket 策略。单张截图检查或上传失败会重试 3 次,仍失败则跳过该截图。
254
254
 
255
255
  启用 `waitForImagesLoaded: true` 后,模块会先滚动页面触发懒加载,再等待当前 DOM 中的 `<img>` 完成加载或失败,之后再提取区块、分析和截图;等待时间受 `extractorConfig.timeoutMs` 控制。
256
256
 
@@ -305,7 +305,7 @@ const result = await analyzeUrl('https://example.com', {
305
305
  bucket: 'my-bucket',
306
306
  region: 'ap-northeast-1',
307
307
  prefix: 'page-analyzer/snapshots',
308
- publicBaseUrl: 'https://cdn.example.com/page-analyzer/snapshots',
308
+ publicBaseUrl: 'https://cdn.example.com',
309
309
  credentials: {
310
310
  accessKeyId: process.env.AWS_ACCESS_KEY_ID,
311
311
  secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
@@ -316,7 +316,7 @@ const result = await analyzeUrl('https://example.com', {
316
316
  });
317
317
  ```
318
318
 
319
- `extractorConfig.s3.bucket` 和 `extractorConfig.s3.region` 必填。`credentials` 可省略,省略时使用 AWS SDK 默认凭证链。`publicBaseUrl` 可省略,省略时返回 `https://<bucket>.s3.<region>.amazonaws.com/<key>`。
319
+ `extractorConfig.s3.bucket` 和 `extractorConfig.s3.region` 必填。`credentials` 可省略,省略时使用 AWS SDK 默认凭证链。`publicBaseUrl` 可省略,省略时返回 `https://<bucket>.s3.<region>.amazonaws.com/<key>`;配置后返回 `${publicBaseUrl}/<key>`。启用 S3 上传时,需要凭证具备 `s3:GetObject` 和 `s3:PutObject` 权限;如果希望不存在的对象能被稳定识别为 404,还需要对应 bucket/prefix 的 `s3:ListBucket` 权限。
320
320
 
321
321
  ### parserConfig
322
322
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "page-analyzer",
3
- "version": "1.2.0",
3
+ "version": "1.2.2",
4
4
  "type": "module",
5
5
  "description": "Standalone page analysis module.",
6
6
  "license": "MIT",
package/page-extractor.js CHANGED
@@ -5,7 +5,8 @@
5
5
 
6
6
  import fs from 'node:fs/promises';
7
7
  import path from 'node:path';
8
- import { PutObjectCommand, S3Client } from '@aws-sdk/client-s3';
8
+ import { createHash } from 'node:crypto';
9
+ import { HeadObjectCommand, PutObjectCommand, S3Client } from '@aws-sdk/client-s3';
9
10
 
10
11
  // In-browser block extraction function (serialized into page.evaluate)
11
12
  // Imported from the project's extract-blocks script
@@ -41,6 +42,24 @@ function createSnapshotRunId() {
41
42
  .replace(/^-+|-+$/g, '');
42
43
  }
43
44
 
45
+ function createS3DomainSegment(url) {
46
+ const source = String(url || '').trim();
47
+ try {
48
+ const parsed = new URL(source);
49
+ const hostname = parsed.hostname
50
+ .toLowerCase()
51
+ .replace(/[^a-z0-9.-]+/g, '-')
52
+ .replace(/^-+|-+$/g, '');
53
+ return hostname || 'page';
54
+ } catch {
55
+ return createSnapshotSlug(source);
56
+ }
57
+ }
58
+
59
+ function createFileMd5(body) {
60
+ return createHash('md5').update(body).digest('hex');
61
+ }
62
+
44
63
  function getBlockNumber(block, fallbackIndex) {
45
64
  return Number.isInteger(block?.blockIdx) ? block.blockIdx : fallbackIndex;
46
65
  }
@@ -96,8 +115,8 @@ function normalizeS3Config(config) {
96
115
  };
97
116
  }
98
117
 
99
- function joinS3Key(prefix, filename) {
100
- return [prefix, filename].filter(Boolean).join('/');
118
+ function joinS3Key(...parts) {
119
+ return parts.filter(Boolean).join('/');
101
120
  }
102
121
 
103
122
  function encodeS3Key(key) {
@@ -107,9 +126,9 @@ function encodeS3Key(key) {
107
126
  .join('/');
108
127
  }
109
128
 
110
- function buildS3Url(s3Config, key, filename) {
129
+ function buildS3Url(s3Config, key) {
111
130
  if (s3Config.publicBaseUrl) {
112
- return `${s3Config.publicBaseUrl}/${encodeURIComponent(filename)}`;
131
+ return `${s3Config.publicBaseUrl}/${encodeS3Key(key)}`;
113
132
  }
114
133
 
115
134
  return `https://${s3Config.bucket}.s3.${s3Config.region}.amazonaws.com/${encodeS3Key(key)}`;
@@ -119,6 +138,12 @@ function getErrorMessage(error) {
119
138
  return error instanceof Error ? error.message : String(error);
120
139
  }
121
140
 
141
+ function isS3NotFoundError(error) {
142
+ const statusCode = error?.$metadata?.httpStatusCode;
143
+ const errorName = String(error?.name || error?.Code || error?.code || '');
144
+ return statusCode === 404 || errorName === 'NotFound' || errorName === 'NoSuchKey';
145
+ }
146
+
122
147
  export class PageExtractor {
123
148
  constructor(config = {}) {
124
149
  this.config = {
@@ -176,13 +201,30 @@ export class PageExtractor {
176
201
  return this.s3Client;
177
202
  }
178
203
 
179
- async uploadScreenshotToS3(filename, body) {
204
+ async s3ObjectExists(client, key) {
205
+ const s3Config = this.config.s3;
206
+ try {
207
+ await client.send(new HeadObjectCommand({
208
+ Bucket: s3Config.bucket,
209
+ Key: key
210
+ }));
211
+ return true;
212
+ } catch (error) {
213
+ if (isS3NotFoundError(error)) {
214
+ return false;
215
+ }
216
+ throw error;
217
+ }
218
+ }
219
+
220
+ async uploadScreenshotToS3(targetUrl, body) {
180
221
  const s3Config = this.config.s3;
181
222
  if (!s3Config) {
182
223
  throw new Error('S3 is not configured');
183
224
  }
184
225
 
185
- const key = joinS3Key(s3Config.prefix, filename);
226
+ const domain = createS3DomainSegment(targetUrl);
227
+ const key = joinS3Key(s3Config.prefix, domain, `${createFileMd5(body)}.png`);
186
228
  const client = this.getS3Client();
187
229
  const commandInput = {
188
230
  Bucket: s3Config.bucket,
@@ -194,14 +236,17 @@ export class PageExtractor {
194
236
  let lastError = null;
195
237
  for (let attempt = 1; attempt <= s3Config.maxUploadAttempts; attempt += 1) {
196
238
  try {
239
+ if (await this.s3ObjectExists(client, key)) {
240
+ return buildS3Url(s3Config, key);
241
+ }
197
242
  const command = new PutObjectCommand(commandInput);
198
243
  await client.send(command);
199
- return buildS3Url(s3Config, key, filename);
244
+ return buildS3Url(s3Config, key);
200
245
  } catch (error) {
201
246
  lastError = error;
202
247
  if (attempt < s3Config.maxUploadAttempts) {
203
248
  console.warn(
204
- `[page-analyzer] Failed to upload ${key} to S3, retrying ` +
249
+ `[page-analyzer] Failed to check/upload ${key} to S3, retrying ` +
205
250
  `(${attempt}/${s3Config.maxUploadAttempts}): ${getErrorMessage(error)}`
206
251
  );
207
252
  }
@@ -572,16 +617,16 @@ export class PageExtractor {
572
617
  await fs.mkdir(this.config.snapshotDir, { recursive: true });
573
618
  }
574
619
 
575
- const prefix = `${createSnapshotSlug(targetUrl)}-${createSnapshotRunId()}`;
620
+ const localPrefix = `${createSnapshotSlug(targetUrl)}-${createSnapshotRunId()}`;
576
621
  const screenshots = {};
577
622
 
578
623
  if (fullPageScreenshot) {
579
- const fullPageFilename = `${prefix}-full-page.png`;
580
624
  try {
581
625
  if (useS3) {
582
626
  const body = await page.screenshot({ fullPage: true });
583
- screenshots.fullPage = await this.uploadScreenshotToS3(fullPageFilename, body);
627
+ screenshots.fullPage = await this.uploadScreenshotToS3(targetUrl, body);
584
628
  } else {
629
+ const fullPageFilename = `${localPrefix}-full-page.png`;
585
630
  const fullPagePath = path.join(this.config.snapshotDir, fullPageFilename);
586
631
  await page.screenshot({
587
632
  path: fullPagePath,
@@ -603,15 +648,13 @@ export class PageExtractor {
603
648
  const block = blocks[index];
604
649
  const blockIdx = getBlockNumber(block, index);
605
650
 
606
- const blockLabel = String(blockIdx).padStart(3, '0').replace(/[^0-9a-z-]+/gi, '-');
607
- const blockFilename = `${prefix}-block-${blockLabel}.png`;
608
651
  try {
609
652
  if (useS3) {
610
653
  const body = await this.captureBlockScreenshotData(page, block);
611
654
  if (!body) {
612
655
  continue;
613
656
  }
614
- const url = await this.uploadScreenshotToS3(blockFilename, body);
657
+ const url = await this.uploadScreenshotToS3(targetUrl, body);
615
658
  const screenshotRecord = {
616
659
  blockIdx,
617
660
  path: url
@@ -626,6 +669,8 @@ export class PageExtractor {
626
669
  continue;
627
670
  }
628
671
 
672
+ const blockLabel = String(blockIdx).padStart(3, '0').replace(/[^0-9a-z-]+/gi, '-');
673
+ const blockFilename = `${localPrefix}-block-${blockLabel}.png`;
629
674
  const blockPath = path.join(this.config.snapshotDir, blockFilename);
630
675
  const captured = await this.captureBlockScreenshot(page, block, blockPath);
631
676
  if (captured) {
@@ -689,6 +689,19 @@
689
689
  return text;
690
690
  }
691
691
 
692
+ function safeResourceUrl(value) {
693
+ const url = pathToUrl(value);
694
+ if (!url) return '';
695
+ if (/^(https?:|file:|blob:)/i.test(url)) return url;
696
+ if (/^data:image\/(png|jpe?g|gif|webp);/i.test(url)) return url;
697
+ if (!/^[a-z][a-z0-9+.-]*:/i.test(url)) return url;
698
+ return '';
699
+ }
700
+
701
+ function imageSrcAttr(value) {
702
+ return escapeHtml(safeResourceUrl(value));
703
+ }
704
+
692
705
  function getShot(block, index) {
693
706
  const direct = asArray(block.blockScreenshotPaths)[0] || block.blockScreenshotPath || block.screenshotPath || '';
694
707
  if (direct) return { path: direct };
@@ -792,8 +805,9 @@
792
805
  }
793
806
  els.allBlocks.innerHTML = rows.map(({ block, index }) => {
794
807
  const shot = getShot(block, index);
795
- const image = shot
796
- ? '<img src="' + pathToUrl(shot.path) + '" alt="Screenshot for block ' + index + '">'
808
+ const shotUrl = shot?.path ? imageSrcAttr(shot.path) : '';
809
+ const image = shotUrl
810
+ ? '<img src="' + shotUrl + '" alt="Screenshot for block ' + index + '">'
797
811
  : '<div class="empty-thumb">No selector screenshot</div>';
798
812
  return '<article class="mini" data-index="' + index + '">' +
799
813
  image +
@@ -815,8 +829,9 @@
815
829
  els.selectedTitle.textContent = 'No blocks found';
816
830
  els.selectedDescription.textContent = 'Loaded JSON does not contain block analysis rows.';
817
831
  els.copySelector.disabled = true;
818
- els.fullPageLink.href = pathToUrl(data.screenshots?.fullPage || '');
819
- els.fullPageLink.style.display = data.screenshots?.fullPage ? 'inline-flex' : 'none';
832
+ const fullPageUrl = safeResourceUrl(data.screenshots?.fullPage || '');
833
+ els.fullPageLink.href = fullPageUrl;
834
+ els.fullPageLink.style.display = fullPageUrl ? 'inline-flex' : 'none';
820
835
  els.screenshot.innerHTML = '<div class="missing-shot">Load a Page Analyzer result with analysis.block_analysis.blocks.</div>';
821
836
  els.info.innerHTML = '';
822
837
  els.raw.textContent = JSON.stringify(data, null, 2);
@@ -828,12 +843,14 @@
828
843
  els.selectedTitle.textContent = '#' + selectedIndex + ' ' + (block.blockName || 'Unnamed block');
829
844
  els.selectedDescription.textContent = block.blockDescription || 'No description available.';
830
845
  els.copySelector.disabled = !block.blockCssPath;
831
- els.fullPageLink.href = pathToUrl(data.screenshots?.fullPage || '');
832
- els.fullPageLink.style.display = data.screenshots?.fullPage ? 'inline-flex' : 'none';
846
+ const fullPageUrl = safeResourceUrl(data.screenshots?.fullPage || '');
847
+ els.fullPageLink.href = fullPageUrl;
848
+ els.fullPageLink.style.display = fullPageUrl ? 'inline-flex' : 'none';
833
849
 
834
- if (shot?.path) {
850
+ const shotUrl = shot?.path ? imageSrcAttr(shot.path) : '';
851
+ if (shotUrl) {
835
852
  els.screenshot.innerHTML =
836
- '<div class="screenshot-frame"><img src="' + pathToUrl(shot.path) + '" alt="Screenshot for selected block"></div>' +
853
+ '<div class="screenshot-frame"><img src="' + shotUrl + '" alt="Screenshot for selected block"></div>' +
837
854
  '<div class="info wide"><label>Screenshot path</label><span>' + escapeHtml(shot.path) + '</span></div>';
838
855
  } else {
839
856
  els.screenshot.innerHTML =
@@ -697,6 +697,19 @@ const html = `<!doctype html>
697
697
  return text;
698
698
  }
699
699
 
700
+ function safeResourceUrl(value) {
701
+ const url = pathToUrl(value);
702
+ if (!url) return '';
703
+ if (/^(https?:|file:|blob:)/i.test(url)) return url;
704
+ if (/^data:image\\/(png|jpe?g|gif|webp);/i.test(url)) return url;
705
+ if (!/^[a-z][a-z0-9+.-]*:/i.test(url)) return url;
706
+ return '';
707
+ }
708
+
709
+ function imageSrcAttr(value) {
710
+ return escapeHtml(safeResourceUrl(value));
711
+ }
712
+
700
713
  function getShot(block, index) {
701
714
  const direct = asArray(block.blockScreenshotPaths)[0] || block.blockScreenshotPath || block.screenshotPath || '';
702
715
  if (direct) return { path: direct };
@@ -800,8 +813,9 @@ const html = `<!doctype html>
800
813
  }
801
814
  els.allBlocks.innerHTML = rows.map(({ block, index }) => {
802
815
  const shot = getShot(block, index);
803
- const image = shot
804
- ? '<img src="' + pathToUrl(shot.path) + '" alt="Screenshot for block ' + index + '">'
816
+ const shotUrl = shot?.path ? imageSrcAttr(shot.path) : '';
817
+ const image = shotUrl
818
+ ? '<img src="' + shotUrl + '" alt="Screenshot for block ' + index + '">'
805
819
  : '<div class="empty-thumb">No selector screenshot</div>';
806
820
  return '<article class="mini" data-index="' + index + '">' +
807
821
  image +
@@ -823,8 +837,9 @@ const html = `<!doctype html>
823
837
  els.selectedTitle.textContent = 'No blocks found';
824
838
  els.selectedDescription.textContent = 'Loaded JSON does not contain block analysis rows.';
825
839
  els.copySelector.disabled = true;
826
- els.fullPageLink.href = pathToUrl(data.screenshots?.fullPage || '');
827
- els.fullPageLink.style.display = data.screenshots?.fullPage ? 'inline-flex' : 'none';
840
+ const fullPageUrl = safeResourceUrl(data.screenshots?.fullPage || '');
841
+ els.fullPageLink.href = fullPageUrl;
842
+ els.fullPageLink.style.display = fullPageUrl ? 'inline-flex' : 'none';
828
843
  els.screenshot.innerHTML = '<div class="missing-shot">Load a Page Analyzer result with analysis.block_analysis.blocks.</div>';
829
844
  els.info.innerHTML = '';
830
845
  els.raw.textContent = JSON.stringify(data, null, 2);
@@ -836,12 +851,14 @@ const html = `<!doctype html>
836
851
  els.selectedTitle.textContent = '#' + selectedIndex + ' ' + (block.blockName || 'Unnamed block');
837
852
  els.selectedDescription.textContent = block.blockDescription || 'No description available.';
838
853
  els.copySelector.disabled = !block.blockCssPath;
839
- els.fullPageLink.href = pathToUrl(data.screenshots?.fullPage || '');
840
- els.fullPageLink.style.display = data.screenshots?.fullPage ? 'inline-flex' : 'none';
854
+ const fullPageUrl = safeResourceUrl(data.screenshots?.fullPage || '');
855
+ els.fullPageLink.href = fullPageUrl;
856
+ els.fullPageLink.style.display = fullPageUrl ? 'inline-flex' : 'none';
841
857
 
842
- if (shot?.path) {
858
+ const shotUrl = shot?.path ? imageSrcAttr(shot.path) : '';
859
+ if (shotUrl) {
843
860
  els.screenshot.innerHTML =
844
- '<div class="screenshot-frame"><img src="' + pathToUrl(shot.path) + '" alt="Screenshot for selected block"></div>' +
861
+ '<div class="screenshot-frame"><img src="' + shotUrl + '" alt="Screenshot for selected block"></div>' +
845
862
  '<div class="info wide"><label>Screenshot path</label><span>' + escapeHtml(shot.path) + '</span></div>';
846
863
  } else {
847
864
  els.screenshot.innerHTML =
@@ -1,10 +1,15 @@
1
1
  import assert from 'node:assert/strict';
2
+ import { createHash } from 'node:crypto';
2
3
  import { EventAnalyzer } from '../llm/analyzers/event-analyzer/event-analyzer.js';
3
4
  import { buildBlockAnalysisArtifact } from '../llm/analyzers/event-analyzer/event-analyzer-blocks.js';
4
5
  import { OpenAiProvider } from '../llm/providers/openai-provider.js';
5
6
  import { PageExtractor } from '../page-extractor.js';
6
7
  import { analyzeUrl } from '../index.js';
7
8
 
9
+ function md5(value) {
10
+ return createHash('md5').update(value).digest('hex');
11
+ }
12
+
8
13
  class FakeProvider {
9
14
  constructor() {
10
15
  this.calls = [];
@@ -26,9 +31,10 @@ class FakeProvider {
26
31
  }
27
32
 
28
33
  class FakeLocator {
29
- constructor({ count = 1, throwOnScreenshot = false } = {}) {
34
+ constructor({ count = 1, throwOnScreenshot = false, screenshotBodies = null } = {}) {
30
35
  this.countValue = count;
31
36
  this.throwOnScreenshot = throwOnScreenshot;
37
+ this.screenshotBodies = Array.isArray(screenshotBodies) ? screenshotBodies : null;
32
38
  this.screenshots = [];
33
39
  }
34
40
 
@@ -41,11 +47,14 @@ class FakeLocator {
41
47
  }
42
48
 
43
49
  async screenshot(options) {
50
+ const screenshotIndex = this.screenshots.length;
44
51
  this.screenshots.push(options);
45
52
  if (this.throwOnScreenshot) {
46
53
  throw new Error('selector screenshot failed');
47
54
  }
48
- return Buffer.from(`locator screenshot:${options?.path || 'buffer'}`);
55
+ return Buffer.from(
56
+ this.screenshotBodies?.[screenshotIndex] || `locator screenshot:${options?.path || 'buffer'}`
57
+ );
49
58
  }
50
59
  }
51
60
 
@@ -74,22 +83,36 @@ class FakePage {
74
83
  }
75
84
 
76
85
  class FakeS3Client {
77
- constructor({ failPredicate = null } = {}) {
86
+ constructor({ failPredicate = null, existingKeys = [] } = {}) {
78
87
  this.failPredicate = failPredicate;
88
+ this.existingKeys = new Set(existingKeys);
89
+ this.headCommands = [];
79
90
  this.commands = [];
80
91
  this.attemptsByKey = new Map();
81
92
  }
82
93
 
83
94
  async send(command) {
84
95
  const input = command.input;
85
- this.commands.push(input);
96
+ if (command.constructor.name === 'HeadObjectCommand') {
97
+ this.headCommands.push(input);
98
+ if (this.existingKeys.has(input.Key)) {
99
+ return {};
100
+ }
101
+ const error = new Error(`s3 object not found for ${input.Key}`);
102
+ error.name = 'NotFound';
103
+ error.$metadata = { httpStatusCode: 404 };
104
+ throw error;
105
+ }
106
+
86
107
  const attempts = (this.attemptsByKey.get(input.Key) || 0) + 1;
87
108
  this.attemptsByKey.set(input.Key, attempts);
109
+ this.commands.push(input);
88
110
 
89
111
  if (this.failPredicate?.(input, attempts)) {
90
112
  throw new Error(`s3 upload failed for ${input.Key}`);
91
113
  }
92
114
 
115
+ this.existingKeys.add(input.Key);
93
116
  return {};
94
117
  }
95
118
  }
@@ -331,7 +354,7 @@ async function analyzeWith(options = {}) {
331
354
  bucket: 'page-analyzer-test',
332
355
  region: 'ap-northeast-1',
333
356
  prefix: '/page-analyzer/snapshots/',
334
- publicBaseUrl: 'https://cdn.example.com/page-analyzer/snapshots/',
357
+ publicBaseUrl: 'https://cdn.example.com/',
335
358
  client: s3Client
336
359
  }
337
360
  });
@@ -348,22 +371,22 @@ async function analyzeWith(options = {}) {
348
371
  assert.deepEqual(page.pageScreenshots[0], { fullPage: true });
349
372
  assert.equal(locator.screenshots.length, 1);
350
373
  assert.deepEqual(locator.screenshots[0], {});
374
+ assert.equal(s3Client.headCommands.length, 2);
351
375
  assert.equal(s3Client.commands.length, 2);
352
376
 
353
377
  const [fullPageUpload, blockUpload] = s3Client.commands;
354
378
  assert.equal(fullPageUpload.Bucket, 'page-analyzer-test');
355
379
  assert.equal(fullPageUpload.ContentType, 'image/png');
356
380
  assert.equal(Buffer.isBuffer(fullPageUpload.Body), true);
357
- assert.match(fullPageUpload.Key, /^page-analyzer\/snapshots\/example-com-demo-.*-full-page\.png$/);
358
- assert.match(blockUpload.Key, /^page-analyzer\/snapshots\/example-com-demo-.*-block-000\.png$/);
381
+ assert.match(fullPageUpload.Key, /^page-analyzer\/snapshots\/example\.com\/[a-f0-9]{32}\.png$/);
382
+ assert.match(blockUpload.Key, /^page-analyzer\/snapshots\/example\.com\/[a-f0-9]{32}\.png$/);
383
+ assert.notEqual(fullPageUpload.Key, blockUpload.Key);
359
384
 
360
- const fullPageFilename = fullPageUpload.Key.split('/').pop();
361
- const blockFilename = blockUpload.Key.split('/').pop();
362
385
  assert.equal(
363
386
  screenshots.fullPage,
364
- `https://cdn.example.com/page-analyzer/snapshots/${fullPageFilename}`
387
+ `https://cdn.example.com/${fullPageUpload.Key}`
365
388
  );
366
- assert.equal(screenshots.blocks[0].path, `https://cdn.example.com/page-analyzer/snapshots/${blockFilename}`);
389
+ assert.equal(screenshots.blocks[0].path, `https://cdn.example.com/${blockUpload.Key}`);
367
390
  }
368
391
 
369
392
  {
@@ -386,32 +409,65 @@ async function analyzeWith(options = {}) {
386
409
  });
387
410
 
388
411
  const uploadedKey = s3Client.commands[0].Key;
389
- assert.match(uploadedKey, /^nested\/prefix\/example-com-demo-.*-block-000\.png$/);
412
+ assert.match(uploadedKey, /^nested\/prefix\/example\.com\/[a-f0-9]{32}\.png$/);
390
413
  assert.equal(
391
414
  screenshots.blocks[0].path,
392
415
  `https://page-analyzer-test.s3.ap-northeast-1.amazonaws.com/${uploadedKey}`
393
416
  );
394
417
  }
395
418
 
419
+ {
420
+ const body = Buffer.from('already uploaded screenshot');
421
+ const existingKey = `page-analyzer/snapshots/example.com/${md5(body)}.png`;
422
+ const s3Client = new FakeS3Client({
423
+ existingKeys: [existingKey]
424
+ });
425
+ const extractor = new PageExtractor({
426
+ s3: {
427
+ bucket: 'page-analyzer-test',
428
+ region: 'ap-northeast-1',
429
+ prefix: 'page-analyzer/snapshots',
430
+ publicBaseUrl: 'https://cdn.example.com',
431
+ client: s3Client
432
+ }
433
+ });
434
+
435
+ const url = await extractor.uploadScreenshotToS3('https://example.com/demo', body);
436
+
437
+ assert.equal(s3Client.headCommands.length, 1);
438
+ assert.equal(s3Client.headCommands[0].Key, existingKey);
439
+ assert.equal(s3Client.commands.length, 0);
440
+ assert.equal(url, `https://cdn.example.com/${existingKey}`);
441
+ }
442
+
396
443
  {
397
444
  const originalWarn = console.warn;
398
445
  const warnings = [];
399
446
  console.warn = (message) => warnings.push(message);
400
447
 
401
448
  try {
449
+ const failingBlockKey =
450
+ `page-analyzer/snapshots/example.com/${md5('locator screenshot:block-0')}.png`;
451
+ const successfulBlockKey =
452
+ `page-analyzer/snapshots/example.com/${md5('locator screenshot:block-1')}.png`;
402
453
  const s3Client = new FakeS3Client({
403
- failPredicate: (input) => input.Key.endsWith('-block-000.png')
454
+ failPredicate: (input) => input.Key === failingBlockKey
404
455
  });
405
456
  const extractor = new PageExtractor({
406
457
  s3: {
407
458
  bucket: 'page-analyzer-test',
408
459
  region: 'ap-northeast-1',
409
460
  prefix: 'page-analyzer/snapshots',
410
- publicBaseUrl: 'https://cdn.example.com/page-analyzer/snapshots',
461
+ publicBaseUrl: 'https://cdn.example.com',
411
462
  client: s3Client
412
463
  }
413
464
  });
414
- const locator = new FakeLocator();
465
+ const locator = new FakeLocator({
466
+ screenshotBodies: [
467
+ 'locator screenshot:block-0',
468
+ 'locator screenshot:block-1'
469
+ ]
470
+ });
415
471
  const page = new FakePage(locator);
416
472
  const screenshots = await extractor.captureScreenshots(page, 'https://example.com/demo', [
417
473
  { blockName: 'Hero', blockCssPath: '#hero' },
@@ -423,8 +479,8 @@ async function analyzeWith(options = {}) {
423
479
 
424
480
  assert.equal(screenshots.blocks.length, 1);
425
481
  assert.equal(screenshots.blocks[0].blockIdx, 1);
426
- assert.equal(s3Client.commands.filter((input) => input.Key.endsWith('-block-000.png')).length, 3);
427
- assert.equal(s3Client.commands.filter((input) => input.Key.endsWith('-block-001.png')).length, 1);
482
+ assert.equal(s3Client.commands.filter((input) => input.Key === failingBlockKey).length, 3);
483
+ assert.equal(s3Client.commands.filter((input) => input.Key === successfulBlockKey).length, 1);
428
484
  assert.equal(warnings.some((message) => message.includes('retrying')), true);
429
485
  assert.equal(warnings.some((message) => message.includes('Failed to capture/upload block 0')), true);
430
486
  } finally {