page-analyzer 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,7 @@ const html = `<!doctype html>
11
11
  <head>
12
12
  <meta charset="utf-8">
13
13
  <meta name="viewport" content="width=device-width, initial-scale=1">
14
+ <link rel="icon" href="data:,">
14
15
  <title>Page Analyzer Result Viewer</title>
15
16
  <style>
16
17
  :root {
@@ -116,12 +117,28 @@ const html = `<!doctype html>
116
117
  color: var(--missing);
117
118
  }
118
119
 
120
+ .url-loader,
119
121
  .file-loader {
120
122
  display: grid;
121
123
  gap: 8px;
122
124
  margin-top: 8px;
123
125
  }
124
126
 
127
+ .url-loader label,
128
+ .file-loader span {
129
+ color: var(--muted);
130
+ font-size: 11px;
131
+ font-weight: 800;
132
+ text-transform: uppercase;
133
+ }
134
+
135
+ .url-row {
136
+ display: grid;
137
+ grid-template-columns: minmax(0, 1fr) auto;
138
+ gap: 8px;
139
+ }
140
+
141
+ .url-loader input,
125
142
  .file-loader input {
126
143
  width: 100%;
127
144
  border: 1px solid var(--line);
@@ -131,6 +148,16 @@ const html = `<!doctype html>
131
148
  font-size: 12px;
132
149
  }
133
150
 
151
+ .url-loader button {
152
+ min-width: 68px;
153
+ border: 1px solid var(--ink);
154
+ background: var(--ink);
155
+ color: white;
156
+ padding: 0 12px;
157
+ cursor: pointer;
158
+ font-size: 12px;
159
+ }
160
+
134
161
  .metrics {
135
162
  display: grid;
136
163
  grid-template-columns: repeat(2, minmax(0, 1fr));
@@ -530,7 +557,14 @@ const html = `<!doctype html>
530
557
  <h1>Block Review</h1>
531
558
  <div class="title" id="page-title"></div>
532
559
  <div class="load-state" id="load-state">Loading result.json...</div>
533
- <label class="file-loader" id="file-loader" hidden>
560
+ <form class="url-loader" id="url-loader">
561
+ <label for="result-url">Result JSON URL</label>
562
+ <div class="url-row">
563
+ <input id="result-url" type="text" inputmode="url" autocomplete="url" placeholder="https://example.com/result.json">
564
+ <button type="submit">Load</button>
565
+ </div>
566
+ </form>
567
+ <label class="file-loader" id="file-loader">
534
568
  <span>Choose result.json manually</span>
535
569
  <input id="result-file" type="file" accept="application/json,.json">
536
570
  </label>
@@ -606,6 +640,9 @@ const html = `<!doctype html>
606
640
  let selectedIndex = 0;
607
641
  let activeFilter = 'all';
608
642
  let query = '';
643
+ let resultSourceUrl = '';
644
+
645
+ const DEFAULT_RESULT_URL = './result.json';
609
646
 
610
647
  const els = {
611
648
  pageTitle: document.getElementById('page-title'),
@@ -622,13 +659,36 @@ const html = `<!doctype html>
622
659
  fullPageLink: document.getElementById('full-page-link'),
623
660
  loadState: document.getElementById('load-state'),
624
661
  fileLoader: document.getElementById('file-loader'),
625
- resultFile: document.getElementById('result-file')
662
+ resultFile: document.getElementById('result-file'),
663
+ urlLoader: document.getElementById('url-loader'),
664
+ resultUrl: document.getElementById('result-url')
626
665
  };
627
666
 
667
+ function asArray(value) {
668
+ if (Array.isArray(value)) return value;
669
+ if (value === undefined || value === null || value === '') return [];
670
+ return [value];
671
+ }
672
+
673
+ function isLikelyLocalAbsolutePath(value) {
674
+ return /^\\/(Users|Volumes|Applications|System|Library|private|tmp|var|home)\\//.test(value);
675
+ }
676
+
628
677
  function pathToUrl(value) {
629
- const text = String(value || '');
678
+ const text = String(value || '').trim();
630
679
  if (!text) return '';
631
- if (/^(https?:|file:|data:)/i.test(text)) return text;
680
+ if (/^(https?:|file:|data:|blob:)/i.test(text)) return text;
681
+ if (resultSourceUrl && /^(https?:|file:)/i.test(resultSourceUrl)) {
682
+ const shouldResolveFromSource = !text.startsWith('/') ||
683
+ (/^https?:/i.test(resultSourceUrl) && !isLikelyLocalAbsolutePath(text));
684
+ if (shouldResolveFromSource) {
685
+ try {
686
+ return new URL(text, resultSourceUrl).href;
687
+ } catch {
688
+ // Fall through to local snapshot handling.
689
+ }
690
+ }
691
+ }
632
692
  const snapshotIndex = text.lastIndexOf('/snapshots/');
633
693
  if (snapshotIndex >= 0) {
634
694
  return encodeURI('./snapshots/' + text.slice(snapshotIndex + '/snapshots/'.length));
@@ -638,7 +698,7 @@ const html = `<!doctype html>
638
698
  }
639
699
 
640
700
  function getShot(block, index) {
641
- const direct = Array.isArray(block.blockScreenshotPaths) ? block.blockScreenshotPaths[0] : '';
701
+ const direct = asArray(block.blockScreenshotPaths)[0] || block.blockScreenshotPath || block.screenshotPath || '';
642
702
  if (direct) return { path: direct };
643
703
  return screenshotByBlockIdx.get(index) || null;
644
704
  }
@@ -659,8 +719,9 @@ const html = `<!doctype html>
659
719
  block.blockDescription,
660
720
  block.blockCssPath,
661
721
  block.blockIdxs,
662
- ...(block.blockSemantics || []),
663
- ...(block.blockPossibleEvents || [])
722
+ ...asArray(block.blockSemantics),
723
+ ...asArray(block.blockPossibleEvents),
724
+ ...asArray(block.blockSemanticGroups).map((item) => JSON.stringify(item))
664
725
  ].join(' ').toLowerCase();
665
726
  return haystack.includes(query.toLowerCase());
666
727
  }
@@ -678,11 +739,24 @@ const html = `<!doctype html>
678
739
  .filter(({ block, index }) => textMatches(block, index) && isVisibleByFilter(block, index));
679
740
  }
680
741
 
742
+ function screenshotCount() {
743
+ const paths = new Set();
744
+ for (const item of screenshotRows) {
745
+ if (item?.path) paths.add(item.path);
746
+ }
747
+ for (const block of blocks) {
748
+ for (const path of asArray(block.blockScreenshotPaths)) {
749
+ if (path) paths.add(path);
750
+ }
751
+ }
752
+ return paths.size;
753
+ }
754
+
681
755
  function renderMetrics() {
682
756
  const stats = data.analysis?.block_analysis?.stats || {};
683
757
  const metrics = [
684
758
  ['Blocks', blocks.length],
685
- ['Screenshots', screenshotRows.length],
759
+ ['Screenshots', screenshotCount()],
686
760
  ['Elements', data.parseMetrics?.elementsCount || 0],
687
761
  ['Parse ms', data.parseMetrics?.parseMs || 0]
688
762
  ];
@@ -698,6 +772,10 @@ const html = `<!doctype html>
698
772
 
699
773
  function renderList() {
700
774
  const rows = visibleBlocks();
775
+ if (!rows.length) {
776
+ els.list.innerHTML = '<div class="load-state">No blocks match the current search or filter.</div>';
777
+ return;
778
+ }
701
779
  els.list.innerHTML = rows.map(({ block, index }) => {
702
780
  const hasShot = Boolean(getShot(block, index));
703
781
  const status = hasShot ? 'shot' : 'no shot';
@@ -715,7 +793,12 @@ const html = `<!doctype html>
715
793
  }
716
794
 
717
795
  function renderAllBlocks() {
718
- els.allBlocks.innerHTML = visibleBlocks().map(({ block, index }) => {
796
+ const rows = visibleBlocks();
797
+ if (!rows.length) {
798
+ els.allBlocks.innerHTML = '<div class="missing-shot">No blocks to show.</div>';
799
+ return;
800
+ }
801
+ els.allBlocks.innerHTML = rows.map(({ block, index }) => {
719
802
  const shot = getShot(block, index);
720
803
  const image = shot
721
804
  ? '<img src="' + pathToUrl(shot.path) + '" alt="Screenshot for block ' + index + '">'
@@ -736,6 +819,18 @@ const html = `<!doctype html>
736
819
  }
737
820
 
738
821
  function renderSelected() {
822
+ if (!blocks.length) {
823
+ els.selectedTitle.textContent = 'No blocks found';
824
+ els.selectedDescription.textContent = 'Loaded JSON does not contain block analysis rows.';
825
+ els.copySelector.disabled = true;
826
+ els.fullPageLink.href = pathToUrl(data.screenshots?.fullPage || '');
827
+ els.fullPageLink.style.display = data.screenshots?.fullPage ? 'inline-flex' : 'none';
828
+ els.screenshot.innerHTML = '<div class="missing-shot">Load a Page Analyzer result with analysis.block_analysis.blocks.</div>';
829
+ els.info.innerHTML = '';
830
+ els.raw.textContent = JSON.stringify(data, null, 2);
831
+ return;
832
+ }
833
+ if (!blocks[selectedIndex]) selectedIndex = 0;
739
834
  const block = blocks[selectedIndex] || {};
740
835
  const shot = getShot(block, selectedIndex);
741
836
  els.selectedTitle.textContent = '#' + selectedIndex + ' ' + (block.blockName || 'Unnamed block');
@@ -753,9 +848,9 @@ const html = `<!doctype html>
753
848
  '<div class="missing-shot">No screenshot was generated for this block.<br>Most likely the selector was empty, hidden, or not screenshotable.</div>';
754
849
  }
755
850
 
756
- const semantics = block.blockSemantics || [];
757
- const events = block.blockPossibleEvents || [];
758
- const groups = block.blockSemanticGroups || [];
851
+ const semantics = asArray(block.blockSemantics);
852
+ const events = asArray(block.blockPossibleEvents);
853
+ const groups = asArray(block.blockSemanticGroups);
759
854
  els.info.innerHTML = [
760
855
  info('Block name', block.blockName || 'n/a'),
761
856
  info('Block idxs', block.blockIdxs || String(selectedIndex)),
@@ -831,44 +926,132 @@ const html = `<!doctype html>
831
926
  els.loadState.classList.toggle('error', isError);
832
927
  }
833
928
 
834
- function initialize(nextData, sourceLabel) {
929
+ function getBlocks(nextData) {
930
+ const candidates = [
931
+ nextData?.analysis?.block_analysis?.blocks,
932
+ nextData?.block_analysis?.blocks,
933
+ nextData?.analysis?.blocks,
934
+ nextData?.blocks
935
+ ];
936
+ return candidates.find(Array.isArray) || [];
937
+ }
938
+
939
+ function getScreenshotRows(nextData) {
940
+ const rows = nextData?.screenshots?.blocks || nextData?.blockScreenshots || [];
941
+ if (Array.isArray(rows)) {
942
+ return rows
943
+ .map((item, index) => typeof item === 'string' ? { blockIdx: index, path: item } : item)
944
+ .filter((item) => item && item.path);
945
+ }
946
+ if (rows && typeof rows === 'object') {
947
+ return Object.entries(rows).map(([blockIdx, path]) => ({ blockIdx, path }));
948
+ }
949
+ return [];
950
+ }
951
+
952
+ function buildScreenshotMap(rows) {
953
+ return new Map(rows.map((item, index) => [
954
+ Number(item.blockIdx ?? item.blockIndex ?? item.index ?? index),
955
+ item
956
+ ]));
957
+ }
958
+
959
+ function sourceLabelForUrl(value) {
960
+ try {
961
+ const url = new URL(value, window.location.href);
962
+ return url.pathname.split('/').filter(Boolean).pop() || url.host || value;
963
+ } catch {
964
+ return value;
965
+ }
966
+ }
967
+
968
+ function normalizeResultUrl(value) {
969
+ const text = String(value || '').trim();
970
+ if (!text) {
971
+ throw new Error('Result JSON URL is empty');
972
+ }
973
+ return new URL(text, window.location.href).href;
974
+ }
975
+
976
+ function getInitialResultUrl() {
977
+ const params = new URLSearchParams(window.location.search);
978
+ return params.get('result') || params.get('url') || '';
979
+ }
980
+
981
+ function rememberResultUrl(value) {
982
+ const url = new URL(window.location.href);
983
+ url.searchParams.set('result', value);
984
+ url.searchParams.delete('url');
985
+ window.history.replaceState(null, '', url);
986
+ }
987
+
988
+ function forgetResultUrl() {
989
+ const url = new URL(window.location.href);
990
+ url.searchParams.delete('result');
991
+ url.searchParams.delete('url');
992
+ window.history.replaceState(null, '', url);
993
+ }
994
+
995
+ function renderLoadError(error, title = 'result.json not loaded') {
996
+ if (data) return;
997
+ els.selectedTitle.textContent = title;
998
+ els.selectedDescription.textContent = error.message || 'Unknown load error';
999
+ els.screenshot.innerHTML = '<div class="missing-shot">Load result.json to inspect blocks.</div>';
1000
+ els.info.innerHTML = '';
1001
+ els.raw.textContent = '';
1002
+ els.allBlocks.innerHTML = '';
1003
+ els.list.innerHTML = '';
1004
+ els.metrics.innerHTML = '';
1005
+ }
1006
+
1007
+ function initialize(nextData, sourceLabel, sourceUrl = '') {
835
1008
  data = nextData || {};
836
- blocks = data.analysis?.block_analysis?.blocks || [];
837
- screenshotRows = data.screenshots?.blocks || [];
838
- screenshotByBlockIdx = new Map(screenshotRows.map((item) => [Number(item.blockIdx), item]));
1009
+ resultSourceUrl = sourceUrl ? normalizeResultUrl(sourceUrl) : '';
1010
+ blocks = getBlocks(data);
1011
+ screenshotRows = getScreenshotRows(data);
1012
+ screenshotByBlockIdx = buildScreenshotMap(screenshotRows);
839
1013
  selectedIndex = 0;
840
1014
  query = '';
841
1015
  els.search.value = '';
842
1016
  els.pageTitle.textContent = data.title || 'Untitled page';
843
- setLoadState(sourceLabel + ' loaded. ' + blocks.length + ' blocks, ' + screenshotRows.length + ' screenshots.');
844
- els.fileLoader.hidden = true;
1017
+ setLoadState(sourceLabel + ' loaded. ' + blocks.length + ' blocks, ' + screenshotCount() + ' screenshots.');
845
1018
  renderMetrics();
846
1019
  renderList();
847
1020
  renderAllBlocks();
848
1021
  renderSelected();
849
1022
  }
850
1023
 
851
- async function loadResultJson() {
1024
+ async function loadResultFromUrl(rawUrl, updateAddress = false) {
1025
+ const resolvedUrl = normalizeResultUrl(rawUrl);
1026
+ els.resultUrl.value = rawUrl;
1027
+ setLoadState('Loading ' + rawUrl + '...');
852
1028
  try {
853
- const response = await fetch('./result.json', { cache: 'no-store' });
1029
+ const response = await fetch(resolvedUrl, { cache: 'no-store' });
854
1030
  if (!response.ok) {
855
- throw new Error('HTTP ' + response.status + ' while loading result.json');
1031
+ throw new Error('HTTP ' + response.status + ' while loading result JSON');
856
1032
  }
857
- initialize(await response.json(), 'result.json');
1033
+ initialize(await response.json(), sourceLabelForUrl(rawUrl), resolvedUrl);
1034
+ if (updateAddress) rememberResultUrl(rawUrl);
858
1035
  } catch (error) {
859
1036
  setLoadState(
860
- 'Could not load ./result.json automatically. If this page is opened with file://, your browser may block local JSON reads. Use the picker below or serve this folder locally.',
1037
+ 'Could not load ' + rawUrl + '. ' + (error.message || 'Unknown load error') + '. Remote URLs must allow browser CORS access.',
861
1038
  true
862
1039
  );
863
- els.fileLoader.hidden = false;
864
- els.selectedTitle.textContent = 'result.json not loaded';
865
- els.selectedDescription.textContent = error.message || 'Unknown load error';
866
- els.screenshot.innerHTML = '<div class="missing-shot">Load result.json to inspect blocks.</div>';
867
- els.info.innerHTML = '';
868
- els.raw.textContent = '';
1040
+ renderLoadError(error);
869
1041
  }
870
1042
  }
871
1043
 
1044
+ async function loadResultJson() {
1045
+ const initialUrl = getInitialResultUrl() || DEFAULT_RESULT_URL;
1046
+ els.resultUrl.value = initialUrl;
1047
+ await loadResultFromUrl(initialUrl);
1048
+ }
1049
+
1050
+ els.urlLoader.addEventListener('submit', async (event) => {
1051
+ event.preventDefault();
1052
+ await loadResultFromUrl(els.resultUrl.value, true);
1053
+ });
1054
+
872
1055
  els.resultFile.addEventListener('change', async () => {
873
1056
  const file = els.resultFile.files?.[0];
874
1057
  if (!file) {
@@ -876,6 +1059,8 @@ const html = `<!doctype html>
876
1059
  }
877
1060
  try {
878
1061
  initialize(JSON.parse(await file.text()), file.name);
1062
+ els.resultUrl.value = '';
1063
+ forgetResultUrl();
879
1064
  } catch (error) {
880
1065
  setLoadState('Could not parse selected JSON: ' + error.message, true);
881
1066
  }
@@ -1,8 +1,9 @@
1
1
  import assert from 'node:assert/strict';
2
2
  import { EventAnalyzer } from '../llm/analyzers/event-analyzer/event-analyzer.js';
3
3
  import { buildBlockAnalysisArtifact } from '../llm/analyzers/event-analyzer/event-analyzer-blocks.js';
4
+ import { OpenAiProvider } from '../llm/providers/openai-provider.js';
4
5
  import { PageExtractor } from '../page-extractor.js';
5
- import '../index.js';
6
+ import { analyzeUrl } from '../index.js';
6
7
 
7
8
  class FakeProvider {
8
9
  constructor() {
@@ -44,6 +45,7 @@ class FakeLocator {
44
45
  if (this.throwOnScreenshot) {
45
46
  throw new Error('selector screenshot failed');
46
47
  }
48
+ return Buffer.from(`locator screenshot:${options?.path || 'buffer'}`);
47
49
  }
48
50
  }
49
51
 
@@ -67,6 +69,28 @@ class FakePage {
67
69
 
68
70
  async screenshot(options) {
69
71
  this.pageScreenshots.push(options);
72
+ return Buffer.from(`page screenshot:${options?.path || 'buffer'}`);
73
+ }
74
+ }
75
+
76
+ class FakeS3Client {
77
+ constructor({ failPredicate = null } = {}) {
78
+ this.failPredicate = failPredicate;
79
+ this.commands = [];
80
+ this.attemptsByKey = new Map();
81
+ }
82
+
83
+ async send(command) {
84
+ const input = command.input;
85
+ this.commands.push(input);
86
+ const attempts = (this.attemptsByKey.get(input.Key) || 0) + 1;
87
+ this.attemptsByKey.set(input.Key, attempts);
88
+
89
+ if (this.failPredicate?.(input, attempts)) {
90
+ throw new Error(`s3 upload failed for ${input.Key}`);
91
+ }
92
+
93
+ return {};
70
94
  }
71
95
  }
72
96
 
@@ -117,6 +141,104 @@ async function analyzeWith(options = {}) {
117
141
  assert.equal(result.events_by_node[0].event_type, 'signup');
118
142
  }
119
143
 
144
+ {
145
+ const originalWithPreparedPage = PageExtractor.prototype.withPreparedPage;
146
+ const originalExtractPreparedPage = PageExtractor.prototype.extractPreparedPage;
147
+ const originalCaptureScreenshots = PageExtractor.prototype.captureScreenshots;
148
+ const originalCaptureUrlScreenshots = PageExtractor.prototype.captureUrlScreenshots;
149
+ const originalAnalyze = OpenAiProvider.prototype.analyze;
150
+ const calls = [];
151
+ const fakePage = { pageId: 'prepared-page' };
152
+
153
+ PageExtractor.prototype.withPreparedPage = async function withPreparedPage(inputUrl, callback) {
154
+ calls.push(['withPreparedPage', inputUrl]);
155
+ return await callback(fakePage, String(inputUrl || '').trim());
156
+ };
157
+ PageExtractor.prototype.extractPreparedPage = async function extractPreparedPage(page, targetUrl) {
158
+ calls.push(['extractPreparedPage', page, targetUrl]);
159
+ assert.equal(page, fakePage);
160
+ return {
161
+ html: [
162
+ '<!doctype html><html><head><title>Demo</title></head><body>',
163
+ '<main><section><a href="/signup">Sign up</a></section></main>',
164
+ '</body></html>'
165
+ ].join(''),
166
+ blocks: [{
167
+ blockIdx: 0,
168
+ blockCssPath: 'body > main:nth-of-type(1) > section:nth-of-type(1)',
169
+ top: 0,
170
+ left: 0,
171
+ width: 1000,
172
+ height: 200,
173
+ textPreview: 'Sign up'
174
+ }],
175
+ elementGeometries: [{
176
+ tag: 'a',
177
+ text: 'Sign up',
178
+ href: 'https://example.com/signup',
179
+ top: 0,
180
+ left: 0,
181
+ width: 80,
182
+ height: 24,
183
+ selectorNthOfType: 'body > main:nth-of-type(1) > section:nth-of-type(1) > a:nth-of-type(1)'
184
+ }],
185
+ screenshots: { fullPage: '/tmp/full-page.png' },
186
+ pageSize: { width: 1000, height: 800 }
187
+ };
188
+ };
189
+ PageExtractor.prototype.captureScreenshots = async function captureScreenshots(page, targetUrl, screenshotBlocks, options) {
190
+ calls.push(['captureScreenshots', page, targetUrl, screenshotBlocks.length, options]);
191
+ assert.equal(page, fakePage);
192
+ return {
193
+ blocks: screenshotBlocks.map((_block, index) => ({
194
+ blockIdx: index,
195
+ path: `/tmp/logical-block-${index}.png`
196
+ }))
197
+ };
198
+ };
199
+ PageExtractor.prototype.captureUrlScreenshots = async function captureUrlScreenshots() {
200
+ calls.push(['captureUrlScreenshots']);
201
+ throw new Error('captureUrlScreenshots should not be called by analyzeUrl block screenshots');
202
+ };
203
+ OpenAiProvider.prototype.analyze = async function analyze(prompt) {
204
+ calls.push(['llm', prompt.includes('DOM CSV') ? 'event' : 'special']);
205
+ return [
206
+ 'Demo page with a primary CTA',
207
+ 'blockIdxs,blockName,blockDescription,blockPossibleEvents',
208
+ '0,CTASection,Primary CTA section,signup.cta_click'
209
+ ].join('\n');
210
+ };
211
+
212
+ try {
213
+ const result = await analyzeUrl(' https://example.com/demo ', {
214
+ llm: {
215
+ apiKey: 'test-key',
216
+ apiEndpoint: 'https://llm.example.invalid/v1/chat/completions',
217
+ model: 'test-model'
218
+ },
219
+ fullPageScreenshot: true,
220
+ blockScreenshots: true,
221
+ showBlockIdx: true
222
+ });
223
+
224
+ assert.equal(calls.filter((call) => call[0] === 'withPreparedPage').length, 1);
225
+ assert.equal(calls.some((call) => call[0] === 'captureUrlScreenshots'), false);
226
+ assert.equal(calls.filter((call) => call[0] === 'captureScreenshots').length, 1);
227
+ assert.equal(result.screenshots.fullPage, '/tmp/full-page.png');
228
+ assert.equal(result.screenshots.blocks[0].path, '/tmp/logical-block-0.png');
229
+ assert.equal(
230
+ result.analysis.block_analysis.blocks[0].blockScreenshotPaths[0],
231
+ '/tmp/logical-block-0.png'
232
+ );
233
+ } finally {
234
+ PageExtractor.prototype.withPreparedPage = originalWithPreparedPage;
235
+ PageExtractor.prototype.extractPreparedPage = originalExtractPreparedPage;
236
+ PageExtractor.prototype.captureScreenshots = originalCaptureScreenshots;
237
+ PageExtractor.prototype.captureUrlScreenshots = originalCaptureUrlScreenshots;
238
+ OpenAiProvider.prototype.analyze = originalAnalyze;
239
+ }
240
+ }
241
+
120
242
  {
121
243
  const extractor = new PageExtractor();
122
244
  const locator = new FakeLocator({ count: 0 });
@@ -191,6 +313,125 @@ async function analyzeWith(options = {}) {
191
313
  assert.equal(page.pageScreenshots.length, 0);
192
314
  }
193
315
 
316
+ {
317
+ assert.throws(
318
+ () => new PageExtractor({ s3: { region: 'ap-northeast-1' } }),
319
+ /extractorConfig\.s3\.bucket is required/
320
+ );
321
+ assert.throws(
322
+ () => new PageExtractor({ s3: { bucket: 'page-analyzer-test' } }),
323
+ /extractorConfig\.s3\.region is required/
324
+ );
325
+ }
326
+
327
+ {
328
+ const s3Client = new FakeS3Client();
329
+ const extractor = new PageExtractor({
330
+ s3: {
331
+ bucket: 'page-analyzer-test',
332
+ region: 'ap-northeast-1',
333
+ prefix: '/page-analyzer/snapshots/',
334
+ publicBaseUrl: 'https://cdn.example.com/page-analyzer/snapshots/',
335
+ client: s3Client
336
+ }
337
+ });
338
+ const locator = new FakeLocator();
339
+ const page = new FakePage(locator);
340
+ const screenshots = await extractor.captureScreenshots(page, 'https://example.com/demo', [
341
+ { blockName: 'Hero', blockCssPath: '#hero' }
342
+ ], {
343
+ fullPageScreenshot: true,
344
+ blockScreenshots: true
345
+ });
346
+
347
+ assert.equal(page.pageScreenshots.length, 1);
348
+ assert.deepEqual(page.pageScreenshots[0], { fullPage: true });
349
+ assert.equal(locator.screenshots.length, 1);
350
+ assert.deepEqual(locator.screenshots[0], {});
351
+ assert.equal(s3Client.commands.length, 2);
352
+
353
+ const [fullPageUpload, blockUpload] = s3Client.commands;
354
+ assert.equal(fullPageUpload.Bucket, 'page-analyzer-test');
355
+ assert.equal(fullPageUpload.ContentType, 'image/png');
356
+ assert.equal(Buffer.isBuffer(fullPageUpload.Body), true);
357
+ assert.match(fullPageUpload.Key, /^page-analyzer\/snapshots\/example-com-demo-.*-full-page\.png$/);
358
+ assert.match(blockUpload.Key, /^page-analyzer\/snapshots\/example-com-demo-.*-block-000\.png$/);
359
+
360
+ const fullPageFilename = fullPageUpload.Key.split('/').pop();
361
+ const blockFilename = blockUpload.Key.split('/').pop();
362
+ assert.equal(
363
+ screenshots.fullPage,
364
+ `https://cdn.example.com/page-analyzer/snapshots/${fullPageFilename}`
365
+ );
366
+ assert.equal(screenshots.blocks[0].path, `https://cdn.example.com/page-analyzer/snapshots/${blockFilename}`);
367
+ }
368
+
369
+ {
370
+ const s3Client = new FakeS3Client();
371
+ const extractor = new PageExtractor({
372
+ s3: {
373
+ bucket: 'page-analyzer-test',
374
+ region: 'ap-northeast-1',
375
+ prefix: 'nested/prefix',
376
+ client: s3Client
377
+ }
378
+ });
379
+ const locator = new FakeLocator();
380
+ const page = new FakePage(locator);
381
+ const screenshots = await extractor.captureScreenshots(page, 'https://example.com/demo', [
382
+ { blockName: 'Hero', blockCssPath: '#hero' }
383
+ ], {
384
+ fullPageScreenshot: false,
385
+ blockScreenshots: true
386
+ });
387
+
388
+ const uploadedKey = s3Client.commands[0].Key;
389
+ assert.match(uploadedKey, /^nested\/prefix\/example-com-demo-.*-block-000\.png$/);
390
+ assert.equal(
391
+ screenshots.blocks[0].path,
392
+ `https://page-analyzer-test.s3.ap-northeast-1.amazonaws.com/${uploadedKey}`
393
+ );
394
+ }
395
+
396
+ {
397
+ const originalWarn = console.warn;
398
+ const warnings = [];
399
+ console.warn = (message) => warnings.push(message);
400
+
401
+ try {
402
+ const s3Client = new FakeS3Client({
403
+ failPredicate: (input) => input.Key.endsWith('-block-000.png')
404
+ });
405
+ const extractor = new PageExtractor({
406
+ s3: {
407
+ bucket: 'page-analyzer-test',
408
+ region: 'ap-northeast-1',
409
+ prefix: 'page-analyzer/snapshots',
410
+ publicBaseUrl: 'https://cdn.example.com/page-analyzer/snapshots',
411
+ client: s3Client
412
+ }
413
+ });
414
+ const locator = new FakeLocator();
415
+ const page = new FakePage(locator);
416
+ const screenshots = await extractor.captureScreenshots(page, 'https://example.com/demo', [
417
+ { blockName: 'Hero', blockCssPath: '#hero' },
418
+ { blockName: 'Footer', blockCssPath: '#footer' }
419
+ ], {
420
+ fullPageScreenshot: false,
421
+ blockScreenshots: true
422
+ });
423
+
424
+ assert.equal(screenshots.blocks.length, 1);
425
+ assert.equal(screenshots.blocks[0].blockIdx, 1);
426
+ assert.equal(s3Client.commands.filter((input) => input.Key.endsWith('-block-000.png')).length, 3);
427
+ assert.equal(s3Client.commands.filter((input) => input.Key.endsWith('-block-001.png')).length, 1);
428
+ assert.equal(warnings.some((message) => message.includes('retrying')), true);
429
+ assert.equal(warnings.some((message) => message.includes('Failed to capture/upload block 0')), true);
430
+ } finally {
431
+ console.warn = originalWarn;
432
+ }
433
+ }
434
+
194
435
  {
195
436
  const artifact = buildBlockAnalysisArtifact('Demo', [{
196
437
  blockIdx: 3,