page-analyzer 1.1.1 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,7 @@ const html = `<!doctype html>
11
11
  <head>
12
12
  <meta charset="utf-8">
13
13
  <meta name="viewport" content="width=device-width, initial-scale=1">
14
+ <link rel="icon" href="data:,">
14
15
  <title>Page Analyzer Result Viewer</title>
15
16
  <style>
16
17
  :root {
@@ -116,12 +117,28 @@ const html = `<!doctype html>
116
117
  color: var(--missing);
117
118
  }
118
119
 
120
+ .url-loader,
119
121
  .file-loader {
120
122
  display: grid;
121
123
  gap: 8px;
122
124
  margin-top: 8px;
123
125
  }
124
126
 
127
+ .url-loader label,
128
+ .file-loader span {
129
+ color: var(--muted);
130
+ font-size: 11px;
131
+ font-weight: 800;
132
+ text-transform: uppercase;
133
+ }
134
+
135
+ .url-row {
136
+ display: grid;
137
+ grid-template-columns: minmax(0, 1fr) auto;
138
+ gap: 8px;
139
+ }
140
+
141
+ .url-loader input,
125
142
  .file-loader input {
126
143
  width: 100%;
127
144
  border: 1px solid var(--line);
@@ -131,6 +148,16 @@ const html = `<!doctype html>
131
148
  font-size: 12px;
132
149
  }
133
150
 
151
+ .url-loader button {
152
+ min-width: 68px;
153
+ border: 1px solid var(--ink);
154
+ background: var(--ink);
155
+ color: white;
156
+ padding: 0 12px;
157
+ cursor: pointer;
158
+ font-size: 12px;
159
+ }
160
+
134
161
  .metrics {
135
162
  display: grid;
136
163
  grid-template-columns: repeat(2, minmax(0, 1fr));
@@ -530,7 +557,14 @@ const html = `<!doctype html>
530
557
  <h1>Block Review</h1>
531
558
  <div class="title" id="page-title"></div>
532
559
  <div class="load-state" id="load-state">Loading result.json...</div>
533
- <label class="file-loader" id="file-loader" hidden>
560
+ <form class="url-loader" id="url-loader">
561
+ <label for="result-url">Result JSON URL</label>
562
+ <div class="url-row">
563
+ <input id="result-url" type="text" inputmode="url" autocomplete="url" placeholder="https://example.com/result.json">
564
+ <button type="submit">Load</button>
565
+ </div>
566
+ </form>
567
+ <label class="file-loader" id="file-loader">
534
568
  <span>Choose result.json manually</span>
535
569
  <input id="result-file" type="file" accept="application/json,.json">
536
570
  </label>
@@ -606,6 +640,9 @@ const html = `<!doctype html>
606
640
  let selectedIndex = 0;
607
641
  let activeFilter = 'all';
608
642
  let query = '';
643
+ let resultSourceUrl = '';
644
+
645
+ const DEFAULT_RESULT_URL = './result.json';
609
646
 
610
647
  const els = {
611
648
  pageTitle: document.getElementById('page-title'),
@@ -622,13 +659,36 @@ const html = `<!doctype html>
622
659
  fullPageLink: document.getElementById('full-page-link'),
623
660
  loadState: document.getElementById('load-state'),
624
661
  fileLoader: document.getElementById('file-loader'),
625
- resultFile: document.getElementById('result-file')
662
+ resultFile: document.getElementById('result-file'),
663
+ urlLoader: document.getElementById('url-loader'),
664
+ resultUrl: document.getElementById('result-url')
626
665
  };
627
666
 
667
+ function asArray(value) {
668
+ if (Array.isArray(value)) return value;
669
+ if (value === undefined || value === null || value === '') return [];
670
+ return [value];
671
+ }
672
+
673
+ function isLikelyLocalAbsolutePath(value) {
674
+ return /^\\/(Users|Volumes|Applications|System|Library|private|tmp|var|home)\\//.test(value);
675
+ }
676
+
628
677
  function pathToUrl(value) {
629
- const text = String(value || '');
678
+ const text = String(value || '').trim();
630
679
  if (!text) return '';
631
- if (/^(https?:|file:|data:)/i.test(text)) return text;
680
+ if (/^(https?:|file:|data:|blob:)/i.test(text)) return text;
681
+ if (resultSourceUrl && /^(https?:|file:)/i.test(resultSourceUrl)) {
682
+ const shouldResolveFromSource = !text.startsWith('/') ||
683
+ (/^https?:/i.test(resultSourceUrl) && !isLikelyLocalAbsolutePath(text));
684
+ if (shouldResolveFromSource) {
685
+ try {
686
+ return new URL(text, resultSourceUrl).href;
687
+ } catch {
688
+ // Fall through to local snapshot handling.
689
+ }
690
+ }
691
+ }
632
692
  const snapshotIndex = text.lastIndexOf('/snapshots/');
633
693
  if (snapshotIndex >= 0) {
634
694
  return encodeURI('./snapshots/' + text.slice(snapshotIndex + '/snapshots/'.length));
@@ -637,8 +697,21 @@ const html = `<!doctype html>
637
697
  return text;
638
698
  }
639
699
 
700
+ function safeResourceUrl(value) {
701
+ const url = pathToUrl(value);
702
+ if (!url) return '';
703
+ if (/^(https?:|file:|blob:)/i.test(url)) return url;
704
+ if (/^data:image\\/(png|jpe?g|gif|webp);/i.test(url)) return url;
705
+ if (!/^[a-z][a-z0-9+.-]*:/i.test(url)) return url;
706
+ return '';
707
+ }
708
+
709
+ function imageSrcAttr(value) {
710
+ return escapeHtml(safeResourceUrl(value));
711
+ }
712
+
640
713
  function getShot(block, index) {
641
- const direct = Array.isArray(block.blockScreenshotPaths) ? block.blockScreenshotPaths[0] : '';
714
+ const direct = asArray(block.blockScreenshotPaths)[0] || block.blockScreenshotPath || block.screenshotPath || '';
642
715
  if (direct) return { path: direct };
643
716
  return screenshotByBlockIdx.get(index) || null;
644
717
  }
@@ -659,8 +732,9 @@ const html = `<!doctype html>
659
732
  block.blockDescription,
660
733
  block.blockCssPath,
661
734
  block.blockIdxs,
662
- ...(block.blockSemantics || []),
663
- ...(block.blockPossibleEvents || [])
735
+ ...asArray(block.blockSemantics),
736
+ ...asArray(block.blockPossibleEvents),
737
+ ...asArray(block.blockSemanticGroups).map((item) => JSON.stringify(item))
664
738
  ].join(' ').toLowerCase();
665
739
  return haystack.includes(query.toLowerCase());
666
740
  }
@@ -678,11 +752,24 @@ const html = `<!doctype html>
678
752
  .filter(({ block, index }) => textMatches(block, index) && isVisibleByFilter(block, index));
679
753
  }
680
754
 
755
+ function screenshotCount() {
756
+ const paths = new Set();
757
+ for (const item of screenshotRows) {
758
+ if (item?.path) paths.add(item.path);
759
+ }
760
+ for (const block of blocks) {
761
+ for (const path of asArray(block.blockScreenshotPaths)) {
762
+ if (path) paths.add(path);
763
+ }
764
+ }
765
+ return paths.size;
766
+ }
767
+
681
768
  function renderMetrics() {
682
769
  const stats = data.analysis?.block_analysis?.stats || {};
683
770
  const metrics = [
684
771
  ['Blocks', blocks.length],
685
- ['Screenshots', screenshotRows.length],
772
+ ['Screenshots', screenshotCount()],
686
773
  ['Elements', data.parseMetrics?.elementsCount || 0],
687
774
  ['Parse ms', data.parseMetrics?.parseMs || 0]
688
775
  ];
@@ -698,6 +785,10 @@ const html = `<!doctype html>
698
785
 
699
786
  function renderList() {
700
787
  const rows = visibleBlocks();
788
+ if (!rows.length) {
789
+ els.list.innerHTML = '<div class="load-state">No blocks match the current search or filter.</div>';
790
+ return;
791
+ }
701
792
  els.list.innerHTML = rows.map(({ block, index }) => {
702
793
  const hasShot = Boolean(getShot(block, index));
703
794
  const status = hasShot ? 'shot' : 'no shot';
@@ -715,10 +806,16 @@ const html = `<!doctype html>
715
806
  }
716
807
 
717
808
  function renderAllBlocks() {
718
- els.allBlocks.innerHTML = visibleBlocks().map(({ block, index }) => {
809
+ const rows = visibleBlocks();
810
+ if (!rows.length) {
811
+ els.allBlocks.innerHTML = '<div class="missing-shot">No blocks to show.</div>';
812
+ return;
813
+ }
814
+ els.allBlocks.innerHTML = rows.map(({ block, index }) => {
719
815
  const shot = getShot(block, index);
720
- const image = shot
721
- ? '<img src="' + pathToUrl(shot.path) + '" alt="Screenshot for block ' + index + '">'
816
+ const shotUrl = shot?.path ? imageSrcAttr(shot.path) : '';
817
+ const image = shotUrl
818
+ ? '<img src="' + shotUrl + '" alt="Screenshot for block ' + index + '">'
722
819
  : '<div class="empty-thumb">No selector screenshot</div>';
723
820
  return '<article class="mini" data-index="' + index + '">' +
724
821
  image +
@@ -736,26 +833,41 @@ const html = `<!doctype html>
736
833
  }
737
834
 
738
835
  function renderSelected() {
836
+ if (!blocks.length) {
837
+ els.selectedTitle.textContent = 'No blocks found';
838
+ els.selectedDescription.textContent = 'Loaded JSON does not contain block analysis rows.';
839
+ els.copySelector.disabled = true;
840
+ const fullPageUrl = safeResourceUrl(data.screenshots?.fullPage || '');
841
+ els.fullPageLink.href = fullPageUrl;
842
+ els.fullPageLink.style.display = fullPageUrl ? 'inline-flex' : 'none';
843
+ els.screenshot.innerHTML = '<div class="missing-shot">Load a Page Analyzer result with analysis.block_analysis.blocks.</div>';
844
+ els.info.innerHTML = '';
845
+ els.raw.textContent = JSON.stringify(data, null, 2);
846
+ return;
847
+ }
848
+ if (!blocks[selectedIndex]) selectedIndex = 0;
739
849
  const block = blocks[selectedIndex] || {};
740
850
  const shot = getShot(block, selectedIndex);
741
851
  els.selectedTitle.textContent = '#' + selectedIndex + ' ' + (block.blockName || 'Unnamed block');
742
852
  els.selectedDescription.textContent = block.blockDescription || 'No description available.';
743
853
  els.copySelector.disabled = !block.blockCssPath;
744
- els.fullPageLink.href = pathToUrl(data.screenshots?.fullPage || '');
745
- els.fullPageLink.style.display = data.screenshots?.fullPage ? 'inline-flex' : 'none';
854
+ const fullPageUrl = safeResourceUrl(data.screenshots?.fullPage || '');
855
+ els.fullPageLink.href = fullPageUrl;
856
+ els.fullPageLink.style.display = fullPageUrl ? 'inline-flex' : 'none';
746
857
 
747
- if (shot?.path) {
858
+ const shotUrl = shot?.path ? imageSrcAttr(shot.path) : '';
859
+ if (shotUrl) {
748
860
  els.screenshot.innerHTML =
749
- '<div class="screenshot-frame"><img src="' + pathToUrl(shot.path) + '" alt="Screenshot for selected block"></div>' +
861
+ '<div class="screenshot-frame"><img src="' + shotUrl + '" alt="Screenshot for selected block"></div>' +
750
862
  '<div class="info wide"><label>Screenshot path</label><span>' + escapeHtml(shot.path) + '</span></div>';
751
863
  } else {
752
864
  els.screenshot.innerHTML =
753
865
  '<div class="missing-shot">No screenshot was generated for this block.<br>Most likely the selector was empty, hidden, or not screenshotable.</div>';
754
866
  }
755
867
 
756
- const semantics = block.blockSemantics || [];
757
- const events = block.blockPossibleEvents || [];
758
- const groups = block.blockSemanticGroups || [];
868
+ const semantics = asArray(block.blockSemantics);
869
+ const events = asArray(block.blockPossibleEvents);
870
+ const groups = asArray(block.blockSemanticGroups);
759
871
  els.info.innerHTML = [
760
872
  info('Block name', block.blockName || 'n/a'),
761
873
  info('Block idxs', block.blockIdxs || String(selectedIndex)),
@@ -831,44 +943,132 @@ const html = `<!doctype html>
831
943
  els.loadState.classList.toggle('error', isError);
832
944
  }
833
945
 
834
- function initialize(nextData, sourceLabel) {
946
+ function getBlocks(nextData) {
947
+ const candidates = [
948
+ nextData?.analysis?.block_analysis?.blocks,
949
+ nextData?.block_analysis?.blocks,
950
+ nextData?.analysis?.blocks,
951
+ nextData?.blocks
952
+ ];
953
+ return candidates.find(Array.isArray) || [];
954
+ }
955
+
956
+ function getScreenshotRows(nextData) {
957
+ const rows = nextData?.screenshots?.blocks || nextData?.blockScreenshots || [];
958
+ if (Array.isArray(rows)) {
959
+ return rows
960
+ .map((item, index) => typeof item === 'string' ? { blockIdx: index, path: item } : item)
961
+ .filter((item) => item && item.path);
962
+ }
963
+ if (rows && typeof rows === 'object') {
964
+ return Object.entries(rows).map(([blockIdx, path]) => ({ blockIdx, path }));
965
+ }
966
+ return [];
967
+ }
968
+
969
+ function buildScreenshotMap(rows) {
970
+ return new Map(rows.map((item, index) => [
971
+ Number(item.blockIdx ?? item.blockIndex ?? item.index ?? index),
972
+ item
973
+ ]));
974
+ }
975
+
976
+ function sourceLabelForUrl(value) {
977
+ try {
978
+ const url = new URL(value, window.location.href);
979
+ return url.pathname.split('/').filter(Boolean).pop() || url.host || value;
980
+ } catch {
981
+ return value;
982
+ }
983
+ }
984
+
985
+ function normalizeResultUrl(value) {
986
+ const text = String(value || '').trim();
987
+ if (!text) {
988
+ throw new Error('Result JSON URL is empty');
989
+ }
990
+ return new URL(text, window.location.href).href;
991
+ }
992
+
993
+ function getInitialResultUrl() {
994
+ const params = new URLSearchParams(window.location.search);
995
+ return params.get('result') || params.get('url') || '';
996
+ }
997
+
998
+ function rememberResultUrl(value) {
999
+ const url = new URL(window.location.href);
1000
+ url.searchParams.set('result', value);
1001
+ url.searchParams.delete('url');
1002
+ window.history.replaceState(null, '', url);
1003
+ }
1004
+
1005
+ function forgetResultUrl() {
1006
+ const url = new URL(window.location.href);
1007
+ url.searchParams.delete('result');
1008
+ url.searchParams.delete('url');
1009
+ window.history.replaceState(null, '', url);
1010
+ }
1011
+
1012
+ function renderLoadError(error, title = 'result.json not loaded') {
1013
+ if (data) return;
1014
+ els.selectedTitle.textContent = title;
1015
+ els.selectedDescription.textContent = error.message || 'Unknown load error';
1016
+ els.screenshot.innerHTML = '<div class="missing-shot">Load result.json to inspect blocks.</div>';
1017
+ els.info.innerHTML = '';
1018
+ els.raw.textContent = '';
1019
+ els.allBlocks.innerHTML = '';
1020
+ els.list.innerHTML = '';
1021
+ els.metrics.innerHTML = '';
1022
+ }
1023
+
1024
+ function initialize(nextData, sourceLabel, sourceUrl = '') {
835
1025
  data = nextData || {};
836
- blocks = data.analysis?.block_analysis?.blocks || [];
837
- screenshotRows = data.screenshots?.blocks || [];
838
- screenshotByBlockIdx = new Map(screenshotRows.map((item) => [Number(item.blockIdx), item]));
1026
+ resultSourceUrl = sourceUrl ? normalizeResultUrl(sourceUrl) : '';
1027
+ blocks = getBlocks(data);
1028
+ screenshotRows = getScreenshotRows(data);
1029
+ screenshotByBlockIdx = buildScreenshotMap(screenshotRows);
839
1030
  selectedIndex = 0;
840
1031
  query = '';
841
1032
  els.search.value = '';
842
1033
  els.pageTitle.textContent = data.title || 'Untitled page';
843
- setLoadState(sourceLabel + ' loaded. ' + blocks.length + ' blocks, ' + screenshotRows.length + ' screenshots.');
844
- els.fileLoader.hidden = true;
1034
+ setLoadState(sourceLabel + ' loaded. ' + blocks.length + ' blocks, ' + screenshotCount() + ' screenshots.');
845
1035
  renderMetrics();
846
1036
  renderList();
847
1037
  renderAllBlocks();
848
1038
  renderSelected();
849
1039
  }
850
1040
 
851
- async function loadResultJson() {
1041
+ async function loadResultFromUrl(rawUrl, updateAddress = false) {
1042
+ const resolvedUrl = normalizeResultUrl(rawUrl);
1043
+ els.resultUrl.value = rawUrl;
1044
+ setLoadState('Loading ' + rawUrl + '...');
852
1045
  try {
853
- const response = await fetch('./result.json', { cache: 'no-store' });
1046
+ const response = await fetch(resolvedUrl, { cache: 'no-store' });
854
1047
  if (!response.ok) {
855
- throw new Error('HTTP ' + response.status + ' while loading result.json');
1048
+ throw new Error('HTTP ' + response.status + ' while loading result JSON');
856
1049
  }
857
- initialize(await response.json(), 'result.json');
1050
+ initialize(await response.json(), sourceLabelForUrl(rawUrl), resolvedUrl);
1051
+ if (updateAddress) rememberResultUrl(rawUrl);
858
1052
  } catch (error) {
859
1053
  setLoadState(
860
- 'Could not load ./result.json automatically. If this page is opened with file://, your browser may block local JSON reads. Use the picker below or serve this folder locally.',
1054
+ 'Could not load ' + rawUrl + '. ' + (error.message || 'Unknown load error') + '. Remote URLs must allow browser CORS access.',
861
1055
  true
862
1056
  );
863
- els.fileLoader.hidden = false;
864
- els.selectedTitle.textContent = 'result.json not loaded';
865
- els.selectedDescription.textContent = error.message || 'Unknown load error';
866
- els.screenshot.innerHTML = '<div class="missing-shot">Load result.json to inspect blocks.</div>';
867
- els.info.innerHTML = '';
868
- els.raw.textContent = '';
1057
+ renderLoadError(error);
869
1058
  }
870
1059
  }
871
1060
 
1061
+ async function loadResultJson() {
1062
+ const initialUrl = getInitialResultUrl() || DEFAULT_RESULT_URL;
1063
+ els.resultUrl.value = initialUrl;
1064
+ await loadResultFromUrl(initialUrl);
1065
+ }
1066
+
1067
+ els.urlLoader.addEventListener('submit', async (event) => {
1068
+ event.preventDefault();
1069
+ await loadResultFromUrl(els.resultUrl.value, true);
1070
+ });
1071
+
872
1072
  els.resultFile.addEventListener('change', async () => {
873
1073
  const file = els.resultFile.files?.[0];
874
1074
  if (!file) {
@@ -876,6 +1076,8 @@ const html = `<!doctype html>
876
1076
  }
877
1077
  try {
878
1078
  initialize(JSON.parse(await file.text()), file.name);
1079
+ els.resultUrl.value = '';
1080
+ forgetResultUrl();
879
1081
  } catch (error) {
880
1082
  setLoadState('Could not parse selected JSON: ' + error.message, true);
881
1083
  }
@@ -1,8 +1,9 @@
1
1
  import assert from 'node:assert/strict';
2
2
  import { EventAnalyzer } from '../llm/analyzers/event-analyzer/event-analyzer.js';
3
3
  import { buildBlockAnalysisArtifact } from '../llm/analyzers/event-analyzer/event-analyzer-blocks.js';
4
+ import { OpenAiProvider } from '../llm/providers/openai-provider.js';
4
5
  import { PageExtractor } from '../page-extractor.js';
5
- import '../index.js';
6
+ import { analyzeUrl } from '../index.js';
6
7
 
7
8
  class FakeProvider {
8
9
  constructor() {
@@ -44,6 +45,7 @@ class FakeLocator {
44
45
  if (this.throwOnScreenshot) {
45
46
  throw new Error('selector screenshot failed');
46
47
  }
48
+ return Buffer.from(`locator screenshot:${options?.path || 'buffer'}`);
47
49
  }
48
50
  }
49
51
 
@@ -67,6 +69,28 @@ class FakePage {
67
69
 
68
70
  async screenshot(options) {
69
71
  this.pageScreenshots.push(options);
72
+ return Buffer.from(`page screenshot:${options?.path || 'buffer'}`);
73
+ }
74
+ }
75
+
76
+ class FakeS3Client {
77
+ constructor({ failPredicate = null } = {}) {
78
+ this.failPredicate = failPredicate;
79
+ this.commands = [];
80
+ this.attemptsByKey = new Map();
81
+ }
82
+
83
+ async send(command) {
84
+ const input = command.input;
85
+ this.commands.push(input);
86
+ const attempts = (this.attemptsByKey.get(input.Key) || 0) + 1;
87
+ this.attemptsByKey.set(input.Key, attempts);
88
+
89
+ if (this.failPredicate?.(input, attempts)) {
90
+ throw new Error(`s3 upload failed for ${input.Key}`);
91
+ }
92
+
93
+ return {};
70
94
  }
71
95
  }
72
96
 
@@ -117,6 +141,104 @@ async function analyzeWith(options = {}) {
117
141
  assert.equal(result.events_by_node[0].event_type, 'signup');
118
142
  }
119
143
 
144
+ {
145
+ const originalWithPreparedPage = PageExtractor.prototype.withPreparedPage;
146
+ const originalExtractPreparedPage = PageExtractor.prototype.extractPreparedPage;
147
+ const originalCaptureScreenshots = PageExtractor.prototype.captureScreenshots;
148
+ const originalCaptureUrlScreenshots = PageExtractor.prototype.captureUrlScreenshots;
149
+ const originalAnalyze = OpenAiProvider.prototype.analyze;
150
+ const calls = [];
151
+ const fakePage = { pageId: 'prepared-page' };
152
+
153
+ PageExtractor.prototype.withPreparedPage = async function withPreparedPage(inputUrl, callback) {
154
+ calls.push(['withPreparedPage', inputUrl]);
155
+ return await callback(fakePage, String(inputUrl || '').trim());
156
+ };
157
+ PageExtractor.prototype.extractPreparedPage = async function extractPreparedPage(page, targetUrl) {
158
+ calls.push(['extractPreparedPage', page, targetUrl]);
159
+ assert.equal(page, fakePage);
160
+ return {
161
+ html: [
162
+ '<!doctype html><html><head><title>Demo</title></head><body>',
163
+ '<main><section><a href="/signup">Sign up</a></section></main>',
164
+ '</body></html>'
165
+ ].join(''),
166
+ blocks: [{
167
+ blockIdx: 0,
168
+ blockCssPath: 'body > main:nth-of-type(1) > section:nth-of-type(1)',
169
+ top: 0,
170
+ left: 0,
171
+ width: 1000,
172
+ height: 200,
173
+ textPreview: 'Sign up'
174
+ }],
175
+ elementGeometries: [{
176
+ tag: 'a',
177
+ text: 'Sign up',
178
+ href: 'https://example.com/signup',
179
+ top: 0,
180
+ left: 0,
181
+ width: 80,
182
+ height: 24,
183
+ selectorNthOfType: 'body > main:nth-of-type(1) > section:nth-of-type(1) > a:nth-of-type(1)'
184
+ }],
185
+ screenshots: { fullPage: '/tmp/full-page.png' },
186
+ pageSize: { width: 1000, height: 800 }
187
+ };
188
+ };
189
+ PageExtractor.prototype.captureScreenshots = async function captureScreenshots(page, targetUrl, screenshotBlocks, options) {
190
+ calls.push(['captureScreenshots', page, targetUrl, screenshotBlocks.length, options]);
191
+ assert.equal(page, fakePage);
192
+ return {
193
+ blocks: screenshotBlocks.map((_block, index) => ({
194
+ blockIdx: index,
195
+ path: `/tmp/logical-block-${index}.png`
196
+ }))
197
+ };
198
+ };
199
+ PageExtractor.prototype.captureUrlScreenshots = async function captureUrlScreenshots() {
200
+ calls.push(['captureUrlScreenshots']);
201
+ throw new Error('captureUrlScreenshots should not be called by analyzeUrl block screenshots');
202
+ };
203
+ OpenAiProvider.prototype.analyze = async function analyze(prompt) {
204
+ calls.push(['llm', prompt.includes('DOM CSV') ? 'event' : 'special']);
205
+ return [
206
+ 'Demo page with a primary CTA',
207
+ 'blockIdxs,blockName,blockDescription,blockPossibleEvents',
208
+ '0,CTASection,Primary CTA section,signup.cta_click'
209
+ ].join('\n');
210
+ };
211
+
212
+ try {
213
+ const result = await analyzeUrl(' https://example.com/demo ', {
214
+ llm: {
215
+ apiKey: 'test-key',
216
+ apiEndpoint: 'https://llm.example.invalid/v1/chat/completions',
217
+ model: 'test-model'
218
+ },
219
+ fullPageScreenshot: true,
220
+ blockScreenshots: true,
221
+ showBlockIdx: true
222
+ });
223
+
224
+ assert.equal(calls.filter((call) => call[0] === 'withPreparedPage').length, 1);
225
+ assert.equal(calls.some((call) => call[0] === 'captureUrlScreenshots'), false);
226
+ assert.equal(calls.filter((call) => call[0] === 'captureScreenshots').length, 1);
227
+ assert.equal(result.screenshots.fullPage, '/tmp/full-page.png');
228
+ assert.equal(result.screenshots.blocks[0].path, '/tmp/logical-block-0.png');
229
+ assert.equal(
230
+ result.analysis.block_analysis.blocks[0].blockScreenshotPaths[0],
231
+ '/tmp/logical-block-0.png'
232
+ );
233
+ } finally {
234
+ PageExtractor.prototype.withPreparedPage = originalWithPreparedPage;
235
+ PageExtractor.prototype.extractPreparedPage = originalExtractPreparedPage;
236
+ PageExtractor.prototype.captureScreenshots = originalCaptureScreenshots;
237
+ PageExtractor.prototype.captureUrlScreenshots = originalCaptureUrlScreenshots;
238
+ OpenAiProvider.prototype.analyze = originalAnalyze;
239
+ }
240
+ }
241
+
120
242
  {
121
243
  const extractor = new PageExtractor();
122
244
  const locator = new FakeLocator({ count: 0 });
@@ -191,6 +313,123 @@ async function analyzeWith(options = {}) {
191
313
  assert.equal(page.pageScreenshots.length, 0);
192
314
  }
193
315
 
316
+ {
317
+ assert.throws(
318
+ () => new PageExtractor({ s3: { region: 'ap-northeast-1' } }),
319
+ /extractorConfig\.s3\.bucket is required/
320
+ );
321
+ assert.throws(
322
+ () => new PageExtractor({ s3: { bucket: 'page-analyzer-test' } }),
323
+ /extractorConfig\.s3\.region is required/
324
+ );
325
+ }
326
+
327
+ {
328
+ const s3Client = new FakeS3Client();
329
+ const extractor = new PageExtractor({
330
+ s3: {
331
+ bucket: 'page-analyzer-test',
332
+ region: 'ap-northeast-1',
333
+ prefix: '/page-analyzer/snapshots/',
334
+ publicBaseUrl: 'https://cdn.example.com/',
335
+ client: s3Client
336
+ }
337
+ });
338
+ const locator = new FakeLocator();
339
+ const page = new FakePage(locator);
340
+ const screenshots = await extractor.captureScreenshots(page, 'https://example.com/demo', [
341
+ { blockName: 'Hero', blockCssPath: '#hero' }
342
+ ], {
343
+ fullPageScreenshot: true,
344
+ blockScreenshots: true
345
+ });
346
+
347
+ assert.equal(page.pageScreenshots.length, 1);
348
+ assert.deepEqual(page.pageScreenshots[0], { fullPage: true });
349
+ assert.equal(locator.screenshots.length, 1);
350
+ assert.deepEqual(locator.screenshots[0], {});
351
+ assert.equal(s3Client.commands.length, 2);
352
+
353
+ const [fullPageUpload, blockUpload] = s3Client.commands;
354
+ assert.equal(fullPageUpload.Bucket, 'page-analyzer-test');
355
+ assert.equal(fullPageUpload.ContentType, 'image/png');
356
+ assert.equal(Buffer.isBuffer(fullPageUpload.Body), true);
357
+ assert.match(fullPageUpload.Key, /^page-analyzer\/snapshots\/example-com-demo-.*-full-page\.png$/);
358
+ assert.match(blockUpload.Key, /^page-analyzer\/snapshots\/example-com-demo-.*-block-000\.png$/);
359
+
360
+ assert.equal(
361
+ screenshots.fullPage,
362
+ `https://cdn.example.com/${fullPageUpload.Key}`
363
+ );
364
+ assert.equal(screenshots.blocks[0].path, `https://cdn.example.com/${blockUpload.Key}`);
365
+ }
366
+
367
+ {
368
+ const s3Client = new FakeS3Client();
369
+ const extractor = new PageExtractor({
370
+ s3: {
371
+ bucket: 'page-analyzer-test',
372
+ region: 'ap-northeast-1',
373
+ prefix: 'nested/prefix',
374
+ client: s3Client
375
+ }
376
+ });
377
+ const locator = new FakeLocator();
378
+ const page = new FakePage(locator);
379
+ const screenshots = await extractor.captureScreenshots(page, 'https://example.com/demo', [
380
+ { blockName: 'Hero', blockCssPath: '#hero' }
381
+ ], {
382
+ fullPageScreenshot: false,
383
+ blockScreenshots: true
384
+ });
385
+
386
+ const uploadedKey = s3Client.commands[0].Key;
387
+ assert.match(uploadedKey, /^nested\/prefix\/example-com-demo-.*-block-000\.png$/);
388
+ assert.equal(
389
+ screenshots.blocks[0].path,
390
+ `https://page-analyzer-test.s3.ap-northeast-1.amazonaws.com/${uploadedKey}`
391
+ );
392
+ }
393
+
394
+ {
395
+ const originalWarn = console.warn;
396
+ const warnings = [];
397
+ console.warn = (message) => warnings.push(message);
398
+
399
+ try {
400
+ const s3Client = new FakeS3Client({
401
+ failPredicate: (input) => input.Key.endsWith('-block-000.png')
402
+ });
403
+ const extractor = new PageExtractor({
404
+ s3: {
405
+ bucket: 'page-analyzer-test',
406
+ region: 'ap-northeast-1',
407
+ prefix: 'page-analyzer/snapshots',
408
+ publicBaseUrl: 'https://cdn.example.com',
409
+ client: s3Client
410
+ }
411
+ });
412
+ const locator = new FakeLocator();
413
+ const page = new FakePage(locator);
414
+ const screenshots = await extractor.captureScreenshots(page, 'https://example.com/demo', [
415
+ { blockName: 'Hero', blockCssPath: '#hero' },
416
+ { blockName: 'Footer', blockCssPath: '#footer' }
417
+ ], {
418
+ fullPageScreenshot: false,
419
+ blockScreenshots: true
420
+ });
421
+
422
+ assert.equal(screenshots.blocks.length, 1);
423
+ assert.equal(screenshots.blocks[0].blockIdx, 1);
424
+ assert.equal(s3Client.commands.filter((input) => input.Key.endsWith('-block-000.png')).length, 3);
425
+ assert.equal(s3Client.commands.filter((input) => input.Key.endsWith('-block-001.png')).length, 1);
426
+ assert.equal(warnings.some((message) => message.includes('retrying')), true);
427
+ assert.equal(warnings.some((message) => message.includes('Failed to capture/upload block 0')), true);
428
+ } finally {
429
+ console.warn = originalWarn;
430
+ }
431
+ }
432
+
194
433
  {
195
434
  const artifact = buildBlockAnalysisArtifact('Demo', [{
196
435
  blockIdx: 3,