textweb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,452 @@
1
+ /**
2
+ * TextWeb Text Grid Renderer
3
+ *
4
+ * Converts a rendered web page into a structured text grid with
5
+ * interactive element references. No screenshots, no vision models.
6
+ *
7
+ * Key design decisions:
8
+ * - Overflow > truncation (never lose information)
9
+ * - Measure actual font metrics from the page
10
+ * - Table-aware layout
11
+ * - Z-index compositing (back to front)
12
+ */
13
+
14
+ /**
15
+ * Measure actual character dimensions from the page's fonts
16
+ */
17
+ async function measureCharSize(page) {
18
+ return await page.evaluate(() => {
19
+ // Create a test element using the page's default font
20
+ const el = document.createElement('span');
21
+ const bodyStyle = getComputedStyle(document.body);
22
+ el.style.fontFamily = bodyStyle.fontFamily;
23
+ el.style.fontSize = bodyStyle.fontSize;
24
+ el.style.fontWeight = 'normal';
25
+ el.style.position = 'absolute';
26
+ el.style.visibility = 'hidden';
27
+ el.style.whiteSpace = 'nowrap';
28
+
29
+ // Use a representative sample of characters for average width
30
+ // (proportional fonts vary per char — average is the best we can do)
31
+ el.textContent = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
32
+ document.body.appendChild(el);
33
+ const avgW = el.getBoundingClientRect().width / el.textContent.length;
34
+ const charH = el.getBoundingClientRect().height;
35
+
36
+ // Also get line height (more accurate for vertical spacing)
37
+ el.textContent = 'X';
38
+ el.style.lineHeight = bodyStyle.lineHeight;
39
+ const lineH = el.getBoundingClientRect().height;
40
+
41
+ document.body.removeChild(el);
42
+
43
+ return {
44
+ charW: avgW,
45
+ charH: Math.max(charH, lineH),
46
+ lineH: lineH,
47
+ font: bodyStyle.fontFamily,
48
+ fontSize: bodyStyle.fontSize,
49
+ };
50
+ });
51
+ }
52
+
53
+ /**
54
+ * Extract visible elements from a Playwright page with positions and metadata
55
+ */
56
+ async function extractElements(page) {
57
+ return await page.evaluate(() => {
58
+ const results = [];
59
+
60
+ function isVisible(el) {
61
+ const style = getComputedStyle(el);
62
+ if (style.display === 'none' || style.visibility === 'hidden' || parseFloat(style.opacity) === 0) return false;
63
+ const rect = el.getBoundingClientRect();
64
+ if (rect.width === 0 && rect.height === 0) return false;
65
+ return true;
66
+ }
67
+
68
+ function getZIndex(el) {
69
+ let z = 0;
70
+ let current = el;
71
+ while (current && current !== document.body) {
72
+ const style = getComputedStyle(current);
73
+ const zi = parseInt(style.zIndex);
74
+ if (!isNaN(zi) && zi > z) z = zi;
75
+ if (style.position === 'fixed' || style.position === 'sticky') z = Math.max(z, 1000);
76
+ current = current.parentElement;
77
+ }
78
+ return z;
79
+ }
80
+
81
+ function buildSelector(el) {
82
+ // Build a robust CSS selector for clicking
83
+ if (el.id) return '#' + CSS.escape(el.id);
84
+
85
+ // Try unique attributes
86
+ if (el.getAttribute('data-testid')) return `[data-testid="${el.getAttribute('data-testid')}"]`;
87
+ if (el.getAttribute('name')) return `${el.tagName.toLowerCase()}[name="${el.getAttribute('name')}"]`;
88
+
89
+ // Fallback: positional selector
90
+ const tag = el.tagName.toLowerCase();
91
+ const parent = el.parentElement;
92
+ if (!parent) return tag;
93
+ const siblings = Array.from(parent.children);
94
+ const idx = siblings.indexOf(el) + 1;
95
+ const parentSel = parent.id ? '#' + CSS.escape(parent.id) : buildSelector(parent);
96
+ return parentSel + ' > ' + tag + ':nth-child(' + idx + ')';
97
+ }
98
+
99
+ function isInteractive(el) {
100
+ return el.matches('a[href], button, input, select, textarea, [onclick], [role="button"], [role="link"], [tabindex]:not([tabindex="-1"]), summary');
101
+ }
102
+
103
+ // Detect tables and extract their structure
104
+ const tableData = new Map();
105
+ document.querySelectorAll('table').forEach(table => {
106
+ const rect = table.getBoundingClientRect();
107
+ if (rect.width === 0 || rect.height === 0) return;
108
+
109
+ const rows = [];
110
+ table.querySelectorAll('tr').forEach(tr => {
111
+ const cells = [];
112
+ tr.querySelectorAll('td, th').forEach(cell => {
113
+ const cellRect = cell.getBoundingClientRect();
114
+ cells.push({
115
+ x: cellRect.x,
116
+ y: cellRect.y,
117
+ w: cellRect.width,
118
+ h: cellRect.height,
119
+ text: cell.textContent.trim().slice(0, 200),
120
+ isHeader: cell.tagName === 'TH',
121
+ colspan: cell.colSpan || 1,
122
+ });
123
+ });
124
+ if (cells.length > 0) rows.push(cells);
125
+ });
126
+
127
+ tableData.set(table, {
128
+ rect,
129
+ rows,
130
+ colCount: Math.max(...rows.map(r => r.length), 0),
131
+ });
132
+ });
133
+
134
+ // Walk the DOM tree
135
+ const walker = document.createTreeWalker(
136
+ document.body,
137
+ NodeFilter.SHOW_TEXT | NodeFilter.SHOW_ELEMENT,
138
+ {
139
+ acceptNode(node) {
140
+ if (node.nodeType === Node.TEXT_NODE) {
141
+ return node.textContent.trim() ? NodeFilter.FILTER_ACCEPT : NodeFilter.FILTER_REJECT;
142
+ }
143
+ const el = node;
144
+ if (!isVisible(el)) return NodeFilter.FILTER_REJECT;
145
+ // Accept specific non-text elements
146
+ if (el.matches('input, select, textarea, img, hr, br')) {
147
+ return NodeFilter.FILTER_ACCEPT;
148
+ }
149
+ return NodeFilter.FILTER_SKIP;
150
+ }
151
+ }
152
+ );
153
+
154
+ while (walker.nextNode()) {
155
+ const node = walker.currentNode;
156
+ const isText = node.nodeType === Node.TEXT_NODE;
157
+ const el = isText ? node.parentElement : node;
158
+ if (!el) continue;
159
+
160
+ let rect;
161
+ if (isText) {
162
+ const range = document.createRange();
163
+ range.selectNodeContents(node);
164
+ rect = range.getBoundingClientRect();
165
+ } else {
166
+ rect = el.getBoundingClientRect();
167
+ }
168
+ if (rect.width === 0 && rect.height === 0) continue;
169
+
170
+ const tag = el.tagName.toLowerCase();
171
+ const interactive = isInteractive(el);
172
+
173
+ let text = '';
174
+ if (isText) {
175
+ text = node.textContent.trim();
176
+ } else if (tag === 'input') {
177
+ const type = (el.type || 'text').toLowerCase();
178
+ text = el.value || el.placeholder || '';
179
+ } else if (tag === 'select') {
180
+ const opt = el.options && el.options[el.selectedIndex];
181
+ text = opt ? opt.text : '';
182
+ } else if (tag === 'textarea') {
183
+ text = el.value || el.placeholder || '';
184
+ } else if (tag === 'img') {
185
+ text = el.alt || '[img]';
186
+ } else if (tag === 'hr') {
187
+ text = '---';
188
+ }
189
+
190
+ // Determine semantic type
191
+ let semantic = 'text';
192
+ const headingMatch = tag.match(/^h(\d)$/);
193
+ if (headingMatch) semantic = 'heading';
194
+ else if (tag === 'a' && el.href) semantic = 'link';
195
+ else if (tag === 'button' || el.getAttribute('role') === 'button') semantic = 'button';
196
+ else if (tag === 'input') {
197
+ const type = (el.type || 'text').toLowerCase();
198
+ if (type === 'checkbox') semantic = 'checkbox';
199
+ else if (type === 'radio') semantic = 'radio';
200
+ else if (type === 'submit' || type === 'button') semantic = 'button';
201
+ else if (type === 'file') semantic = 'file';
202
+ else semantic = 'input';
203
+ }
204
+ else if (tag === 'select') semantic = 'select';
205
+ else if (tag === 'textarea') semantic = 'textarea';
206
+ else if (tag === 'hr') semantic = 'separator';
207
+
208
+ // Check for list context
209
+ if (el.closest('li') && semantic === 'text') {
210
+ const li = el.closest('li');
211
+ const liRect = li.getBoundingClientRect();
212
+ if (Math.abs(rect.y - liRect.y) < 5) {
213
+ semantic = 'listitem';
214
+ }
215
+ }
216
+
217
+ // Check if inside a table cell
218
+ const closestTd = el.closest('td, th');
219
+ let tableCell = null;
220
+ if (closestTd) {
221
+ const tr = closestTd.closest('tr');
222
+ const table = closestTd.closest('table');
223
+ if (tr && table) {
224
+ tableCell = {
225
+ cellIndex: Array.from(tr.children).indexOf(closestTd),
226
+ rowIndex: Array.from(table.querySelectorAll('tr')).indexOf(tr),
227
+ isHeader: closestTd.tagName === 'TH',
228
+ };
229
+ }
230
+ }
231
+
232
+ results.push({
233
+ text,
234
+ tag,
235
+ semantic,
236
+ headingLevel: headingMatch ? parseInt(headingMatch[1]) : 0,
237
+ interactive,
238
+ checked: !!el.checked,
239
+ x: rect.x,
240
+ y: rect.y,
241
+ w: rect.width,
242
+ h: rect.height,
243
+ z: getZIndex(el),
244
+ href: el.href || null,
245
+ selector: buildSelector(el),
246
+ tableCell,
247
+ });
248
+ }
249
+
250
+ // Sort by z-index (back to front), then by document position (y, x)
251
+ results.sort((a, b) => a.z - b.z || a.y - b.y || a.x - b.x);
252
+ return results;
253
+ });
254
+ }
255
+
256
+ /**
257
+ * Place text onto the grid, allowing overflow (never truncate).
258
+ * Text wraps to the next line at grid edge, continuing at the same start column.
259
+ */
260
+ function placeText(grid, zGrid, z, row, col, text, cols, rows) {
261
+ let r = row;
262
+ let c = col;
263
+
264
+ for (let i = 0; i < text.length; i++) {
265
+ // Grow grid vertically if needed (overflow — don't lose data)
266
+ while (r >= grid.length) {
267
+ grid.push(Array(cols).fill(' '));
268
+ zGrid.push(Array(cols).fill(-1));
269
+ }
270
+
271
+ if (c >= cols) {
272
+ // Wrap to next line at original column position
273
+ r++;
274
+ c = col;
275
+ while (r >= grid.length) {
276
+ grid.push(Array(cols).fill(' '));
277
+ zGrid.push(Array(cols).fill(-1));
278
+ }
279
+ }
280
+
281
+ const ch = text[i];
282
+ if (ch === '\n') { r++; c = col; continue; }
283
+
284
+ if (c >= 0 && c < cols && z >= zGrid[r][c]) {
285
+ grid[r][c] = ch;
286
+ zGrid[r][c] = z;
287
+ }
288
+ c++;
289
+ }
290
+
291
+ return r; // Return last row written to (useful for tracking grid growth)
292
+ }
293
+
294
+ /**
295
+ * Detect row boundaries — groups of elements that share the same Y position
296
+ * This prevents text from different elements on the same visual line from overlapping
297
+ */
298
+ function groupByRows(elements, charH) {
299
+ const rows = [];
300
+ let currentRow = [];
301
+ let currentY = -Infinity;
302
+ const threshold = charH * 0.4; // elements within 40% of line height are on the same row
303
+
304
+ for (const el of elements) {
305
+ if (Math.abs(el.y - currentY) > threshold && currentRow.length > 0) {
306
+ rows.push(currentRow);
307
+ currentRow = [];
308
+ }
309
+ currentRow.push(el);
310
+ currentY = el.y;
311
+ }
312
+ if (currentRow.length > 0) rows.push(currentRow);
313
+ return rows;
314
+ }
315
+
316
+ /**
317
+ * Build the display string for an element, assigning refs for interactive ones
318
+ */
319
+ function formatElement(el, ref, cols, startCol, charW) {
320
+ switch (el.semantic) {
321
+ case 'heading': {
322
+ const bar = el.headingLevel <= 2 ? '═' : '─';
323
+ const prefix = ref !== null ? `[${ref}]` : '';
324
+ const title = el.text.toUpperCase();
325
+ return `${prefix}${bar.repeat(2)} ${title} ${bar.repeat(Math.max(2, cols - startCol - title.length - 6))}`;
326
+ }
327
+ case 'link':
328
+ return `[${ref}]${el.text}`;
329
+ case 'button':
330
+ return `[${ref} ${el.text}]`;
331
+ case 'input': {
332
+ const w = Math.min(25, Math.max(5, Math.round(el.w / charW) - 6));
333
+ return `[${ref}:${el.text || '_'.repeat(w)}]`;
334
+ }
335
+ case 'textarea': {
336
+ const w = Math.min(40, Math.max(5, Math.round(el.w / charW) - 6));
337
+ return `[${ref}:${el.text || '_'.repeat(w)}]`;
338
+ }
339
+ case 'checkbox':
340
+ return `[${ref}:${el.checked ? 'X' : ' '}] ${el.text}`;
341
+ case 'radio':
342
+ return `[${ref}:${el.checked ? '●' : '○'}] ${el.text}`;
343
+ case 'select':
344
+ return `[${ref}:▼ ${el.text}]`;
345
+ case 'file':
346
+ return `[${ref}:📎 ${el.text || 'Choose file'}]`;
347
+ case 'separator': {
348
+ const width = Math.min(cols - startCol, Math.round(el.w / charW));
349
+ return '─'.repeat(Math.max(3, width));
350
+ }
351
+ case 'listitem':
352
+ return (ref !== null ? `[${ref}]` : '') + `• ${el.text}`;
353
+ default:
354
+ return (ref !== null ? `[${ref}]` : '') + el.text;
355
+ }
356
+ }
357
+
358
+ /**
359
+ * Render extracted elements into a text grid.
360
+ *
361
+ * Strategy:
362
+ * 1. Group elements into visual rows (same Y position ± threshold)
363
+ * 2. Within each visual row, sort by X and lay out left-to-right with spacing
364
+ * 3. Each visual row maps to one or more grid lines
365
+ * 4. Grid grows as needed (overflow — never lose data)
366
+ */
367
+ function renderGrid(elements, cols, charW, charH, scrollY = 0) {
368
+ const elementMap = {};
369
+ let refId = 0;
370
+ const lines = []; // output lines as strings
371
+
372
+ // Filter to viewport (vertically — allow overflow below)
373
+ const visible = elements.filter(el => {
374
+ const adjY = el.y - scrollY;
375
+ return adjY + el.h >= 0; // don't filter bottom — allow overflow
376
+ });
377
+
378
+ // Group into visual rows
379
+ const visualRows = groupByRows(visible, charH);
380
+
381
+ for (const rowElements of visualRows) {
382
+ // Sort elements in this row by X position (left to right)
383
+ rowElements.sort((a, b) => a.x - b.x);
384
+
385
+ // Build this row's text by placing each element at its column position
386
+ let line = '';
387
+ let cursor = 0; // current character position in the line
388
+
389
+ for (const el of rowElements) {
390
+ const targetCol = Math.max(0, Math.round(el.x / charW));
391
+
392
+ // Assign ref for interactive elements
393
+ let ref = null;
394
+ if (el.interactive) {
395
+ ref = refId++;
396
+ elementMap[ref] = {
397
+ selector: el.selector,
398
+ tag: el.tag,
399
+ semantic: el.semantic,
400
+ href: el.href,
401
+ text: el.text,
402
+ x: el.x,
403
+ y: el.y,
404
+ };
405
+ }
406
+
407
+ const display = formatElement(el, ref, cols, targetCol, charW);
408
+ if (!display) continue;
409
+
410
+ if (targetCol > cursor) {
411
+ // Pad with spaces to reach the target column
412
+ line += ' '.repeat(targetCol - cursor);
413
+ cursor = targetCol;
414
+ } else if (cursor > 0 && targetCol <= cursor) {
415
+ // Elements overlap — add a single space separator
416
+ line += ' ';
417
+ cursor += 1;
418
+ }
419
+
420
+ line += display;
421
+ cursor += display.length;
422
+ }
423
+
424
+ lines.push(line.trimEnd());
425
+ }
426
+
427
+ // Remove trailing empty lines
428
+ while (lines.length > 0 && lines[lines.length - 1] === '') lines.pop();
429
+
430
+ return {
431
+ view: lines.join('\n'),
432
+ elements: elementMap,
433
+ meta: { cols, rows: lines.length, scrollY, totalRefs: refId, charW, charH }
434
+ };
435
+ }
436
+
437
+ /**
438
+ * Main render function: page → text grid
439
+ */
440
+ async function render(page, options = {}) {
441
+ const { cols = 120, scrollY = 0 } = options;
442
+
443
+ // Measure actual font metrics from the page
444
+ const metrics = await measureCharSize(page);
445
+ const charW = metrics.charW;
446
+ const charH = metrics.charH;
447
+
448
+ const elements = await extractElements(page);
449
+ return renderGrid(elements, cols, charW, charH, scrollY);
450
+ }
451
+
452
+ module.exports = { render, extractElements, renderGrid, measureCharSize };