explorbot 0.1.11 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/README.md +12 -2
  2. package/bin/explorbot-cli.ts +21 -21
  3. package/dist/bin/explorbot-cli.js +3 -3
  4. package/dist/package.json +4 -3
  5. package/dist/rules/researcher/container-rules.md +2 -0
  6. package/dist/src/action-result.js +2 -1
  7. package/dist/src/action.js +5 -10
  8. package/dist/src/ai/captain.js +0 -2
  9. package/dist/src/ai/driller.js +1108 -0
  10. package/dist/src/ai/historian/codeceptjs.js +2 -2
  11. package/dist/src/ai/historian/experience.js +1 -0
  12. package/dist/src/ai/historian/playwright.js +4 -4
  13. package/dist/src/ai/historian/screencast.js +121 -0
  14. package/dist/src/ai/historian.js +5 -3
  15. package/dist/src/ai/pilot.js +31 -22
  16. package/dist/src/ai/rules.js +3 -5
  17. package/dist/src/ai/session-analyst.js +117 -0
  18. package/dist/src/ai/tester.js +13 -2
  19. package/dist/src/commands/base-command.js +6 -6
  20. package/dist/src/commands/drill-command.js +3 -2
  21. package/dist/src/commands/exit-command.js +1 -0
  22. package/dist/src/commands/explore-command.js +20 -3
  23. package/dist/src/components/AddRule.js +1 -1
  24. package/dist/src/explorbot.js +52 -9
  25. package/dist/src/explorer.js +11 -9
  26. package/dist/src/reporter.js +68 -4
  27. package/dist/src/state-manager.js +4 -3
  28. package/dist/src/stats.js +5 -0
  29. package/dist/src/utils/aria.js +354 -529
  30. package/dist/src/utils/hooks-runner.js +2 -8
  31. package/dist/src/utils/html.js +371 -0
  32. package/dist/src/utils/strings.js +15 -0
  33. package/dist/src/utils/unique-names.js +12 -1
  34. package/dist/src/utils/url-matcher.js +6 -1
  35. package/dist/src/utils/web-element.js +27 -24
  36. package/dist/src/utils/xpath.js +1 -1
  37. package/package.json +4 -3
  38. package/rules/researcher/container-rules.md +2 -0
  39. package/src/action-result.ts +2 -1
  40. package/src/action.ts +5 -12
  41. package/src/ai/captain.ts +0 -2
  42. package/src/ai/driller.ts +1194 -0
  43. package/src/ai/historian/codeceptjs.ts +2 -2
  44. package/src/ai/historian/experience.ts +3 -2
  45. package/src/ai/historian/playwright.ts +5 -5
  46. package/src/ai/historian/screencast.ts +133 -0
  47. package/src/ai/historian.ts +7 -5
  48. package/src/ai/pilot.ts +31 -21
  49. package/src/ai/rules.ts +3 -5
  50. package/src/ai/session-analyst.ts +133 -0
  51. package/src/ai/tester.ts +15 -2
  52. package/src/commands/base-command.ts +6 -6
  53. package/src/commands/drill-command.ts +3 -2
  54. package/src/commands/exit-command.ts +1 -0
  55. package/src/commands/explore-command.ts +22 -3
  56. package/src/components/AddRule.tsx +1 -1
  57. package/src/config.ts +10 -0
  58. package/src/explorbot.ts +59 -11
  59. package/src/explorer.ts +11 -9
  60. package/src/reporter.ts +68 -4
  61. package/src/state-manager.ts +4 -3
  62. package/src/stats.ts +7 -0
  63. package/src/utils/aria.ts +367 -537
  64. package/src/utils/hooks-runner.ts +2 -6
  65. package/src/utils/html.ts +381 -0
  66. package/src/utils/strings.ts +17 -0
  67. package/src/utils/unique-names.ts +13 -0
  68. package/src/utils/url-matcher.ts +5 -1
  69. package/src/utils/web-element.ts +31 -28
  70. package/src/utils/xpath.ts +1 -1
  71. package/dist/src/ai/bosun.js +0 -456
  72. package/src/ai/bosun.ts +0 -571
@@ -1,6 +1,7 @@
1
1
  import type { ExplorbotConfig, Hook, HookConfig } from '../config.ts';
2
2
  import type Explorer from '../explorer.ts';
3
3
  import { createDebug } from './logger.ts';
4
+ import { extractStatePath } from './url-matcher.ts';
4
5
  import { matchesUrl } from './url-matcher.ts';
5
6
 
6
7
  const debugLog = createDebug('explorbot:hooks');
@@ -69,11 +70,6 @@ export class HooksRunner {
69
70
  }
70
71
 
71
72
  private extractPath(url: string): string {
72
- if (url.startsWith('/')) return url;
73
- try {
74
- return new URL(url).pathname;
75
- } catch {
76
- return url;
77
- }
73
+ return extractStatePath(url);
78
74
  }
79
75
  }
package/src/utils/html.ts CHANGED
@@ -83,6 +83,387 @@ const INTERACTIVE_EVENT_ATTRIBUTES = new Set(['onclick', 'onchange', 'onblur', '
83
83
 
84
84
  const HIDDEN_CLASSES = new Set(['hidden', 'invisible', 'd-none', 'hide', 'dn', 'u-hidden', 'is-hidden', 'visually-hidden', 'sr-only', 'screen-reader-only', 'visuallyhidden', 'opacity-0']);
85
85
 
86
+ export const EXPLORBOT_ATTRS = {
87
+ area: 'data-explorbot-area',
88
+ context: 'data-explorbot-context',
89
+ eidx: 'data-explorbot-eidx',
90
+ variant: 'data-explorbot-variant',
91
+ } as const;
92
+
93
+ export const HTML_SELECTORS = {
94
+ headingLabel: 'h1, h2, h3, h4, h5, h6, legend, caption, label, [role="heading"]',
95
+ interactiveContent: 'button, a[href], input, select, textarea, [role="button"], [role="link"], [role="option"], [role="menuitem"], [role="switch"], [role="checkbox"], [role="radio"], [aria-label], [tabindex]',
96
+ interactiveControl: 'button, a[href], input, select, textarea, [role="button"], [role="link"], [role="checkbox"], [role="radio"], [role="switch"], [role="tab"], [role="menuitem"]',
97
+ labelLike: 'h1, h2, h3, h4, h5, h6, legend, caption, label, [role="heading"], [class*="title"], [class*="label"], [class*="header"], [class*="name"]',
98
+ semanticContextContainer: 'section, article, form, fieldset, li, tr, td, th, [role="group"], [role="tabpanel"], [role="region"], [class*="card"], [class*="panel"], [class*="item"], [class*="usage"], [class*="group"]',
99
+ semanticOverlays: ['[role="dialog"]', '[role="listbox"]', '[role="menu"]', '[role="tooltip"]:not([style*="display: none"]):not([style*="visibility: hidden"])'],
100
+ } as const;
101
+
102
+ export const HTML_VISIBILITY_LIMITS = {
103
+ maxViewportOverlayRatio: 0.95,
104
+ minOpacity: 0.1,
105
+ minOverlayHeight: 40,
106
+ minOverlayWidth: 80,
107
+ } as const;
108
+
109
+ export const HTML_EXTRACTION_LIMITS = {
110
+ componentScopeHtmlLength: 8000,
111
+ maxOverlayCount: 3,
112
+ maxScopeInteractiveCount: 16,
113
+ overlayHtmlLength: 6000,
114
+ } as const;
115
+
116
+ export const CODE_EDITOR_MARKERS = ['monaco', 'codemirror', 'ace', 'ace_editor', 'code'] as const;
117
+
118
+ export const HTML_INTERACTIVE_ROLES = new Set(['button', 'link', 'checkbox', 'radio', 'switch', 'tab', 'combobox', 'iframe', 'code-editor', 'menuitem', 'menuitemcheckbox', 'menuitemradio', 'option', 'slider', 'spinbutton', 'textbox', 'searchbox', 'treeitem']);
119
+ export const HTML_FORM_CONTROL_ROLES = new Set(['checkbox', 'radio', 'switch', 'combobox', 'option', 'slider', 'spinbutton', 'textbox', 'searchbox']);
120
+ export const HTML_COMPOSITE_TARGET_ROLES = new Set(['tab', 'option', 'menuitem', 'menuitemcheckbox', 'menuitemradio', 'treeitem']);
121
+ export const HTML_COMPOSITE_AREA_HINTS = new Set(['role:tab', 'role:option', 'role:menuitem', 'role:menuitemcheckbox', 'role:menuitemradio', 'role:treeitem']);
122
+ export const HTML_FORM_CONTROL_TAGS = new Set(['input', 'select', 'textarea']);
123
+
124
+ export function inferHtmlRole(data: { attrs: Record<string, string>; role?: string; tag: string; variantHints?: string[] }): string {
125
+ if (data.tag === 'iframe' && data.variantHints?.includes('code-editor')) return 'code-editor';
126
+ if (data.role) return data.role.toLowerCase();
127
+ const explicitRole = data.attrs.role;
128
+ if (explicitRole) return explicitRole.toLowerCase();
129
+ if (data.tag === 'a' && data.attrs.href) return 'link';
130
+ if (data.tag === 'button') return 'button';
131
+ if (data.tag === 'iframe') return 'iframe';
132
+ if (data.tag === 'select') return 'combobox';
133
+ if (data.tag === 'textarea') return 'textbox';
134
+ if (data.tag === 'input') {
135
+ const type = (data.attrs.type || 'text').toLowerCase();
136
+ if (type === 'checkbox') return 'checkbox';
137
+ if (type === 'radio') return 'radio';
138
+ return 'textbox';
139
+ }
140
+ return data.tag;
141
+ }
142
+
143
+ export const ELEMENT_EXTRACTION_CONFIG = {
144
+ attrs: EXPLORBOT_ATTRS,
145
+ codeEditorMarkers: CODE_EDITOR_MARKERS,
146
+ maxAreaDepth: 5,
147
+ maxContextLength: 120,
148
+ maxOuterHTMLLength: 2000,
149
+ maxTextLength: 80,
150
+ minOpacity: HTML_VISIBILITY_LIMITS.minOpacity,
151
+ selectors: {
152
+ headingLabel: HTML_SELECTORS.headingLabel,
153
+ labelLike: HTML_SELECTORS.labelLike,
154
+ semanticContextContainer: HTML_SELECTORS.semanticContextContainer,
155
+ },
156
+ } as const;
157
+
158
+ export type ElementExtractionConfig = typeof ELEMENT_EXTRACTION_CONFIG;
159
+ export type RawElementData = NonNullable<ReturnType<typeof extractElementData>>;
160
+ export type VisibleOverlayExtractionConfig = {
161
+ interactiveContentSelector: string;
162
+ limits: typeof HTML_EXTRACTION_LIMITS;
163
+ overlaySelectors: readonly string[];
164
+ visibilityLimits: typeof HTML_VISIBILITY_LIMITS;
165
+ };
166
+ export type ComponentScopeExtractionConfig = {
167
+ eidxAttr: string;
168
+ interactiveControlSelector: string;
169
+ limits: typeof HTML_EXTRACTION_LIMITS;
170
+ };
171
+
172
+ export function extractElementData(el: Element, config?: ElementExtractionConfig) {
173
+ const cfg =
174
+ config ||
175
+ ({
176
+ attrs: {
177
+ area: 'data-explorbot-area',
178
+ context: 'data-explorbot-context',
179
+ eidx: 'data-explorbot-eidx',
180
+ variant: 'data-explorbot-variant',
181
+ },
182
+ codeEditorMarkers: ['monaco', 'codemirror', 'ace', 'ace_editor', 'code'],
183
+ maxAreaDepth: 5,
184
+ maxContextLength: 120,
185
+ maxOuterHTMLLength: 2000,
186
+ maxTextLength: 80,
187
+ minOpacity: 0.1,
188
+ selectors: {
189
+ headingLabel: 'h1, h2, h3, h4, h5, h6, legend, caption, label, [role="heading"]',
190
+ labelLike: 'h1, h2, h3, h4, h5, h6, legend, caption, label, [role="heading"], [class*="title"], [class*="label"], [class*="header"], [class*="name"]',
191
+ semanticContextContainer: 'section, article, form, fieldset, li, tr, td, th, [role="group"], [role="tabpanel"], [role="region"], [class*="card"], [class*="panel"], [class*="item"], [class*="usage"], [class*="group"]',
192
+ },
193
+ } as ElementExtractionConfig);
194
+
195
+ function normalizeText(value: string): string {
196
+ return value.replace(/\s+/g, ' ').trim();
197
+ }
198
+
199
+ function readText(node: Element | null): string {
200
+ if (!node) return '';
201
+ return normalizeText(node.textContent || '').slice(0, cfg.maxContextLength);
202
+ }
203
+
204
+ function getLabelLikeText(node: Element | null): string {
205
+ if (!node) return '';
206
+ const direct = readText(node);
207
+ if (direct) return direct;
208
+ const labelLike = node.querySelector(cfg.selectors.labelLike);
209
+ return readText(labelLike);
210
+ }
211
+
212
+ function collectVariantHints(target: Element): string[] {
213
+ const tokens = new Set<string>();
214
+ const className = target.getAttribute('class') || '';
215
+ const tagName = target.tagName.toLowerCase();
216
+
217
+ for (const cls of className.split(/\s+/).filter(Boolean)) {
218
+ const lower = cls.toLowerCase();
219
+ if (/^(xs|sm|md|lg|xl|xxl)$/.test(lower)) tokens.add(lower);
220
+ if (/^(mini|small|medium|large|xlarge|xl|compact|dense)$/.test(lower)) tokens.add(lower);
221
+ if (/(^|[-_])(xs|sm|md|lg|xl|xxl|mini|small|medium|large|compact|dense)([-_]|$)/.test(lower)) tokens.add(lower);
222
+ if (/(selected|disabled|primary|secondary|tertiary|danger|success|warning|outline|ghost|icon|dropdown)/.test(lower)) tokens.add(lower);
223
+ }
224
+
225
+ const type = (target.getAttribute('type') || '').toLowerCase();
226
+ if (type) tokens.add(type);
227
+ if (target.hasAttribute('disabled') || target.getAttribute('aria-disabled') === 'true') tokens.add('disabled');
228
+ if (className.toLowerCase().includes('selected') || target.getAttribute('aria-pressed') === 'true') tokens.add('selected');
229
+ if (tagName === 'iframe') tokens.add('iframe');
230
+ if (tagName === 'iframe' && isEmbeddedCodeEditorFrame(target)) tokens.add('code-editor');
231
+ const svgCount = target.querySelectorAll('svg').length;
232
+ if (svgCount > 0) tokens.add('has-icon');
233
+ if (svgCount > 1) tokens.add('double-icon');
234
+
235
+ const normalizedText = normalizeText(target.textContent || '');
236
+ if (!normalizedText && svgCount > 0) tokens.add('icon-only');
237
+ if (normalizedText && svgCount > 0) {
238
+ const first = target.firstElementChild?.tagName.toLowerCase();
239
+ const last = target.lastElementChild?.tagName.toLowerCase();
240
+ if (first === 'svg') tokens.add('leading-icon');
241
+ if (last === 'svg') tokens.add('trailing-icon');
242
+ }
243
+
244
+ if (tagName === 'a' && target.getAttribute('href')) tokens.add('navigates');
245
+
246
+ return Array.from(tokens).slice(0, 8);
247
+ }
248
+
249
+ function isEmbeddedCodeEditorFrame(target: Element): boolean {
250
+ const src = (target.getAttribute('src') || '').toLowerCase();
251
+ const markerSelector = cfg.codeEditorMarkers.map((marker) => `[class*="${marker}"]`).join(', ');
252
+ const ancestorClasses = (target.closest(markerSelector)?.getAttribute('class') || '').toLowerCase();
253
+ return cfg.codeEditorMarkers.some((marker) => src.includes(marker) || ancestorClasses.includes(marker));
254
+ }
255
+
256
+ function findContextLabel(target: Element): string {
257
+ const labelledby = target.getAttribute('aria-labelledby');
258
+ const candidates: string[] = [];
259
+ if (labelledby) {
260
+ for (const id of labelledby.split(/\s+/).filter(Boolean)) {
261
+ const ref = document.getElementById(id);
262
+ const text = readText(ref);
263
+ if (text) candidates.push(text);
264
+ }
265
+ }
266
+
267
+ const semanticContainer = target.closest(cfg.selectors.semanticContextContainer);
268
+ if (semanticContainer) {
269
+ const ownHeading = semanticContainer.querySelector(cfg.selectors.headingLabel);
270
+ const ownHeadingText = readText(ownHeading);
271
+ if (ownHeadingText) candidates.push(ownHeadingText);
272
+
273
+ let previous: Element | null = semanticContainer.previousElementSibling;
274
+ let hops = 0;
275
+ while (previous && hops < 3) {
276
+ const previousText = getLabelLikeText(previous);
277
+ if (previousText) {
278
+ candidates.push(previousText);
279
+ break;
280
+ }
281
+ previous = previous.previousElementSibling;
282
+ hops++;
283
+ }
284
+ }
285
+
286
+ let parent: Element | null = target.parentElement;
287
+ let depth = 0;
288
+ while (parent && depth < 4) {
289
+ let sibling: Element | null = parent.previousElementSibling;
290
+ let hops = 0;
291
+ while (sibling && hops < 2) {
292
+ const siblingText = getLabelLikeText(sibling);
293
+ if (siblingText) {
294
+ candidates.push(siblingText);
295
+ sibling = null;
296
+ break;
297
+ }
298
+ sibling = sibling.previousElementSibling;
299
+ hops++;
300
+ }
301
+ parent = parent.parentElement;
302
+ depth++;
303
+ }
304
+
305
+ const ownText = normalizeText(target.textContent || '');
306
+ for (const candidate of candidates) {
307
+ if (!candidate) continue;
308
+ if (candidate === ownText) continue;
309
+ if (candidate.toLowerCase().includes('title should not be empty')) continue;
310
+ return candidate.slice(0, cfg.maxContextLength);
311
+ }
312
+
313
+ return '';
314
+ }
315
+
316
+ const rect = el.getBoundingClientRect();
317
+ if (rect.width === 0 && rect.height === 0) return null;
318
+ const style = window.getComputedStyle(el);
319
+ if (style.display === 'none' || style.visibility === 'hidden') return null;
320
+ if (Number.parseFloat(style.opacity || '1') < cfg.minOpacity) return null;
321
+ if (el.getAttribute('aria-hidden') === 'true' || el.hasAttribute('hidden')) return null;
322
+ if ((el as HTMLElement).offsetParent === null && style.position !== 'fixed') return null;
323
+
324
+ const allAttrs: Record<string, string> = {};
325
+ for (let i = 0; i < el.attributes.length; i++) {
326
+ const attr = el.attributes[i];
327
+ allAttrs[attr.name] = attr.value;
328
+ }
329
+
330
+ const areaHints: string[] = [];
331
+ let current: Element | null = el;
332
+ let depth = 0;
333
+ while (current && depth < cfg.maxAreaDepth) {
334
+ const tag = current.tagName.toLowerCase();
335
+ areaHints.push(tag);
336
+
337
+ const role = current.getAttribute('role');
338
+ if (role) areaHints.push(`role:${role.toLowerCase()}`);
339
+
340
+ const id = current.getAttribute('id');
341
+ if (id) areaHints.push(`id:${id.toLowerCase()}`);
342
+
343
+ const className = current.getAttribute('class');
344
+ if (className) {
345
+ for (const cls of className.split(/\s+/).filter(Boolean)) {
346
+ areaHints.push(`class:${cls.toLowerCase()}`);
347
+ }
348
+ }
349
+
350
+ current = current.parentElement;
351
+ depth++;
352
+ }
353
+
354
+ allAttrs[cfg.attrs.area] = areaHints.join('|');
355
+ allAttrs[cfg.attrs.context] = findContextLabel(el);
356
+ allAttrs[cfg.attrs.variant] = collectVariantHints(el).join('|');
357
+
358
+ return {
359
+ tag: el.tagName.toLowerCase(),
360
+ text: normalizeText(el.textContent || '').slice(0, cfg.maxTextLength),
361
+ allAttrs,
362
+ outerHTML: el.outerHTML.slice(0, cfg.maxOuterHTMLLength),
363
+ x: Math.round(rect.x + rect.width / 2),
364
+ y: Math.round(rect.y + rect.height / 2),
365
+ };
366
+ }
367
+
368
+ export function getElementDataExtractorSource(): string {
369
+ return extractElementData.toString();
370
+ }
371
+
372
+ export function extractVisibleOverlayHtml(config: VisibleOverlayExtractionConfig): string {
373
+ function isVisible(element: Element): boolean {
374
+ const html = element as HTMLElement;
375
+ const style = window.getComputedStyle(html);
376
+ const rect = html.getBoundingClientRect();
377
+ if (rect.width === 0 && rect.height === 0) return false;
378
+ if (style.display === 'none' || style.visibility === 'hidden') return false;
379
+ if (Number.parseFloat(style.opacity || '1') < config.visibilityLimits.minOpacity) return false;
380
+ return true;
381
+ }
382
+
383
+ function getUsefulContent(element: Element): { interactiveCount: number; text: string } {
384
+ const text = (element.textContent || '').replace(/\s+/g, ' ').trim();
385
+ const interactiveCount = element.querySelectorAll(config.interactiveContentSelector).length;
386
+ return { interactiveCount, text };
387
+ }
388
+
389
+ function isLikelyFloatingOverlay(element: Element): boolean {
390
+ const html = element as HTMLElement;
391
+ const style = window.getComputedStyle(html);
392
+ const rect = html.getBoundingClientRect();
393
+ const zIndex = Number.parseInt(style.zIndex || '0', 10);
394
+ const isFloating = style.position === 'fixed' || style.position === 'absolute' || style.position === 'sticky' || zIndex > 0;
395
+ if (!isFloating) return false;
396
+ if (rect.width < config.visibilityLimits.minOverlayWidth || rect.height < config.visibilityLimits.minOverlayHeight) return false;
397
+ if (rect.bottom < 0 || rect.right < 0 || rect.top > window.innerHeight || rect.left > window.innerWidth) return false;
398
+ if (rect.width >= window.innerWidth * config.visibilityLimits.maxViewportOverlayRatio && rect.height >= window.innerHeight * config.visibilityLimits.maxViewportOverlayRatio) return false;
399
+ const { interactiveCount, text } = getUsefulContent(element);
400
+ return interactiveCount > 0 || text.length > 0;
401
+ }
402
+
403
+ const overlays: string[] = [];
404
+ const seen = new Set<Element>();
405
+ for (const selector of config.overlaySelectors) {
406
+ for (const element of Array.from(document.querySelectorAll(selector))) {
407
+ if (seen.has(element)) continue;
408
+ seen.add(element);
409
+ if (!isVisible(element)) continue;
410
+ const { interactiveCount, text } = getUsefulContent(element);
411
+ if (interactiveCount === 0 && text.length === 0) continue;
412
+ overlays.push((element as HTMLElement).outerHTML.slice(0, config.limits.overlayHtmlLength));
413
+ }
414
+ }
415
+
416
+ if (overlays.length === 0) {
417
+ const floatingCandidates = Array.from(document.body.querySelectorAll('*'))
418
+ .filter((element) => !seen.has(element) && isVisible(element) && isLikelyFloatingOverlay(element))
419
+ .sort((left, right) => {
420
+ const leftStyle = window.getComputedStyle(left as HTMLElement);
421
+ const rightStyle = window.getComputedStyle(right as HTMLElement);
422
+ const leftZ = Number.parseInt(leftStyle.zIndex || '0', 10) || 0;
423
+ const rightZ = Number.parseInt(rightStyle.zIndex || '0', 10) || 0;
424
+ if (leftZ !== rightZ) return rightZ - leftZ;
425
+ const leftRect = (left as HTMLElement).getBoundingClientRect();
426
+ const rightRect = (right as HTMLElement).getBoundingClientRect();
427
+ return leftRect.width * leftRect.height - rightRect.width * rightRect.height;
428
+ });
429
+
430
+ for (const element of floatingCandidates.slice(0, config.limits.maxOverlayCount)) {
431
+ overlays.push((element as HTMLElement).outerHTML.slice(0, config.limits.overlayHtmlLength));
432
+ }
433
+ }
434
+
435
+ return overlays.slice(0, config.limits.maxOverlayCount).join('\n\n--- overlay ---\n\n');
436
+ }
437
+
438
+ export function extractComponentScopeHtml(eidx: string, config: ComponentScopeExtractionConfig): string {
439
+ const element = document.querySelector(`[${config.eidxAttr}="${eidx}"]`);
440
+ if (!element) return '';
441
+
442
+ function countInteractive(node: Element): number {
443
+ return node.querySelectorAll(config.interactiveControlSelector).length;
444
+ }
445
+
446
+ let current = element.parentElement;
447
+ while (current) {
448
+ const count = countInteractive(current);
449
+ if (count > 0 && count <= config.limits.maxScopeInteractiveCount) {
450
+ return current.outerHTML.slice(0, config.limits.componentScopeHtmlLength);
451
+ }
452
+ current = current.parentElement;
453
+ }
454
+
455
+ if (element instanceof HTMLElement) return element.outerHTML.slice(0, config.limits.componentScopeHtmlLength);
456
+ return '';
457
+ }
458
+
459
+ export function getVisibleOverlayHtmlExtractorSource(): string {
460
+ return extractVisibleOverlayHtml.toString();
461
+ }
462
+
463
+ export function getComponentScopeHtmlExtractorSource(): string {
464
+ return extractComponentScopeHtml.toString();
465
+ }
466
+
86
467
  export const TRASH_HTML_CLASSES = /^(text-|color-|flex-|float-|v-|ember-|d-|border-)/;
87
468
 
88
469
  export const TAILWIND_CLASS_PATTERNS: RegExp[] = [
@@ -1,3 +1,5 @@
1
+ import { createHash } from 'node:crypto';
2
+
1
3
  export function truncateJson(input: any): string {
2
4
  if (!input) return '';
3
5
  const str = JSON.stringify(input);
@@ -11,3 +13,18 @@ export function sanitizeFilename(name: string): string {
11
13
  .replace(/^_+|_+$/g, '')
12
14
  .slice(0, 50);
13
15
  }
16
+
17
+ export function safeFilename(name: string, ext = '', maxBytes = 240): string {
18
+ const sanitized = name.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase();
19
+ const extBytes = Buffer.byteLength(ext, 'utf8');
20
+ const budget = maxBytes - extBytes;
21
+ if (Buffer.byteLength(sanitized, 'utf8') <= budget) return sanitized + ext;
22
+
23
+ const hash = createHash('sha1').update(name).digest('hex').slice(0, 8);
24
+ const suffix = `_${hash}`;
25
+ let truncated = sanitized;
26
+ while (Buffer.byteLength(truncated + suffix, 'utf8') > budget && truncated.length > 0) {
27
+ truncated = truncated.slice(0, -1);
28
+ }
29
+ return truncated + suffix + ext;
30
+ }
@@ -7,8 +7,21 @@ const nameConfig = {
7
7
  style: 'capital',
8
8
  };
9
9
 
10
+ const explorationConfig = {
11
+ dictionaries: [adjectives, animals],
12
+ separator: '',
13
+ length: 2,
14
+ style: 'capital',
15
+ };
16
+
10
17
  export function uniqSessionName(): string {
11
18
  const name = uniqueNamesGenerator(nameConfig);
12
19
  const randomNum = Math.floor(Math.random() * 999);
13
20
  return `${name}${randomNum}`;
14
21
  }
22
+
23
+ export function uniqExplorationName(): string {
24
+ const name = uniqueNamesGenerator(explorationConfig);
25
+ const randomNum = Math.floor(Math.random() * 999);
26
+ return `${name}${randomNum}`;
27
+ }
@@ -45,6 +45,10 @@ export function generalizeUrl(url: string): string {
45
45
 
46
46
  export function matchesUrl(pattern: string, path: string): boolean {
47
47
  if (pattern === '*') return true;
48
+ if (!pattern.includes('?')) {
49
+ const queryIndex = path.indexOf('?');
50
+ if (queryIndex >= 0) path = path.slice(0, queryIndex);
51
+ }
48
52
  const norm = (s: string) => s?.replace(/\/+$/, '').toLowerCase();
49
53
  if (norm(pattern) === norm(path)) return true;
50
54
 
@@ -81,7 +85,7 @@ export function extractStatePath(url: string): string {
81
85
  if (url.startsWith('/')) return url;
82
86
  try {
83
87
  const urlObj = new URL(url);
84
- return urlObj.pathname + urlObj.hash;
88
+ return `${urlObj.pathname}${urlObj.search}${urlObj.hash}`;
85
89
  } catch {
86
90
  return url;
87
91
  }
@@ -1,10 +1,11 @@
1
+ import { ELEMENT_EXTRACTION_CONFIG, EXPLORBOT_ATTRS, type ElementExtractionConfig, type RawElementData, extractElementData, getElementDataExtractorSource } from './html.ts';
1
2
  import { type XPathMatch, buildClickableXPath, evaluateXPath, isDynamicId, isGenericClass } from './xpath.ts';
2
3
 
4
+ export { extractElementData } from './html.ts';
5
+
3
6
  const KEY_DISPLAY_ATTRS = ['role', 'id', 'class', 'aria-label'];
4
7
  const KEY_ATTRS = ['role', 'aria-label', 'id', 'name', 'type', 'href'];
5
8
 
6
- type RawElementData = NonNullable<ReturnType<typeof extractElementData>>;
7
-
8
9
  export class WebElement {
9
10
  tag: string;
10
11
  role: string;
@@ -43,7 +44,7 @@ export class WebElement {
43
44
  }
44
45
 
45
46
  get eidx(): string | null {
46
- return this.attrs['data-explorbot-eidx'] || this.attrs.eidx || null;
47
+ return this.attrs[EXPLORBOT_ATTRS.eidx] || this.attrs.eidx || null;
47
48
  }
48
49
 
49
50
  get isNavigationLink(): boolean {
@@ -57,6 +58,26 @@ export class WebElement {
57
58
  return cls.split(/\s+/).filter((c) => c.length > 2 && !isDynamicId(c) && !isGenericClass(c));
58
59
  }
59
60
 
61
+ get areaHints(): string[] {
62
+ const raw = this.attrs[EXPLORBOT_ATTRS.area] || '';
63
+ return raw
64
+ .split('|')
65
+ .map((entry) => entry.trim().toLowerCase())
66
+ .filter(Boolean);
67
+ }
68
+
69
+ get contextLabel(): string {
70
+ return (this.attrs[EXPLORBOT_ATTRS.context] || '').trim();
71
+ }
72
+
73
+ get variantHints(): string[] {
74
+ const raw = this.attrs[EXPLORBOT_ATTRS.variant] || '';
75
+ return raw
76
+ .split('|')
77
+ .map((entry) => entry.trim().toLowerCase())
78
+ .filter(Boolean);
79
+ }
80
+
60
81
  static fromRawData(d: RawElementData, role?: string): WebElement {
61
82
  return new WebElement({
62
83
  tag: d.tag,
@@ -65,6 +86,7 @@ export class WebElement {
65
86
  clickXPath: buildClickableXPath({ tag: d.tag, allAttrs: d.allAttrs, text: d.text } as XPathMatch),
66
87
  attrs: d.allAttrs,
67
88
  text: d.text,
89
+ outerHTML: d.outerHTML,
68
90
  x: d.x,
69
91
  y: d.y,
70
92
  });
@@ -87,7 +109,7 @@ export class WebElement {
87
109
  try {
88
110
  const count = await locator.count();
89
111
  if (count === 0) return null;
90
- const data = await locator.first().evaluate(extractElementData);
112
+ const data = await locator.first().evaluate(extractElementData, ELEMENT_EXTRACTION_CONFIG);
91
113
  if (!data) return null;
92
114
  return WebElement.fromRawData(data);
93
115
  } catch {
@@ -96,25 +118,25 @@ export class WebElement {
96
118
  }
97
119
 
98
120
  static async fromEidx(page: any, eidx: string): Promise<WebElement | null> {
99
- return WebElement.fromPlaywrightLocator(page.locator(`[data-explorbot-eidx="${eidx}"]`));
121
+ return WebElement.fromPlaywrightLocator(page.locator(`[${EXPLORBOT_ATTRS.eidx}="${eidx}"]`));
100
122
  }
101
123
 
102
124
  static async fromEidxList(page: any, eidxList: string[]): Promise<WebElement[]> {
103
125
  if (eidxList.length === 0) return [];
104
126
 
105
127
  const rawList: RawElementData[] = await page.evaluate(
106
- ([list, extractFnStr]: [string[], string]) => {
128
+ ([list, extractFnStr, config]: [string[], string, ElementExtractionConfig]) => {
107
129
  const extract = new Function(`return ${extractFnStr}`)() as (el: Element) => any;
108
130
  const results: any[] = [];
109
131
  for (const eidx of list) {
110
- const el = document.querySelector(`[data-explorbot-eidx="${eidx}"]`);
132
+ const el = document.querySelector(`[${config.attrs.eidx}="${eidx}"]`);
111
133
  if (!el) continue;
112
- const data = extract(el);
134
+ const data = extract(el, config);
113
135
  if (data) results.push(data);
114
136
  }
115
137
  return results;
116
138
  },
117
- [eidxList, extractElementData.toString()] as [string[], string]
139
+ [eidxList, getElementDataExtractorSource(), ELEMENT_EXTRACTION_CONFIG] as [string[], string, ElementExtractionConfig]
118
140
  );
119
141
 
120
142
  return rawList.map((d) => WebElement.fromRawData(d));
@@ -126,22 +148,3 @@ export class WebElement {
126
148
  return { totalFound: result.totalFound, elements: result.matches.map((m) => WebElement.fromXPathMatch(m)) };
127
149
  }
128
150
  }
129
-
130
- export function extractElementData(el: Element) {
131
- const rect = el.getBoundingClientRect();
132
- if (rect.width === 0 && rect.height === 0) return null;
133
-
134
- const allAttrs: Record<string, string> = {};
135
- for (let i = 0; i < el.attributes.length; i++) {
136
- const attr = el.attributes[i];
137
- allAttrs[attr.name] = attr.value;
138
- }
139
-
140
- return {
141
- tag: el.tagName.toLowerCase(),
142
- text: (el.textContent || '').trim().slice(0, 80),
143
- allAttrs,
144
- x: Math.round(rect.x + rect.width / 2),
145
- y: Math.round(rect.y + rect.height / 2),
146
- };
147
- }
@@ -48,7 +48,7 @@ function getAbsoluteXPath(el: Element): string {
48
48
  }
49
49
 
50
50
  export const isDynamicId = (id: string) => /^(ember|react|__next)\d|^\d+$/.test(id);
51
- export const isGenericClass = (cls: string) => /^ember-view$|^ember\d|^react-|^__next/.test(cls);
51
+ export const isGenericClass = (cls: string) => /^ember-view$|^ember\d|^ember-|^react-|^__next/.test(cls);
52
52
 
53
53
  export function buildClickableXPath(el: XPathMatch): string {
54
54
  const a = el.allAttrs;