explorbot 0.1.12 → 0.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/explorbot-cli.ts +21 -21
- package/dist/bin/explorbot-cli.js +3 -3
- package/dist/package.json +3 -2
- package/dist/rules/researcher/container-rules.md +2 -0
- package/dist/src/action-result.js +2 -1
- package/dist/src/action.js +0 -6
- package/dist/src/ai/captain.js +0 -2
- package/dist/src/ai/driller.js +1108 -0
- package/dist/src/ai/pilot.js +31 -22
- package/dist/src/ai/rules.js +3 -5
- package/dist/src/ai/session-analyst.js +117 -0
- package/dist/src/ai/tester.js +13 -2
- package/dist/src/commands/base-command.js +6 -6
- package/dist/src/commands/drill-command.js +3 -2
- package/dist/src/commands/exit-command.js +1 -0
- package/dist/src/commands/explore-command.js +1 -0
- package/dist/src/components/AddRule.js +1 -1
- package/dist/src/explorbot.js +48 -8
- package/dist/src/explorer.js +9 -8
- package/dist/src/reporter.js +64 -3
- package/dist/src/state-manager.js +4 -3
- package/dist/src/stats.js +5 -0
- package/dist/src/utils/aria.js +354 -529
- package/dist/src/utils/hooks-runner.js +2 -8
- package/dist/src/utils/html.js +371 -0
- package/dist/src/utils/unique-names.js +12 -1
- package/dist/src/utils/url-matcher.js +6 -1
- package/dist/src/utils/web-element.js +27 -24
- package/dist/src/utils/xpath.js +1 -1
- package/package.json +3 -2
- package/rules/researcher/container-rules.md +2 -0
- package/src/action-result.ts +2 -1
- package/src/action.ts +0 -8
- package/src/ai/captain.ts +0 -2
- package/src/ai/driller.ts +1194 -0
- package/src/ai/pilot.ts +31 -21
- package/src/ai/rules.ts +3 -5
- package/src/ai/session-analyst.ts +133 -0
- package/src/ai/tester.ts +15 -2
- package/src/commands/base-command.ts +6 -6
- package/src/commands/drill-command.ts +3 -2
- package/src/commands/exit-command.ts +1 -0
- package/src/commands/explore-command.ts +1 -0
- package/src/components/AddRule.tsx +1 -1
- package/src/config.ts +4 -0
- package/src/explorbot.ts +55 -10
- package/src/explorer.ts +9 -8
- package/src/reporter.ts +64 -3
- package/src/state-manager.ts +4 -3
- package/src/stats.ts +7 -0
- package/src/utils/aria.ts +367 -537
- package/src/utils/hooks-runner.ts +2 -6
- package/src/utils/html.ts +381 -0
- package/src/utils/unique-names.ts +13 -0
- package/src/utils/url-matcher.ts +5 -1
- package/src/utils/web-element.ts +31 -28
- package/src/utils/xpath.ts +1 -1
- package/dist/src/ai/bosun.js +0 -456
- package/src/ai/bosun.ts +0 -571
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { ExplorbotConfig, Hook, HookConfig } from '../config.ts';
|
|
2
2
|
import type Explorer from '../explorer.ts';
|
|
3
3
|
import { createDebug } from './logger.ts';
|
|
4
|
+
import { extractStatePath } from './url-matcher.ts';
|
|
4
5
|
import { matchesUrl } from './url-matcher.ts';
|
|
5
6
|
|
|
6
7
|
const debugLog = createDebug('explorbot:hooks');
|
|
@@ -69,11 +70,6 @@ export class HooksRunner {
|
|
|
69
70
|
}
|
|
70
71
|
|
|
71
72
|
private extractPath(url: string): string {
|
|
72
|
-
|
|
73
|
-
try {
|
|
74
|
-
return new URL(url).pathname;
|
|
75
|
-
} catch {
|
|
76
|
-
return url;
|
|
77
|
-
}
|
|
73
|
+
return extractStatePath(url);
|
|
78
74
|
}
|
|
79
75
|
}
|
package/src/utils/html.ts
CHANGED
|
@@ -83,6 +83,387 @@ const INTERACTIVE_EVENT_ATTRIBUTES = new Set(['onclick', 'onchange', 'onblur', '
|
|
|
83
83
|
|
|
84
84
|
const HIDDEN_CLASSES = new Set(['hidden', 'invisible', 'd-none', 'hide', 'dn', 'u-hidden', 'is-hidden', 'visually-hidden', 'sr-only', 'screen-reader-only', 'visuallyhidden', 'opacity-0']);
|
|
85
85
|
|
|
86
|
+
export const EXPLORBOT_ATTRS = {
|
|
87
|
+
area: 'data-explorbot-area',
|
|
88
|
+
context: 'data-explorbot-context',
|
|
89
|
+
eidx: 'data-explorbot-eidx',
|
|
90
|
+
variant: 'data-explorbot-variant',
|
|
91
|
+
} as const;
|
|
92
|
+
|
|
93
|
+
export const HTML_SELECTORS = {
|
|
94
|
+
headingLabel: 'h1, h2, h3, h4, h5, h6, legend, caption, label, [role="heading"]',
|
|
95
|
+
interactiveContent: 'button, a[href], input, select, textarea, [role="button"], [role="link"], [role="option"], [role="menuitem"], [role="switch"], [role="checkbox"], [role="radio"], [aria-label], [tabindex]',
|
|
96
|
+
interactiveControl: 'button, a[href], input, select, textarea, [role="button"], [role="link"], [role="checkbox"], [role="radio"], [role="switch"], [role="tab"], [role="menuitem"]',
|
|
97
|
+
labelLike: 'h1, h2, h3, h4, h5, h6, legend, caption, label, [role="heading"], [class*="title"], [class*="label"], [class*="header"], [class*="name"]',
|
|
98
|
+
semanticContextContainer: 'section, article, form, fieldset, li, tr, td, th, [role="group"], [role="tabpanel"], [role="region"], [class*="card"], [class*="panel"], [class*="item"], [class*="usage"], [class*="group"]',
|
|
99
|
+
semanticOverlays: ['[role="dialog"]', '[role="listbox"]', '[role="menu"]', '[role="tooltip"]:not([style*="display: none"]):not([style*="visibility: hidden"])'],
|
|
100
|
+
} as const;
|
|
101
|
+
|
|
102
|
+
export const HTML_VISIBILITY_LIMITS = {
|
|
103
|
+
maxViewportOverlayRatio: 0.95,
|
|
104
|
+
minOpacity: 0.1,
|
|
105
|
+
minOverlayHeight: 40,
|
|
106
|
+
minOverlayWidth: 80,
|
|
107
|
+
} as const;
|
|
108
|
+
|
|
109
|
+
export const HTML_EXTRACTION_LIMITS = {
|
|
110
|
+
componentScopeHtmlLength: 8000,
|
|
111
|
+
maxOverlayCount: 3,
|
|
112
|
+
maxScopeInteractiveCount: 16,
|
|
113
|
+
overlayHtmlLength: 6000,
|
|
114
|
+
} as const;
|
|
115
|
+
|
|
116
|
+
export const CODE_EDITOR_MARKERS = ['monaco', 'codemirror', 'ace', 'ace_editor', 'code'] as const;
|
|
117
|
+
|
|
118
|
+
export const HTML_INTERACTIVE_ROLES = new Set(['button', 'link', 'checkbox', 'radio', 'switch', 'tab', 'combobox', 'iframe', 'code-editor', 'menuitem', 'menuitemcheckbox', 'menuitemradio', 'option', 'slider', 'spinbutton', 'textbox', 'searchbox', 'treeitem']);
|
|
119
|
+
export const HTML_FORM_CONTROL_ROLES = new Set(['checkbox', 'radio', 'switch', 'combobox', 'option', 'slider', 'spinbutton', 'textbox', 'searchbox']);
|
|
120
|
+
export const HTML_COMPOSITE_TARGET_ROLES = new Set(['tab', 'option', 'menuitem', 'menuitemcheckbox', 'menuitemradio', 'treeitem']);
|
|
121
|
+
export const HTML_COMPOSITE_AREA_HINTS = new Set(['role:tab', 'role:option', 'role:menuitem', 'role:menuitemcheckbox', 'role:menuitemradio', 'role:treeitem']);
|
|
122
|
+
export const HTML_FORM_CONTROL_TAGS = new Set(['input', 'select', 'textarea']);
|
|
123
|
+
|
|
124
|
+
export function inferHtmlRole(data: { attrs: Record<string, string>; role?: string; tag: string; variantHints?: string[] }): string {
|
|
125
|
+
if (data.tag === 'iframe' && data.variantHints?.includes('code-editor')) return 'code-editor';
|
|
126
|
+
if (data.role) return data.role.toLowerCase();
|
|
127
|
+
const explicitRole = data.attrs.role;
|
|
128
|
+
if (explicitRole) return explicitRole.toLowerCase();
|
|
129
|
+
if (data.tag === 'a' && data.attrs.href) return 'link';
|
|
130
|
+
if (data.tag === 'button') return 'button';
|
|
131
|
+
if (data.tag === 'iframe') return 'iframe';
|
|
132
|
+
if (data.tag === 'select') return 'combobox';
|
|
133
|
+
if (data.tag === 'textarea') return 'textbox';
|
|
134
|
+
if (data.tag === 'input') {
|
|
135
|
+
const type = (data.attrs.type || 'text').toLowerCase();
|
|
136
|
+
if (type === 'checkbox') return 'checkbox';
|
|
137
|
+
if (type === 'radio') return 'radio';
|
|
138
|
+
return 'textbox';
|
|
139
|
+
}
|
|
140
|
+
return data.tag;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export const ELEMENT_EXTRACTION_CONFIG = {
|
|
144
|
+
attrs: EXPLORBOT_ATTRS,
|
|
145
|
+
codeEditorMarkers: CODE_EDITOR_MARKERS,
|
|
146
|
+
maxAreaDepth: 5,
|
|
147
|
+
maxContextLength: 120,
|
|
148
|
+
maxOuterHTMLLength: 2000,
|
|
149
|
+
maxTextLength: 80,
|
|
150
|
+
minOpacity: HTML_VISIBILITY_LIMITS.minOpacity,
|
|
151
|
+
selectors: {
|
|
152
|
+
headingLabel: HTML_SELECTORS.headingLabel,
|
|
153
|
+
labelLike: HTML_SELECTORS.labelLike,
|
|
154
|
+
semanticContextContainer: HTML_SELECTORS.semanticContextContainer,
|
|
155
|
+
},
|
|
156
|
+
} as const;
|
|
157
|
+
|
|
158
|
+
export type ElementExtractionConfig = typeof ELEMENT_EXTRACTION_CONFIG;
|
|
159
|
+
export type RawElementData = NonNullable<ReturnType<typeof extractElementData>>;
|
|
160
|
+
export type VisibleOverlayExtractionConfig = {
|
|
161
|
+
interactiveContentSelector: string;
|
|
162
|
+
limits: typeof HTML_EXTRACTION_LIMITS;
|
|
163
|
+
overlaySelectors: readonly string[];
|
|
164
|
+
visibilityLimits: typeof HTML_VISIBILITY_LIMITS;
|
|
165
|
+
};
|
|
166
|
+
export type ComponentScopeExtractionConfig = {
|
|
167
|
+
eidxAttr: string;
|
|
168
|
+
interactiveControlSelector: string;
|
|
169
|
+
limits: typeof HTML_EXTRACTION_LIMITS;
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
export function extractElementData(el: Element, config?: ElementExtractionConfig) {
|
|
173
|
+
const cfg =
|
|
174
|
+
config ||
|
|
175
|
+
({
|
|
176
|
+
attrs: {
|
|
177
|
+
area: 'data-explorbot-area',
|
|
178
|
+
context: 'data-explorbot-context',
|
|
179
|
+
eidx: 'data-explorbot-eidx',
|
|
180
|
+
variant: 'data-explorbot-variant',
|
|
181
|
+
},
|
|
182
|
+
codeEditorMarkers: ['monaco', 'codemirror', 'ace', 'ace_editor', 'code'],
|
|
183
|
+
maxAreaDepth: 5,
|
|
184
|
+
maxContextLength: 120,
|
|
185
|
+
maxOuterHTMLLength: 2000,
|
|
186
|
+
maxTextLength: 80,
|
|
187
|
+
minOpacity: 0.1,
|
|
188
|
+
selectors: {
|
|
189
|
+
headingLabel: 'h1, h2, h3, h4, h5, h6, legend, caption, label, [role="heading"]',
|
|
190
|
+
labelLike: 'h1, h2, h3, h4, h5, h6, legend, caption, label, [role="heading"], [class*="title"], [class*="label"], [class*="header"], [class*="name"]',
|
|
191
|
+
semanticContextContainer: 'section, article, form, fieldset, li, tr, td, th, [role="group"], [role="tabpanel"], [role="region"], [class*="card"], [class*="panel"], [class*="item"], [class*="usage"], [class*="group"]',
|
|
192
|
+
},
|
|
193
|
+
} as ElementExtractionConfig);
|
|
194
|
+
|
|
195
|
+
function normalizeText(value: string): string {
|
|
196
|
+
return value.replace(/\s+/g, ' ').trim();
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function readText(node: Element | null): string {
|
|
200
|
+
if (!node) return '';
|
|
201
|
+
return normalizeText(node.textContent || '').slice(0, cfg.maxContextLength);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function getLabelLikeText(node: Element | null): string {
|
|
205
|
+
if (!node) return '';
|
|
206
|
+
const direct = readText(node);
|
|
207
|
+
if (direct) return direct;
|
|
208
|
+
const labelLike = node.querySelector(cfg.selectors.labelLike);
|
|
209
|
+
return readText(labelLike);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function collectVariantHints(target: Element): string[] {
|
|
213
|
+
const tokens = new Set<string>();
|
|
214
|
+
const className = target.getAttribute('class') || '';
|
|
215
|
+
const tagName = target.tagName.toLowerCase();
|
|
216
|
+
|
|
217
|
+
for (const cls of className.split(/\s+/).filter(Boolean)) {
|
|
218
|
+
const lower = cls.toLowerCase();
|
|
219
|
+
if (/^(xs|sm|md|lg|xl|xxl)$/.test(lower)) tokens.add(lower);
|
|
220
|
+
if (/^(mini|small|medium|large|xlarge|xl|compact|dense)$/.test(lower)) tokens.add(lower);
|
|
221
|
+
if (/(^|[-_])(xs|sm|md|lg|xl|xxl|mini|small|medium|large|compact|dense)([-_]|$)/.test(lower)) tokens.add(lower);
|
|
222
|
+
if (/(selected|disabled|primary|secondary|tertiary|danger|success|warning|outline|ghost|icon|dropdown)/.test(lower)) tokens.add(lower);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
const type = (target.getAttribute('type') || '').toLowerCase();
|
|
226
|
+
if (type) tokens.add(type);
|
|
227
|
+
if (target.hasAttribute('disabled') || target.getAttribute('aria-disabled') === 'true') tokens.add('disabled');
|
|
228
|
+
if (className.toLowerCase().includes('selected') || target.getAttribute('aria-pressed') === 'true') tokens.add('selected');
|
|
229
|
+
if (tagName === 'iframe') tokens.add('iframe');
|
|
230
|
+
if (tagName === 'iframe' && isEmbeddedCodeEditorFrame(target)) tokens.add('code-editor');
|
|
231
|
+
const svgCount = target.querySelectorAll('svg').length;
|
|
232
|
+
if (svgCount > 0) tokens.add('has-icon');
|
|
233
|
+
if (svgCount > 1) tokens.add('double-icon');
|
|
234
|
+
|
|
235
|
+
const normalizedText = normalizeText(target.textContent || '');
|
|
236
|
+
if (!normalizedText && svgCount > 0) tokens.add('icon-only');
|
|
237
|
+
if (normalizedText && svgCount > 0) {
|
|
238
|
+
const first = target.firstElementChild?.tagName.toLowerCase();
|
|
239
|
+
const last = target.lastElementChild?.tagName.toLowerCase();
|
|
240
|
+
if (first === 'svg') tokens.add('leading-icon');
|
|
241
|
+
if (last === 'svg') tokens.add('trailing-icon');
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
if (tagName === 'a' && target.getAttribute('href')) tokens.add('navigates');
|
|
245
|
+
|
|
246
|
+
return Array.from(tokens).slice(0, 8);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
function isEmbeddedCodeEditorFrame(target: Element): boolean {
|
|
250
|
+
const src = (target.getAttribute('src') || '').toLowerCase();
|
|
251
|
+
const markerSelector = cfg.codeEditorMarkers.map((marker) => `[class*="${marker}"]`).join(', ');
|
|
252
|
+
const ancestorClasses = (target.closest(markerSelector)?.getAttribute('class') || '').toLowerCase();
|
|
253
|
+
return cfg.codeEditorMarkers.some((marker) => src.includes(marker) || ancestorClasses.includes(marker));
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
function findContextLabel(target: Element): string {
|
|
257
|
+
const labelledby = target.getAttribute('aria-labelledby');
|
|
258
|
+
const candidates: string[] = [];
|
|
259
|
+
if (labelledby) {
|
|
260
|
+
for (const id of labelledby.split(/\s+/).filter(Boolean)) {
|
|
261
|
+
const ref = document.getElementById(id);
|
|
262
|
+
const text = readText(ref);
|
|
263
|
+
if (text) candidates.push(text);
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
const semanticContainer = target.closest(cfg.selectors.semanticContextContainer);
|
|
268
|
+
if (semanticContainer) {
|
|
269
|
+
const ownHeading = semanticContainer.querySelector(cfg.selectors.headingLabel);
|
|
270
|
+
const ownHeadingText = readText(ownHeading);
|
|
271
|
+
if (ownHeadingText) candidates.push(ownHeadingText);
|
|
272
|
+
|
|
273
|
+
let previous: Element | null = semanticContainer.previousElementSibling;
|
|
274
|
+
let hops = 0;
|
|
275
|
+
while (previous && hops < 3) {
|
|
276
|
+
const previousText = getLabelLikeText(previous);
|
|
277
|
+
if (previousText) {
|
|
278
|
+
candidates.push(previousText);
|
|
279
|
+
break;
|
|
280
|
+
}
|
|
281
|
+
previous = previous.previousElementSibling;
|
|
282
|
+
hops++;
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
let parent: Element | null = target.parentElement;
|
|
287
|
+
let depth = 0;
|
|
288
|
+
while (parent && depth < 4) {
|
|
289
|
+
let sibling: Element | null = parent.previousElementSibling;
|
|
290
|
+
let hops = 0;
|
|
291
|
+
while (sibling && hops < 2) {
|
|
292
|
+
const siblingText = getLabelLikeText(sibling);
|
|
293
|
+
if (siblingText) {
|
|
294
|
+
candidates.push(siblingText);
|
|
295
|
+
sibling = null;
|
|
296
|
+
break;
|
|
297
|
+
}
|
|
298
|
+
sibling = sibling.previousElementSibling;
|
|
299
|
+
hops++;
|
|
300
|
+
}
|
|
301
|
+
parent = parent.parentElement;
|
|
302
|
+
depth++;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
const ownText = normalizeText(target.textContent || '');
|
|
306
|
+
for (const candidate of candidates) {
|
|
307
|
+
if (!candidate) continue;
|
|
308
|
+
if (candidate === ownText) continue;
|
|
309
|
+
if (candidate.toLowerCase().includes('title should not be empty')) continue;
|
|
310
|
+
return candidate.slice(0, cfg.maxContextLength);
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
return '';
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
const rect = el.getBoundingClientRect();
|
|
317
|
+
if (rect.width === 0 && rect.height === 0) return null;
|
|
318
|
+
const style = window.getComputedStyle(el);
|
|
319
|
+
if (style.display === 'none' || style.visibility === 'hidden') return null;
|
|
320
|
+
if (Number.parseFloat(style.opacity || '1') < cfg.minOpacity) return null;
|
|
321
|
+
if (el.getAttribute('aria-hidden') === 'true' || el.hasAttribute('hidden')) return null;
|
|
322
|
+
if ((el as HTMLElement).offsetParent === null && style.position !== 'fixed') return null;
|
|
323
|
+
|
|
324
|
+
const allAttrs: Record<string, string> = {};
|
|
325
|
+
for (let i = 0; i < el.attributes.length; i++) {
|
|
326
|
+
const attr = el.attributes[i];
|
|
327
|
+
allAttrs[attr.name] = attr.value;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
const areaHints: string[] = [];
|
|
331
|
+
let current: Element | null = el;
|
|
332
|
+
let depth = 0;
|
|
333
|
+
while (current && depth < cfg.maxAreaDepth) {
|
|
334
|
+
const tag = current.tagName.toLowerCase();
|
|
335
|
+
areaHints.push(tag);
|
|
336
|
+
|
|
337
|
+
const role = current.getAttribute('role');
|
|
338
|
+
if (role) areaHints.push(`role:${role.toLowerCase()}`);
|
|
339
|
+
|
|
340
|
+
const id = current.getAttribute('id');
|
|
341
|
+
if (id) areaHints.push(`id:${id.toLowerCase()}`);
|
|
342
|
+
|
|
343
|
+
const className = current.getAttribute('class');
|
|
344
|
+
if (className) {
|
|
345
|
+
for (const cls of className.split(/\s+/).filter(Boolean)) {
|
|
346
|
+
areaHints.push(`class:${cls.toLowerCase()}`);
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
current = current.parentElement;
|
|
351
|
+
depth++;
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
allAttrs[cfg.attrs.area] = areaHints.join('|');
|
|
355
|
+
allAttrs[cfg.attrs.context] = findContextLabel(el);
|
|
356
|
+
allAttrs[cfg.attrs.variant] = collectVariantHints(el).join('|');
|
|
357
|
+
|
|
358
|
+
return {
|
|
359
|
+
tag: el.tagName.toLowerCase(),
|
|
360
|
+
text: normalizeText(el.textContent || '').slice(0, cfg.maxTextLength),
|
|
361
|
+
allAttrs,
|
|
362
|
+
outerHTML: el.outerHTML.slice(0, cfg.maxOuterHTMLLength),
|
|
363
|
+
x: Math.round(rect.x + rect.width / 2),
|
|
364
|
+
y: Math.round(rect.y + rect.height / 2),
|
|
365
|
+
};
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
export function getElementDataExtractorSource(): string {
|
|
369
|
+
return extractElementData.toString();
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
export function extractVisibleOverlayHtml(config: VisibleOverlayExtractionConfig): string {
|
|
373
|
+
function isVisible(element: Element): boolean {
|
|
374
|
+
const html = element as HTMLElement;
|
|
375
|
+
const style = window.getComputedStyle(html);
|
|
376
|
+
const rect = html.getBoundingClientRect();
|
|
377
|
+
if (rect.width === 0 && rect.height === 0) return false;
|
|
378
|
+
if (style.display === 'none' || style.visibility === 'hidden') return false;
|
|
379
|
+
if (Number.parseFloat(style.opacity || '1') < config.visibilityLimits.minOpacity) return false;
|
|
380
|
+
return true;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
function getUsefulContent(element: Element): { interactiveCount: number; text: string } {
|
|
384
|
+
const text = (element.textContent || '').replace(/\s+/g, ' ').trim();
|
|
385
|
+
const interactiveCount = element.querySelectorAll(config.interactiveContentSelector).length;
|
|
386
|
+
return { interactiveCount, text };
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
function isLikelyFloatingOverlay(element: Element): boolean {
|
|
390
|
+
const html = element as HTMLElement;
|
|
391
|
+
const style = window.getComputedStyle(html);
|
|
392
|
+
const rect = html.getBoundingClientRect();
|
|
393
|
+
const zIndex = Number.parseInt(style.zIndex || '0', 10);
|
|
394
|
+
const isFloating = style.position === 'fixed' || style.position === 'absolute' || style.position === 'sticky' || zIndex > 0;
|
|
395
|
+
if (!isFloating) return false;
|
|
396
|
+
if (rect.width < config.visibilityLimits.minOverlayWidth || rect.height < config.visibilityLimits.minOverlayHeight) return false;
|
|
397
|
+
if (rect.bottom < 0 || rect.right < 0 || rect.top > window.innerHeight || rect.left > window.innerWidth) return false;
|
|
398
|
+
if (rect.width >= window.innerWidth * config.visibilityLimits.maxViewportOverlayRatio && rect.height >= window.innerHeight * config.visibilityLimits.maxViewportOverlayRatio) return false;
|
|
399
|
+
const { interactiveCount, text } = getUsefulContent(element);
|
|
400
|
+
return interactiveCount > 0 || text.length > 0;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
const overlays: string[] = [];
|
|
404
|
+
const seen = new Set<Element>();
|
|
405
|
+
for (const selector of config.overlaySelectors) {
|
|
406
|
+
for (const element of Array.from(document.querySelectorAll(selector))) {
|
|
407
|
+
if (seen.has(element)) continue;
|
|
408
|
+
seen.add(element);
|
|
409
|
+
if (!isVisible(element)) continue;
|
|
410
|
+
const { interactiveCount, text } = getUsefulContent(element);
|
|
411
|
+
if (interactiveCount === 0 && text.length === 0) continue;
|
|
412
|
+
overlays.push((element as HTMLElement).outerHTML.slice(0, config.limits.overlayHtmlLength));
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
if (overlays.length === 0) {
|
|
417
|
+
const floatingCandidates = Array.from(document.body.querySelectorAll('*'))
|
|
418
|
+
.filter((element) => !seen.has(element) && isVisible(element) && isLikelyFloatingOverlay(element))
|
|
419
|
+
.sort((left, right) => {
|
|
420
|
+
const leftStyle = window.getComputedStyle(left as HTMLElement);
|
|
421
|
+
const rightStyle = window.getComputedStyle(right as HTMLElement);
|
|
422
|
+
const leftZ = Number.parseInt(leftStyle.zIndex || '0', 10) || 0;
|
|
423
|
+
const rightZ = Number.parseInt(rightStyle.zIndex || '0', 10) || 0;
|
|
424
|
+
if (leftZ !== rightZ) return rightZ - leftZ;
|
|
425
|
+
const leftRect = (left as HTMLElement).getBoundingClientRect();
|
|
426
|
+
const rightRect = (right as HTMLElement).getBoundingClientRect();
|
|
427
|
+
return leftRect.width * leftRect.height - rightRect.width * rightRect.height;
|
|
428
|
+
});
|
|
429
|
+
|
|
430
|
+
for (const element of floatingCandidates.slice(0, config.limits.maxOverlayCount)) {
|
|
431
|
+
overlays.push((element as HTMLElement).outerHTML.slice(0, config.limits.overlayHtmlLength));
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
return overlays.slice(0, config.limits.maxOverlayCount).join('\n\n--- overlay ---\n\n');
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
export function extractComponentScopeHtml(eidx: string, config: ComponentScopeExtractionConfig): string {
|
|
439
|
+
const element = document.querySelector(`[${config.eidxAttr}="${eidx}"]`);
|
|
440
|
+
if (!element) return '';
|
|
441
|
+
|
|
442
|
+
function countInteractive(node: Element): number {
|
|
443
|
+
return node.querySelectorAll(config.interactiveControlSelector).length;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
let current = element.parentElement;
|
|
447
|
+
while (current) {
|
|
448
|
+
const count = countInteractive(current);
|
|
449
|
+
if (count > 0 && count <= config.limits.maxScopeInteractiveCount) {
|
|
450
|
+
return current.outerHTML.slice(0, config.limits.componentScopeHtmlLength);
|
|
451
|
+
}
|
|
452
|
+
current = current.parentElement;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
if (element instanceof HTMLElement) return element.outerHTML.slice(0, config.limits.componentScopeHtmlLength);
|
|
456
|
+
return '';
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
export function getVisibleOverlayHtmlExtractorSource(): string {
|
|
460
|
+
return extractVisibleOverlayHtml.toString();
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
export function getComponentScopeHtmlExtractorSource(): string {
|
|
464
|
+
return extractComponentScopeHtml.toString();
|
|
465
|
+
}
|
|
466
|
+
|
|
86
467
|
export const TRASH_HTML_CLASSES = /^(text-|color-|flex-|float-|v-|ember-|d-|border-)/;
|
|
87
468
|
|
|
88
469
|
export const TAILWIND_CLASS_PATTERNS: RegExp[] = [
|
|
@@ -7,8 +7,21 @@ const nameConfig = {
|
|
|
7
7
|
style: 'capital',
|
|
8
8
|
};
|
|
9
9
|
|
|
10
|
+
const explorationConfig = {
|
|
11
|
+
dictionaries: [adjectives, animals],
|
|
12
|
+
separator: '',
|
|
13
|
+
length: 2,
|
|
14
|
+
style: 'capital',
|
|
15
|
+
};
|
|
16
|
+
|
|
10
17
|
export function uniqSessionName(): string {
|
|
11
18
|
const name = uniqueNamesGenerator(nameConfig);
|
|
12
19
|
const randomNum = Math.floor(Math.random() * 999);
|
|
13
20
|
return `${name}${randomNum}`;
|
|
14
21
|
}
|
|
22
|
+
|
|
23
|
+
export function uniqExplorationName(): string {
|
|
24
|
+
const name = uniqueNamesGenerator(explorationConfig);
|
|
25
|
+
const randomNum = Math.floor(Math.random() * 999);
|
|
26
|
+
return `${name}${randomNum}`;
|
|
27
|
+
}
|
package/src/utils/url-matcher.ts
CHANGED
|
@@ -45,6 +45,10 @@ export function generalizeUrl(url: string): string {
|
|
|
45
45
|
|
|
46
46
|
export function matchesUrl(pattern: string, path: string): boolean {
|
|
47
47
|
if (pattern === '*') return true;
|
|
48
|
+
if (!pattern.includes('?')) {
|
|
49
|
+
const queryIndex = path.indexOf('?');
|
|
50
|
+
if (queryIndex >= 0) path = path.slice(0, queryIndex);
|
|
51
|
+
}
|
|
48
52
|
const norm = (s: string) => s?.replace(/\/+$/, '').toLowerCase();
|
|
49
53
|
if (norm(pattern) === norm(path)) return true;
|
|
50
54
|
|
|
@@ -81,7 +85,7 @@ export function extractStatePath(url: string): string {
|
|
|
81
85
|
if (url.startsWith('/')) return url;
|
|
82
86
|
try {
|
|
83
87
|
const urlObj = new URL(url);
|
|
84
|
-
return urlObj.pathname
|
|
88
|
+
return `${urlObj.pathname}${urlObj.search}${urlObj.hash}`;
|
|
85
89
|
} catch {
|
|
86
90
|
return url;
|
|
87
91
|
}
|
package/src/utils/web-element.ts
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
|
+
import { ELEMENT_EXTRACTION_CONFIG, EXPLORBOT_ATTRS, type ElementExtractionConfig, type RawElementData, extractElementData, getElementDataExtractorSource } from './html.ts';
|
|
1
2
|
import { type XPathMatch, buildClickableXPath, evaluateXPath, isDynamicId, isGenericClass } from './xpath.ts';
|
|
2
3
|
|
|
4
|
+
export { extractElementData } from './html.ts';
|
|
5
|
+
|
|
3
6
|
const KEY_DISPLAY_ATTRS = ['role', 'id', 'class', 'aria-label'];
|
|
4
7
|
const KEY_ATTRS = ['role', 'aria-label', 'id', 'name', 'type', 'href'];
|
|
5
8
|
|
|
6
|
-
type RawElementData = NonNullable<ReturnType<typeof extractElementData>>;
|
|
7
|
-
|
|
8
9
|
export class WebElement {
|
|
9
10
|
tag: string;
|
|
10
11
|
role: string;
|
|
@@ -43,7 +44,7 @@ export class WebElement {
|
|
|
43
44
|
}
|
|
44
45
|
|
|
45
46
|
get eidx(): string | null {
|
|
46
|
-
return this.attrs[
|
|
47
|
+
return this.attrs[EXPLORBOT_ATTRS.eidx] || this.attrs.eidx || null;
|
|
47
48
|
}
|
|
48
49
|
|
|
49
50
|
get isNavigationLink(): boolean {
|
|
@@ -57,6 +58,26 @@ export class WebElement {
|
|
|
57
58
|
return cls.split(/\s+/).filter((c) => c.length > 2 && !isDynamicId(c) && !isGenericClass(c));
|
|
58
59
|
}
|
|
59
60
|
|
|
61
|
+
get areaHints(): string[] {
|
|
62
|
+
const raw = this.attrs[EXPLORBOT_ATTRS.area] || '';
|
|
63
|
+
return raw
|
|
64
|
+
.split('|')
|
|
65
|
+
.map((entry) => entry.trim().toLowerCase())
|
|
66
|
+
.filter(Boolean);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
get contextLabel(): string {
|
|
70
|
+
return (this.attrs[EXPLORBOT_ATTRS.context] || '').trim();
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
get variantHints(): string[] {
|
|
74
|
+
const raw = this.attrs[EXPLORBOT_ATTRS.variant] || '';
|
|
75
|
+
return raw
|
|
76
|
+
.split('|')
|
|
77
|
+
.map((entry) => entry.trim().toLowerCase())
|
|
78
|
+
.filter(Boolean);
|
|
79
|
+
}
|
|
80
|
+
|
|
60
81
|
static fromRawData(d: RawElementData, role?: string): WebElement {
|
|
61
82
|
return new WebElement({
|
|
62
83
|
tag: d.tag,
|
|
@@ -65,6 +86,7 @@ export class WebElement {
|
|
|
65
86
|
clickXPath: buildClickableXPath({ tag: d.tag, allAttrs: d.allAttrs, text: d.text } as XPathMatch),
|
|
66
87
|
attrs: d.allAttrs,
|
|
67
88
|
text: d.text,
|
|
89
|
+
outerHTML: d.outerHTML,
|
|
68
90
|
x: d.x,
|
|
69
91
|
y: d.y,
|
|
70
92
|
});
|
|
@@ -87,7 +109,7 @@ export class WebElement {
|
|
|
87
109
|
try {
|
|
88
110
|
const count = await locator.count();
|
|
89
111
|
if (count === 0) return null;
|
|
90
|
-
const data = await locator.first().evaluate(extractElementData);
|
|
112
|
+
const data = await locator.first().evaluate(extractElementData, ELEMENT_EXTRACTION_CONFIG);
|
|
91
113
|
if (!data) return null;
|
|
92
114
|
return WebElement.fromRawData(data);
|
|
93
115
|
} catch {
|
|
@@ -96,25 +118,25 @@ export class WebElement {
|
|
|
96
118
|
}
|
|
97
119
|
|
|
98
120
|
static async fromEidx(page: any, eidx: string): Promise<WebElement | null> {
|
|
99
|
-
return WebElement.fromPlaywrightLocator(page.locator(`[
|
|
121
|
+
return WebElement.fromPlaywrightLocator(page.locator(`[${EXPLORBOT_ATTRS.eidx}="${eidx}"]`));
|
|
100
122
|
}
|
|
101
123
|
|
|
102
124
|
static async fromEidxList(page: any, eidxList: string[]): Promise<WebElement[]> {
|
|
103
125
|
if (eidxList.length === 0) return [];
|
|
104
126
|
|
|
105
127
|
const rawList: RawElementData[] = await page.evaluate(
|
|
106
|
-
([list, extractFnStr]: [string[], string]) => {
|
|
128
|
+
([list, extractFnStr, config]: [string[], string, ElementExtractionConfig]) => {
|
|
107
129
|
const extract = new Function(`return ${extractFnStr}`)() as (el: Element) => any;
|
|
108
130
|
const results: any[] = [];
|
|
109
131
|
for (const eidx of list) {
|
|
110
|
-
const el = document.querySelector(`[
|
|
132
|
+
const el = document.querySelector(`[${config.attrs.eidx}="${eidx}"]`);
|
|
111
133
|
if (!el) continue;
|
|
112
|
-
const data = extract(el);
|
|
134
|
+
const data = extract(el, config);
|
|
113
135
|
if (data) results.push(data);
|
|
114
136
|
}
|
|
115
137
|
return results;
|
|
116
138
|
},
|
|
117
|
-
[eidxList,
|
|
139
|
+
[eidxList, getElementDataExtractorSource(), ELEMENT_EXTRACTION_CONFIG] as [string[], string, ElementExtractionConfig]
|
|
118
140
|
);
|
|
119
141
|
|
|
120
142
|
return rawList.map((d) => WebElement.fromRawData(d));
|
|
@@ -126,22 +148,3 @@ export class WebElement {
|
|
|
126
148
|
return { totalFound: result.totalFound, elements: result.matches.map((m) => WebElement.fromXPathMatch(m)) };
|
|
127
149
|
}
|
|
128
150
|
}
|
|
129
|
-
|
|
130
|
-
export function extractElementData(el: Element) {
|
|
131
|
-
const rect = el.getBoundingClientRect();
|
|
132
|
-
if (rect.width === 0 && rect.height === 0) return null;
|
|
133
|
-
|
|
134
|
-
const allAttrs: Record<string, string> = {};
|
|
135
|
-
for (let i = 0; i < el.attributes.length; i++) {
|
|
136
|
-
const attr = el.attributes[i];
|
|
137
|
-
allAttrs[attr.name] = attr.value;
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
return {
|
|
141
|
-
tag: el.tagName.toLowerCase(),
|
|
142
|
-
text: (el.textContent || '').trim().slice(0, 80),
|
|
143
|
-
allAttrs,
|
|
144
|
-
x: Math.round(rect.x + rect.width / 2),
|
|
145
|
-
y: Math.round(rect.y + rect.height / 2),
|
|
146
|
-
};
|
|
147
|
-
}
|
package/src/utils/xpath.ts
CHANGED
|
@@ -48,7 +48,7 @@ function getAbsoluteXPath(el: Element): string {
|
|
|
48
48
|
}
|
|
49
49
|
|
|
50
50
|
export const isDynamicId = (id: string) => /^(ember|react|__next)\d|^\d+$/.test(id);
|
|
51
|
-
export const isGenericClass = (cls: string) => /^ember-view$|^ember\d|^react-|^__next/.test(cls);
|
|
51
|
+
export const isGenericClass = (cls: string) => /^ember-view$|^ember\d|^ember-|^react-|^__next/.test(cls);
|
|
52
52
|
|
|
53
53
|
export function buildClickableXPath(el: XPathMatch): string {
|
|
54
54
|
const a = el.allAttrs;
|