explorbot 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/rules/planner/styles/curious.md +18 -5
- package/dist/rules/planner/styles/normal.md +4 -4
- package/dist/rules/planner/styles/psycho.md +14 -11
- package/dist/src/ai/captain/web-mode.js +9 -1
- package/dist/src/ai/historian.js +6 -0
- package/dist/src/ai/pilot.js +23 -2
- package/dist/src/ai/researcher/deep-analysis.js +65 -9
- package/dist/src/ai/researcher/sections.js +103 -0
- package/dist/src/ai/researcher.js +9 -46
- package/dist/src/ai/rules.js +9 -1
- package/dist/src/ai/task-agent.js +7 -27
- package/dist/src/ai/tester.js +41 -5
- package/dist/src/ai/tools.js +27 -2
- package/dist/src/commands/explore-command.js +4 -9
- package/dist/src/experience-tracker.js +126 -1
- package/dist/src/explorbot.js +9 -2
- package/dist/src/utils/aria.js +39 -2
- package/package.json +1 -1
- package/rules/planner/styles/curious.md +18 -5
- package/rules/planner/styles/normal.md +4 -4
- package/rules/planner/styles/psycho.md +14 -11
- package/src/ai/captain/web-mode.ts +9 -1
- package/src/ai/historian.ts +7 -0
- package/src/ai/pilot.ts +23 -3
- package/src/ai/researcher/deep-analysis.ts +74 -9
- package/src/ai/researcher/sections.ts +122 -0
- package/src/ai/researcher.ts +9 -47
- package/src/ai/rules.ts +9 -1
- package/src/ai/task-agent.ts +7 -31
- package/src/ai/tester.ts +44 -6
- package/src/ai/tools.ts +33 -1
- package/src/commands/explore-command.ts +4 -9
- package/src/config.ts +1 -0
- package/src/experience-tracker.ts +136 -1
- package/src/explorbot.ts +9 -2
- package/src/utils/aria.ts +40 -2
package/dist/src/ai/tester.js
CHANGED
|
@@ -8,11 +8,11 @@ import { setActivity } from "../activity.js";
|
|
|
8
8
|
import { ConfigParser } from "../config.js";
|
|
9
9
|
import { Stats } from "../stats.js";
|
|
10
10
|
import { TestResult } from "../test-plan.js";
|
|
11
|
-
import { extractFocusedElement } from "../utils/aria.js";
|
|
11
|
+
import { detectFocusArea, extractFocusedElement } from "../utils/aria.js";
|
|
12
12
|
import { HooksRunner } from "../utils/hooks-runner.js";
|
|
13
13
|
import { createDebug, tag } from "../utils/logger.js";
|
|
14
14
|
import { loop } from "../utils/loop.js";
|
|
15
|
-
import { actionRule, focusedElementRule, locatorRule, multipleTabsRule, sectionContextRule } from "./rules.js";
|
|
15
|
+
import { actionRule, focusedElementRule, locatorRule, multipleTabsRule, protectionRule, sectionContextRule } from "./rules.js";
|
|
16
16
|
import { TaskAgent } from "./task-agent.js";
|
|
17
17
|
import { createCodeceptJSTools, createSpecialContextTools } from "./tools.js";
|
|
18
18
|
const debugLog = createDebug('explorbot:tester');
|
|
@@ -44,6 +44,8 @@ export class Tester extends TaskAgent {
|
|
|
44
44
|
executionLogFile = null;
|
|
45
45
|
previousUrl = null;
|
|
46
46
|
previousStateHash = null;
|
|
47
|
+
pageStateHash = null;
|
|
48
|
+
pageActionResult = null;
|
|
47
49
|
hooksRunner;
|
|
48
50
|
constructor(explorer, provider, researcher, navigator, agentTools) {
|
|
49
51
|
super();
|
|
@@ -90,9 +92,27 @@ export class Tester extends TaskAgent {
|
|
|
90
92
|
setActivity(`🧪 Testing: ${task.scenario}`, 'action');
|
|
91
93
|
this.previousUrl = null;
|
|
92
94
|
this.previousStateHash = null;
|
|
95
|
+
this.pageStateHash = null;
|
|
96
|
+
this.pageActionResult = null;
|
|
93
97
|
this.explorer.getStateManager().clearHistory();
|
|
94
98
|
this.resetFailureCount();
|
|
95
99
|
this.pilot?.reset();
|
|
100
|
+
const requestStore = this.explorer.getRequestStore();
|
|
101
|
+
requestStore?.clear();
|
|
102
|
+
const offFailedRequest = requestStore?.onFailedRequest((r) => {
|
|
103
|
+
task.addNote(`Network error: ${r.method} ${r.path} → ${r.status}`, TestResult.FAILED);
|
|
104
|
+
});
|
|
105
|
+
const page = this.explorer.playwrightHelper?.page;
|
|
106
|
+
const onPageError = (err) => {
|
|
107
|
+
task.addNote(`Console error: ${err.message}`, TestResult.FAILED);
|
|
108
|
+
};
|
|
109
|
+
const onConsoleMessage = (msg) => {
|
|
110
|
+
if (msg.type() !== 'error')
|
|
111
|
+
return;
|
|
112
|
+
task.addNote(`Console error: ${msg.text()}`, TestResult.FAILED);
|
|
113
|
+
};
|
|
114
|
+
page?.on('pageerror', onPageError);
|
|
115
|
+
page?.on('console', onConsoleMessage);
|
|
96
116
|
const initialState = ActionResult.fromState(state);
|
|
97
117
|
const conversation = this.provider.startConversation(this.getSystemMessage(), 'tester');
|
|
98
118
|
this.currentConversation = conversation;
|
|
@@ -291,6 +311,9 @@ export class Tester extends TaskAgent {
|
|
|
291
311
|
}
|
|
292
312
|
await this.getQuartermaster().analyzeSession(task, initialState, conversation);
|
|
293
313
|
offStateChange();
|
|
314
|
+
offFailedRequest?.();
|
|
315
|
+
page?.off('pageerror', onPageError);
|
|
316
|
+
page?.off('console', onConsoleMessage);
|
|
294
317
|
await this.finishTest(task);
|
|
295
318
|
await this.explorer.stopTest(task, {
|
|
296
319
|
startUrl: task.startUrl,
|
|
@@ -369,7 +392,8 @@ export class Tester extends TaskAgent {
|
|
|
369
392
|
}
|
|
370
393
|
if (isNewUrl) {
|
|
371
394
|
const research = await this.researcher.research(currentState);
|
|
372
|
-
|
|
395
|
+
this.pageStateHash = currentStateHash;
|
|
396
|
+
this.pageActionResult = currentState;
|
|
373
397
|
let uiMapSection = '';
|
|
374
398
|
if (research) {
|
|
375
399
|
uiMapSection = dedent `
|
|
@@ -394,8 +418,6 @@ export class Tester extends TaskAgent {
|
|
|
394
418
|
</page_aria>
|
|
395
419
|
${uiMapSection}
|
|
396
420
|
|
|
397
|
-
${experience}
|
|
398
|
-
|
|
399
421
|
Use <page_ui_map> to understand the page structure and its main elements.
|
|
400
422
|
However, <page_ui_map> is not always up to date, use <page_aria> and <page_html> to understand the ACTUAL state of the page
|
|
401
423
|
Do not interact with elements that are not listed in <page_aria> and <page_html>
|
|
@@ -403,6 +425,18 @@ export class Tester extends TaskAgent {
|
|
|
403
425
|
`;
|
|
404
426
|
return context;
|
|
405
427
|
}
|
|
428
|
+
const focusArea = detectFocusArea(currentState.ariaSnapshot);
|
|
429
|
+
if (focusArea.detected && focusArea.name && this.pageStateHash && this.pageActionResult) {
|
|
430
|
+
const overlaySection = await this.researcher.researchOverlay(currentState, this.pageActionResult, this.pageStateHash);
|
|
431
|
+
if (overlaySection) {
|
|
432
|
+
context += dedent `
|
|
433
|
+
|
|
434
|
+
<page_ui_map_overlay>
|
|
435
|
+
${overlaySection}
|
|
436
|
+
</page_ui_map_overlay>
|
|
437
|
+
`;
|
|
438
|
+
}
|
|
439
|
+
}
|
|
406
440
|
// if (isStateChanged) {
|
|
407
441
|
// const combinedHtml = await currentState.combinedHtml();
|
|
408
442
|
// context += dedent`
|
|
@@ -591,6 +625,8 @@ export class Tester extends TaskAgent {
|
|
|
591
625
|
When creating or editing items via form() or type() you should include ${task.sessionName} in the value (if it is not restricted by the application logic)
|
|
592
626
|
Initial page URL: ${actionResult.url}
|
|
593
627
|
|
|
628
|
+
${protectionRule}
|
|
629
|
+
|
|
594
630
|
${this.buildDeletionScope(task)}
|
|
595
631
|
|
|
596
632
|
${this.buildAvailableFiles()}
|
package/dist/src/ai/tools.js
CHANGED
|
@@ -389,9 +389,9 @@ export function createSpecialContextTools(explorer, context) {
|
|
|
389
389
|
}),
|
|
390
390
|
};
|
|
391
391
|
}
|
|
392
|
-
export function createAgentTools({ explorer, researcher, navigator, }) {
|
|
392
|
+
export function createAgentTools({ explorer, researcher, navigator, experienceTracker, getState, }) {
|
|
393
393
|
let visionDisabled = false;
|
|
394
|
-
|
|
394
|
+
const tools = {
|
|
395
395
|
see: tool({
|
|
396
396
|
description: dedent `
|
|
397
397
|
Check the page contents based on current page state and screenshot.
|
|
@@ -830,6 +830,31 @@ export function createAgentTools({ explorer, researcher, navigator, }) {
|
|
|
830
830
|
},
|
|
831
831
|
}),
|
|
832
832
|
};
|
|
833
|
+
if (experienceTracker && getState) {
|
|
834
|
+
tools.learn_experience = tool({
|
|
835
|
+
description: dedent `
|
|
836
|
+
Read the full body of a specific experience section listed in <experience>.
|
|
837
|
+
The TOC shows entries like "A.1 ## Successful Flow: ...". Pass the fileTag and sectionIndex.
|
|
838
|
+
Only call when a TOC entry looks directly relevant to the current step.
|
|
839
|
+
`,
|
|
840
|
+
inputSchema: z.object({
|
|
841
|
+
fileTag: z.string().describe('File tag from the TOC, e.g. "A", "B", "AA"'),
|
|
842
|
+
sectionIndex: z.number().int().positive().describe('1-based section index within that file'),
|
|
843
|
+
}),
|
|
844
|
+
execute: async ({ fileTag, sectionIndex }) => {
|
|
845
|
+
const state = getState();
|
|
846
|
+
if (!state) {
|
|
847
|
+
return { error: 'No current page state available.' };
|
|
848
|
+
}
|
|
849
|
+
const section = experienceTracker.getExperienceSection(fileTag, sectionIndex, state);
|
|
850
|
+
if (!section) {
|
|
851
|
+
return { error: 'Section not found. Experience may have been updated; re-read the latest TOC.' };
|
|
852
|
+
}
|
|
853
|
+
return section;
|
|
854
|
+
},
|
|
855
|
+
});
|
|
856
|
+
}
|
|
857
|
+
return tools;
|
|
833
858
|
}
|
|
834
859
|
const PAGE_DIFF_SUGGESTION = 'Analyze page diff. htmlParts shows what changed and WHERE — each part has a container selector. Use the container as context when clicking elements from the diff.';
|
|
835
860
|
function transformContainsCommand(command) {
|
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
import { existsSync, readdirSync } from 'node:fs';
|
|
2
1
|
import figureSet from 'figures';
|
|
3
2
|
import path from 'node:path';
|
|
4
3
|
import { getStyles } from '../ai/planner/styles.js';
|
|
5
|
-
import { ConfigParser } from "../config.js";
|
|
6
4
|
import { getCliName } from "../utils/cli-name.js";
|
|
7
5
|
import { jsonToTable } from '../utils/markdown-parser.js';
|
|
8
6
|
import { tag } from '../utils/logger.js';
|
|
@@ -110,14 +108,11 @@ export class ExploreCommand extends BaseCommand {
|
|
|
110
108
|
}
|
|
111
109
|
}
|
|
112
110
|
printRerunSuggestions() {
|
|
113
|
-
const
|
|
114
|
-
if (
|
|
111
|
+
const savedFiles = this.explorBot.agentHistorian().getSavedFiles();
|
|
112
|
+
if (savedFiles.length === 0)
|
|
115
113
|
return;
|
|
116
|
-
const
|
|
117
|
-
|
|
118
|
-
return;
|
|
119
|
-
for (const file of testFiles) {
|
|
120
|
-
tag('info').log(`Generated: ${file}`);
|
|
114
|
+
for (const filePath of savedFiles) {
|
|
115
|
+
tag('info').log(`Generated: ${path.basename(filePath)}`);
|
|
121
116
|
}
|
|
122
117
|
tag('info').log(`List tests: ${getCliName()} runs`);
|
|
123
118
|
tag('info').log(`Re-run with healing: ${getCliName()} rerun <filename> [index]`);
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from 'node:fs';
|
|
2
|
-
import { dirname, join } from 'node:path';
|
|
2
|
+
import { basename, dirname, join } from 'node:path';
|
|
3
3
|
import matter from 'gray-matter';
|
|
4
|
+
import { marked } from 'marked';
|
|
4
5
|
import { ConfigParser } from './config.js';
|
|
5
6
|
import { KnowledgeTracker } from './knowledge-tracker.js';
|
|
6
7
|
import { createDebug, tag } from './utils/logger.js';
|
|
@@ -290,4 +291,128 @@ ${filteredCode}
|
|
|
290
291
|
}
|
|
291
292
|
return results;
|
|
292
293
|
}
|
|
294
|
+
getExperienceTableOfContents(state, options) {
|
|
295
|
+
const records = this.getRelevantExperience(state, options);
|
|
296
|
+
if (records.length === 0)
|
|
297
|
+
return [];
|
|
298
|
+
const sorted = [...records].sort((a, b) => {
|
|
299
|
+
const aHash = basename(a.filePath, '.md');
|
|
300
|
+
const bHash = basename(b.filePath, '.md');
|
|
301
|
+
return aHash.localeCompare(bHash);
|
|
302
|
+
});
|
|
303
|
+
const toc = [];
|
|
304
|
+
for (let i = 0; i < sorted.length; i++) {
|
|
305
|
+
const record = sorted[i];
|
|
306
|
+
const fileHash = basename(record.filePath, '.md');
|
|
307
|
+
const url = record.data?.url || '';
|
|
308
|
+
const sections = listTocHeadings(record.content);
|
|
309
|
+
if (sections.length === 0)
|
|
310
|
+
continue;
|
|
311
|
+
toc.push({
|
|
312
|
+
fileTag: indexToLetters(i),
|
|
313
|
+
fileHash,
|
|
314
|
+
url,
|
|
315
|
+
sections,
|
|
316
|
+
});
|
|
317
|
+
}
|
|
318
|
+
return toc;
|
|
319
|
+
}
|
|
320
|
+
getExperienceSection(fileTag, sectionIndex, state, options) {
|
|
321
|
+
const toc = this.getExperienceTableOfContents(state, options);
|
|
322
|
+
const entry = toc.find((e) => e.fileTag === fileTag);
|
|
323
|
+
if (!entry)
|
|
324
|
+
return null;
|
|
325
|
+
const filePath = this.findExperienceFileByHash(entry.fileHash);
|
|
326
|
+
if (!filePath)
|
|
327
|
+
return null;
|
|
328
|
+
const { content } = this.readExperienceFile(entry.fileHash);
|
|
329
|
+
const extracted = extractHeadingSection(content, sectionIndex);
|
|
330
|
+
if (!extracted)
|
|
331
|
+
return null;
|
|
332
|
+
return { title: extracted.title, url: entry.url, content: extracted.body };
|
|
333
|
+
}
|
|
334
|
+
findExperienceFileByHash(fileHash) {
|
|
335
|
+
for (const dir of this.getExperienceDirectories()) {
|
|
336
|
+
const candidate = join(dir, `${fileHash}.md`);
|
|
337
|
+
if (existsSync(candidate))
|
|
338
|
+
return candidate;
|
|
339
|
+
}
|
|
340
|
+
return null;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
function listTocHeadings(content) {
|
|
344
|
+
const tokens = marked.lexer(content);
|
|
345
|
+
const result = [];
|
|
346
|
+
let index = 0;
|
|
347
|
+
for (const token of tokens) {
|
|
348
|
+
if (token.type !== 'heading')
|
|
349
|
+
continue;
|
|
350
|
+
const heading = token;
|
|
351
|
+
if (heading.depth !== 2 && heading.depth !== 3)
|
|
352
|
+
continue;
|
|
353
|
+
index++;
|
|
354
|
+
result.push({ index, level: heading.depth, title: heading.text });
|
|
355
|
+
}
|
|
356
|
+
return result;
|
|
357
|
+
}
|
|
358
|
+
function extractHeadingSection(content, sectionIndex) {
|
|
359
|
+
const tokens = marked.lexer(content);
|
|
360
|
+
const matching = [];
|
|
361
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
362
|
+
const token = tokens[i];
|
|
363
|
+
if (token.type !== 'heading')
|
|
364
|
+
continue;
|
|
365
|
+
const heading = token;
|
|
366
|
+
if (heading.depth !== 2 && heading.depth !== 3)
|
|
367
|
+
continue;
|
|
368
|
+
matching.push({ tokenIdx: i, depth: heading.depth, text: heading.text });
|
|
369
|
+
}
|
|
370
|
+
if (sectionIndex < 1 || sectionIndex > matching.length)
|
|
371
|
+
return null;
|
|
372
|
+
const target = matching[sectionIndex - 1];
|
|
373
|
+
let endTokenIdx = tokens.length;
|
|
374
|
+
for (let j = target.tokenIdx + 1; j < tokens.length; j++) {
|
|
375
|
+
const token = tokens[j];
|
|
376
|
+
if (token.type !== 'heading')
|
|
377
|
+
continue;
|
|
378
|
+
if (token.depth <= target.depth) {
|
|
379
|
+
endTokenIdx = j;
|
|
380
|
+
break;
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
const body = tokens
|
|
384
|
+
.slice(target.tokenIdx, endTokenIdx)
|
|
385
|
+
.map((t) => t.raw || '')
|
|
386
|
+
.join('');
|
|
387
|
+
return { title: target.text, body };
|
|
388
|
+
}
|
|
389
|
+
function indexToLetters(index) {
|
|
390
|
+
let n = index;
|
|
391
|
+
let result = '';
|
|
392
|
+
while (true) {
|
|
393
|
+
result = String.fromCharCode(65 + (n % 26)) + result;
|
|
394
|
+
n = Math.floor(n / 26);
|
|
395
|
+
if (n === 0)
|
|
396
|
+
break;
|
|
397
|
+
n -= 1;
|
|
398
|
+
}
|
|
399
|
+
return result;
|
|
400
|
+
}
|
|
401
|
+
export function renderExperienceToc(toc) {
|
|
402
|
+
if (toc.length === 0)
|
|
403
|
+
return '';
|
|
404
|
+
const lines = [];
|
|
405
|
+
lines.push('<experience>');
|
|
406
|
+
lines.push('Past experience for this page. Call learn_experience({ fileTag, sectionIndex }) to read a section.');
|
|
407
|
+
lines.push('');
|
|
408
|
+
for (const entry of toc) {
|
|
409
|
+
lines.push(`File ${entry.fileTag} ${entry.url}:`);
|
|
410
|
+
for (const section of entry.sections) {
|
|
411
|
+
const prefix = '#'.repeat(section.level);
|
|
412
|
+
lines.push(` ${entry.fileTag}.${section.index} ${prefix} ${section.title}`);
|
|
413
|
+
}
|
|
414
|
+
lines.push('');
|
|
415
|
+
}
|
|
416
|
+
lines.push('</experience>');
|
|
417
|
+
return lines.join('\n');
|
|
293
418
|
}
|
package/dist/src/explorbot.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { existsSync, mkdirSync } from 'node:fs';
|
|
2
2
|
import path from 'node:path';
|
|
3
|
+
import { ActionResult } from "./action-result.js";
|
|
3
4
|
import { ApiClient } from "./api/api-client.js";
|
|
4
5
|
import { RequestStore } from "./api/request-store.js";
|
|
5
6
|
import { loadSpec } from "./api/spec-reader.js";
|
|
@@ -144,8 +145,14 @@ export class ExplorBot {
|
|
|
144
145
|
return (this.agents.pilot ||= this.createAgent(({ ai, explorer }) => {
|
|
145
146
|
const researcher = this.agentResearcher();
|
|
146
147
|
const navigator = this.agentNavigator();
|
|
147
|
-
const
|
|
148
|
-
|
|
148
|
+
const stateManager = explorer.getStateManager();
|
|
149
|
+
const experienceTracker = stateManager.getExperienceTracker();
|
|
150
|
+
const getState = () => {
|
|
151
|
+
const state = stateManager.getCurrentState();
|
|
152
|
+
return state ? ActionResult.fromState(state) : null;
|
|
153
|
+
};
|
|
154
|
+
const tools = createAgentTools({ explorer, researcher, navigator, experienceTracker, getState });
|
|
155
|
+
return new Pilot(ai, tools, researcher, explorer, experienceTracker);
|
|
149
156
|
}));
|
|
150
157
|
}
|
|
151
158
|
agentTester() {
|
package/dist/src/utils/aria.js
CHANGED
|
@@ -333,8 +333,40 @@ const parseAriaSnapshot = (snapshot, keepNamed = false) => {
|
|
|
333
333
|
}
|
|
334
334
|
return pruneNodes(roots, keepNamed);
|
|
335
335
|
};
|
|
336
|
+
const CLOSE_OVERLAY_BUTTON_RE = /^close\s+(modal|dialog|popup|drawer|panel|sheet)\b/i;
|
|
337
|
+
const findOverlayByCloseButton = (nodeList) => {
|
|
338
|
+
const closeIdx = nodeList.findIndex((n) => n.role === 'button' && CLOSE_OVERLAY_BUTTON_RE.test(n.name || ''));
|
|
339
|
+
if (closeIdx !== -1) {
|
|
340
|
+
let heading;
|
|
341
|
+
for (let i = closeIdx - 1; i >= 0; i--) {
|
|
342
|
+
if (nodeList[i].role === 'heading' && nodeList[i].name) {
|
|
343
|
+
heading = nodeList[i];
|
|
344
|
+
break;
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
if (!heading) {
|
|
348
|
+
for (let i = closeIdx + 1; i < nodeList.length; i++) {
|
|
349
|
+
if (nodeList[i].role === 'heading' && nodeList[i].name) {
|
|
350
|
+
heading = nodeList[i];
|
|
351
|
+
break;
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
return {
|
|
356
|
+
detected: true,
|
|
357
|
+
type: 'dialog',
|
|
358
|
+
name: heading?.name || null,
|
|
359
|
+
};
|
|
360
|
+
}
|
|
361
|
+
for (const node of nodeList) {
|
|
362
|
+
const inner = findOverlayByCloseButton(node.children);
|
|
363
|
+
if (inner)
|
|
364
|
+
return inner;
|
|
365
|
+
}
|
|
366
|
+
return null;
|
|
367
|
+
};
|
|
336
368
|
export const detectFocusArea = (snapshot) => {
|
|
337
|
-
const nodes = parseAriaSnapshot(snapshot);
|
|
369
|
+
const nodes = parseAriaSnapshot(snapshot, true);
|
|
338
370
|
const findFocusArea = (nodeList) => {
|
|
339
371
|
for (const node of nodeList) {
|
|
340
372
|
if (node.role === 'dialog' || node.role === 'alertdialog') {
|
|
@@ -359,7 +391,12 @@ export const detectFocusArea = (snapshot) => {
|
|
|
359
391
|
return null;
|
|
360
392
|
};
|
|
361
393
|
const result = findFocusArea(nodes);
|
|
362
|
-
|
|
394
|
+
if (result)
|
|
395
|
+
return result;
|
|
396
|
+
const fallback = findOverlayByCloseButton(nodes);
|
|
397
|
+
if (fallback && fallback.name)
|
|
398
|
+
return fallback;
|
|
399
|
+
return { detected: false, type: null, name: null };
|
|
363
400
|
};
|
|
364
401
|
export const collectInteractiveNodes = (snapshot) => {
|
|
365
402
|
const nodes = parseAriaSnapshot(snapshot);
|
package/package.json
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
Detect new valid paths that previous tests missed. Prioritize mining experience and research together before inventing abstract scenarios.
|
|
2
2
|
|
|
3
|
+
Rank every scenario you build by the **strength of its outcome**, from strongest to weakest:
|
|
4
|
+
1. **Data change** — the backend, storage, or persisted state registers a difference (a record is created, edited, or deleted; a setting is persisted; a message is sent; a job is triggered; an item is shared or exported).
|
|
5
|
+
2. **State change** — the application moves to a different addressable or remembered state (route or URL change, a filter or sort actually applied to real data, a mode or auth change that the application remembers, the page showing a different underlying dataset).
|
|
6
|
+
3. **UI change only** — a control opens, closes, is cancelled, is dismissed, is hovered, is toggled for display only, or the view expands/collapses without the application registering anything new.
|
|
7
|
+
|
|
8
|
+
Prefer scenarios whose ending falls into category 1. Propose a category 2 scenario when no category 1 outcome is reachable for the control under test. Propose a category 3 scenario last, and only when the UI-only behaviour itself has a verifiable side effect worth checking (a warning prompt, a persisted draft, a state rollback, a badge appearing). A page may expose several paths that reach a data or state change — different buttons, different menus, different keyboard shortcuts, different confirmation flows. Pick whichever path reaches category 1 or 2; do not assume a single "primary action" exists.
|
|
9
|
+
|
|
3
10
|
When <previously_tested_flows> is present, treat it as the ground truth for what already worked:
|
|
4
11
|
- List items under Successful Flow describe the path that was executed
|
|
5
12
|
- Lines in blockquotes (lines starting with >) are discoveries: extra fields, side panels, conditional UI, inputs called out during that run
|
|
@@ -11,7 +18,7 @@ When <previously_tested_flows> is NOT present, use <tested_scenarios> as the gro
|
|
|
11
18
|
Read the step lines for each test to understand which controls were actually interacted with.
|
|
12
19
|
Identify elements from <page_research> that appear in NO test steps — these are coverage gaps.
|
|
13
20
|
|
|
14
|
-
Cross-read with <page_research>: for each
|
|
21
|
+
Cross-read with <page_research>: for each section and Extended Research subsection, compare against those flows. Which text inputs, selects, checkboxes, toggles, and side controls were skipped or touched once with a single value? Prefer filling those gaps over repeating the same path.
|
|
15
22
|
|
|
16
23
|
The Type column in <page_research> tables shows the ARIA role of each element.
|
|
17
24
|
Cross-reference these types with the steps listed in <tested_scenarios> or <previously_tested_flows>:
|
|
@@ -24,16 +31,22 @@ Coverage gaps to look for:
|
|
|
24
31
|
- Action buttons that were never clicked as part of a complete workflow
|
|
25
32
|
- Dependent UI: controls that appear or change based on another control's value
|
|
26
33
|
|
|
27
|
-
|
|
34
|
+
A coverage gap for an untested control is only **closed** when the scenario built around it reaches a data change or state change. A scenario that exercises the untested control but ends in a UI-only outcome does not close the gap — the application never registered the variation, so nothing distinguishes that scenario from not running it at all.
|
|
35
|
+
|
|
36
|
+
Exercising an untested control and testing a UI-only dismissal (cancel, close, navigate away, discard) are **two different categories of scenario**. Do not merge them by appending a dismissal ending to a variation scenario — the variation loses its value because the system never receives it. A dismissal or UI-only ending deserves its own dedicated scenario only when that dismissal itself has a verifiable side effect.
|
|
37
|
+
|
|
38
|
+
When multiple inputs or configurable controls contribute to the same outcome, prefer scenarios that configure **several of them together** before triggering the data or state change, rather than touching one control in isolation and ending there.
|
|
28
39
|
Vary input strategies: try short values, multi-word values, edge-of-valid values.
|
|
29
|
-
When
|
|
30
|
-
|
|
40
|
+
When sections, tabs, or conditional panels exist, exercise each section.
|
|
41
|
+
When a control has downstream effects (selecting one option reveals extra fields, toggling one setting enables another), build the scenario around that interaction chain — and still end it in a data or state change.
|
|
31
42
|
|
|
32
43
|
Combinatorial coverage (valid data only):
|
|
33
44
|
- For each select or equivalent, ensure each option is exercised in at least one scenario, or one scenario whose steps walk through distinct options in sequence if that fits the task constraints better
|
|
34
45
|
- Exercise each checkbox or binary control in both states when behavior can differ
|
|
35
46
|
- Combine checkboxes and related toggles in small sets (pairs or triples) when they plausibly change validation, visible sections, or outcomes — avoid exploding into huge Cartesian products
|
|
36
47
|
|
|
37
|
-
|
|
48
|
+
Each proposed combination must be exercised in a scenario that reaches a data change or state change. Combinations that only change the UI and never reach a registerable outcome do not count as coverage — the system never distinguishes them from each other.
|
|
49
|
+
|
|
50
|
+
When the page is not heavy on inputs, still pursue: unvisited state transitions, follow-ups after data-changing operations (share, export, duplicate, re-open), alternative paths to the same data change, preconditions that unlock new data-changing actions, and visible controls never clicked. Again, prioritise scenarios whose ending falls into category 1 or 2.
|
|
38
51
|
|
|
39
52
|
Skip the Menu/Navigation section — we are testing THIS page.
|
|
@@ -2,18 +2,18 @@ Study the page and figure out its business purpose. What is this page FOR? What
|
|
|
2
2
|
|
|
3
3
|
Based on the page type, propose tests for COMPLETE user workflows:
|
|
4
4
|
- If this is a data page (lists, tables): test CRUD operations end-to-end (create item → verify in list, edit item → verify changes saved, delete item → verify removed)
|
|
5
|
-
- If
|
|
5
|
+
- If the page has inputs to fill in: test the full commit flow, not just that the controls render
|
|
6
6
|
- If this has filters and search: test filtering AND verify results change, not just "filter tab clicked"
|
|
7
7
|
- If this has modals/dropdowns: test the ACTION inside them, not just opening/closing them
|
|
8
8
|
|
|
9
|
-
Each test should end
|
|
9
|
+
Each test should end in a **data change** (record created/edited/deleted, setting persisted, message sent) or a **state change** (route change, filter applied to real data, mode change the app remembers). Tests ending in UI-only outcomes (open, close, hover, expand) are the weakest and should be rare.
|
|
10
10
|
|
|
11
11
|
IMPORTANT: Distribute tests across DIFFERENT feature areas from the research.
|
|
12
12
|
Do not propose more than 2 tests for the same feature area.
|
|
13
13
|
Every Extended Research section (modal, dropdown, panel) with actionable features deserves at least one test.
|
|
14
|
-
Tests that change application data MUST come first — create, update, delete records before testing filters, search, or pagination.
|
|
14
|
+
Tests that change application data MUST come first — create, update, delete records before testing filters, search, or pagination.
|
|
15
15
|
If the research shows multiple ways to create or modify data (different types, forms, or options), propose a separate test for each.
|
|
16
|
-
|
|
16
|
+
UI-only tests (tab switching, pagination, view toggles) should be proposed only after data-changing and state-changing interactions are covered.
|
|
17
17
|
|
|
18
18
|
Skip the Menu/Navigation section — we are testing THIS page.
|
|
19
19
|
|
|
@@ -1,14 +1,17 @@
|
|
|
1
|
-
Stress-test the page by
|
|
1
|
+
Stress-test the page by feeding invalid, empty, or extreme values to its controls and committing.
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
- Empty states: submit forms with no data, clear required fields, remove default values
|
|
5
|
-
- Long values: paste 10000 characters into inputs, use extremely long names and descriptions
|
|
6
|
-
- Boundary values: zero, negative numbers, special characters, unicode, HTML tags in text fields
|
|
7
|
-
- Invalid formats: wrong email formats, letters in number fields, SQL injection strings, script tags
|
|
8
|
-
- Invalid combinations: select incompatible options, mix conflicting settings
|
|
9
|
-
- Combining states: apply multiple filters at once, use conflicting form values together
|
|
10
|
-
- Out-of-range values: dates in the past/future, quantities beyond limits, prices with too many decimals
|
|
3
|
+
**Match attack breadth to controls reachable.** If only ONE control is reachable, attack it alone. If several are reachable, attack **all of them in the same scenario** — each with a different strange value. Never stress one while leaving the rest untouched: attacking one-at-a-time hides interaction bugs and wastes plan budget.
|
|
11
4
|
|
|
12
|
-
|
|
5
|
+
Do not produce multiple scenarios that each isolate one control of the same section. Fold those attacks into fewer scenarios that push every reachable control strangely at once. Vary the **mix** between scenarios — which control receives SQL, which receives 10000 chars, which receives unicode, which receives a conflicting combination — not the single control under attack.
|
|
13
6
|
|
|
14
|
-
|
|
7
|
+
**Attack categories** (combine across controls, not one-per-scenario):
|
|
8
|
+
empty • very long (10000+ chars) • boundary (zero, negative, unicode, HTML, special chars) • invalid formats (malformed email/url/number, SQL, script tags) • invalid combinations (mutually exclusive toggles together, conflicting modes) • out-of-range (far dates, quantities beyond limits, excess decimals) • dependent-UI stress (flip a control that reveals more, attack those too).
|
|
9
|
+
|
|
10
|
+
**Prefer scenarios that:**
|
|
11
|
+
- Push every reachable control to a different bad-data category, then commit
|
|
12
|
+
- Trigger a conditional section, attack revealed and base controls together, then commit
|
|
13
|
+
- Combine mutually exclusive control states with invalid values, then commit
|
|
14
|
+
|
|
15
|
+
End each scenario with the state **committed** (saved, applied, sent, triggered). A scenario that enters bad data then cancels or navigates away reveals nothing — the application never received the payload.
|
|
16
|
+
|
|
17
|
+
Skip the Menu/Navigation section — we are testing THIS page.
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { tool } from 'ai';
|
|
2
2
|
import dedent from 'dedent';
|
|
3
3
|
import { z } from 'zod';
|
|
4
|
+
import { ActionResult } from '../../action-result.ts';
|
|
4
5
|
import { actionRule, locatorRule, sectionContextRule } from '../rules.ts';
|
|
5
6
|
import { createAgentTools, createCodeceptJSTools } from '../tools.ts';
|
|
6
7
|
import { type Constructor, type ModeContext, debugLog } from './mixin.ts';
|
|
@@ -9,13 +10,19 @@ export function WithWebMode<T extends Constructor>(Base: T) {
|
|
|
9
10
|
return class extends Base {
|
|
10
11
|
webModeTools(ctx: ModeContext): Record<string, any> {
|
|
11
12
|
const explorer = ctx.explorBot.getExplorer();
|
|
13
|
+
const stateManager = explorer.getStateManager();
|
|
12
14
|
const codeceptTools = createCodeceptJSTools(explorer, ctx.task);
|
|
13
15
|
const agentTools = createAgentTools({
|
|
14
16
|
explorer,
|
|
15
17
|
researcher: ctx.explorBot.agentResearcher(),
|
|
16
18
|
navigator: ctx.explorBot.agentNavigator(),
|
|
19
|
+
experienceTracker: stateManager.getExperienceTracker(),
|
|
20
|
+
getState: () => {
|
|
21
|
+
const state = stateManager.getCurrentState();
|
|
22
|
+
return state ? ActionResult.fromState(state) : null;
|
|
23
|
+
},
|
|
17
24
|
});
|
|
18
|
-
const { see, context, visualClick } = agentTools;
|
|
25
|
+
const { see, context, visualClick, learn_experience } = agentTools;
|
|
19
26
|
|
|
20
27
|
return {
|
|
21
28
|
navigate: tool({
|
|
@@ -96,6 +103,7 @@ export function WithWebMode<T extends Constructor>(Base: T) {
|
|
|
96
103
|
see,
|
|
97
104
|
context,
|
|
98
105
|
visualClick,
|
|
106
|
+
learn_experience,
|
|
99
107
|
};
|
|
100
108
|
}
|
|
101
109
|
|
package/src/ai/historian.ts
CHANGED
|
@@ -22,6 +22,7 @@ export class Historian {
|
|
|
22
22
|
private experienceTracker: ExperienceTracker;
|
|
23
23
|
private reporter?: Reporter;
|
|
24
24
|
private stateManager?: StateManager;
|
|
25
|
+
private savedFiles = new Set<string>();
|
|
25
26
|
|
|
26
27
|
constructor(provider: Provider, experienceTracker?: ExperienceTracker, reporter?: Reporter, stateManager?: StateManager) {
|
|
27
28
|
this.provider = provider;
|
|
@@ -30,6 +31,10 @@ export class Historian {
|
|
|
30
31
|
this.stateManager = stateManager;
|
|
31
32
|
}
|
|
32
33
|
|
|
34
|
+
getSavedFiles(): string[] {
|
|
35
|
+
return [...this.savedFiles];
|
|
36
|
+
}
|
|
37
|
+
|
|
33
38
|
async saveSession(task: Task, initialState: ActionResult, conversation: Conversation): Promise<void> {
|
|
34
39
|
debugLog('Saving session experience');
|
|
35
40
|
|
|
@@ -433,6 +438,7 @@ export class Historian {
|
|
|
433
438
|
const filename = plan.title.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase();
|
|
434
439
|
const filePath = join(testsDir, `${filename}.js`);
|
|
435
440
|
writeFileSync(filePath, lines.join('\n'));
|
|
441
|
+
this.savedFiles.add(filePath);
|
|
436
442
|
|
|
437
443
|
tag('substep').log(`Saved plan tests to: ${filePath}`);
|
|
438
444
|
return filePath;
|
|
@@ -447,6 +453,7 @@ export class Historian {
|
|
|
447
453
|
}
|
|
448
454
|
|
|
449
455
|
writeFileSync(filePath, content);
|
|
456
|
+
this.savedFiles.add(filePath);
|
|
450
457
|
tag('substep').log(`Updated test file with healed steps: ${filePath}`);
|
|
451
458
|
}
|
|
452
459
|
|
package/src/ai/pilot.ts
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { tool } from 'ai';
|
|
2
2
|
import dedent from 'dedent';
|
|
3
3
|
import { z } from 'zod';
|
|
4
|
-
import
|
|
4
|
+
import { ActionResult } from '../action-result.ts';
|
|
5
5
|
import { ConfigParser } from '../config.ts';
|
|
6
|
+
import { type ExperienceTracker, renderExperienceToc } from '../experience-tracker.ts';
|
|
6
7
|
import type Explorer from '../explorer.ts';
|
|
7
8
|
import { type Test, TestResult } from '../test-plan.ts';
|
|
8
9
|
import { collectInteractiveNodes, detectFocusArea, extractFocusedElement } from '../utils/aria.ts';
|
|
@@ -28,12 +29,14 @@ export class Pilot implements Agent {
|
|
|
28
29
|
private researcher: Researcher;
|
|
29
30
|
private explorer: Explorer;
|
|
30
31
|
private fisherman: Fisherman | null = null;
|
|
32
|
+
private experienceTracker: ExperienceTracker | null;
|
|
31
33
|
|
|
32
|
-
constructor(provider: Provider, agentTools: any, researcher: Researcher, explorer: Explorer) {
|
|
34
|
+
constructor(provider: Provider, agentTools: any, researcher: Researcher, explorer: Explorer, experienceTracker?: ExperienceTracker) {
|
|
33
35
|
this.provider = provider;
|
|
34
36
|
this.agentTools = agentTools;
|
|
35
37
|
this.researcher = researcher;
|
|
36
38
|
this.explorer = explorer;
|
|
39
|
+
this.experienceTracker = experienceTracker || null;
|
|
37
40
|
}
|
|
38
41
|
|
|
39
42
|
setFisherman(fisherman: Fisherman): void {
|
|
@@ -376,7 +379,15 @@ export class Pilot implements Agent {
|
|
|
376
379
|
|
|
377
380
|
private async sendToPilot(userText: string, functionId: string, opts: { tools?: boolean; maxToolRoundtrips?: number; task?: Test } = {}): Promise<string> {
|
|
378
381
|
debugLog(`sendToPilot: ${functionId}, tools: ${!!opts.tools}, roundtrips: ${opts.maxToolRoundtrips ?? 0}`);
|
|
379
|
-
|
|
382
|
+
|
|
383
|
+
let finalUserText = userText;
|
|
384
|
+
if (opts.tools) {
|
|
385
|
+
const tocBlock = this.getExperienceToc();
|
|
386
|
+
if (tocBlock) {
|
|
387
|
+
finalUserText = `${tocBlock}\n\n${userText}`;
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
this.conversation!.addUserText(finalUserText);
|
|
380
391
|
let tools = opts.tools ? this.agentTools : undefined;
|
|
381
392
|
|
|
382
393
|
if (opts.tools && opts.task) {
|
|
@@ -391,6 +402,15 @@ export class Pilot implements Agent {
|
|
|
391
402
|
return result?.response?.text || '';
|
|
392
403
|
}
|
|
393
404
|
|
|
405
|
+
private getExperienceToc(): string {
|
|
406
|
+
if (!this.experienceTracker) return '';
|
|
407
|
+
const state = this.explorer.getStateManager().getCurrentState();
|
|
408
|
+
if (!state) return '';
|
|
409
|
+
const actionResult = ActionResult.fromState(state);
|
|
410
|
+
const toc = this.experienceTracker.getExperienceTableOfContents(actionResult);
|
|
411
|
+
return renderExperienceToc(toc);
|
|
412
|
+
}
|
|
413
|
+
|
|
394
414
|
private buildPreconditionTool(task: Test) {
|
|
395
415
|
return {
|
|
396
416
|
precondition: tool({
|