explorbot 0.1.12 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/explorbot-cli.ts +21 -21
- package/dist/bin/explorbot-cli.js +3 -3
- package/dist/package.json +4 -2
- package/dist/rules/researcher/container-rules.md +2 -0
- package/dist/src/action-result.js +2 -1
- package/dist/src/action.js +3 -8
- package/dist/src/ai/captain.js +0 -2
- package/dist/src/ai/conversation.js +20 -4
- package/dist/src/ai/driller.js +1108 -0
- package/dist/src/ai/historian/utils.js +8 -1
- package/dist/src/ai/pilot.js +214 -267
- package/dist/src/ai/provider.js +25 -12
- package/dist/src/ai/quartermaster.js +2 -2
- package/dist/src/ai/rules.js +5 -5
- package/dist/src/ai/session-analyst.js +122 -0
- package/dist/src/ai/tester.js +69 -22
- package/dist/src/ai/tools.js +19 -4
- package/dist/src/commands/base-command.js +6 -6
- package/dist/src/commands/drill-command.js +3 -2
- package/dist/src/commands/exit-command.js +1 -0
- package/dist/src/commands/explore-command.js +9 -2
- package/dist/src/components/AddRule.js +1 -1
- package/dist/src/components/StatusPane.js +6 -1
- package/dist/src/experience-tracker.js +9 -0
- package/dist/src/explorbot.js +48 -8
- package/dist/src/explorer.js +11 -13
- package/dist/src/reporter.js +105 -4
- package/dist/src/state-manager.js +4 -3
- package/dist/src/stats.js +7 -1
- package/dist/src/test-plan.js +47 -3
- package/dist/src/utils/aria.js +354 -529
- package/dist/src/utils/hooks-runner.js +2 -8
- package/dist/src/utils/html.js +371 -0
- package/dist/src/utils/unique-names.js +12 -1
- package/dist/src/utils/url-matcher.js +6 -1
- package/dist/src/utils/web-element.js +27 -24
- package/dist/src/utils/xpath.js +1 -1
- package/package.json +4 -2
- package/rules/researcher/container-rules.md +2 -0
- package/src/action-result.ts +2 -1
- package/src/action.ts +3 -10
- package/src/ai/captain.ts +0 -2
- package/src/ai/conversation.ts +21 -4
- package/src/ai/driller.ts +1194 -0
- package/src/ai/historian/utils.ts +8 -1
- package/src/ai/pilot.ts +215 -265
- package/src/ai/provider.ts +24 -12
- package/src/ai/quartermaster.ts +2 -2
- package/src/ai/rules.ts +5 -5
- package/src/ai/session-analyst.ts +139 -0
- package/src/ai/tester.ts +63 -20
- package/src/ai/tools.ts +18 -4
- package/src/commands/base-command.ts +6 -6
- package/src/commands/drill-command.ts +3 -2
- package/src/commands/exit-command.ts +1 -0
- package/src/commands/explore-command.ts +10 -2
- package/src/components/AddRule.tsx +1 -1
- package/src/components/StatusPane.tsx +6 -3
- package/src/config.ts +4 -0
- package/src/experience-tracker.ts +9 -0
- package/src/explorbot.ts +55 -10
- package/src/explorer.ts +10 -12
- package/src/reporter.ts +108 -4
- package/src/state-manager.ts +4 -3
- package/src/stats.ts +10 -1
- package/src/test-plan.ts +62 -3
- package/src/utils/aria.ts +367 -537
- package/src/utils/hooks-runner.ts +2 -6
- package/src/utils/html.ts +381 -0
- package/src/utils/unique-names.ts +13 -0
- package/src/utils/url-matcher.ts +5 -1
- package/src/utils/web-element.ts +31 -28
- package/src/utils/xpath.ts +1 -1
- package/dist/src/ai/bosun.js +0 -456
- package/src/ai/bosun.ts +0 -571
package/dist/src/ai/provider.js
CHANGED
|
@@ -16,6 +16,16 @@ class AiError extends Error {
|
|
|
16
16
|
}
|
|
17
17
|
export class ContextLengthError extends Error {
|
|
18
18
|
}
|
|
19
|
+
function extractCachedTokens(usage) {
|
|
20
|
+
if (!usage)
|
|
21
|
+
return 0;
|
|
22
|
+
const direct = usage.cachedInputTokens ?? usage.inputTokenDetails?.cacheReadTokens;
|
|
23
|
+
if (typeof direct === 'number')
|
|
24
|
+
return direct;
|
|
25
|
+
const raw = usage.raw;
|
|
26
|
+
const fromRaw = raw?.prompt_tokens_details?.cached_tokens ?? raw?.promptTokensDetails?.cachedTokens;
|
|
27
|
+
return typeof fromRaw === 'number' ? fromRaw : 0;
|
|
28
|
+
}
|
|
19
29
|
function rejectAfterIdle(ms, signal) {
|
|
20
30
|
return new Promise((_, reject) => {
|
|
21
31
|
const tick = () => {
|
|
@@ -227,9 +237,10 @@ export class Provider {
|
|
|
227
237
|
responseLog(response.text);
|
|
228
238
|
if (response.usage) {
|
|
229
239
|
Stats.recordTokens(options.agentName || 'unknown', modelName, {
|
|
230
|
-
input: response.usage.promptTokens
|
|
231
|
-
output: response.usage.completionTokens
|
|
232
|
-
total: response.usage.totalTokens
|
|
240
|
+
input: response.usage.inputTokens ?? response.usage.promptTokens ?? 0,
|
|
241
|
+
output: response.usage.outputTokens ?? response.usage.completionTokens ?? 0,
|
|
242
|
+
total: response.usage.totalTokens ?? 0,
|
|
243
|
+
cached: extractCachedTokens(response.usage),
|
|
233
244
|
});
|
|
234
245
|
}
|
|
235
246
|
return response;
|
|
@@ -311,9 +322,10 @@ export class Provider {
|
|
|
311
322
|
responseLog(response.text);
|
|
312
323
|
if (response.usage) {
|
|
313
324
|
Stats.recordTokens(options.agentName || 'unknown', modelName, {
|
|
314
|
-
input: response.usage.promptTokens
|
|
315
|
-
output: response.usage.completionTokens
|
|
316
|
-
total: response.usage.totalTokens
|
|
325
|
+
input: response.usage.inputTokens ?? response.usage.promptTokens ?? 0,
|
|
326
|
+
output: response.usage.outputTokens ?? response.usage.completionTokens ?? 0,
|
|
327
|
+
total: response.usage.totalTokens ?? 0,
|
|
328
|
+
cached: extractCachedTokens(response.usage),
|
|
317
329
|
});
|
|
318
330
|
}
|
|
319
331
|
return response;
|
|
@@ -379,9 +391,10 @@ export class Provider {
|
|
|
379
391
|
responseLog(response.object);
|
|
380
392
|
if (response.usage) {
|
|
381
393
|
Stats.recordTokens(options.agentName || 'unknown', modelName, {
|
|
382
|
-
input: response.usage.promptTokens
|
|
383
|
-
output: response.usage.completionTokens
|
|
384
|
-
total: response.usage.totalTokens
|
|
394
|
+
input: response.usage.inputTokens ?? response.usage.promptTokens ?? 0,
|
|
395
|
+
output: response.usage.outputTokens ?? response.usage.completionTokens ?? 0,
|
|
396
|
+
total: response.usage.totalTokens ?? 0,
|
|
397
|
+
cached: extractCachedTokens(response.usage),
|
|
385
398
|
});
|
|
386
399
|
}
|
|
387
400
|
return response;
|
|
@@ -555,9 +568,9 @@ export class Provider {
|
|
|
555
568
|
responseLog(response.text);
|
|
556
569
|
if (response.usage) {
|
|
557
570
|
Stats.recordTokens('vision', this.getModelName(this.config.visionModel), {
|
|
558
|
-
input: response.usage.promptTokens
|
|
559
|
-
output: response.usage.completionTokens
|
|
560
|
-
total: response.usage.totalTokens
|
|
571
|
+
input: response.usage.inputTokens ?? response.usage.promptTokens ?? 0,
|
|
572
|
+
output: response.usage.outputTokens ?? response.usage.completionTokens ?? 0,
|
|
573
|
+
total: response.usage.totalTokens ?? 0,
|
|
561
574
|
});
|
|
562
575
|
}
|
|
563
576
|
return response;
|
|
@@ -169,10 +169,10 @@ Focus on what would confuse a real user or caused the agent to make mistakes.`;
|
|
|
169
169
|
const criticalViolations = report.axeViolations.filter((v) => v.impact === 'critical' || v.impact === 'serious');
|
|
170
170
|
for (const v of criticalViolations.slice(0, 3)) {
|
|
171
171
|
const nodeHtml = v.nodes[0]?.html.slice(0, 100) || '';
|
|
172
|
-
task.
|
|
172
|
+
task.addVerificationDetail(`🔴 A11Y [${v.impact}] ${v.id}: ${v.description} — ${nodeHtml}`);
|
|
173
173
|
}
|
|
174
174
|
for (const issue of report.semanticIssues.slice(0, 3)) {
|
|
175
|
-
task.
|
|
175
|
+
task.addVerificationDetail(`💡 UX [${issue.type}] ${issue.element}: ${issue.suggestion}`);
|
|
176
176
|
}
|
|
177
177
|
}
|
|
178
178
|
saveReport(stateHash, report) {
|
package/dist/src/ai/rules.js
CHANGED
|
@@ -231,6 +231,8 @@ export function multipleTabsRule(tabs) {
|
|
|
231
231
|
}
|
|
232
232
|
export const actionRule = dedent `
|
|
233
233
|
<actions>
|
|
234
|
+
\`faker\` (from @faker-js/faker) is available inside I.* calls for generating data, e.g. I.fillField('Bio', faker.lorem.paragraphs(5)).
|
|
235
|
+
|
|
234
236
|
### I.click
|
|
235
237
|
|
|
236
238
|
clicks on the element by its locator
|
|
@@ -272,11 +274,9 @@ export const actionRule = dedent `
|
|
|
272
274
|
I.fillField('Description', 'Hello world', '.editor'); // works for rich text / code editors too
|
|
273
275
|
</example>
|
|
274
276
|
|
|
275
|
-
I.fillField handles plain inputs, textareas, contenteditable regions, and rich text / code editors
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
Do NOT open the editor with raw JS (executeScript, page.evaluate), do NOT dispatch synthetic events,
|
|
279
|
-
do NOT call the editor's own API (monaco.editor.setValue, view.dispatch, etc.) to write text.
|
|
277
|
+
I.fillField handles plain inputs, textareas, contenteditable regions, and rich text / code editors transparently.
|
|
278
|
+
ALWAYS use I.fillField for rich text / code editors — target the editor container or its nearest label/heading with a normal locator.
|
|
279
|
+
If I.fillField does not work, I.type into the focused element is the fallback.
|
|
280
280
|
|
|
281
281
|
### I.type
|
|
282
282
|
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import dedent from 'dedent';
|
|
4
|
+
import { outputPath } from "../config.js";
|
|
5
|
+
import { Stats } from "../stats.js";
|
|
6
|
+
export class SessionAnalyst {
|
|
7
|
+
emoji = '🧐';
|
|
8
|
+
provider;
|
|
9
|
+
constructor(provider) {
|
|
10
|
+
this.provider = provider;
|
|
11
|
+
}
|
|
12
|
+
async analyze(tests) {
|
|
13
|
+
const eligible = tests.filter((t) => t.startTime != null);
|
|
14
|
+
if (eligible.length === 0)
|
|
15
|
+
return '';
|
|
16
|
+
const model = this.provider.getAgenticModel('analyst');
|
|
17
|
+
const customPrompt = this.provider.getSystemPromptForAgent('analyst', undefined);
|
|
18
|
+
const systemPrompt = dedent `
|
|
19
|
+
You write a TERSE end-of-session report. Reader is a developer who wants to UNDERSTAND THE FEATURE — what works, what is broken, what is unclear. Every word must earn its place.
|
|
20
|
+
|
|
21
|
+
Output MARKDOWN. No JSON, no preamble, no closing summary.
|
|
22
|
+
|
|
23
|
+
NO EMOJI. No 🔴 🟡 🟢 ✅, no escape sequences like \\u2705. Use plain text severity tags: [High], [Medium], [Low] for defects.
|
|
24
|
+
|
|
25
|
+
## Reporting unit
|
|
26
|
+
|
|
27
|
+
Report at the level of FEATURES / FLOWS / PAGES. Tests are evidence, not the unit. Several tests covering the same flow → ONE entry citing all of them.
|
|
28
|
+
|
|
29
|
+
## Walk every test
|
|
30
|
+
|
|
31
|
+
PASSED test: did all steps run, was the goal actually verified, did the user-visible goal happen? All yes → contributes to What works. Any no → Execution issue (false positive).
|
|
32
|
+
|
|
33
|
+
FAILED test, first match wins: (1) goal achieved but mis-verified → Execution. (2) automation failure (locator/timeout/loop/modal/a11y) → Execution. (3) bad preconditions or data → Execution. (4) wrong URL/environment → Execution. (5) app contradicted expected outcome → Defect.
|
|
34
|
+
|
|
35
|
+
Crucial distinction: "the app misbehaved" vs "the automation could not interact with the app". ONLY the first is a Defect. If the automation gives up before the app responds — timeout, retries exhausted, dead loop / loop detected, could not click or find an element — that is an Execution issue regardless of what the log calls it. Failure inside the automation ≠ failure inside the product.
|
|
36
|
+
|
|
37
|
+
A solitary failure where adjacent tests on the same feature passed → Execution, not Defect.
|
|
38
|
+
|
|
39
|
+
## Severity (defects only)
|
|
40
|
+
[High] blocks a core flow · [Medium] degrades a flow but workaround exists · [Low] cosmetic / edge case
|
|
41
|
+
|
|
42
|
+
## Format
|
|
43
|
+
|
|
44
|
+
# Session Analysis
|
|
45
|
+
|
|
46
|
+
<ONE or TWO sentences describing the FEATURE STATE — what was explored, whether the core flow holds, what the standout problem is. NO test counts, NO "N tests run". Talk about the product, not the run.>
|
|
47
|
+
|
|
48
|
+
## Coverage
|
|
49
|
+
- Pages: <paths>
|
|
50
|
+
- Features: <capabilities>
|
|
51
|
+
|
|
52
|
+
## What works
|
|
53
|
+
- **<feature>** — #2, #7, #8
|
|
54
|
+
|
|
55
|
+
## Defects
|
|
56
|
+
|
|
57
|
+
### [Medium] <plain-English bug title>
|
|
58
|
+
Affects: #3, #5
|
|
59
|
+
Reproduce:
|
|
60
|
+
1. <concrete UI step>
|
|
61
|
+
2. <next>
|
|
62
|
+
Evidence: <one short observation>
|
|
63
|
+
|
|
64
|
+
## UX issues
|
|
65
|
+
- **<feature>** — <what's confusing> (#7)
|
|
66
|
+
|
|
67
|
+
## Execution Issues
|
|
68
|
+
- **#2 <scenario>** — <≤10 words, what was unreliable>
|
|
69
|
+
|
|
70
|
+
## Brevity rules
|
|
71
|
+
|
|
72
|
+
- Headline: 2 sentences MAX. About the FEATURE, not the run. No counts, no "N tests", no "this session". Banned words: "exercised", "comprehensive", "notably", "this session", "module", "targeted", "covered creation".
|
|
73
|
+
- What works: feature name + test refs. NO parentheticals, NO caveats. If there's a caveat, the entry doesn't belong here.
|
|
74
|
+
- Defect title is the BUG ("Search returns non-matching results"), never the scenario name.
|
|
75
|
+
- Reproduce steps are imperative one-liners drawn from the log.
|
|
76
|
+
- Evidence is one short factual observation. Never quote the \`result\` field.
|
|
77
|
+
- Execution Issues: ONE line per test, ≤10 words, plain. Examples: "passed vacuously, no list assertion", "no file upload step in log", "dead loop on Save click". No prefixes, no nested explanation.
|
|
78
|
+
- Omit any empty section.
|
|
79
|
+
- Section order: Coverage → What works → Defects (severity desc) → UX issues → Execution Issues.
|
|
80
|
+
|
|
81
|
+
${customPrompt || ''}
|
|
82
|
+
`;
|
|
83
|
+
const userPayload = dedent `
|
|
84
|
+
${eligible.length} tests were executed in this session.
|
|
85
|
+
|
|
86
|
+
${eligible.map((t, i) => this.serializeTest(t, i + 1)).join('\n\n')}
|
|
87
|
+
`;
|
|
88
|
+
const response = await this.provider.chat([
|
|
89
|
+
{ role: 'system', content: systemPrompt },
|
|
90
|
+
{ role: 'user', content: userPayload },
|
|
91
|
+
], model, { agentName: 'analyst' });
|
|
92
|
+
return decodeEscapes((response?.text || '').trim());
|
|
93
|
+
}
|
|
94
|
+
writeReport(markdown) {
|
|
95
|
+
const filePath = outputPath('reports', `${Stats.sessionLabel()}.md`);
|
|
96
|
+
const dir = path.dirname(filePath);
|
|
97
|
+
if (!existsSync(dir))
|
|
98
|
+
mkdirSync(dir, { recursive: true });
|
|
99
|
+
writeFileSync(filePath, markdown);
|
|
100
|
+
return filePath;
|
|
101
|
+
}
|
|
102
|
+
serializeTest(test, ref) {
|
|
103
|
+
const log = test
|
|
104
|
+
.getLog()
|
|
105
|
+
.slice(-30)
|
|
106
|
+
.map((entry) => ` - [${entry.type}] ${entry.content}`)
|
|
107
|
+
.join('\n');
|
|
108
|
+
return dedent `
|
|
109
|
+
<test ref="#${ref}">
|
|
110
|
+
url: ${test.startUrl || '/'}
|
|
111
|
+
scenario: ${test.scenario}
|
|
112
|
+
result: ${test.result || 'unknown'}
|
|
113
|
+
expected: ${test.expected.join(' | ') || '(none)'}
|
|
114
|
+
log:
|
|
115
|
+
${log}
|
|
116
|
+
</test>
|
|
117
|
+
`;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
function decodeEscapes(text) {
|
|
121
|
+
return text.replace(/\\u\{([0-9a-fA-F]+)\}/g, (_, hex) => String.fromCodePoint(Number.parseInt(hex, 16))).replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) => String.fromCodePoint(Number.parseInt(hex, 16)));
|
|
122
|
+
}
|
package/dist/src/ai/tester.js
CHANGED
|
@@ -49,6 +49,8 @@ export class Tester extends TaskAgent {
|
|
|
49
49
|
pageStateHash = null;
|
|
50
50
|
pageActionResult = null;
|
|
51
51
|
hooksRunner;
|
|
52
|
+
seenUiMapUrls = new Set();
|
|
53
|
+
lastAnalyzedStateHash = null;
|
|
52
54
|
constructor(explorer, provider, researcher, navigator, agentTools) {
|
|
53
55
|
super();
|
|
54
56
|
this.explorer = explorer;
|
|
@@ -80,7 +82,7 @@ export class Tester extends TaskAgent {
|
|
|
80
82
|
return ActionResult.fromState(this.explorer.getStateManager().getCurrentState());
|
|
81
83
|
}
|
|
82
84
|
get progressCheckInterval() {
|
|
83
|
-
return this.explorer.getConfig().ai?.agents?.tester?.progressCheckInterval ??
|
|
85
|
+
return this.explorer.getConfig().ai?.agents?.tester?.progressCheckInterval ?? 3;
|
|
84
86
|
}
|
|
85
87
|
getConversation() {
|
|
86
88
|
return this.currentConversation;
|
|
@@ -96,6 +98,8 @@ export class Tester extends TaskAgent {
|
|
|
96
98
|
this.previousStateHash = null;
|
|
97
99
|
this.pageStateHash = null;
|
|
98
100
|
this.pageActionResult = null;
|
|
101
|
+
this.seenUiMapUrls.clear();
|
|
102
|
+
this.lastAnalyzedStateHash = null;
|
|
99
103
|
this.explorer.getStateManager().clearHistory();
|
|
100
104
|
this.resetFailureCount();
|
|
101
105
|
this.pilot?.reset();
|
|
@@ -117,12 +121,18 @@ export class Tester extends TaskAgent {
|
|
|
117
121
|
page?.on('console', onConsoleMessage);
|
|
118
122
|
const initialState = ActionResult.fromState(state);
|
|
119
123
|
const conversation = this.provider.startConversation(this.getSystemMessage(), 'tester');
|
|
124
|
+
conversation.markLastMessageCacheable();
|
|
120
125
|
this.currentConversation = conversation;
|
|
121
126
|
const outputDir = ConfigParser.getInstance().getOutputDir();
|
|
122
127
|
this.executionLogFile = join(outputDir, `tester_${task.sessionName}.md`);
|
|
123
128
|
// Note: Markdown saving functionality removed from Conversation class
|
|
124
|
-
const
|
|
125
|
-
conversation.addUserText(
|
|
129
|
+
const scenarioBlock = this.buildScenarioBlock(task, initialState);
|
|
130
|
+
conversation.addUserText(scenarioBlock);
|
|
131
|
+
conversation.markLastMessageCacheable();
|
|
132
|
+
conversation.protectPrefix(conversation.messages.length);
|
|
133
|
+
const pageContext = await this.reinjectContextIfNeeded(1, initialState);
|
|
134
|
+
if (pageContext)
|
|
135
|
+
conversation.addUserText(pageContext);
|
|
126
136
|
return await Observability.run(`test: ${task.scenario}`, {
|
|
127
137
|
sessionId: task.sessionName,
|
|
128
138
|
tags: ['tester'],
|
|
@@ -138,6 +148,12 @@ export class Tester extends TaskAgent {
|
|
|
138
148
|
if (this.pilot) {
|
|
139
149
|
try {
|
|
140
150
|
const plan = await this.pilot.planTest(task, initialState);
|
|
151
|
+
if (task.hasFinished) {
|
|
152
|
+
offFailedRequest?.();
|
|
153
|
+
page?.off('pageerror', onPageError);
|
|
154
|
+
page?.off('console', onConsoleMessage);
|
|
155
|
+
return { success: task.isSuccessful };
|
|
156
|
+
}
|
|
141
157
|
if (plan) {
|
|
142
158
|
conversation.addUserText(`Pilot's test plan:\n${plan}\n\nFollow this plan while executing the test.`);
|
|
143
159
|
}
|
|
@@ -158,14 +174,18 @@ export class Tester extends TaskAgent {
|
|
|
158
174
|
await this.explorer.startTest(task);
|
|
159
175
|
debugLog(`Navigating to ${task.startUrl}`);
|
|
160
176
|
await this.explorer.visit(task.startUrl);
|
|
161
|
-
const
|
|
177
|
+
const startState = this.explorer.getStateManager().getCurrentState();
|
|
178
|
+
if (startState)
|
|
179
|
+
task.addUrlNote(startState);
|
|
180
|
+
const currentUrl = startState?.url || task.startUrl || '';
|
|
162
181
|
await this.hooksRunner.runBeforeHook('tester', currentUrl);
|
|
163
182
|
const offStateChange = this.explorer.getStateManager().onStateChange((event) => {
|
|
164
183
|
if (task.hasFinished)
|
|
165
184
|
return;
|
|
166
185
|
if (event.toState?.url === event.fromState?.url)
|
|
167
186
|
return;
|
|
168
|
-
|
|
187
|
+
if (event.toState)
|
|
188
|
+
task.addUrlNote(event.toState, event.fromState || undefined);
|
|
169
189
|
task.states.push(event.toState);
|
|
170
190
|
});
|
|
171
191
|
const codeceptjsTools = createCodeceptJSTools(this.explorer, task);
|
|
@@ -203,33 +223,34 @@ export class Tester extends TaskAgent {
|
|
|
203
223
|
The user has interrupted and wants to change direction. Follow the new instruction.
|
|
204
224
|
`);
|
|
205
225
|
}
|
|
206
|
-
conversation.cleanupTag('page_aria', '...cleaned aria snapshot...',
|
|
226
|
+
conversation.cleanupTag('page_aria', '...cleaned aria snapshot...', 1);
|
|
207
227
|
conversation.cleanupTag('page_html', '...cleaned HTML snapshot...', 1);
|
|
208
228
|
conversation.cleanupTag('experience', '...cleaned experience...', 1);
|
|
209
229
|
conversation.cleanupTag('applied_experience', '...cleaned past experience...', 1);
|
|
210
230
|
conversation.cleanupTag('page_ui_map', '...cleaned UI map...', 1);
|
|
211
231
|
conversation.cleanupTag('page_ui_map_overlay', '...cleaned UI overlay...', 1);
|
|
212
|
-
conversation.compactToolResults(
|
|
232
|
+
conversation.compactToolResults(2);
|
|
213
233
|
if (iteration > 1) {
|
|
214
234
|
const isNewPage = this.previousUrl !== null && this.previousUrl !== currentState.url;
|
|
215
235
|
let nextStep = '';
|
|
216
236
|
nextStep += await this.reinjectContextIfNeeded(iteration, currentState);
|
|
217
237
|
nextStep += await this.prepareInstructionsForNextStep(task);
|
|
218
238
|
if (isNewPage && this.pilot) {
|
|
219
|
-
const guidance = await this.pilot.reviewNewPage(task, currentState);
|
|
239
|
+
const guidance = await this.pilot.reviewNewPage(task, currentState, conversation);
|
|
220
240
|
if (guidance)
|
|
221
241
|
nextStep += `\n\n${guidance}`;
|
|
222
242
|
}
|
|
223
|
-
else if ((iteration
|
|
243
|
+
else if (this.shouldAnalyzeProgress(iteration, currentState) && this.pilot) {
|
|
224
244
|
const guidance = await this.pilot.analyzeProgress(task, currentState, conversation);
|
|
225
245
|
if (guidance)
|
|
226
246
|
nextStep += `\n\n${guidance}`;
|
|
227
247
|
this.consecutiveFailures = 0;
|
|
248
|
+
this.lastAnalyzedStateHash = currentState.hash;
|
|
228
249
|
}
|
|
229
250
|
conversation.addUserText(nextStep);
|
|
230
251
|
}
|
|
231
252
|
const result = await this.provider.invokeConversation(conversation, tools, {
|
|
232
|
-
maxToolRoundtrips:
|
|
253
|
+
maxToolRoundtrips: 3,
|
|
233
254
|
toolChoice: 'required',
|
|
234
255
|
stopWhen: () => task.hasFinished,
|
|
235
256
|
});
|
|
@@ -354,6 +375,17 @@ export class Tester extends TaskAgent {
|
|
|
354
375
|
...task,
|
|
355
376
|
};
|
|
356
377
|
}
|
|
378
|
+
shouldAnalyzeProgress(iteration, currentState) {
|
|
379
|
+
if (this.consecutiveFailures >= 3)
|
|
380
|
+
return true;
|
|
381
|
+
if (this.consecutiveEmptyResults >= 2)
|
|
382
|
+
return true;
|
|
383
|
+
if (iteration % this.progressCheckInterval !== 0)
|
|
384
|
+
return false;
|
|
385
|
+
if (this.lastAnalyzedStateHash === currentState.hash)
|
|
386
|
+
return false;
|
|
387
|
+
return true;
|
|
388
|
+
}
|
|
357
389
|
async prepareInstructionsForNextStep(task) {
|
|
358
390
|
let outcomeStatus = dedent `
|
|
359
391
|
<task>
|
|
@@ -388,6 +420,7 @@ export class Tester extends TaskAgent {
|
|
|
388
420
|
this.previousUrl = currentUrl;
|
|
389
421
|
this.previousStateHash = currentStateHash;
|
|
390
422
|
let context = '';
|
|
423
|
+
const focusArea = detectFocusArea(currentState.ariaSnapshot);
|
|
391
424
|
const focusedElement = extractFocusedElement(currentState.ariaSnapshot);
|
|
392
425
|
if (focusedElement) {
|
|
393
426
|
const isTextInput = ['textbox', 'combobox', 'searchbox'].includes(focusedElement.role);
|
|
@@ -403,6 +436,17 @@ export class Tester extends TaskAgent {
|
|
|
403
436
|
<no_focus>
|
|
404
437
|
No element is focused
|
|
405
438
|
</no_focus>
|
|
439
|
+
`;
|
|
440
|
+
}
|
|
441
|
+
if (focusArea.detected) {
|
|
442
|
+
const areaName = focusArea.name ? ` "${focusArea.name}"` : '';
|
|
443
|
+
context += dedent `
|
|
444
|
+
<focus_scope>
|
|
445
|
+
A ${focusArea.type}${areaName} is currently open above the page.
|
|
446
|
+
Scope all interactions to elements inside this ${focusArea.type}.
|
|
447
|
+
Page navigation, filters, and tabs that exist outside it are not actionable while it is open and may share names or roles with elements inside it — prefer the locator inside the ${focusArea.type}.
|
|
448
|
+
Use <page_aria> to confirm the element you target is actually inside the ${focusArea.type}.
|
|
449
|
+
</focus_scope>
|
|
406
450
|
`;
|
|
407
451
|
}
|
|
408
452
|
if (currentState.isInsideIframe) {
|
|
@@ -420,19 +464,23 @@ export class Tester extends TaskAgent {
|
|
|
420
464
|
this.explorer.clearOtherTabsInfo();
|
|
421
465
|
}
|
|
422
466
|
if (isNewUrl) {
|
|
467
|
+
const alreadySeenUiMap = this.seenUiMapUrls.has(currentUrl);
|
|
423
468
|
let research = '';
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
469
|
+
if (!alreadySeenUiMap) {
|
|
470
|
+
try {
|
|
471
|
+
research = await this.researcher.research(currentState);
|
|
472
|
+
}
|
|
473
|
+
catch (err) {
|
|
474
|
+
if (!(err instanceof ErrorPageError))
|
|
475
|
+
throw err;
|
|
476
|
+
tag('warning').log(`Research skipped: ${err.message}`);
|
|
477
|
+
}
|
|
431
478
|
}
|
|
432
479
|
this.pageStateHash = currentStateHash;
|
|
433
480
|
this.pageActionResult = currentState;
|
|
434
481
|
let uiMapSection = '';
|
|
435
482
|
if (research) {
|
|
483
|
+
this.seenUiMapUrls.add(currentUrl);
|
|
436
484
|
uiMapSection = dedent `
|
|
437
485
|
|
|
438
486
|
Page UI Map
|
|
@@ -442,6 +490,9 @@ export class Tester extends TaskAgent {
|
|
|
442
490
|
</page_ui_map>
|
|
443
491
|
`;
|
|
444
492
|
}
|
|
493
|
+
else if (alreadySeenUiMap) {
|
|
494
|
+
uiMapSection = `\n\n<page_ui_map>UI map for ${currentUrl} was shown earlier in this session — refer to it above.</page_ui_map>`;
|
|
495
|
+
}
|
|
445
496
|
context += dedent `
|
|
446
497
|
Context:
|
|
447
498
|
|
|
@@ -462,7 +513,6 @@ export class Tester extends TaskAgent {
|
|
|
462
513
|
`;
|
|
463
514
|
return context;
|
|
464
515
|
}
|
|
465
|
-
const focusArea = detectFocusArea(currentState.ariaSnapshot);
|
|
466
516
|
if (focusArea.detected && focusArea.name && this.pageStateHash && this.pageActionResult) {
|
|
467
517
|
const overlaySection = await this.researcher.researchOverlay(currentState, this.pageActionResult, this.pageStateHash);
|
|
468
518
|
if (overlaySection) {
|
|
@@ -640,9 +690,8 @@ export class Tester extends TaskAgent {
|
|
|
640
690
|
${this.provider.getSystemPromptForAgent('tester', this.explorer.getStateManager().getCurrentState()?.url) || ''}
|
|
641
691
|
`;
|
|
642
692
|
}
|
|
643
|
-
|
|
693
|
+
buildScenarioBlock(task, actionResult) {
|
|
644
694
|
const knowledge = this.getKnowledge(actionResult);
|
|
645
|
-
const pageContext = await this.reinjectContextIfNeeded(1, actionResult);
|
|
646
695
|
return dedent `
|
|
647
696
|
<task>
|
|
648
697
|
SCENARIO GOAL: ${task.scenario}
|
|
@@ -669,8 +718,6 @@ export class Tester extends TaskAgent {
|
|
|
669
718
|
${this.buildAvailableFiles()}
|
|
670
719
|
|
|
671
720
|
${knowledge}
|
|
672
|
-
|
|
673
|
-
${pageContext}
|
|
674
721
|
`;
|
|
675
722
|
}
|
|
676
723
|
getDeletableSessionNames(task) {
|
package/dist/src/ai/tools.js
CHANGED
|
@@ -423,7 +423,7 @@ export function createAgentTools({ explorer, researcher, navigator, experienceTr
|
|
|
423
423
|
return failedToolResult('see', 'AI analysis failed to process the screenshot');
|
|
424
424
|
}
|
|
425
425
|
return successToolResult('see', {
|
|
426
|
-
analysis: analysisResult,
|
|
426
|
+
analysis: cap(analysisResult, ANALYSIS_OUTPUT_CAP),
|
|
427
427
|
message: `Successfully analyzed screenshot for: ${request}`,
|
|
428
428
|
suggestion: 'Visual confirmation is valid evidence for test results. Use record() to note the visual findings.',
|
|
429
429
|
});
|
|
@@ -469,8 +469,8 @@ export function createAgentTools({ explorer, researcher, navigator, experienceTr
|
|
|
469
469
|
url: currentState.url,
|
|
470
470
|
title: currentState.title,
|
|
471
471
|
suggestion: 'If not enough context received, call see() to visually identify elements in page contents',
|
|
472
|
-
aria,
|
|
473
|
-
html,
|
|
472
|
+
aria: cap(aria, ARIA_OUTPUT_CAP),
|
|
473
|
+
html: cap(html, HTML_OUTPUT_CAP),
|
|
474
474
|
reminder: 'Context provided. Do not call context() again until you perform actions or suspect page changed.',
|
|
475
475
|
});
|
|
476
476
|
}
|
|
@@ -556,7 +556,7 @@ export function createAgentTools({ explorer, researcher, navigator, experienceTr
|
|
|
556
556
|
const researchResult = await researcher.research(currentState, { screenshot: true, data: true });
|
|
557
557
|
return successToolResult('research', {
|
|
558
558
|
analysis: researchResult,
|
|
559
|
-
aria: ActionResult.fromState(currentState).getInteractiveARIA(),
|
|
559
|
+
aria: cap(ActionResult.fromState(currentState).getInteractiveARIA(), ARIA_OUTPUT_CAP),
|
|
560
560
|
message: `Successfully researched page: ${currentState.url}.`,
|
|
561
561
|
suggestion: dedent `
|
|
562
562
|
You received comprehensive UI map report. Use it to understand the page structure and navigate to the elements.
|
|
@@ -859,6 +859,16 @@ export function createAgentTools({ explorer, researcher, navigator, experienceTr
|
|
|
859
859
|
return tools;
|
|
860
860
|
}
|
|
861
861
|
const PAGE_DIFF_SUGGESTION = 'Analyze page diff. htmlParts shows what changed and WHERE — each part has a container selector. Use the container as context when clicking elements from the diff.';
|
|
862
|
+
const ARIA_OUTPUT_CAP = 4000;
|
|
863
|
+
const HTML_OUTPUT_CAP = 6000;
|
|
864
|
+
const ANALYSIS_OUTPUT_CAP = 2000;
|
|
865
|
+
function cap(text, max) {
|
|
866
|
+
if (!text)
|
|
867
|
+
return '';
|
|
868
|
+
if (text.length <= max)
|
|
869
|
+
return text;
|
|
870
|
+
return `${text.slice(0, max)}\n[...truncated; ${text.length - max} chars omitted...]`;
|
|
871
|
+
}
|
|
862
872
|
function transformContainsCommand(command) {
|
|
863
873
|
if (!command.includes(':contains('))
|
|
864
874
|
return command;
|
|
@@ -897,9 +907,14 @@ function successToolResult(action, data, source) {
|
|
|
897
907
|
if (data?.pageDiff) {
|
|
898
908
|
let suggestion = PAGE_DIFF_SUGGESTION;
|
|
899
909
|
const ariaChanges = data.pageDiff.ariaChanges || '';
|
|
910
|
+
const urlChanged = data.pageDiff.urlChanged === true;
|
|
911
|
+
const hasHtmlParts = Array.isArray(data.pageDiff.htmlParts) && data.pageDiff.htmlParts.length > 0;
|
|
900
912
|
if (countAriaChanges(ariaChanges) >= 50) {
|
|
901
913
|
suggestion = `MAJOR PAGE CHANGE. Page entered a different mode. Check htmlParts and iframes in pageDiff before next action. ${suggestion}`;
|
|
902
914
|
}
|
|
915
|
+
else if (!urlChanged && !ariaChanges && !hasHtmlParts) {
|
|
916
|
+
suggestion = 'Action ran without error but produced no observable change (URL, ARIA and HTML all unchanged). The locator likely matched a non-interactive ancestor or an element outside the intended control. Re-locate via xpathCheck() or verify with see() before treating this as success.';
|
|
917
|
+
}
|
|
903
918
|
else if (ariaChanges.includes('heading') && ariaChanges.includes('added')) {
|
|
904
919
|
suggestion += ' WARNING: A new panel or modal may have appeared. If this was not the intended action, close it and try a different element.';
|
|
905
920
|
}
|
|
@@ -19,17 +19,17 @@ export class BaseCommand {
|
|
|
19
19
|
if (this.suggestions.length === 0)
|
|
20
20
|
return;
|
|
21
21
|
const prefix = isInteractive() ? '/' : `${getCliName()} `;
|
|
22
|
-
|
|
23
|
-
|
|
22
|
+
const commandWidth = this.suggestions.reduce((max, s) => (s.command ? Math.max(max, prefix.length + s.command.length) : max), 0);
|
|
23
|
+
const lines = [chalk.bold('Suggested:')];
|
|
24
24
|
for (const { command, hint } of this.suggestions) {
|
|
25
|
-
tag('info').log('');
|
|
26
25
|
if (!command) {
|
|
27
|
-
|
|
26
|
+
lines.push(` ${chalk.dim(hint)}`);
|
|
28
27
|
continue;
|
|
29
28
|
}
|
|
30
|
-
|
|
31
|
-
|
|
29
|
+
const cmd = `${prefix}${command}`.padEnd(commandWidth);
|
|
30
|
+
lines.push(` ${chalk.yellow(cmd)} ${chalk.dim(hint)}`);
|
|
32
31
|
}
|
|
32
|
+
tag('info').log(lines.join('\n'));
|
|
33
33
|
}
|
|
34
34
|
parseArgs(args) {
|
|
35
35
|
const cmd = new Command();
|
|
@@ -2,6 +2,7 @@ import { BaseCommand } from './base-command.js';
|
|
|
2
2
|
export class DrillCommand extends BaseCommand {
|
|
3
3
|
name = 'drill';
|
|
4
4
|
description = 'Drill all components on current page to learn interactions';
|
|
5
|
+
aliases = ['driller'];
|
|
5
6
|
suggestions = [
|
|
6
7
|
{ command: 'research', hint: 'see UI map first' },
|
|
7
8
|
{ command: 'navigate <page>', hint: 'go to another page' },
|
|
@@ -13,7 +14,7 @@ export class DrillCommand extends BaseCommand {
|
|
|
13
14
|
if (!state) {
|
|
14
15
|
throw new Error('No active page to drill');
|
|
15
16
|
}
|
|
16
|
-
await this.explorBot.
|
|
17
|
+
await this.explorBot.agentDriller().drill({
|
|
17
18
|
knowledgePath,
|
|
18
19
|
maxComponents,
|
|
19
20
|
interactive: true,
|
|
@@ -24,7 +25,7 @@ export class DrillCommand extends BaseCommand {
|
|
|
24
25
|
return match ? match[1] : undefined;
|
|
25
26
|
}
|
|
26
27
|
parseMaxArg(args) {
|
|
27
|
-
const match = args.match(/--max\s+(\d+)/);
|
|
28
|
+
const match = args.match(/--max-components\s+(\d+)/);
|
|
28
29
|
return match ? Number.parseInt(match[1], 10) : undefined;
|
|
29
30
|
}
|
|
30
31
|
}
|
|
@@ -8,6 +8,7 @@ export class ExitCommand extends BaseCommand {
|
|
|
8
8
|
description = 'Exit the application';
|
|
9
9
|
aliases = ['quit'];
|
|
10
10
|
async execute(_args) {
|
|
11
|
+
await this.explorBot.printSessionAnalysis();
|
|
11
12
|
await this.explorBot.getExplorer().stop();
|
|
12
13
|
if (Stats.hasActivity()) {
|
|
13
14
|
await new Promise((resolve) => {
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import figureSet from 'figures';
|
|
2
2
|
import { getStyles } from '../ai/planner/styles.js';
|
|
3
3
|
import { outputPath } from '../config.js';
|
|
4
|
+
import { normalizeUrl } from '../state-manager.js';
|
|
4
5
|
import { Stats } from '../stats.js';
|
|
5
6
|
import { getCliName } from "../utils/cli-name.js";
|
|
6
7
|
import { ErrorPageError } from "../utils/error-page.js";
|
|
@@ -9,6 +10,7 @@ import { jsonToTable } from '../utils/markdown-parser.js';
|
|
|
9
10
|
import { printNextSteps, relativeToCwd } from "../utils/next-steps.js";
|
|
10
11
|
import { safeFilename } from "../utils/strings.js";
|
|
11
12
|
import { BaseCommand } from './base-command.js';
|
|
13
|
+
const MAX_SUB_PAGE_ATTEMPTS = 30;
|
|
12
14
|
export class ExploreCommand extends BaseCommand {
|
|
13
15
|
name = 'explore';
|
|
14
16
|
description = 'Start web exploration';
|
|
@@ -24,6 +26,7 @@ export class ExploreCommand extends BaseCommand {
|
|
|
24
26
|
maxTests;
|
|
25
27
|
testsRun = 0;
|
|
26
28
|
completedPlans = [];
|
|
29
|
+
failedSubPages = new Set();
|
|
27
30
|
async execute(args) {
|
|
28
31
|
const { opts, args: remaining } = this.parseArgs(args);
|
|
29
32
|
if (opts.maxTests) {
|
|
@@ -40,10 +43,12 @@ export class ExploreCommand extends BaseCommand {
|
|
|
40
43
|
this.completedPlans.push(mainPlan);
|
|
41
44
|
if (!feature && !this.isLimitReached()) {
|
|
42
45
|
const planner = this.explorBot.agentPlanner();
|
|
43
|
-
|
|
46
|
+
let attempts = 0;
|
|
47
|
+
while (attempts < MAX_SUB_PAGE_ATTEMPTS) {
|
|
48
|
+
attempts++;
|
|
44
49
|
if (this.isLimitReached())
|
|
45
50
|
break;
|
|
46
|
-
const candidates = planner.collectSubPageCandidates(mainPlan, mainUrl || '/');
|
|
51
|
+
const candidates = planner.collectSubPageCandidates(mainPlan, mainUrl || '/').filter((c) => !this.failedSubPages.has(normalizeUrl(c.url)));
|
|
47
52
|
if (candidates.length === 0)
|
|
48
53
|
break;
|
|
49
54
|
const pick = await planner.pickNextSubPage(candidates);
|
|
@@ -59,6 +64,7 @@ export class ExploreCommand extends BaseCommand {
|
|
|
59
64
|
}
|
|
60
65
|
}
|
|
61
66
|
catch (err) {
|
|
67
|
+
this.failedSubPages.add(normalizeUrl(pick.url));
|
|
62
68
|
tag('warning').log(`Sub-page exploration failed: ${err instanceof Error ? err.message : err}`);
|
|
63
69
|
}
|
|
64
70
|
}
|
|
@@ -68,6 +74,7 @@ export class ExploreCommand extends BaseCommand {
|
|
|
68
74
|
await this.explorBot.visit(mainUrl);
|
|
69
75
|
const savedPath = this.explorBot.savePlans(this.completedPlans);
|
|
70
76
|
this.printResults();
|
|
77
|
+
await this.explorBot.printSessionAnalysis();
|
|
71
78
|
this.printNextSteps(savedPath);
|
|
72
79
|
}
|
|
73
80
|
async runAllStyles(pageUrl, feature, parentPlan, completedPlans) {
|
|
@@ -4,7 +4,7 @@ import { Box, Text, useInput } from 'ink';
|
|
|
4
4
|
import React, { useEffect, useState } from 'react';
|
|
5
5
|
import { AddRuleCommand } from '../commands/add-rule-command.js';
|
|
6
6
|
import InputReadline from './InputReadline.js';
|
|
7
|
-
const KNOWN_AGENTS = ['researcher', 'tester', 'planner', 'pilot', 'captain', '
|
|
7
|
+
const KNOWN_AGENTS = ['researcher', 'tester', 'planner', 'pilot', 'captain', 'driller', 'navigator'];
|
|
8
8
|
const AddRule = ({ initialAgent = '', initialName = '', onComplete, onCancel }) => {
|
|
9
9
|
const [agent, setAgent] = useState(initialAgent);
|
|
10
10
|
const [ruleName, setRuleName] = useState(initialName);
|