explorbot 0.1.12 → 0.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/explorbot-cli.ts +21 -21
- package/dist/bin/explorbot-cli.js +3 -3
- package/dist/package.json +3 -2
- package/dist/rules/researcher/container-rules.md +2 -0
- package/dist/src/action-result.js +2 -1
- package/dist/src/action.js +0 -6
- package/dist/src/ai/captain.js +0 -2
- package/dist/src/ai/driller.js +1108 -0
- package/dist/src/ai/pilot.js +31 -22
- package/dist/src/ai/rules.js +3 -5
- package/dist/src/ai/session-analyst.js +117 -0
- package/dist/src/ai/tester.js +13 -2
- package/dist/src/commands/base-command.js +6 -6
- package/dist/src/commands/drill-command.js +3 -2
- package/dist/src/commands/exit-command.js +1 -0
- package/dist/src/commands/explore-command.js +1 -0
- package/dist/src/components/AddRule.js +1 -1
- package/dist/src/explorbot.js +48 -8
- package/dist/src/explorer.js +9 -8
- package/dist/src/reporter.js +64 -3
- package/dist/src/state-manager.js +4 -3
- package/dist/src/stats.js +5 -0
- package/dist/src/utils/aria.js +354 -529
- package/dist/src/utils/hooks-runner.js +2 -8
- package/dist/src/utils/html.js +371 -0
- package/dist/src/utils/unique-names.js +12 -1
- package/dist/src/utils/url-matcher.js +6 -1
- package/dist/src/utils/web-element.js +27 -24
- package/dist/src/utils/xpath.js +1 -1
- package/package.json +3 -2
- package/rules/researcher/container-rules.md +2 -0
- package/src/action-result.ts +2 -1
- package/src/action.ts +0 -8
- package/src/ai/captain.ts +0 -2
- package/src/ai/driller.ts +1194 -0
- package/src/ai/pilot.ts +31 -21
- package/src/ai/rules.ts +3 -5
- package/src/ai/session-analyst.ts +133 -0
- package/src/ai/tester.ts +15 -2
- package/src/commands/base-command.ts +6 -6
- package/src/commands/drill-command.ts +3 -2
- package/src/commands/exit-command.ts +1 -0
- package/src/commands/explore-command.ts +1 -0
- package/src/components/AddRule.tsx +1 -1
- package/src/config.ts +4 -0
- package/src/explorbot.ts +55 -10
- package/src/explorer.ts +9 -8
- package/src/reporter.ts +64 -3
- package/src/state-manager.ts +4 -3
- package/src/stats.ts +7 -0
- package/src/utils/aria.ts +367 -537
- package/src/utils/hooks-runner.ts +2 -6
- package/src/utils/html.ts +381 -0
- package/src/utils/unique-names.ts +13 -0
- package/src/utils/url-matcher.ts +5 -1
- package/src/utils/web-element.ts +31 -28
- package/src/utils/xpath.ts +1 -1
- package/dist/src/ai/bosun.js +0 -456
- package/src/ai/bosun.ts +0 -571
package/src/ai/pilot.ts
CHANGED
|
@@ -89,15 +89,16 @@ export class Pilot implements Agent {
|
|
|
89
89
|
const notes = task.notesToString() || 'No notes recorded.';
|
|
90
90
|
|
|
91
91
|
let visualAnalysis = '';
|
|
92
|
+
let screenshotState: ActionResult | null = null;
|
|
92
93
|
if (this.provider.hasVision()) {
|
|
93
94
|
try {
|
|
94
95
|
const action = this.explorer.createAction();
|
|
95
|
-
|
|
96
|
+
screenshotState = await action.caputrePageWithScreenshot();
|
|
96
97
|
if (screenshotState.screenshot) {
|
|
97
98
|
visualAnalysis = (await this.researcher.answerQuestionAboutScreenshot(screenshotState, `Describe current page state relevant to: ${task.scenario}`)) || '';
|
|
98
99
|
}
|
|
99
100
|
} catch {
|
|
100
|
-
|
|
101
|
+
screenshotState = null;
|
|
101
102
|
}
|
|
102
103
|
}
|
|
103
104
|
|
|
@@ -167,26 +168,23 @@ export class Pilot implements Agent {
|
|
|
167
168
|
return false;
|
|
168
169
|
}
|
|
169
170
|
|
|
170
|
-
if (result.requestVerification && navigator) {
|
|
171
|
+
if (result.decision === 'pass' && result.requestVerification && navigator) {
|
|
171
172
|
tag('substep').log(`Pilot requesting verification: ${result.requestVerification}`);
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
if (verifyResult.
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
}
|
|
173
|
+
const verifyResult = await navigator.verifyState(result.requestVerification, currentState).catch(() => null);
|
|
174
|
+
if (verifyResult?.verified) {
|
|
175
|
+
if (verifyResult.assertionSteps?.length) {
|
|
176
|
+
this.explorer.getPlaywrightRecorder().recordVerification(verifyResult.assertionSteps);
|
|
177
|
+
}
|
|
178
|
+
} else {
|
|
179
|
+
let answer: string | null = null;
|
|
180
|
+
if (screenshotState?.screenshot) {
|
|
181
|
+
answer = await this.researcher.answerQuestionAboutScreenshot(screenshotState, `Does the screen confirm: "${result.requestVerification}"? Answer YES or NO only.`);
|
|
182
|
+
}
|
|
183
|
+
if (!(answer || '').trim().toUpperCase().startsWith('YES')) {
|
|
184
|
+
task.addNote(`Pilot: verification failed — ${result.requestVerification}`, TestResult.FAILED);
|
|
185
|
+
task.finish(TestResult.FAILED);
|
|
186
|
+
return false;
|
|
187
187
|
}
|
|
188
|
-
} catch (verifyErr: any) {
|
|
189
|
-
tag('warning').log(`Pilot verification errored: ${verifyErr.message}`);
|
|
190
188
|
}
|
|
191
189
|
}
|
|
192
190
|
|
|
@@ -389,6 +387,8 @@ export class Pilot implements Agent {
|
|
|
389
387
|
- If no verification was done → prefer "continue" with guidance telling tester what to verify.
|
|
390
388
|
- If verify assertion describes a state that was ALREADY TRUE before the test started, the verification proves nothing — reject with "continue".
|
|
391
389
|
|
|
390
|
+
requestVerification — pick assertions DOM can actually express. Some content is not assertable via DOM (iframe text, canvas, custom widgets, Monaco/CodeMirror editors). When the scenario goal lives in such a region, target a STABLE LANDMARK (container element, ARIA role, the parent that wraps the widget) rather than literal text inside it. Your "pass" verdict is honored even if the DOM assertion can't be made — pick the strongest landmark you can.
|
|
391
|
+
|
|
392
392
|
GUIDANCE FIELD: When decision is "continue", you MUST provide "guidance" — a specific actionable instruction:
|
|
393
393
|
- If evidence is insufficient: tell tester to verify with see()/verify(), specify WHAT to check
|
|
394
394
|
- If approach was wrong: tell tester to try a different method, suggest which one
|
|
@@ -470,7 +470,7 @@ export class Pilot implements Agent {
|
|
|
470
470
|
);
|
|
471
471
|
}
|
|
472
472
|
|
|
473
|
-
async reviewNewPage(task: Test, currentState: ActionResult): Promise<string> {
|
|
473
|
+
async reviewNewPage(task: Test, currentState: ActionResult, testerConversation: Conversation): Promise<string> {
|
|
474
474
|
if (!this.conversation) return '';
|
|
475
475
|
|
|
476
476
|
tag('substep').log('Pilot reviewing new page...');
|
|
@@ -481,8 +481,14 @@ export class Pilot implements Agent {
|
|
|
481
481
|
if (!pageSummary) return '';
|
|
482
482
|
|
|
483
483
|
const stateContext = this.buildStateContext(currentState);
|
|
484
|
+
const toolCalls = testerConversation
|
|
485
|
+
.getToolExecutions()
|
|
486
|
+
.filter((t: any) => t.wasSuccessful)
|
|
487
|
+
.slice(-this.stepsToReview);
|
|
488
|
+
const actionsContext = this.formatActions(toolCalls);
|
|
484
489
|
|
|
485
490
|
this.conversation.cleanupTag('page_summary', '...trimmed...', 1);
|
|
491
|
+
this.conversation.cleanupTag('recent_actions', '...trimmed...', 2);
|
|
486
492
|
|
|
487
493
|
return this.sendToPilot(
|
|
488
494
|
dedent`
|
|
@@ -497,6 +503,10 @@ export class Pilot implements Agent {
|
|
|
497
503
|
${pageSummary}
|
|
498
504
|
</page_summary>
|
|
499
505
|
|
|
506
|
+
<recent_actions>
|
|
507
|
+
${actionsContext || 'None'}
|
|
508
|
+
</recent_actions>
|
|
509
|
+
|
|
500
510
|
${this.formatExpectations(task)}
|
|
501
511
|
|
|
502
512
|
First: evaluate whether this navigation makes sense for the scenario goal. If the page is unrelated, instruct Tester to back() or reset(). Then plan next steps.
|
package/src/ai/rules.ts
CHANGED
|
@@ -282,11 +282,9 @@ export const actionRule = dedent`
|
|
|
282
282
|
I.fillField('Description', 'Hello world', '.editor'); // works for rich text / code editors too
|
|
283
283
|
</example>
|
|
284
284
|
|
|
285
|
-
I.fillField handles plain inputs, textareas, contenteditable regions, and rich text / code editors
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
Do NOT open the editor with raw JS (executeScript, page.evaluate), do NOT dispatch synthetic events,
|
|
289
|
-
do NOT call the editor's own API (monaco.editor.setValue, view.dispatch, etc.) to write text.
|
|
285
|
+
I.fillField handles plain inputs, textareas, contenteditable regions, and rich text / code editors transparently.
|
|
286
|
+
ALWAYS use I.fillField for rich text / code editors — target the editor container or its nearest label/heading with a normal locator.
|
|
287
|
+
If I.fillField does not work, I.type into the focused element is the fallback.
|
|
290
288
|
|
|
291
289
|
### I.type
|
|
292
290
|
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import dedent from 'dedent';
|
|
4
|
+
import { outputPath } from '../config.ts';
|
|
5
|
+
import { Stats } from '../stats.ts';
|
|
6
|
+
import type { Test } from '../test-plan.ts';
|
|
7
|
+
import type { Agent } from './agent.ts';
|
|
8
|
+
import type { Provider } from './provider.ts';
|
|
9
|
+
|
|
10
|
+
export class SessionAnalyst implements Agent {
|
|
11
|
+
emoji = '🧐';
|
|
12
|
+
private provider: Provider;
|
|
13
|
+
|
|
14
|
+
constructor(provider: Provider) {
|
|
15
|
+
this.provider = provider;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
async analyze(tests: Test[]): Promise<string> {
|
|
19
|
+
const eligible = tests.filter((t) => t.startTime != null);
|
|
20
|
+
if (eligible.length === 0) return '';
|
|
21
|
+
|
|
22
|
+
const model = this.provider.getModelForAgent('analyst');
|
|
23
|
+
const customPrompt = this.provider.getSystemPromptForAgent('analyst', undefined);
|
|
24
|
+
|
|
25
|
+
const systemPrompt = dedent`
|
|
26
|
+
You write a brief end-of-session report after autonomous exploratory testing. Your reader is a developer who needs to know in seconds: what is broken, how to reproduce it, and which results were inconclusive.
|
|
27
|
+
|
|
28
|
+
Output MARKDOWN. No JSON, no preamble, no closing remarks. Start with the heading.
|
|
29
|
+
|
|
30
|
+
## Clustering
|
|
31
|
+
Group by ROOT CAUSE, not by scenario. If three tests fail for the same dropdown, that is ONE defect listing all three test refs (#3, #5, #7). Do not produce one cluster per test.
|
|
32
|
+
|
|
33
|
+
## Bucketing
|
|
34
|
+
Use the FINAL verdict (the test's \`result\` field) as the starting point. Mid-test errors that the automation recovered from do NOT make a passed test unreliable.
|
|
35
|
+
|
|
36
|
+
- **Defect** — real product bug. \`result: failed\` AND the failure reflects the app misbehaving (not the automation). The automation completed its interactions, the app contradicted the expected outcome. Severity required.
|
|
37
|
+
- **UX issue** — app works but the UI is ambiguous, controls are hidden, or labels are unclear. Worth flagging to design.
|
|
38
|
+
- **Execution issue** — the FINAL verdict is unreliable. Only two cases:
|
|
39
|
+
1. \`result: failed\` AND the failure was automation, environment, or UI/UX (locator missing, timeout, AI loop, navigation stuck, modal trapped focus, no accessible label) — i.e. the test could not conclude whether the app works.
|
|
40
|
+
2. \`result: passed\` AND clear evidence in the log shows the user-visible goal was NOT achieved (no confirmation visible, no state change verified, the assertion was vacuous).
|
|
41
|
+
|
|
42
|
+
A test that passed and shows no contrary evidence belongs in NO section. Do not list passed tests just because the log contains intermediate retries or recovered failures.
|
|
43
|
+
|
|
44
|
+
## Severity emoji (defects only)
|
|
45
|
+
- 🔴 critical or high — core flow blocked, data loss, security
|
|
46
|
+
- 🟡 medium — partial breakage with workaround
|
|
47
|
+
- 🟢 low — cosmetic
|
|
48
|
+
|
|
49
|
+
## Required format
|
|
50
|
+
|
|
51
|
+
# Session Analysis
|
|
52
|
+
|
|
53
|
+
<one sentence: total tests, defect count, headline finding>
|
|
54
|
+
|
|
55
|
+
## Defects
|
|
56
|
+
|
|
57
|
+
### 🔴 <plain-English title of the BUG, not the scenario name>
|
|
58
|
+
Affects: #3, #5, #7
|
|
59
|
+
Reproduce:
|
|
60
|
+
1. <concrete UI step a person can replay>
|
|
61
|
+
2. <next step>
|
|
62
|
+
Evidence: <one short observation from the test log>
|
|
63
|
+
|
|
64
|
+
### 🟡 <next defect>
|
|
65
|
+
...
|
|
66
|
+
|
|
67
|
+
## UX issues
|
|
68
|
+
|
|
69
|
+
- **<title>** — #4
|
|
70
|
+
<one short evidence line>
|
|
71
|
+
|
|
72
|
+
## Execution Issues
|
|
73
|
+
|
|
74
|
+
- **<short test name or scenario phrase>** — <plain-English one-liner: what made the result unreliable>
|
|
75
|
+
- **<…>** — <…>
|
|
76
|
+
|
|
77
|
+
## Rules
|
|
78
|
+
- Defects first, sorted by severity descending. Omit any section that has zero entries.
|
|
79
|
+
- Defect title describes the BUG ("Run-type dropdown does not filter"), never the scenario name.
|
|
80
|
+
- Reproduce steps are concrete UI actions derived from the log: URL + clicks + inputs. Imperative, one short line each.
|
|
81
|
+
- Evidence is the smallest factual observation from notes/steps that supports the claim — what was OBSERVED in the page (HTML, message, missing element). Never quote the test's \`result\` field as evidence; that is a tautology.
|
|
82
|
+
- **Execution Issues** entries must explain what actually went wrong in concrete terms a human understands: "could not find a Submit button after navigation", "page reloaded before the assertion ran", "passed without ever seeing a confirmation message", "marked failed but the new item appears in the list", "modal trapped focus and tests could not click outside", "ARIA tree had no labelled controls". Avoid jargon like "locator failed" without context. Never write category prefixes ("execution:", "false-positive:") — the section header already says it. No emoji on these entries.
|
|
83
|
+
- Do NOT include a passed test in any section unless evidence proves its goal was not achieved. Intermediate retries or recovered errors in the log are not grounds for listing a passed test.
|
|
84
|
+
- No editorialising, no restating the scenario verbatim, no closing summary.
|
|
85
|
+
|
|
86
|
+
${customPrompt || ''}
|
|
87
|
+
`;
|
|
88
|
+
|
|
89
|
+
const userPayload = dedent`
|
|
90
|
+
${eligible.length} tests were executed in this session.
|
|
91
|
+
|
|
92
|
+
${eligible.map((t, i) => this.serializeTest(t, i + 1)).join('\n\n')}
|
|
93
|
+
`;
|
|
94
|
+
|
|
95
|
+
const response = await this.provider.chat(
|
|
96
|
+
[
|
|
97
|
+
{ role: 'system', content: systemPrompt },
|
|
98
|
+
{ role: 'user', content: userPayload },
|
|
99
|
+
],
|
|
100
|
+
model,
|
|
101
|
+
{ agentName: 'analyst' }
|
|
102
|
+
);
|
|
103
|
+
|
|
104
|
+
return (response?.text || '').trim();
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
writeReport(markdown: string): string {
|
|
108
|
+
const filePath = outputPath('reports', `${Stats.sessionLabel()}.md`);
|
|
109
|
+
const dir = path.dirname(filePath);
|
|
110
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
111
|
+
writeFileSync(filePath, markdown);
|
|
112
|
+
return filePath;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
private serializeTest(test: Test, ref: number): string {
|
|
116
|
+
const log = test
|
|
117
|
+
.getLog()
|
|
118
|
+
.slice(-30)
|
|
119
|
+
.map((entry) => ` - [${entry.type}] ${entry.content}`)
|
|
120
|
+
.join('\n');
|
|
121
|
+
|
|
122
|
+
return dedent`
|
|
123
|
+
<test ref="#${ref}">
|
|
124
|
+
url: ${test.startUrl || '/'}
|
|
125
|
+
scenario: ${test.scenario}
|
|
126
|
+
result: ${test.result || 'unknown'}
|
|
127
|
+
expected: ${test.expected.join(' | ') || '(none)'}
|
|
128
|
+
log:
|
|
129
|
+
${log}
|
|
130
|
+
</test>
|
|
131
|
+
`;
|
|
132
|
+
}
|
|
133
|
+
}
|
package/src/ai/tester.ts
CHANGED
|
@@ -268,7 +268,7 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
268
268
|
nextStep += await this.prepareInstructionsForNextStep(task);
|
|
269
269
|
|
|
270
270
|
if (isNewPage && this.pilot) {
|
|
271
|
-
const guidance = await this.pilot.reviewNewPage(task, currentState);
|
|
271
|
+
const guidance = await this.pilot.reviewNewPage(task, currentState, conversation);
|
|
272
272
|
if (guidance) nextStep += `\n\n${guidance}`;
|
|
273
273
|
} else if ((iteration % this.progressCheckInterval === 0 || this.consecutiveFailures >= 3 || this.consecutiveEmptyResults >= 2) && this.pilot) {
|
|
274
274
|
const guidance = await this.pilot.analyzeProgress(task, currentState, conversation);
|
|
@@ -463,6 +463,8 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
463
463
|
|
|
464
464
|
let context = '';
|
|
465
465
|
|
|
466
|
+
const focusArea = detectFocusArea(currentState.ariaSnapshot);
|
|
467
|
+
|
|
466
468
|
const focusedElement = extractFocusedElement(currentState.ariaSnapshot);
|
|
467
469
|
if (focusedElement) {
|
|
468
470
|
const isTextInput = ['textbox', 'combobox', 'searchbox'].includes(focusedElement.role);
|
|
@@ -480,6 +482,18 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
480
482
|
`;
|
|
481
483
|
}
|
|
482
484
|
|
|
485
|
+
if (focusArea.detected) {
|
|
486
|
+
const areaName = focusArea.name ? ` "${focusArea.name}"` : '';
|
|
487
|
+
context += dedent`
|
|
488
|
+
<focus_scope>
|
|
489
|
+
A ${focusArea.type}${areaName} is currently open above the page.
|
|
490
|
+
Scope all interactions to elements inside this ${focusArea.type}.
|
|
491
|
+
Page navigation, filters, and tabs that exist outside it are not actionable while it is open and may share names or roles with elements inside it — prefer the locator inside the ${focusArea.type}.
|
|
492
|
+
Use <page_aria> to confirm the element you target is actually inside the ${focusArea.type}.
|
|
493
|
+
</focus_scope>
|
|
494
|
+
`;
|
|
495
|
+
}
|
|
496
|
+
|
|
483
497
|
if (currentState.isInsideIframe) {
|
|
484
498
|
const iframeInfo = currentState.iframeURL || this.explorer.getCurrentIframeInfo() || 'iframe context active';
|
|
485
499
|
context += dedent`
|
|
@@ -539,7 +553,6 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
539
553
|
return context;
|
|
540
554
|
}
|
|
541
555
|
|
|
542
|
-
const focusArea = detectFocusArea(currentState.ariaSnapshot);
|
|
543
556
|
if (focusArea.detected && focusArea.name && this.pageStateHash && this.pageActionResult) {
|
|
544
557
|
const overlaySection = await this.researcher.researchOverlay(currentState, this.pageActionResult, this.pageStateHash);
|
|
545
558
|
if (overlaySection) {
|
|
@@ -38,17 +38,17 @@ export abstract class BaseCommand {
|
|
|
38
38
|
printSuggestions(): void {
|
|
39
39
|
if (this.suggestions.length === 0) return;
|
|
40
40
|
const prefix = isInteractive() ? '/' : `${getCliName()} `;
|
|
41
|
-
|
|
42
|
-
|
|
41
|
+
const commandWidth = this.suggestions.reduce((max, s) => (s.command ? Math.max(max, prefix.length + s.command.length) : max), 0);
|
|
42
|
+
const lines = [chalk.bold('Suggested:')];
|
|
43
43
|
for (const { command, hint } of this.suggestions) {
|
|
44
|
-
tag('info').log('');
|
|
45
44
|
if (!command) {
|
|
46
|
-
|
|
45
|
+
lines.push(` ${chalk.dim(hint)}`);
|
|
47
46
|
continue;
|
|
48
47
|
}
|
|
49
|
-
|
|
50
|
-
|
|
48
|
+
const cmd = `${prefix}${command}`.padEnd(commandWidth);
|
|
49
|
+
lines.push(` ${chalk.yellow(cmd)} ${chalk.dim(hint)}`);
|
|
51
50
|
}
|
|
51
|
+
tag('info').log(lines.join('\n'));
|
|
52
52
|
}
|
|
53
53
|
|
|
54
54
|
protected parseArgs(args: string): { opts: Record<string, string | boolean>; args: string[] } {
|
|
@@ -3,6 +3,7 @@ import { BaseCommand, type Suggestion } from './base-command.js';
|
|
|
3
3
|
export class DrillCommand extends BaseCommand {
|
|
4
4
|
name = 'drill';
|
|
5
5
|
description = 'Drill all components on current page to learn interactions';
|
|
6
|
+
aliases = ['driller'];
|
|
6
7
|
suggestions: Suggestion[] = [
|
|
7
8
|
{ command: 'research', hint: 'see UI map first' },
|
|
8
9
|
{ command: 'navigate <page>', hint: 'go to another page' },
|
|
@@ -17,7 +18,7 @@ export class DrillCommand extends BaseCommand {
|
|
|
17
18
|
throw new Error('No active page to drill');
|
|
18
19
|
}
|
|
19
20
|
|
|
20
|
-
await this.explorBot.
|
|
21
|
+
await this.explorBot.agentDriller().drill({
|
|
21
22
|
knowledgePath,
|
|
22
23
|
maxComponents,
|
|
23
24
|
interactive: true,
|
|
@@ -30,7 +31,7 @@ export class DrillCommand extends BaseCommand {
|
|
|
30
31
|
}
|
|
31
32
|
|
|
32
33
|
private parseMaxArg(args: string): number | undefined {
|
|
33
|
-
const match = args.match(/--max\s+(\d+)/);
|
|
34
|
+
const match = args.match(/--max-components\s+(\d+)/);
|
|
34
35
|
return match ? Number.parseInt(match[1], 10) : undefined;
|
|
35
36
|
}
|
|
36
37
|
}
|
|
@@ -73,6 +73,7 @@ export class ExploreCommand extends BaseCommand {
|
|
|
73
73
|
if (mainUrl) await this.explorBot.visit(mainUrl);
|
|
74
74
|
const savedPath = this.explorBot.savePlans(this.completedPlans);
|
|
75
75
|
this.printResults();
|
|
76
|
+
await this.explorBot.printSessionAnalysis();
|
|
76
77
|
this.printNextSteps(savedPath);
|
|
77
78
|
}
|
|
78
79
|
|
|
@@ -5,7 +5,7 @@ import React, { useEffect, useState } from 'react';
|
|
|
5
5
|
import { AddRuleCommand } from '../commands/add-rule-command.js';
|
|
6
6
|
import InputReadline from './InputReadline.js';
|
|
7
7
|
|
|
8
|
-
const KNOWN_AGENTS = ['researcher', 'tester', 'planner', 'pilot', 'captain', '
|
|
8
|
+
const KNOWN_AGENTS = ['researcher', 'tester', 'planner', 'pilot', 'captain', 'driller', 'navigator'];
|
|
9
9
|
|
|
10
10
|
interface AddRuleProps {
|
|
11
11
|
initialAgent?: string;
|
package/src/config.ts
CHANGED
|
@@ -123,6 +123,7 @@ interface AgentsConfig {
|
|
|
123
123
|
researcher?: ResearcherAgentConfig;
|
|
124
124
|
planner?: PlannerAgentConfig;
|
|
125
125
|
pilot?: PilotAgentConfig;
|
|
126
|
+
driller?: AgentConfig;
|
|
126
127
|
'experience-compactor'?: AgentConfig;
|
|
127
128
|
captain?: AgentConfig;
|
|
128
129
|
quartermaster?: AgentConfig;
|
|
@@ -131,6 +132,7 @@ interface AgentsConfig {
|
|
|
131
132
|
chief?: AgentConfig;
|
|
132
133
|
curler?: AgentConfig;
|
|
133
134
|
rerunner?: RerunnerAgentConfig;
|
|
135
|
+
analyst?: AgentConfig;
|
|
134
136
|
}
|
|
135
137
|
|
|
136
138
|
interface AIConfig {
|
|
@@ -179,6 +181,8 @@ interface ActionConfig {
|
|
|
179
181
|
interface ReporterConfig {
|
|
180
182
|
enabled?: boolean;
|
|
181
183
|
html?: boolean;
|
|
184
|
+
markdown?: boolean;
|
|
185
|
+
runGroup?: string | null;
|
|
182
186
|
}
|
|
183
187
|
|
|
184
188
|
type ApiHookFn = (ctx: { headers: Record<string, string>; baseEndpoint: string }) => Promise<Record<string, string> | undefined> | Record<string, string> | undefined;
|
package/src/explorbot.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { existsSync, mkdirSync } from 'node:fs';
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import { ActionResult } from './action-result.ts';
|
|
4
|
-
import { Bosun } from './ai/bosun.ts';
|
|
5
4
|
import { Captain } from './ai/captain.ts';
|
|
5
|
+
import { Driller } from './ai/driller.ts';
|
|
6
6
|
import { ExperienceCompactor } from './ai/experience-compactor.ts';
|
|
7
7
|
import { Fisherman } from './ai/fisherman.ts';
|
|
8
8
|
import { Historian } from './ai/historian.ts';
|
|
@@ -13,6 +13,7 @@ import { AIProvider } from './ai/provider.ts';
|
|
|
13
13
|
import { Quartermaster } from './ai/quartermaster.ts';
|
|
14
14
|
import { Rerunner } from './ai/rerunner.ts';
|
|
15
15
|
import { Researcher } from './ai/researcher.ts';
|
|
16
|
+
import { SessionAnalyst } from './ai/session-analyst.ts';
|
|
16
17
|
import { Tester } from './ai/tester.ts';
|
|
17
18
|
import { createAgentTools } from './ai/tools.ts';
|
|
18
19
|
import { ApiClient } from './api/api-client.ts';
|
|
@@ -25,8 +26,9 @@ import Explorer from './explorer.ts';
|
|
|
25
26
|
import { KnowledgeTracker } from './knowledge-tracker.ts';
|
|
26
27
|
import { WebPageState } from './state-manager.ts';
|
|
27
28
|
import type { Suite } from './suite.ts';
|
|
28
|
-
import { Plan } from './test-plan.ts';
|
|
29
|
+
import { Plan, type Test } from './test-plan.ts';
|
|
29
30
|
import { setVerboseMode, tag } from './utils/logger.ts';
|
|
31
|
+
import { relativeToCwd } from './utils/next-steps.ts';
|
|
30
32
|
import { sanitizeFilename } from './utils/strings.ts';
|
|
31
33
|
|
|
32
34
|
export interface ExplorBotOptions {
|
|
@@ -55,6 +57,8 @@ export class ExplorBot {
|
|
|
55
57
|
lastPlanError: Error | null = null;
|
|
56
58
|
lastSavedPlanPath: string | null = null;
|
|
57
59
|
private agents: Record<string, any> = {};
|
|
60
|
+
private sessionPlans: Plan[] = [];
|
|
61
|
+
private lastReportedTestCount = 0;
|
|
58
62
|
|
|
59
63
|
constructor(options: ExplorBotOptions = {}) {
|
|
60
64
|
this.options = options;
|
|
@@ -284,15 +288,17 @@ export class ExplorBot {
|
|
|
284
288
|
return this.agents.rerunner;
|
|
285
289
|
}
|
|
286
290
|
|
|
287
|
-
|
|
288
|
-
return (this.agents.
|
|
289
|
-
const researcher = this.agentResearcher();
|
|
291
|
+
agentDriller(): Driller {
|
|
292
|
+
return (this.agents.driller ||= this.createAgent(({ ai, explorer }) => {
|
|
290
293
|
const navigator = this.agentNavigator();
|
|
291
|
-
|
|
292
|
-
return new Bosun(explorer, ai, researcher, navigator, tools);
|
|
294
|
+
return new Driller(explorer, ai, navigator);
|
|
293
295
|
}));
|
|
294
296
|
}
|
|
295
297
|
|
|
298
|
+
agentSessionAnalyst(): SessionAnalyst {
|
|
299
|
+
return (this.agents.sessionAnalyst ||= this.createAgent(({ ai }) => new SessionAnalyst(ai)));
|
|
300
|
+
}
|
|
301
|
+
|
|
296
302
|
agentFisherman(): Fisherman | null {
|
|
297
303
|
const fishermanConfig = this.config.ai?.agents?.fisherman;
|
|
298
304
|
const hasApiConfig = !!this.config.api;
|
|
@@ -365,7 +371,7 @@ export class ExplorBot {
|
|
|
365
371
|
}
|
|
366
372
|
this.lastPlanError = null;
|
|
367
373
|
try {
|
|
368
|
-
this.
|
|
374
|
+
this.setCurrentPlan(await planner.plan(feature, opts.style, opts.extend, opts.completedPlans));
|
|
369
375
|
} catch (err) {
|
|
370
376
|
this.lastPlanError = err instanceof Error ? err : new Error(String(err));
|
|
371
377
|
tag('warning').log(`Planning failed: ${this.lastPlanError.message}`);
|
|
@@ -436,11 +442,50 @@ export class ExplorBot {
|
|
|
436
442
|
throw new Error(`Plan file not found: ${planPath}`);
|
|
437
443
|
}
|
|
438
444
|
|
|
439
|
-
this.
|
|
440
|
-
return this.currentPlan
|
|
445
|
+
this.setCurrentPlan(Plan.fromMarkdown(planPath));
|
|
446
|
+
return this.currentPlan!;
|
|
441
447
|
}
|
|
442
448
|
|
|
443
449
|
setCurrentPlan(plan?: Plan): void {
|
|
444
450
|
this.currentPlan = plan;
|
|
451
|
+
if (plan && !this.sessionPlans.includes(plan)) {
|
|
452
|
+
this.sessionPlans.push(plan);
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
getSessionTests(): Test[] {
|
|
457
|
+
return this.sessionPlans.flatMap((p) => p.tests.filter((t) => t.startTime != null));
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
async printSessionAnalysis(): Promise<void> {
|
|
461
|
+
const analystConfig = this.config.ai?.agents?.analyst;
|
|
462
|
+
if (analystConfig?.enabled === false) return;
|
|
463
|
+
|
|
464
|
+
const tests = this.getSessionTests();
|
|
465
|
+
if (tests.length === 0) return;
|
|
466
|
+
if (tests.length === this.lastReportedTestCount) return;
|
|
467
|
+
|
|
468
|
+
try {
|
|
469
|
+
const markdown = await this.agentSessionAnalyst().analyze(tests);
|
|
470
|
+
if (!markdown) {
|
|
471
|
+
this.lastReportedTestCount = tests.length;
|
|
472
|
+
return;
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
tag('multiline').log(markdown);
|
|
476
|
+
|
|
477
|
+
const filePath = this.agentSessionAnalyst().writeReport(markdown);
|
|
478
|
+
tag('info').log(`Session report saved: ${relativeToCwd(filePath)}`);
|
|
479
|
+
|
|
480
|
+
const reporter = this.explorer?.getReporter();
|
|
481
|
+
if (reporter?.isEnabled()) {
|
|
482
|
+
await reporter.setRunDescription(markdown);
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
this.lastReportedTestCount = tests.length;
|
|
486
|
+
} catch (error) {
|
|
487
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
488
|
+
tag('warning').log(`Session analysis failed: ${message}`);
|
|
489
|
+
}
|
|
445
490
|
}
|
|
446
491
|
}
|
package/src/explorer.ts
CHANGED
|
@@ -19,8 +19,9 @@ import { PlaywrightRecorder } from './playwright-recorder.ts';
|
|
|
19
19
|
import { Reporter } from './reporter.ts';
|
|
20
20
|
import { StateManager } from './state-manager.js';
|
|
21
21
|
import { Test } from './test-plan.ts';
|
|
22
|
+
import { ELEMENT_EXTRACTION_CONFIG, getElementDataExtractorSource } from './utils/html.ts';
|
|
22
23
|
import { createDebug, log, tag } from './utils/logger.js';
|
|
23
|
-
import { WebElement
|
|
24
|
+
import { WebElement } from './utils/web-element.ts';
|
|
24
25
|
|
|
25
26
|
declare global {
|
|
26
27
|
namespace NodeJS {
|
|
@@ -337,11 +338,11 @@ class Explorer {
|
|
|
337
338
|
async getEidxInContainer(containerCss: string | null): Promise<string[]> {
|
|
338
339
|
const page = this.playwrightHelper.page;
|
|
339
340
|
try {
|
|
340
|
-
const selector = containerCss ? `${containerCss} [
|
|
341
|
+
const selector = containerCss ? `${containerCss} [${ELEMENT_EXTRACTION_CONFIG.attrs.eidx}]` : `[${ELEMENT_EXTRACTION_CONFIG.attrs.eidx}]`;
|
|
341
342
|
const elements = await page.locator(selector).all();
|
|
342
343
|
const result: string[] = [];
|
|
343
344
|
for (const el of elements) {
|
|
344
|
-
const attr = await el.getAttribute(
|
|
345
|
+
const attr = await el.getAttribute(ELEMENT_EXTRACTION_CONFIG.attrs.eidx);
|
|
345
346
|
if (attr) result.push(attr);
|
|
346
347
|
}
|
|
347
348
|
return result;
|
|
@@ -359,7 +360,7 @@ class Explorer {
|
|
|
359
360
|
const page = this.playwrightHelper.page;
|
|
360
361
|
const base = container ? page.locator(container) : page;
|
|
361
362
|
const el = locator.startsWith('//') ? base.locator(`xpath=${locator}`) : base.locator(locator);
|
|
362
|
-
return await el.first().getAttribute(
|
|
363
|
+
return await el.first().getAttribute(ELEMENT_EXTRACTION_CONFIG.attrs.eidx);
|
|
363
364
|
} catch (error) {
|
|
364
365
|
if (this.isFatalBrowserError(error)) {
|
|
365
366
|
tag('warning').log(`getEidxByLocator: ${error instanceof Error ? error.message : error}`);
|
|
@@ -751,20 +752,20 @@ export async function annotatePageElements(page: any): Promise<{ ariaSnapshot: s
|
|
|
751
752
|
for (const [role, entries] of byRole) {
|
|
752
753
|
try {
|
|
753
754
|
const rawList = await page.getByRole(role).evaluateAll(
|
|
754
|
-
(domElements: Element[], [data, extractFnStr]: [Array<{ name: string; ref: string }>, string]) => {
|
|
755
|
+
(domElements: Element[], [data, extractFnStr, config]: [Array<{ name: string; ref: string }>, string, typeof ELEMENT_EXTRACTION_CONFIG]) => {
|
|
755
756
|
const extract = new Function(`return ${extractFnStr}`)() as (el: Element) => any;
|
|
756
757
|
const results: any[] = [];
|
|
757
758
|
let ariaIdx = 0;
|
|
758
759
|
for (const el of domElements) {
|
|
759
760
|
if (ariaIdx >= data.length) break;
|
|
760
|
-
el.setAttribute(
|
|
761
|
-
const elData = extract(el);
|
|
761
|
+
el.setAttribute(config.attrs.eidx, data[ariaIdx].ref);
|
|
762
|
+
const elData = extract(el, config);
|
|
762
763
|
if (elData) results.push(elData);
|
|
763
764
|
ariaIdx++;
|
|
764
765
|
}
|
|
765
766
|
return results;
|
|
766
767
|
},
|
|
767
|
-
[entries,
|
|
768
|
+
[entries, getElementDataExtractorSource(), ELEMENT_EXTRACTION_CONFIG]
|
|
768
769
|
);
|
|
769
770
|
for (const raw of rawList) {
|
|
770
771
|
elements.push(WebElement.fromRawData(raw, role));
|