explorbot 0.1.12 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/bin/explorbot-cli.ts +21 -21
  2. package/dist/bin/explorbot-cli.js +3 -3
  3. package/dist/package.json +3 -2
  4. package/dist/rules/researcher/container-rules.md +2 -0
  5. package/dist/src/action-result.js +2 -1
  6. package/dist/src/action.js +0 -6
  7. package/dist/src/ai/captain.js +0 -2
  8. package/dist/src/ai/driller.js +1108 -0
  9. package/dist/src/ai/pilot.js +31 -22
  10. package/dist/src/ai/rules.js +3 -5
  11. package/dist/src/ai/session-analyst.js +117 -0
  12. package/dist/src/ai/tester.js +13 -2
  13. package/dist/src/commands/base-command.js +6 -6
  14. package/dist/src/commands/drill-command.js +3 -2
  15. package/dist/src/commands/exit-command.js +1 -0
  16. package/dist/src/commands/explore-command.js +1 -0
  17. package/dist/src/components/AddRule.js +1 -1
  18. package/dist/src/explorbot.js +48 -8
  19. package/dist/src/explorer.js +9 -8
  20. package/dist/src/reporter.js +64 -3
  21. package/dist/src/state-manager.js +4 -3
  22. package/dist/src/stats.js +5 -0
  23. package/dist/src/utils/aria.js +354 -529
  24. package/dist/src/utils/hooks-runner.js +2 -8
  25. package/dist/src/utils/html.js +371 -0
  26. package/dist/src/utils/unique-names.js +12 -1
  27. package/dist/src/utils/url-matcher.js +6 -1
  28. package/dist/src/utils/web-element.js +27 -24
  29. package/dist/src/utils/xpath.js +1 -1
  30. package/package.json +3 -2
  31. package/rules/researcher/container-rules.md +2 -0
  32. package/src/action-result.ts +2 -1
  33. package/src/action.ts +0 -8
  34. package/src/ai/captain.ts +0 -2
  35. package/src/ai/driller.ts +1194 -0
  36. package/src/ai/pilot.ts +31 -21
  37. package/src/ai/rules.ts +3 -5
  38. package/src/ai/session-analyst.ts +133 -0
  39. package/src/ai/tester.ts +15 -2
  40. package/src/commands/base-command.ts +6 -6
  41. package/src/commands/drill-command.ts +3 -2
  42. package/src/commands/exit-command.ts +1 -0
  43. package/src/commands/explore-command.ts +1 -0
  44. package/src/components/AddRule.tsx +1 -1
  45. package/src/config.ts +4 -0
  46. package/src/explorbot.ts +55 -10
  47. package/src/explorer.ts +9 -8
  48. package/src/reporter.ts +64 -3
  49. package/src/state-manager.ts +4 -3
  50. package/src/stats.ts +7 -0
  51. package/src/utils/aria.ts +367 -537
  52. package/src/utils/hooks-runner.ts +2 -6
  53. package/src/utils/html.ts +381 -0
  54. package/src/utils/unique-names.ts +13 -0
  55. package/src/utils/url-matcher.ts +5 -1
  56. package/src/utils/web-element.ts +31 -28
  57. package/src/utils/xpath.ts +1 -1
  58. package/dist/src/ai/bosun.js +0 -456
  59. package/src/ai/bosun.ts +0 -571
package/src/ai/pilot.ts CHANGED
@@ -89,15 +89,16 @@ export class Pilot implements Agent {
89
89
  const notes = task.notesToString() || 'No notes recorded.';
90
90
 
91
91
  let visualAnalysis = '';
92
+ let screenshotState: ActionResult | null = null;
92
93
  if (this.provider.hasVision()) {
93
94
  try {
94
95
  const action = this.explorer.createAction();
95
- const screenshotState = await action.caputrePageWithScreenshot();
96
+ screenshotState = await action.caputrePageWithScreenshot();
96
97
  if (screenshotState.screenshot) {
97
98
  visualAnalysis = (await this.researcher.answerQuestionAboutScreenshot(screenshotState, `Describe current page state relevant to: ${task.scenario}`)) || '';
98
99
  }
99
100
  } catch {
100
- // vision not available, continue without
101
+ screenshotState = null;
101
102
  }
102
103
  }
103
104
 
@@ -167,26 +168,23 @@ export class Pilot implements Agent {
167
168
  return false;
168
169
  }
169
170
 
170
- if (result.requestVerification && navigator) {
171
+ if (result.decision === 'pass' && result.requestVerification && navigator) {
171
172
  tag('substep').log(`Pilot requesting verification: ${result.requestVerification}`);
172
- try {
173
- const verifyResult = await navigator.verifyState(result.requestVerification, currentState);
174
- if (verifyResult.verified) {
175
- if (verifyResult.assertionSteps?.length) {
176
- this.explorer.getPlaywrightRecorder().recordVerification(verifyResult.assertionSteps);
177
- }
178
- tag('substep').log(`Pilot verified: ${result.requestVerification}`);
179
- } else {
180
- tag('substep').log(`Pilot verification failed: ${result.requestVerification}`);
181
- if (result.decision === 'pass') {
182
- const flipMessage = `Verification "${result.requestVerification}" did not match the page. Adjust approach and re-verify before finishing.`;
183
- result.decision = 'continue';
184
- result.reason = flipMessage;
185
- result.guidance = result.guidance ?? flipMessage;
186
- }
173
+ const verifyResult = await navigator.verifyState(result.requestVerification, currentState).catch(() => null);
174
+ if (verifyResult?.verified) {
175
+ if (verifyResult.assertionSteps?.length) {
176
+ this.explorer.getPlaywrightRecorder().recordVerification(verifyResult.assertionSteps);
177
+ }
178
+ } else {
179
+ let answer: string | null = null;
180
+ if (screenshotState?.screenshot) {
181
+ answer = await this.researcher.answerQuestionAboutScreenshot(screenshotState, `Does the screen confirm: "${result.requestVerification}"? Answer YES or NO only.`);
182
+ }
183
+ if (!(answer || '').trim().toUpperCase().startsWith('YES')) {
184
+ task.addNote(`Pilot: verification failed — ${result.requestVerification}`, TestResult.FAILED);
185
+ task.finish(TestResult.FAILED);
186
+ return false;
187
187
  }
188
- } catch (verifyErr: any) {
189
- tag('warning').log(`Pilot verification errored: ${verifyErr.message}`);
190
188
  }
191
189
  }
192
190
 
@@ -389,6 +387,8 @@ export class Pilot implements Agent {
389
387
  - If no verification was done → prefer "continue" with guidance telling tester what to verify.
390
388
  - If verify assertion describes a state that was ALREADY TRUE before the test started, the verification proves nothing — reject with "continue".
391
389
 
390
+ requestVerification — pick assertions DOM can actually express. Some content is not assertable via DOM (iframe text, canvas, custom widgets, Monaco/CodeMirror editors). When the scenario goal lives in such a region, target a STABLE LANDMARK (container element, ARIA role, the parent that wraps the widget) rather than literal text inside it. Your "pass" verdict is honored even if the DOM assertion can't be made — pick the strongest landmark you can.
391
+
392
392
  GUIDANCE FIELD: When decision is "continue", you MUST provide "guidance" — a specific actionable instruction:
393
393
  - If evidence is insufficient: tell tester to verify with see()/verify(), specify WHAT to check
394
394
  - If approach was wrong: tell tester to try a different method, suggest which one
@@ -470,7 +470,7 @@ export class Pilot implements Agent {
470
470
  );
471
471
  }
472
472
 
473
- async reviewNewPage(task: Test, currentState: ActionResult): Promise<string> {
473
+ async reviewNewPage(task: Test, currentState: ActionResult, testerConversation: Conversation): Promise<string> {
474
474
  if (!this.conversation) return '';
475
475
 
476
476
  tag('substep').log('Pilot reviewing new page...');
@@ -481,8 +481,14 @@ export class Pilot implements Agent {
481
481
  if (!pageSummary) return '';
482
482
 
483
483
  const stateContext = this.buildStateContext(currentState);
484
+ const toolCalls = testerConversation
485
+ .getToolExecutions()
486
+ .filter((t: any) => t.wasSuccessful)
487
+ .slice(-this.stepsToReview);
488
+ const actionsContext = this.formatActions(toolCalls);
484
489
 
485
490
  this.conversation.cleanupTag('page_summary', '...trimmed...', 1);
491
+ this.conversation.cleanupTag('recent_actions', '...trimmed...', 2);
486
492
 
487
493
  return this.sendToPilot(
488
494
  dedent`
@@ -497,6 +503,10 @@ export class Pilot implements Agent {
497
503
  ${pageSummary}
498
504
  </page_summary>
499
505
 
506
+ <recent_actions>
507
+ ${actionsContext || 'None'}
508
+ </recent_actions>
509
+
500
510
  ${this.formatExpectations(task)}
501
511
 
502
512
  First: evaluate whether this navigation makes sense for the scenario goal. If the page is unrelated, instruct Tester to back() or reset(). Then plan next steps.
package/src/ai/rules.ts CHANGED
@@ -282,11 +282,9 @@ export const actionRule = dedent`
282
282
  I.fillField('Description', 'Hello world', '.editor'); // works for rich text / code editors too
283
283
  </example>
284
284
 
285
- I.fillField handles plain inputs, textareas, contenteditable regions, and rich text / code editors
286
- (Monaco, ProseMirror, CodeMirror, TipTap, Quill, Draft.js, Slate, etc.) transparently.
287
- ALWAYS use I.fillField for rich editors target the editor container or its nearest label/heading with a normal locator.
288
- Do NOT open the editor with raw JS (executeScript, page.evaluate), do NOT dispatch synthetic events,
289
- do NOT call the editor's own API (monaco.editor.setValue, view.dispatch, etc.) to write text.
285
+ I.fillField handles plain inputs, textareas, contenteditable regions, and rich text / code editors transparently.
286
+ ALWAYS use I.fillField for rich text / code editors — target the editor container or its nearest label/heading with a normal locator.
287
+ If I.fillField does not work, I.type into the focused element is the fallback.
290
288
 
291
289
  ### I.type
292
290
 
@@ -0,0 +1,133 @@
1
+ import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
2
+ import path from 'node:path';
3
+ import dedent from 'dedent';
4
+ import { outputPath } from '../config.ts';
5
+ import { Stats } from '../stats.ts';
6
+ import type { Test } from '../test-plan.ts';
7
+ import type { Agent } from './agent.ts';
8
+ import type { Provider } from './provider.ts';
9
+
10
+ export class SessionAnalyst implements Agent {
11
+ emoji = '🧐';
12
+ private provider: Provider;
13
+
14
+ constructor(provider: Provider) {
15
+ this.provider = provider;
16
+ }
17
+
18
+ async analyze(tests: Test[]): Promise<string> {
19
+ const eligible = tests.filter((t) => t.startTime != null);
20
+ if (eligible.length === 0) return '';
21
+
22
+ const model = this.provider.getModelForAgent('analyst');
23
+ const customPrompt = this.provider.getSystemPromptForAgent('analyst', undefined);
24
+
25
+ const systemPrompt = dedent`
26
+ You write a brief end-of-session report after autonomous exploratory testing. Your reader is a developer who needs to know in seconds: what is broken, how to reproduce it, and which results were inconclusive.
27
+
28
+ Output MARKDOWN. No JSON, no preamble, no closing remarks. Start with the heading.
29
+
30
+ ## Clustering
31
+ Group by ROOT CAUSE, not by scenario. If three tests fail for the same dropdown, that is ONE defect listing all three test refs (#3, #5, #7). Do not produce one cluster per test.
32
+
33
+ ## Bucketing
34
+ Use the FINAL verdict (the test's \`result\` field) as the starting point. Mid-test errors that the automation recovered from do NOT make a passed test unreliable.
35
+
36
+ - **Defect** — real product bug. \`result: failed\` AND the failure reflects the app misbehaving (not the automation). The automation completed its interactions, the app contradicted the expected outcome. Severity required.
37
+ - **UX issue** — app works but the UI is ambiguous, controls are hidden, or labels are unclear. Worth flagging to design.
38
+ - **Execution issue** — the FINAL verdict is unreliable. Only two cases:
39
+ 1. \`result: failed\` AND the failure was automation, environment, or UI/UX (locator missing, timeout, AI loop, navigation stuck, modal trapped focus, no accessible label) — i.e. the test could not conclude whether the app works.
40
+ 2. \`result: passed\` AND clear evidence in the log shows the user-visible goal was NOT achieved (no confirmation visible, no state change verified, the assertion was vacuous).
41
+
42
+ A test that passed and shows no contrary evidence belongs in NO section. Do not list passed tests just because the log contains intermediate retries or recovered failures.
43
+
44
+ ## Severity emoji (defects only)
45
+ - 🔴 critical or high — core flow blocked, data loss, security
46
+ - 🟡 medium — partial breakage with workaround
47
+ - 🟢 low — cosmetic
48
+
49
+ ## Required format
50
+
51
+ # Session Analysis
52
+
53
+ <one sentence: total tests, defect count, headline finding>
54
+
55
+ ## Defects
56
+
57
+ ### 🔴 <plain-English title of the BUG, not the scenario name>
58
+ Affects: #3, #5, #7
59
+ Reproduce:
60
+ 1. <concrete UI step a person can replay>
61
+ 2. <next step>
62
+ Evidence: <one short observation from the test log>
63
+
64
+ ### 🟡 <next defect>
65
+ ...
66
+
67
+ ## UX issues
68
+
69
+ - **<title>** — #4
70
+ <one short evidence line>
71
+
72
+ ## Execution Issues
73
+
74
+ - **<short test name or scenario phrase>** — <plain-English one-liner: what made the result unreliable>
75
+ - **<…>** — <…>
76
+
77
+ ## Rules
78
+ - Defects first, sorted by severity descending. Omit any section that has zero entries.
79
+ - Defect title describes the BUG ("Run-type dropdown does not filter"), never the scenario name.
80
+ - Reproduce steps are concrete UI actions derived from the log: URL + clicks + inputs. Imperative, one short line each.
81
+ - Evidence is the smallest factual observation from notes/steps that supports the claim — what was OBSERVED in the page (HTML, message, missing element). Never quote the test's \`result\` field as evidence; that is a tautology.
82
+ - **Execution Issues** entries must explain what actually went wrong in concrete terms a human understands: "could not find a Submit button after navigation", "page reloaded before the assertion ran", "passed without ever seeing a confirmation message", "marked failed but the new item appears in the list", "modal trapped focus and tests could not click outside", "ARIA tree had no labelled controls". Avoid jargon like "locator failed" without context. Never write category prefixes ("execution:", "false-positive:") — the section header already says it. No emoji on these entries.
83
+ - Do NOT include a passed test in any section unless evidence proves its goal was not achieved. Intermediate retries or recovered errors in the log are not grounds for listing a passed test.
84
+ - No editorialising, no restating the scenario verbatim, no closing summary.
85
+
86
+ ${customPrompt || ''}
87
+ `;
88
+
89
+ const userPayload = dedent`
90
+ ${eligible.length} tests were executed in this session.
91
+
92
+ ${eligible.map((t, i) => this.serializeTest(t, i + 1)).join('\n\n')}
93
+ `;
94
+
95
+ const response = await this.provider.chat(
96
+ [
97
+ { role: 'system', content: systemPrompt },
98
+ { role: 'user', content: userPayload },
99
+ ],
100
+ model,
101
+ { agentName: 'analyst' }
102
+ );
103
+
104
+ return (response?.text || '').trim();
105
+ }
106
+
107
+ writeReport(markdown: string): string {
108
+ const filePath = outputPath('reports', `${Stats.sessionLabel()}.md`);
109
+ const dir = path.dirname(filePath);
110
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
111
+ writeFileSync(filePath, markdown);
112
+ return filePath;
113
+ }
114
+
115
+ private serializeTest(test: Test, ref: number): string {
116
+ const log = test
117
+ .getLog()
118
+ .slice(-30)
119
+ .map((entry) => ` - [${entry.type}] ${entry.content}`)
120
+ .join('\n');
121
+
122
+ return dedent`
123
+ <test ref="#${ref}">
124
+ url: ${test.startUrl || '/'}
125
+ scenario: ${test.scenario}
126
+ result: ${test.result || 'unknown'}
127
+ expected: ${test.expected.join(' | ') || '(none)'}
128
+ log:
129
+ ${log}
130
+ </test>
131
+ `;
132
+ }
133
+ }
package/src/ai/tester.ts CHANGED
@@ -268,7 +268,7 @@ export class Tester extends TaskAgent implements Agent {
268
268
  nextStep += await this.prepareInstructionsForNextStep(task);
269
269
 
270
270
  if (isNewPage && this.pilot) {
271
- const guidance = await this.pilot.reviewNewPage(task, currentState);
271
+ const guidance = await this.pilot.reviewNewPage(task, currentState, conversation);
272
272
  if (guidance) nextStep += `\n\n${guidance}`;
273
273
  } else if ((iteration % this.progressCheckInterval === 0 || this.consecutiveFailures >= 3 || this.consecutiveEmptyResults >= 2) && this.pilot) {
274
274
  const guidance = await this.pilot.analyzeProgress(task, currentState, conversation);
@@ -463,6 +463,8 @@ export class Tester extends TaskAgent implements Agent {
463
463
 
464
464
  let context = '';
465
465
 
466
+ const focusArea = detectFocusArea(currentState.ariaSnapshot);
467
+
466
468
  const focusedElement = extractFocusedElement(currentState.ariaSnapshot);
467
469
  if (focusedElement) {
468
470
  const isTextInput = ['textbox', 'combobox', 'searchbox'].includes(focusedElement.role);
@@ -480,6 +482,18 @@ export class Tester extends TaskAgent implements Agent {
480
482
  `;
481
483
  }
482
484
 
485
+ if (focusArea.detected) {
486
+ const areaName = focusArea.name ? ` "${focusArea.name}"` : '';
487
+ context += dedent`
488
+ <focus_scope>
489
+ A ${focusArea.type}${areaName} is currently open above the page.
490
+ Scope all interactions to elements inside this ${focusArea.type}.
491
+ Page navigation, filters, and tabs that exist outside it are not actionable while it is open and may share names or roles with elements inside it — prefer the locator inside the ${focusArea.type}.
492
+ Use <page_aria> to confirm the element you target is actually inside the ${focusArea.type}.
493
+ </focus_scope>
494
+ `;
495
+ }
496
+
483
497
  if (currentState.isInsideIframe) {
484
498
  const iframeInfo = currentState.iframeURL || this.explorer.getCurrentIframeInfo() || 'iframe context active';
485
499
  context += dedent`
@@ -539,7 +553,6 @@ export class Tester extends TaskAgent implements Agent {
539
553
  return context;
540
554
  }
541
555
 
542
- const focusArea = detectFocusArea(currentState.ariaSnapshot);
543
556
  if (focusArea.detected && focusArea.name && this.pageStateHash && this.pageActionResult) {
544
557
  const overlaySection = await this.researcher.researchOverlay(currentState, this.pageActionResult, this.pageStateHash);
545
558
  if (overlaySection) {
@@ -38,17 +38,17 @@ export abstract class BaseCommand {
38
38
  printSuggestions(): void {
39
39
  if (this.suggestions.length === 0) return;
40
40
  const prefix = isInteractive() ? '/' : `${getCliName()} `;
41
- tag('info').log('');
42
- tag('info').log(chalk.bold('Suggested:'));
41
+ const commandWidth = this.suggestions.reduce((max, s) => (s.command ? Math.max(max, prefix.length + s.command.length) : max), 0);
42
+ const lines = [chalk.bold('Suggested:')];
43
43
  for (const { command, hint } of this.suggestions) {
44
- tag('info').log('');
45
44
  if (!command) {
46
- tag('info').log(chalk.dim(hint));
45
+ lines.push(` ${chalk.dim(hint)}`);
47
46
  continue;
48
47
  }
49
- tag('info').log(chalk.dim(`${hint}:`));
50
- tag('info').log(` ${chalk.yellow(`${prefix}${command}`)}`);
48
+ const cmd = `${prefix}${command}`.padEnd(commandWidth);
49
+ lines.push(` ${chalk.yellow(cmd)} ${chalk.dim(hint)}`);
51
50
  }
51
+ tag('info').log(lines.join('\n'));
52
52
  }
53
53
 
54
54
  protected parseArgs(args: string): { opts: Record<string, string | boolean>; args: string[] } {
@@ -3,6 +3,7 @@ import { BaseCommand, type Suggestion } from './base-command.js';
3
3
  export class DrillCommand extends BaseCommand {
4
4
  name = 'drill';
5
5
  description = 'Drill all components on current page to learn interactions';
6
+ aliases = ['driller'];
6
7
  suggestions: Suggestion[] = [
7
8
  { command: 'research', hint: 'see UI map first' },
8
9
  { command: 'navigate <page>', hint: 'go to another page' },
@@ -17,7 +18,7 @@ export class DrillCommand extends BaseCommand {
17
18
  throw new Error('No active page to drill');
18
19
  }
19
20
 
20
- await this.explorBot.agentBosun().drill({
21
+ await this.explorBot.agentDriller().drill({
21
22
  knowledgePath,
22
23
  maxComponents,
23
24
  interactive: true,
@@ -30,7 +31,7 @@ export class DrillCommand extends BaseCommand {
30
31
  }
31
32
 
32
33
  private parseMaxArg(args: string): number | undefined {
33
- const match = args.match(/--max\s+(\d+)/);
34
+ const match = args.match(/--max-components\s+(\d+)/);
34
35
  return match ? Number.parseInt(match[1], 10) : undefined;
35
36
  }
36
37
  }
@@ -10,6 +10,7 @@ export class ExitCommand extends BaseCommand {
10
10
  aliases = ['quit'];
11
11
 
12
12
  async execute(_args: string): Promise<void> {
13
+ await this.explorBot.printSessionAnalysis();
13
14
  await this.explorBot.getExplorer().stop();
14
15
 
15
16
  if (Stats.hasActivity()) {
@@ -73,6 +73,7 @@ export class ExploreCommand extends BaseCommand {
73
73
  if (mainUrl) await this.explorBot.visit(mainUrl);
74
74
  const savedPath = this.explorBot.savePlans(this.completedPlans);
75
75
  this.printResults();
76
+ await this.explorBot.printSessionAnalysis();
76
77
  this.printNextSteps(savedPath);
77
78
  }
78
79
 
@@ -5,7 +5,7 @@ import React, { useEffect, useState } from 'react';
5
5
  import { AddRuleCommand } from '../commands/add-rule-command.js';
6
6
  import InputReadline from './InputReadline.js';
7
7
 
8
- const KNOWN_AGENTS = ['researcher', 'tester', 'planner', 'pilot', 'captain', 'bosun', 'navigator'];
8
+ const KNOWN_AGENTS = ['researcher', 'tester', 'planner', 'pilot', 'captain', 'driller', 'navigator'];
9
9
 
10
10
  interface AddRuleProps {
11
11
  initialAgent?: string;
package/src/config.ts CHANGED
@@ -123,6 +123,7 @@ interface AgentsConfig {
123
123
  researcher?: ResearcherAgentConfig;
124
124
  planner?: PlannerAgentConfig;
125
125
  pilot?: PilotAgentConfig;
126
+ driller?: AgentConfig;
126
127
  'experience-compactor'?: AgentConfig;
127
128
  captain?: AgentConfig;
128
129
  quartermaster?: AgentConfig;
@@ -131,6 +132,7 @@ interface AgentsConfig {
131
132
  chief?: AgentConfig;
132
133
  curler?: AgentConfig;
133
134
  rerunner?: RerunnerAgentConfig;
135
+ analyst?: AgentConfig;
134
136
  }
135
137
 
136
138
  interface AIConfig {
@@ -179,6 +181,8 @@ interface ActionConfig {
179
181
  interface ReporterConfig {
180
182
  enabled?: boolean;
181
183
  html?: boolean;
184
+ markdown?: boolean;
185
+ runGroup?: string | null;
182
186
  }
183
187
 
184
188
  type ApiHookFn = (ctx: { headers: Record<string, string>; baseEndpoint: string }) => Promise<Record<string, string> | undefined> | Record<string, string> | undefined;
package/src/explorbot.ts CHANGED
@@ -1,8 +1,8 @@
1
1
  import { existsSync, mkdirSync } from 'node:fs';
2
2
  import path from 'node:path';
3
3
  import { ActionResult } from './action-result.ts';
4
- import { Bosun } from './ai/bosun.ts';
5
4
  import { Captain } from './ai/captain.ts';
5
+ import { Driller } from './ai/driller.ts';
6
6
  import { ExperienceCompactor } from './ai/experience-compactor.ts';
7
7
  import { Fisherman } from './ai/fisherman.ts';
8
8
  import { Historian } from './ai/historian.ts';
@@ -13,6 +13,7 @@ import { AIProvider } from './ai/provider.ts';
13
13
  import { Quartermaster } from './ai/quartermaster.ts';
14
14
  import { Rerunner } from './ai/rerunner.ts';
15
15
  import { Researcher } from './ai/researcher.ts';
16
+ import { SessionAnalyst } from './ai/session-analyst.ts';
16
17
  import { Tester } from './ai/tester.ts';
17
18
  import { createAgentTools } from './ai/tools.ts';
18
19
  import { ApiClient } from './api/api-client.ts';
@@ -25,8 +26,9 @@ import Explorer from './explorer.ts';
25
26
  import { KnowledgeTracker } from './knowledge-tracker.ts';
26
27
  import { WebPageState } from './state-manager.ts';
27
28
  import type { Suite } from './suite.ts';
28
- import { Plan } from './test-plan.ts';
29
+ import { Plan, type Test } from './test-plan.ts';
29
30
  import { setVerboseMode, tag } from './utils/logger.ts';
31
+ import { relativeToCwd } from './utils/next-steps.ts';
30
32
  import { sanitizeFilename } from './utils/strings.ts';
31
33
 
32
34
  export interface ExplorBotOptions {
@@ -55,6 +57,8 @@ export class ExplorBot {
55
57
  lastPlanError: Error | null = null;
56
58
  lastSavedPlanPath: string | null = null;
57
59
  private agents: Record<string, any> = {};
60
+ private sessionPlans: Plan[] = [];
61
+ private lastReportedTestCount = 0;
58
62
 
59
63
  constructor(options: ExplorBotOptions = {}) {
60
64
  this.options = options;
@@ -284,15 +288,17 @@ export class ExplorBot {
284
288
  return this.agents.rerunner;
285
289
  }
286
290
 
287
- agentBosun(): Bosun {
288
- return (this.agents.bosun ||= this.createAgent(({ ai, explorer }) => {
289
- const researcher = this.agentResearcher();
291
+ agentDriller(): Driller {
292
+ return (this.agents.driller ||= this.createAgent(({ ai, explorer }) => {
290
293
  const navigator = this.agentNavigator();
291
- const tools = createAgentTools({ explorer, researcher, navigator });
292
- return new Bosun(explorer, ai, researcher, navigator, tools);
294
+ return new Driller(explorer, ai, navigator);
293
295
  }));
294
296
  }
295
297
 
298
+ agentSessionAnalyst(): SessionAnalyst {
299
+ return (this.agents.sessionAnalyst ||= this.createAgent(({ ai }) => new SessionAnalyst(ai)));
300
+ }
301
+
296
302
  agentFisherman(): Fisherman | null {
297
303
  const fishermanConfig = this.config.ai?.agents?.fisherman;
298
304
  const hasApiConfig = !!this.config.api;
@@ -365,7 +371,7 @@ export class ExplorBot {
365
371
  }
366
372
  this.lastPlanError = null;
367
373
  try {
368
- this.currentPlan = await planner.plan(feature, opts.style, opts.extend, opts.completedPlans);
374
+ this.setCurrentPlan(await planner.plan(feature, opts.style, opts.extend, opts.completedPlans));
369
375
  } catch (err) {
370
376
  this.lastPlanError = err instanceof Error ? err : new Error(String(err));
371
377
  tag('warning').log(`Planning failed: ${this.lastPlanError.message}`);
@@ -436,11 +442,50 @@ export class ExplorBot {
436
442
  throw new Error(`Plan file not found: ${planPath}`);
437
443
  }
438
444
 
439
- this.currentPlan = Plan.fromMarkdown(planPath);
440
- return this.currentPlan;
445
+ this.setCurrentPlan(Plan.fromMarkdown(planPath));
446
+ return this.currentPlan!;
441
447
  }
442
448
 
443
449
  setCurrentPlan(plan?: Plan): void {
444
450
  this.currentPlan = plan;
451
+ if (plan && !this.sessionPlans.includes(plan)) {
452
+ this.sessionPlans.push(plan);
453
+ }
454
+ }
455
+
456
+ getSessionTests(): Test[] {
457
+ return this.sessionPlans.flatMap((p) => p.tests.filter((t) => t.startTime != null));
458
+ }
459
+
460
+ async printSessionAnalysis(): Promise<void> {
461
+ const analystConfig = this.config.ai?.agents?.analyst;
462
+ if (analystConfig?.enabled === false) return;
463
+
464
+ const tests = this.getSessionTests();
465
+ if (tests.length === 0) return;
466
+ if (tests.length === this.lastReportedTestCount) return;
467
+
468
+ try {
469
+ const markdown = await this.agentSessionAnalyst().analyze(tests);
470
+ if (!markdown) {
471
+ this.lastReportedTestCount = tests.length;
472
+ return;
473
+ }
474
+
475
+ tag('multiline').log(markdown);
476
+
477
+ const filePath = this.agentSessionAnalyst().writeReport(markdown);
478
+ tag('info').log(`Session report saved: ${relativeToCwd(filePath)}`);
479
+
480
+ const reporter = this.explorer?.getReporter();
481
+ if (reporter?.isEnabled()) {
482
+ await reporter.setRunDescription(markdown);
483
+ }
484
+
485
+ this.lastReportedTestCount = tests.length;
486
+ } catch (error) {
487
+ const message = error instanceof Error ? error.message : String(error);
488
+ tag('warning').log(`Session analysis failed: ${message}`);
489
+ }
445
490
  }
446
491
  }
package/src/explorer.ts CHANGED
@@ -19,8 +19,9 @@ import { PlaywrightRecorder } from './playwright-recorder.ts';
19
19
  import { Reporter } from './reporter.ts';
20
20
  import { StateManager } from './state-manager.js';
21
21
  import { Test } from './test-plan.ts';
22
+ import { ELEMENT_EXTRACTION_CONFIG, getElementDataExtractorSource } from './utils/html.ts';
22
23
  import { createDebug, log, tag } from './utils/logger.js';
23
- import { WebElement, extractElementData } from './utils/web-element.ts';
24
+ import { WebElement } from './utils/web-element.ts';
24
25
 
25
26
  declare global {
26
27
  namespace NodeJS {
@@ -337,11 +338,11 @@ class Explorer {
337
338
  async getEidxInContainer(containerCss: string | null): Promise<string[]> {
338
339
  const page = this.playwrightHelper.page;
339
340
  try {
340
- const selector = containerCss ? `${containerCss} [data-explorbot-eidx]` : '[data-explorbot-eidx]';
341
+ const selector = containerCss ? `${containerCss} [${ELEMENT_EXTRACTION_CONFIG.attrs.eidx}]` : `[${ELEMENT_EXTRACTION_CONFIG.attrs.eidx}]`;
341
342
  const elements = await page.locator(selector).all();
342
343
  const result: string[] = [];
343
344
  for (const el of elements) {
344
- const attr = await el.getAttribute('data-explorbot-eidx');
345
+ const attr = await el.getAttribute(ELEMENT_EXTRACTION_CONFIG.attrs.eidx);
345
346
  if (attr) result.push(attr);
346
347
  }
347
348
  return result;
@@ -359,7 +360,7 @@ class Explorer {
359
360
  const page = this.playwrightHelper.page;
360
361
  const base = container ? page.locator(container) : page;
361
362
  const el = locator.startsWith('//') ? base.locator(`xpath=${locator}`) : base.locator(locator);
362
- return await el.first().getAttribute('data-explorbot-eidx');
363
+ return await el.first().getAttribute(ELEMENT_EXTRACTION_CONFIG.attrs.eidx);
363
364
  } catch (error) {
364
365
  if (this.isFatalBrowserError(error)) {
365
366
  tag('warning').log(`getEidxByLocator: ${error instanceof Error ? error.message : error}`);
@@ -751,20 +752,20 @@ export async function annotatePageElements(page: any): Promise<{ ariaSnapshot: s
751
752
  for (const [role, entries] of byRole) {
752
753
  try {
753
754
  const rawList = await page.getByRole(role).evaluateAll(
754
- (domElements: Element[], [data, extractFnStr]: [Array<{ name: string; ref: string }>, string]) => {
755
+ (domElements: Element[], [data, extractFnStr, config]: [Array<{ name: string; ref: string }>, string, typeof ELEMENT_EXTRACTION_CONFIG]) => {
755
756
  const extract = new Function(`return ${extractFnStr}`)() as (el: Element) => any;
756
757
  const results: any[] = [];
757
758
  let ariaIdx = 0;
758
759
  for (const el of domElements) {
759
760
  if (ariaIdx >= data.length) break;
760
- el.setAttribute('data-explorbot-eidx', data[ariaIdx].ref);
761
- const elData = extract(el);
761
+ el.setAttribute(config.attrs.eidx, data[ariaIdx].ref);
762
+ const elData = extract(el, config);
762
763
  if (elData) results.push(elData);
763
764
  ariaIdx++;
764
765
  }
765
766
  return results;
766
767
  },
767
- [entries, extractElementData.toString()]
768
+ [entries, getElementDataExtractorSource(), ELEMENT_EXTRACTION_CONFIG]
768
769
  );
769
770
  for (const raw of rawList) {
770
771
  elements.push(WebElement.fromRawData(raw, role));