npm - explorbot - Versions diffs - 0.1.12 → 0.1.15 - Mend

explorbot 0.1.12 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

package/bin/explorbot-cli.ts +21 -21
package/dist/bin/explorbot-cli.js +3 -3
package/dist/package.json +4 -2
package/dist/rules/researcher/container-rules.md +2 -0
package/dist/src/action-result.js +2 -1
package/dist/src/action.js +3 -8
package/dist/src/ai/captain.js +0 -2
package/dist/src/ai/conversation.js +20 -4
package/dist/src/ai/driller.js +1108 -0
package/dist/src/ai/historian/utils.js +8 -1
package/dist/src/ai/pilot.js +214 -267
package/dist/src/ai/provider.js +25 -12
package/dist/src/ai/quartermaster.js +2 -2
package/dist/src/ai/rules.js +5 -5
package/dist/src/ai/session-analyst.js +122 -0
package/dist/src/ai/tester.js +69 -22
package/dist/src/ai/tools.js +19 -4
package/dist/src/commands/base-command.js +6 -6
package/dist/src/commands/drill-command.js +3 -2
package/dist/src/commands/exit-command.js +1 -0
package/dist/src/commands/explore-command.js +9 -2
package/dist/src/components/AddRule.js +1 -1
package/dist/src/components/StatusPane.js +6 -1
package/dist/src/experience-tracker.js +9 -0
package/dist/src/explorbot.js +48 -8
package/dist/src/explorer.js +11 -13
package/dist/src/reporter.js +105 -4
package/dist/src/state-manager.js +4 -3
package/dist/src/stats.js +7 -1
package/dist/src/test-plan.js +47 -3
package/dist/src/utils/aria.js +354 -529
package/dist/src/utils/hooks-runner.js +2 -8
package/dist/src/utils/html.js +371 -0
package/dist/src/utils/unique-names.js +12 -1
package/dist/src/utils/url-matcher.js +6 -1
package/dist/src/utils/web-element.js +27 -24
package/dist/src/utils/xpath.js +1 -1
package/package.json +4 -2
package/rules/researcher/container-rules.md +2 -0
package/src/action-result.ts +2 -1
package/src/action.ts +3 -10
package/src/ai/captain.ts +0 -2
package/src/ai/conversation.ts +21 -4
package/src/ai/driller.ts +1194 -0
package/src/ai/historian/utils.ts +8 -1
package/src/ai/pilot.ts +215 -265
package/src/ai/provider.ts +24 -12
package/src/ai/quartermaster.ts +2 -2
package/src/ai/rules.ts +5 -5
package/src/ai/session-analyst.ts +139 -0
package/src/ai/tester.ts +63 -20
package/src/ai/tools.ts +18 -4
package/src/commands/base-command.ts +6 -6
package/src/commands/drill-command.ts +3 -2
package/src/commands/exit-command.ts +1 -0
package/src/commands/explore-command.ts +10 -2
package/src/components/AddRule.tsx +1 -1
package/src/components/StatusPane.tsx +6 -3
package/src/config.ts +4 -0
package/src/experience-tracker.ts +9 -0
package/src/explorbot.ts +55 -10
package/src/explorer.ts +10 -12
package/src/reporter.ts +108 -4
package/src/state-manager.ts +4 -3
package/src/stats.ts +10 -1
package/src/test-plan.ts +62 -3
package/src/utils/aria.ts +367 -537
package/src/utils/hooks-runner.ts +2 -6
package/src/utils/html.ts +381 -0
package/src/utils/unique-names.ts +13 -0
package/src/utils/url-matcher.ts +5 -1
package/src/utils/web-element.ts +31 -28
package/src/utils/xpath.ts +1 -1
package/dist/src/ai/bosun.js +0 -456
package/src/ai/bosun.ts +0 -571

package/src/ai/provider.ts CHANGED Viewed

@@ -19,6 +19,15 @@ const responseLog = createDebug('explorbot:provider:in');
 class AiError extends Error {}
 export class ContextLengthError extends Error {}
+function extractCachedTokens(usage: any): number {
+  if (!usage) return 0;
+  const direct = usage.cachedInputTokens ?? usage.inputTokenDetails?.cacheReadTokens;
+  if (typeof direct === 'number') return direct;
+  const raw = usage.raw;
+  const fromRaw = raw?.prompt_tokens_details?.cached_tokens ?? raw?.promptTokensDetails?.cachedTokens;
+  return typeof fromRaw === 'number' ? fromRaw : 0;
+}
 function rejectAfterIdle(ms: number, signal: { cancelled: boolean }): Promise<never> {
   return new Promise((_, reject) => {
     const tick = () => {
@@ -265,9 +274,10 @@ export class Provider {
       if (response.usage) {
         Stats.recordTokens(options.agentName || 'unknown', modelName, {
-          input: response.usage.promptTokens || 0,
-          output: response.usage.completionTokens || 0,
-          total: response.usage.totalTokens || 0,
+          input: response.usage.inputTokens ?? response.usage.promptTokens ?? 0,
+          output: response.usage.outputTokens ?? response.usage.completionTokens ?? 0,
+          total: response.usage.totalTokens ?? 0,
+          cached: extractCachedTokens(response.usage),
         });
       }
@@ -355,9 +365,10 @@ export class Provider {
       if (response.usage) {
         Stats.recordTokens(options.agentName || 'unknown', modelName, {
-          input: response.usage.promptTokens || 0,
-          output: response.usage.completionTokens || 0,
-          total: response.usage.totalTokens || 0,
+          input: response.usage.inputTokens ?? response.usage.promptTokens ?? 0,
+          output: response.usage.outputTokens ?? response.usage.completionTokens ?? 0,
+          total: response.usage.totalTokens ?? 0,
+          cached: extractCachedTokens(response.usage),
         });
       }
@@ -428,9 +439,10 @@ export class Provider {
       if (response.usage) {
         Stats.recordTokens(options.agentName || 'unknown', modelName, {
-          input: response.usage.promptTokens || 0,
-          output: response.usage.completionTokens || 0,
-          total: response.usage.totalTokens || 0,
+          input: response.usage.inputTokens ?? response.usage.promptTokens ?? 0,
+          output: response.usage.outputTokens ?? response.usage.completionTokens ?? 0,
+          total: response.usage.totalTokens ?? 0,
+          cached: extractCachedTokens(response.usage),
         });
       }
@@ -625,9 +637,9 @@ export class Provider {
       if (response.usage) {
         Stats.recordTokens('vision', this.getModelName(this.config.visionModel), {
-          input: response.usage.promptTokens || 0,
-          output: response.usage.completionTokens || 0,
-          total: response.usage.totalTokens || 0,
+          input: response.usage.inputTokens ?? response.usage.promptTokens ?? 0,
+          output: response.usage.outputTokens ?? response.usage.completionTokens ?? 0,
+          total: response.usage.totalTokens ?? 0,
         });
       }

package/src/ai/quartermaster.ts CHANGED Viewed

@@ -240,11 +240,11 @@ Focus on what would confuse a real user or caused the agent to make mistakes.`;
     const criticalViolations = report.axeViolations.filter((v) => v.impact === 'critical' || v.impact === 'serious');
     for (const v of criticalViolations.slice(0, 3)) {
       const nodeHtml = v.nodes[0]?.html.slice(0, 100) || '';
-      task.addNote(`🔴 A11Y [${v.impact}] ${v.id}: ${v.description} — ${nodeHtml}`);
+      task.addVerificationDetail(`🔴 A11Y [${v.impact}] ${v.id}: ${v.description} — ${nodeHtml}`);
     }
     for (const issue of report.semanticIssues.slice(0, 3)) {
-      task.addNote(`💡 UX [${issue.type}] ${issue.element}: ${issue.suggestion}`);
+      task.addVerificationDetail(`💡 UX [${issue.type}] ${issue.element}: ${issue.suggestion}`);
     }
   }

package/src/ai/rules.ts CHANGED Viewed

@@ -241,6 +241,8 @@ export function multipleTabsRule(tabs: Array<{ url: string; title: string }>): s
 export const actionRule = dedent`
   <actions>
+  \`faker\` (from @faker-js/faker) is available inside I.* calls for generating data, e.g. I.fillField('Bio', faker.lorem.paragraphs(5)).
   ### I.click
   clicks on the element by its locator
@@ -282,11 +284,9 @@ export const actionRule = dedent`
     I.fillField('Description', 'Hello world', '.editor'); // works for rich text / code editors too
   </example>
-  I.fillField handles plain inputs, textareas, contenteditable regions, and rich text / code editors
-  (Monaco, ProseMirror, CodeMirror, TipTap, Quill, Draft.js, Slate, etc.) transparently.
-  ALWAYS use I.fillField for rich editors — target the editor container or its nearest label/heading with a normal locator.
-  Do NOT open the editor with raw JS (executeScript, page.evaluate), do NOT dispatch synthetic events,
-  do NOT call the editor's own API (monaco.editor.setValue, view.dispatch, etc.) to write text.
+  I.fillField handles plain inputs, textareas, contenteditable regions, and rich text / code editors transparently.
+  ALWAYS use I.fillField for rich text / code editors — target the editor container or its nearest label/heading with a normal locator.
+  If I.fillField does not work, I.type into the focused element is the fallback.
   ### I.type

package/src/ai/session-analyst.ts ADDED Viewed

@@ -0,0 +1,139 @@
+import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
+import path from 'node:path';
+import dedent from 'dedent';
+import { outputPath } from '../config.ts';
+import { Stats } from '../stats.ts';
+import type { Test } from '../test-plan.ts';
+import type { Agent } from './agent.ts';
+import type { Provider } from './provider.ts';
+export class SessionAnalyst implements Agent {
+  emoji = '🧐';
+  private provider: Provider;
+  constructor(provider: Provider) {
+    this.provider = provider;
+  }
+  async analyze(tests: Test[]): Promise<string> {
+    const eligible = tests.filter((t) => t.startTime != null);
+    if (eligible.length === 0) return '';
+    const model = this.provider.getAgenticModel('analyst');
+    const customPrompt = this.provider.getSystemPromptForAgent('analyst', undefined);
+    const systemPrompt = dedent`
+      You write a TERSE end-of-session report. Reader is a developer who wants to UNDERSTAND THE FEATURE — what works, what is broken, what is unclear. Every word must earn its place.
+      Output MARKDOWN. No JSON, no preamble, no closing summary.
+      NO EMOJI. No 🔴 🟡 🟢 ✅, no escape sequences like \\u2705. Use plain text severity tags: [High], [Medium], [Low] for defects.
+      ## Reporting unit
+      Report at the level of FEATURES / FLOWS / PAGES. Tests are evidence, not the unit. Several tests covering the same flow → ONE entry citing all of them.
+      ## Walk every test
+      PASSED test: did all steps run, was the goal actually verified, did the user-visible goal happen? All yes → contributes to What works. Any no → Execution issue (false positive).
+      FAILED test, first match wins: (1) goal achieved but mis-verified → Execution. (2) automation failure (locator/timeout/loop/modal/a11y) → Execution. (3) bad preconditions or data → Execution. (4) wrong URL/environment → Execution. (5) app contradicted expected outcome → Defect.
+      Crucial distinction: "the app misbehaved" vs "the automation could not interact with the app". ONLY the first is a Defect. If the automation gives up before the app responds — timeout, retries exhausted, dead loop / loop detected, could not click or find an element — that is an Execution issue regardless of what the log calls it. Failure inside the automation ≠ failure inside the product.
+      A solitary failure where adjacent tests on the same feature passed → Execution, not Defect.
+      ## Severity (defects only)
+      [High] blocks a core flow · [Medium] degrades a flow but workaround exists · [Low] cosmetic / edge case
+      ## Format
+      # Session Analysis
+      <ONE or TWO sentences describing the FEATURE STATE — what was explored, whether the core flow holds, what the standout problem is. NO test counts, NO "N tests run". Talk about the product, not the run.>
+      ## Coverage
+      - Pages: <paths>
+      - Features: <capabilities>
+      ## What works
+      - **<feature>** — #2, #7, #8
+      ## Defects
+      ### [Medium] <plain-English bug title>
+      Affects: #3, #5
+      Reproduce:
+        1. <concrete UI step>
+        2. <next>
+      Evidence: <one short observation>
+      ## UX issues
+      - **<feature>** — <what's confusing> (#7)
+      ## Execution Issues
+      - **#2 <scenario>** — <≤10 words, what was unreliable>
+      ## Brevity rules
+      - Headline: 2 sentences MAX. About the FEATURE, not the run. No counts, no "N tests", no "this session". Banned words: "exercised", "comprehensive", "notably", "this session", "module", "targeted", "covered creation".
+      - What works: feature name + test refs. NO parentheticals, NO caveats. If there's a caveat, the entry doesn't belong here.
+      - Defect title is the BUG ("Search returns non-matching results"), never the scenario name.
+      - Reproduce steps are imperative one-liners drawn from the log.
+      - Evidence is one short factual observation. Never quote the \`result\` field.
+      - Execution Issues: ONE line per test, ≤10 words, plain. Examples: "passed vacuously, no list assertion", "no file upload step in log", "dead loop on Save click". No prefixes, no nested explanation.
+      - Omit any empty section.
+      - Section order: Coverage → What works → Defects (severity desc) → UX issues → Execution Issues.
+      ${customPrompt || ''}
+    `;
+    const userPayload = dedent`
+      ${eligible.length} tests were executed in this session.
+      ${eligible.map((t, i) => this.serializeTest(t, i + 1)).join('\n\n')}
+    `;
+    const response = await this.provider.chat(
+      [
+        { role: 'system', content: systemPrompt },
+        { role: 'user', content: userPayload },
+      ],
+      model,
+      { agentName: 'analyst' }
+    );
+    return decodeEscapes((response?.text || '').trim());
+  }
+  writeReport(markdown: string): string {
+    const filePath = outputPath('reports', `${Stats.sessionLabel()}.md`);
+    const dir = path.dirname(filePath);
+    if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
+    writeFileSync(filePath, markdown);
+    return filePath;
+  }
+  private serializeTest(test: Test, ref: number): string {
+    const log = test
+      .getLog()
+      .slice(-30)
+      .map((entry) => `  - [${entry.type}] ${entry.content}`)
+      .join('\n');
+    return dedent`
+      <test ref="#${ref}">
+      url: ${test.startUrl || '/'}
+      scenario: ${test.scenario}
+      result: ${test.result || 'unknown'}
+      expected: ${test.expected.join(' | ') || '(none)'}
+      log:
+      ${log}
+      </test>
+    `;
+  }
+}
+function decodeEscapes(text: string): string {
+  return text.replace(/\\u\{([0-9a-fA-F]+)\}/g, (_, hex) => String.fromCodePoint(Number.parseInt(hex, 16))).replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) => String.fromCodePoint(Number.parseInt(hex, 16)));
+}

package/src/ai/tester.ts CHANGED Viewed

@@ -64,6 +64,8 @@ export class Tester extends TaskAgent implements Agent {
   private pageStateHash: string | null = null;
   private pageActionResult: ActionResult | null = null;
   private hooksRunner: HooksRunner;
+  private seenUiMapUrls = new Set<string>();
+  private lastAnalyzedStateHash: string | null = null;
   constructor(explorer: Explorer, provider: Provider, researcher: Researcher, navigator: Navigator, agentTools?: any) {
     super();
@@ -104,7 +106,7 @@ export class Tester extends TaskAgent implements Agent {
   }
   private get progressCheckInterval(): number {
-    return (this.explorer.getConfig().ai?.agents?.tester as any)?.progressCheckInterval ?? 5;
+    return (this.explorer.getConfig().ai?.agents?.tester as any)?.progressCheckInterval ?? 3;
   }
   getConversation(): Conversation | null {
@@ -123,6 +125,8 @@ export class Tester extends TaskAgent implements Agent {
     this.previousStateHash = null;
     this.pageStateHash = null;
     this.pageActionResult = null;
+    this.seenUiMapUrls.clear();
+    this.lastAnalyzedStateHash = null;
     this.explorer.getStateManager().clearHistory();
     this.resetFailureCount();
     this.pilot?.reset();
@@ -147,14 +151,20 @@ export class Tester extends TaskAgent implements Agent {
     const initialState = ActionResult.fromState(state);
     const conversation = this.provider.startConversation(this.getSystemMessage(), 'tester');
+    conversation.markLastMessageCacheable();
     this.currentConversation = conversation;
     const outputDir = ConfigParser.getInstance().getOutputDir();
     this.executionLogFile = join(outputDir, `tester_${task.sessionName}.md`);
     // Note: Markdown saving functionality removed from Conversation class
-    const initialPrompt = await this.buildTestPrompt(task, initialState);
-    conversation.addUserText(initialPrompt);
+    const scenarioBlock = this.buildScenarioBlock(task, initialState);
+    conversation.addUserText(scenarioBlock);
+    conversation.markLastMessageCacheable();
+    conversation.protectPrefix(conversation.messages.length);
+    const pageContext = await this.reinjectContextIfNeeded(1, initialState);
+    if (pageContext) conversation.addUserText(pageContext);
     return await Observability.run(
       `test: ${task.scenario}`,
@@ -177,6 +187,12 @@ export class Tester extends TaskAgent implements Agent {
     if (this.pilot) {
       try {
         const plan = await this.pilot.planTest(task, initialState);
+        if (task.hasFinished) {
+          offFailedRequest?.();
+          page?.off('pageerror', onPageError);
+          page?.off('console', onConsoleMessage);
+          return { success: task.isSuccessful };
+        }
         if (plan) {
           conversation.addUserText(`Pilot's test plan:\n${plan}\n\nFollow this plan while executing the test.`);
         }
@@ -200,13 +216,15 @@ export class Tester extends TaskAgent implements Agent {
     debugLog(`Navigating to ${task.startUrl}`);
     await this.explorer.visit(task.startUrl!);
-    const currentUrl = this.explorer.getStateManager().getCurrentState()?.url || task.startUrl || '';
+    const startState = this.explorer.getStateManager().getCurrentState();
+    if (startState) task.addUrlNote(startState);
+    const currentUrl = startState?.url || task.startUrl || '';
     await this.hooksRunner.runBeforeHook('tester', currentUrl);
     const offStateChange = this.explorer.getStateManager().onStateChange((event: StateTransition) => {
       if (task.hasFinished) return;
       if (event.toState?.url === event.fromState?.url) return;
-      task.addNote(`Navigated to ${event.toState?.url}`, TestResult.PASSED);
+      if (event.toState) task.addUrlNote(event.toState, event.fromState || undefined);
       task.states.push(event.toState);
     });
@@ -253,13 +271,13 @@ export class Tester extends TaskAgent implements Agent {
           `);
           }
-          conversation.cleanupTag('page_aria', '...cleaned aria snapshot...', 2);
+          conversation.cleanupTag('page_aria', '...cleaned aria snapshot...', 1);
           conversation.cleanupTag('page_html', '...cleaned HTML snapshot...', 1);
           conversation.cleanupTag('experience', '...cleaned experience...', 1);
           conversation.cleanupTag('applied_experience', '...cleaned past experience...', 1);
           conversation.cleanupTag('page_ui_map', '...cleaned UI map...', 1);
           conversation.cleanupTag('page_ui_map_overlay', '...cleaned UI overlay...', 1);
-          conversation.compactToolResults(3);
+          conversation.compactToolResults(2);
           if (iteration > 1) {
             const isNewPage = this.previousUrl !== null && this.previousUrl !== currentState.url;
@@ -268,18 +286,19 @@ export class Tester extends TaskAgent implements Agent {
             nextStep += await this.prepareInstructionsForNextStep(task);
             if (isNewPage && this.pilot) {
-              const guidance = await this.pilot.reviewNewPage(task, currentState);
+              const guidance = await this.pilot.reviewNewPage(task, currentState, conversation);
               if (guidance) nextStep += `\n\n${guidance}`;
-            } else if ((iteration % this.progressCheckInterval === 0 || this.consecutiveFailures >= 3 || this.consecutiveEmptyResults >= 2) && this.pilot) {
+            } else if (this.shouldAnalyzeProgress(iteration, currentState) && this.pilot) {
               const guidance = await this.pilot.analyzeProgress(task, currentState, conversation);
               if (guidance) nextStep += `\n\n${guidance}`;
               this.consecutiveFailures = 0;
+              this.lastAnalyzedStateHash = currentState.hash;
             }
             conversation.addUserText(nextStep);
           }
           const result = await this.provider.invokeConversation(conversation, tools, {
-            maxToolRoundtrips: 5,
+            maxToolRoundtrips: 3,
             toolChoice: 'required',
             stopWhen: () => task.hasFinished,
           });
@@ -421,6 +440,14 @@ export class Tester extends TaskAgent implements Agent {
     };
   }
+  private shouldAnalyzeProgress(iteration: number, currentState: ActionResult): boolean {
+    if (this.consecutiveFailures >= 3) return true;
+    if (this.consecutiveEmptyResults >= 2) return true;
+    if (iteration % this.progressCheckInterval !== 0) return false;
+    if (this.lastAnalyzedStateHash === currentState.hash) return false;
+    return true;
+  }
   private async prepareInstructionsForNextStep(task: Test): Promise<string> {
     let outcomeStatus = dedent`
       <task>
@@ -463,6 +490,8 @@ export class Tester extends TaskAgent implements Agent {
     let context = '';
+    const focusArea = detectFocusArea(currentState.ariaSnapshot);
     const focusedElement = extractFocusedElement(currentState.ariaSnapshot);
     if (focusedElement) {
       const isTextInput = ['textbox', 'combobox', 'searchbox'].includes(focusedElement.role);
@@ -480,6 +509,18 @@ export class Tester extends TaskAgent implements Agent {
       `;
     }
+    if (focusArea.detected) {
+      const areaName = focusArea.name ? ` "${focusArea.name}"` : '';
+      context += dedent`
+        <focus_scope>
+        A ${focusArea.type}${areaName} is currently open above the page.
+        Scope all interactions to elements inside this ${focusArea.type}.
+        Page navigation, filters, and tabs that exist outside it are not actionable while it is open and may share names or roles with elements inside it — prefer the locator inside the ${focusArea.type}.
+        Use <page_aria> to confirm the element you target is actually inside the ${focusArea.type}.
+        </focus_scope>
+      `;
+    }
     if (currentState.isInsideIframe) {
       const iframeInfo = currentState.iframeURL || this.explorer.getCurrentIframeInfo() || 'iframe context active';
       context += dedent`
@@ -497,17 +538,21 @@ export class Tester extends TaskAgent implements Agent {
     }
     if (isNewUrl) {
+      const alreadySeenUiMap = this.seenUiMapUrls.has(currentUrl);
       let research = '';
-      try {
-        research = await this.researcher.research(currentState);
-      } catch (err) {
-        if (!(err instanceof ErrorPageError)) throw err;
-        tag('warning').log(`Research skipped: ${err.message}`);
+      if (!alreadySeenUiMap) {
+        try {
+          research = await this.researcher.research(currentState);
+        } catch (err) {
+          if (!(err instanceof ErrorPageError)) throw err;
+          tag('warning').log(`Research skipped: ${err.message}`);
+        }
       }
       this.pageStateHash = currentStateHash;
       this.pageActionResult = currentState;
       let uiMapSection = '';
       if (research) {
+        this.seenUiMapUrls.add(currentUrl);
         uiMapSection = dedent`
           Page UI Map
@@ -516,6 +561,8 @@ export class Tester extends TaskAgent implements Agent {
           ${research}
           </page_ui_map>
         `;
+      } else if (alreadySeenUiMap) {
+        uiMapSection = `\n\n<page_ui_map>UI map for ${currentUrl} was shown earlier in this session — refer to it above.</page_ui_map>`;
       }
       context += dedent`
@@ -539,7 +586,6 @@ export class Tester extends TaskAgent implements Agent {
       return context;
     }
-    const focusArea = detectFocusArea(currentState.ariaSnapshot);
     if (focusArea.detected && focusArea.name && this.pageStateHash && this.pageActionResult) {
       const overlaySection = await this.researcher.researchOverlay(currentState, this.pageActionResult, this.pageStateHash);
       if (overlaySection) {
@@ -727,9 +773,8 @@ export class Tester extends TaskAgent implements Agent {
     `;
   }
-  private async buildTestPrompt(task: Test, actionResult: ActionResult): Promise<string> {
+  private buildScenarioBlock(task: Test, actionResult: ActionResult): string {
     const knowledge = this.getKnowledge(actionResult);
-    const pageContext = await this.reinjectContextIfNeeded(1, actionResult);
     return dedent`
       <task>
@@ -757,8 +802,6 @@ export class Tester extends TaskAgent implements Agent {
       ${this.buildAvailableFiles()}
       ${knowledge}
-      ${pageContext}
     `;
   }

package/src/ai/tools.ts CHANGED Viewed

@@ -510,7 +510,7 @@ export function createAgentTools({
           }
           return successToolResult('see', {
-            analysis: analysisResult,
+            analysis: cap(analysisResult, ANALYSIS_OUTPUT_CAP),
             message: `Successfully analyzed screenshot for: ${request}`,
             suggestion: 'Visual confirmation is valid evidence for test results. Use record() to note the visual findings.',
           });
@@ -559,8 +559,8 @@ export function createAgentTools({
             url: currentState.url,
             title: currentState.title,
             suggestion: 'If not enough context received, call see() to visually identify elements in page contents',
-            aria,
-            html,
+            aria: cap(aria, ARIA_OUTPUT_CAP),
+            html: cap(html, HTML_OUTPUT_CAP),
             reminder: 'Context provided. Do not call context() again until you perform actions or suspect page changed.',
           });
         } catch (error) {
@@ -657,7 +657,7 @@ export function createAgentTools({
           return successToolResult('research', {
             analysis: researchResult,
-            aria: ActionResult.fromState(currentState).getInteractiveARIA(),
+            aria: cap(ActionResult.fromState(currentState).getInteractiveARIA(), ARIA_OUTPUT_CAP),
             message: `Successfully researched page: ${currentState.url}.`,
             suggestion: dedent`
               You received comprehensive UI map report. Use it to understand the page structure and navigate to the elements.
@@ -1001,6 +1001,16 @@ export function createAgentTools({
 const PAGE_DIFF_SUGGESTION = 'Analyze page diff. htmlParts shows what changed and WHERE — each part has a container selector. Use the container as context when clicking elements from the diff.';
+const ARIA_OUTPUT_CAP = 4000;
+const HTML_OUTPUT_CAP = 6000;
+const ANALYSIS_OUTPUT_CAP = 2000;
+function cap(text: string | undefined | null, max: number): string {
+  if (!text) return '';
+  if (text.length <= max) return text;
+  return `${text.slice(0, max)}\n[...truncated; ${text.length - max} chars omitted...]`;
+}
 function transformContainsCommand(command: string): string {
   if (!command.includes(':contains(')) return command;
@@ -1044,8 +1054,12 @@ function successToolResult(action: string, data?: Record<string, any>, source?:
   if (data?.pageDiff) {
     let suggestion = PAGE_DIFF_SUGGESTION;
     const ariaChanges = data.pageDiff.ariaChanges || '';
+    const urlChanged = data.pageDiff.urlChanged === true;
+    const hasHtmlParts = Array.isArray(data.pageDiff.htmlParts) && data.pageDiff.htmlParts.length > 0;
     if (countAriaChanges(ariaChanges) >= 50) {
       suggestion = `MAJOR PAGE CHANGE. Page entered a different mode. Check htmlParts and iframes in pageDiff before next action. ${suggestion}`;
+    } else if (!urlChanged && !ariaChanges && !hasHtmlParts) {
+      suggestion = 'Action ran without error but produced no observable change (URL, ARIA and HTML all unchanged). The locator likely matched a non-interactive ancestor or an element outside the intended control. Re-locate via xpathCheck() or verify with see() before treating this as success.';
     } else if (ariaChanges.includes('heading') && ariaChanges.includes('added')) {
       suggestion += ' WARNING: A new panel or modal may have appeared. If this was not the intended action, close it and try a different element.';
     }

package/src/commands/base-command.ts CHANGED Viewed

@@ -38,17 +38,17 @@ export abstract class BaseCommand {
   printSuggestions(): void {
     if (this.suggestions.length === 0) return;
     const prefix = isInteractive() ? '/' : `${getCliName()} `;
-    tag('info').log('');
-    tag('info').log(chalk.bold('Suggested:'));
+    const commandWidth = this.suggestions.reduce((max, s) => (s.command ? Math.max(max, prefix.length + s.command.length) : max), 0);
+    const lines = [chalk.bold('Suggested:')];
     for (const { command, hint } of this.suggestions) {
-      tag('info').log('');
       if (!command) {
-        tag('info').log(chalk.dim(hint));
+        lines.push(`  ${chalk.dim(hint)}`);
         continue;
       }
-      tag('info').log(chalk.dim(`${hint}:`));
-      tag('info').log(`  ${chalk.yellow(`${prefix}${command}`)}`);
+      const cmd = `${prefix}${command}`.padEnd(commandWidth);
+      lines.push(`  ${chalk.yellow(cmd)}  ${chalk.dim(hint)}`);
     }
+    tag('info').log(lines.join('\n'));
   }
   protected parseArgs(args: string): { opts: Record<string, string | boolean>; args: string[] } {

package/src/commands/drill-command.ts CHANGED Viewed

@@ -3,6 +3,7 @@ import { BaseCommand, type Suggestion } from './base-command.js';
 export class DrillCommand extends BaseCommand {
   name = 'drill';
   description = 'Drill all components on current page to learn interactions';
+  aliases = ['driller'];
   suggestions: Suggestion[] = [
     { command: 'research', hint: 'see UI map first' },
     { command: 'navigate <page>', hint: 'go to another page' },
@@ -17,7 +18,7 @@ export class DrillCommand extends BaseCommand {
       throw new Error('No active page to drill');
     }
-    await this.explorBot.agentBosun().drill({
+    await this.explorBot.agentDriller().drill({
       knowledgePath,
       maxComponents,
       interactive: true,
@@ -30,7 +31,7 @@ export class DrillCommand extends BaseCommand {
   }
   private parseMaxArg(args: string): number | undefined {
-    const match = args.match(/--max\s+(\d+)/);
+    const match = args.match(/--max-components\s+(\d+)/);
     return match ? Number.parseInt(match[1], 10) : undefined;
   }
 }

package/src/commands/exit-command.ts CHANGED Viewed

@@ -10,6 +10,7 @@ export class ExitCommand extends BaseCommand {
   aliases = ['quit'];
   async execute(_args: string): Promise<void> {
+    await this.explorBot.printSessionAnalysis();
     await this.explorBot.getExplorer().stop();
     if (Stats.hasActivity()) {

package/src/commands/explore-command.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import figureSet from 'figures';
 import { getStyles } from '../ai/planner/styles.js';
 import { outputPath } from '../config.js';
+import { normalizeUrl } from '../state-manager.js';
 import { Stats } from '../stats.js';
 import type { Plan } from '../test-plan.js';
 import { getCliName } from '../utils/cli-name.ts';
@@ -11,6 +12,8 @@ import { type NextStepSection, printNextSteps, relativeToCwd } from '../utils/ne
 import { safeFilename } from '../utils/strings.ts';
 import { BaseCommand, type Suggestion } from './base-command.js';
+const MAX_SUB_PAGE_ATTEMPTS = 30;
 export class ExploreCommand extends BaseCommand {
   name = 'explore';
   description = 'Start web exploration';
@@ -27,6 +30,7 @@ export class ExploreCommand extends BaseCommand {
   maxTests?: number;
   private testsRun = 0;
   private completedPlans: Plan[] = [];
+  private failedSubPages = new Set<string>();
   async execute(args: string): Promise<void> {
     const { opts, args: remaining } = this.parseArgs(args);
@@ -46,10 +50,12 @@ export class ExploreCommand extends BaseCommand {
     if (!feature && !this.isLimitReached()) {
       const planner = this.explorBot.agentPlanner();
-      while (true) {
+      let attempts = 0;
+      while (attempts < MAX_SUB_PAGE_ATTEMPTS) {
+        attempts++;
         if (this.isLimitReached()) break;
-        const candidates = planner.collectSubPageCandidates(mainPlan, mainUrl || '/');
+        const candidates = planner.collectSubPageCandidates(mainPlan, mainUrl || '/').filter((c) => !this.failedSubPages.has(normalizeUrl(c.url)));
         if (candidates.length === 0) break;
         const pick = await planner.pickNextSubPage(candidates);
@@ -64,6 +70,7 @@ export class ExploreCommand extends BaseCommand {
             this.completedPlans.push(subPlan);
           }
         } catch (err) {
+          this.failedSubPages.add(normalizeUrl(pick.url));
           tag('warning').log(`Sub-page exploration failed: ${err instanceof Error ? err.message : err}`);
         }
       }
@@ -73,6 +80,7 @@ export class ExploreCommand extends BaseCommand {
     if (mainUrl) await this.explorBot.visit(mainUrl);
     const savedPath = this.explorBot.savePlans(this.completedPlans);
     this.printResults();
+    await this.explorBot.printSessionAnalysis();
     this.printNextSteps(savedPath);
   }

package/src/components/AddRule.tsx CHANGED Viewed

@@ -5,7 +5,7 @@ import React, { useEffect, useState } from 'react';
 import { AddRuleCommand } from '../commands/add-rule-command.js';
 import InputReadline from './InputReadline.js';
-const KNOWN_AGENTS = ['researcher', 'tester', 'planner', 'pilot', 'captain', 'bosun', 'navigator'];
+const KNOWN_AGENTS = ['researcher', 'tester', 'planner', 'pilot', 'captain', 'driller', 'navigator'];
 interface AddRuleProps {
   initialAgent?: string;