npm - explorbot - Versions diffs - 0.1.11 → 0.1.13 - Mend

explorbot 0.1.11 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

package/README.md +12 -2
package/bin/explorbot-cli.ts +21 -21
package/dist/bin/explorbot-cli.js +3 -3
package/dist/package.json +4 -3
package/dist/rules/researcher/container-rules.md +2 -0
package/dist/src/action-result.js +2 -1
package/dist/src/action.js +5 -10
package/dist/src/ai/captain.js +0 -2
package/dist/src/ai/driller.js +1108 -0
package/dist/src/ai/historian/codeceptjs.js +2 -2
package/dist/src/ai/historian/experience.js +1 -0
package/dist/src/ai/historian/playwright.js +4 -4
package/dist/src/ai/historian/screencast.js +121 -0
package/dist/src/ai/historian.js +5 -3
package/dist/src/ai/pilot.js +31 -22
package/dist/src/ai/rules.js +3 -5
package/dist/src/ai/session-analyst.js +117 -0
package/dist/src/ai/tester.js +13 -2
package/dist/src/commands/base-command.js +6 -6
package/dist/src/commands/drill-command.js +3 -2
package/dist/src/commands/exit-command.js +1 -0
package/dist/src/commands/explore-command.js +20 -3
package/dist/src/components/AddRule.js +1 -1
package/dist/src/explorbot.js +52 -9
package/dist/src/explorer.js +11 -9
package/dist/src/reporter.js +68 -4
package/dist/src/state-manager.js +4 -3
package/dist/src/stats.js +5 -0
package/dist/src/utils/aria.js +354 -529
package/dist/src/utils/hooks-runner.js +2 -8
package/dist/src/utils/html.js +371 -0
package/dist/src/utils/strings.js +15 -0
package/dist/src/utils/unique-names.js +12 -1
package/dist/src/utils/url-matcher.js +6 -1
package/dist/src/utils/web-element.js +27 -24
package/dist/src/utils/xpath.js +1 -1
package/package.json +4 -3
package/rules/researcher/container-rules.md +2 -0
package/src/action-result.ts +2 -1
package/src/action.ts +5 -12
package/src/ai/captain.ts +0 -2
package/src/ai/driller.ts +1194 -0
package/src/ai/historian/codeceptjs.ts +2 -2
package/src/ai/historian/experience.ts +3 -2
package/src/ai/historian/playwright.ts +5 -5
package/src/ai/historian/screencast.ts +133 -0
package/src/ai/historian.ts +7 -5
package/src/ai/pilot.ts +31 -21
package/src/ai/rules.ts +3 -5
package/src/ai/session-analyst.ts +133 -0
package/src/ai/tester.ts +15 -2
package/src/commands/base-command.ts +6 -6
package/src/commands/drill-command.ts +3 -2
package/src/commands/exit-command.ts +1 -0
package/src/commands/explore-command.ts +22 -3
package/src/components/AddRule.tsx +1 -1
package/src/config.ts +10 -0
package/src/explorbot.ts +59 -11
package/src/explorer.ts +11 -9
package/src/reporter.ts +68 -4
package/src/state-manager.ts +4 -3
package/src/stats.ts +7 -0
package/src/utils/aria.ts +367 -537
package/src/utils/hooks-runner.ts +2 -6
package/src/utils/html.ts +381 -0
package/src/utils/strings.ts +17 -0
package/src/utils/unique-names.ts +13 -0
package/src/utils/url-matcher.ts +5 -1
package/src/utils/web-element.ts +31 -28
package/src/utils/xpath.ts +1 -1
package/dist/src/ai/bosun.js +0 -456
package/src/ai/bosun.ts +0 -571

package/dist/src/ai/historian/codeceptjs.js CHANGED Viewed

@@ -5,6 +5,7 @@ import { ConfigParser } from "../../config.js";
 import { KnowledgeTracker } from "../../knowledge-tracker.js";
 import { tag } from "../../utils/logger.js";
 import { relativeToCwd } from "../../utils/next-steps.js";
+import { safeFilename } from "../../utils/strings.js";
 import { ASSERTION_TOOLS, CODECEPT_TOOLS } from "../tools.js";
 import { escapeString, getExecutionLabel, isNonReusableCode, stripComments } from "./utils.js";
 export function WithCodeceptJS(Base) {
@@ -78,8 +79,7 @@ export function WithCodeceptJS(Base) {
             }
             const testsDir = ConfigParser.getInstance().getTestsDir();
             mkdirSync(testsDir, { recursive: true });
-            const filename = plan.title.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase();
-            const filePath = join(testsDir, `${filename}.js`);
+            const filePath = join(testsDir, safeFilename(plan.title, '.js'));
             writeFileSync(filePath, lines.join('\n'));
             this.savedFiles.add(filePath);
             tag('substep').log(`Saved plan tests to: ${relativeToCwd(filePath)}`);

package/dist/src/ai/historian/experience.js CHANGED Viewed

@@ -29,6 +29,7 @@ export function WithExperience(Base) {
             if (task instanceof Test && result !== 'failed') {
                 await this.reportSession(task, steps);
             }
+            await this.stopScreencast();
             tag('substep').log(`Historian saved session for: ${task.description}`);
         }
         async reportSession(test, steps) {

package/dist/src/ai/historian/playwright.js CHANGED Viewed

@@ -6,6 +6,7 @@ import { KnowledgeTracker } from "../../knowledge-tracker.js";
 import { renderAssertion, renderCall } from "../../playwright-recorder.js";
 import { tag } from "../../utils/logger.js";
 import { relativeToCwd } from "../../utils/next-steps.js";
+import { safeFilename } from "../../utils/strings.js";
 import { ASSERTION_TOOLS, CODECEPT_TOOLS } from "../tools.js";
 import { escapeString, getExecutionLabel } from "./utils.js";
 const PLAYWRIGHT_EMITTED_TOOLS = [...CODECEPT_TOOLS, ...ASSERTION_TOOLS];
@@ -14,7 +15,7 @@ export function WithPlaywright(Base) {
         async toPlaywrightCode(conversation, scenario) {
             const toolExecutions = conversation.getToolExecutions();
             const successfulSteps = toolExecutions.filter((exec) => exec.wasSuccessful && PLAYWRIGHT_EMITTED_TOOLS.includes(exec.toolName));
-            const callsByGroup = this.recorder ? await this.recorder.exportChunk() : new Map();
+            const callsByGroup = this.playwright?.recorder ? await this.playwright.recorder.exportChunk() : new Map();
             const stepLines = [];
             for (const exec of successfulSteps) {
                 const explanation = getExecutionLabel(exec);
@@ -46,7 +47,7 @@ export function WithPlaywright(Base) {
                     }
                 }
             }
-            const pilotVerifications = this.recorder ? this.recorder.drainVerifications() : [];
+            const pilotVerifications = this.playwright?.recorder ? this.playwright.recorder.drainVerifications() : [];
             if (pilotVerifications.length > 0) {
                 const assertionLines = [];
                 for (const step of pilotVerifications) {
@@ -115,8 +116,7 @@ export function WithPlaywright(Base) {
             lines.push('});');
             const testsDir = ConfigParser.getInstance().getTestsDir();
             mkdirSync(testsDir, { recursive: true });
-            const filename = plan.title.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase();
-            const filePath = join(testsDir, `${filename}.spec.ts`);
+            const filePath = join(testsDir, safeFilename(plan.title, '.spec.ts'));
             writeFileSync(filePath, lines.join('\n'));
             this.savedFiles.add(filePath);
             tag('substep').log(`Saved plan tests to: ${relativeToCwd(filePath)}`);

package/dist/src/ai/historian/screencast.js ADDED Viewed

@@ -0,0 +1,121 @@
+import { mkdirSync } from 'node:fs';
+import { join } from 'node:path';
+// @ts-ignore
+import * as codeceptjs from 'codeceptjs';
+import { outputPath } from "../../config.js";
+import { tag } from "../../utils/logger.js";
+import { relativeToCwd } from "../../utils/next-steps.js";
+import { safeFilename } from "../../utils/strings.js";
+import { debugLog } from "./mixin.js";
+export function WithScreencast(Base) {
+    return class extends Base {
+        screencastPage = null;
+        screencastActive = false;
+        screencastPath = null;
+        screencastListenersInstalled = false;
+        screencastTask = null;
+        screencastLastChapter = null;
+        onTestBefore;
+        onStepPassed;
+        onTestAfter;
+        isScreencastActive() {
+            return this.screencastActive;
+        }
+        attachScreencast() {
+            if (this.screencastListenersInstalled)
+                return;
+            if (!this.config?.ai?.agents?.historian?.screencast)
+                return;
+            if (!this.playwright?.helper)
+                return;
+            this.onTestBefore = (test) => {
+                void this.startScreencast(test);
+            };
+            this.onStepPassed = (step) => {
+                void this.emitChapter(step);
+            };
+            this.onTestAfter = () => {
+                void this.stopScreencast();
+            };
+            codeceptjs.event.dispatcher.on('test.before', this.onTestBefore);
+            codeceptjs.event.dispatcher.on('step.passed', this.onStepPassed);
+            codeceptjs.event.dispatcher.on('test.after', this.onTestAfter);
+            this.screencastListenersInstalled = true;
+        }
+        async startScreencast(test) {
+            if (this.screencastActive)
+                return;
+            const page = this.playwright?.helper?.page;
+            if (!page?.screencast?.start)
+                return;
+            const task = test?._explorbotTest;
+            const scenarioName = task?.scenario || test?.title || 'scenario';
+            const planTitle = task?.plan?.title;
+            const planTests = task?.plan?.tests;
+            const index = planTests && task ? planTests.indexOf(task) + 1 : 0;
+            const parts = [];
+            if (planTitle)
+                parts.push(safeFilename(planTitle));
+            if (index > 0)
+                parts.push(String(index));
+            parts.push(safeFilename(scenarioName));
+            const dir = outputPath('screencasts');
+            mkdirSync(dir, { recursive: true });
+            const filePath = join(dir, `${parts.join('-')}.webm`);
+            const screencastConfig = this.config?.ai?.agents?.historian?.screencast;
+            const screencastOpts = typeof screencastConfig === 'object' ? screencastConfig : {};
+            const size = screencastOpts.size ?? page.viewportSize?.() ?? undefined;
+            const quality = screencastOpts.quality ?? 95;
+            try {
+                await page.screencast.start({ path: filePath, quality, size });
+                await page.screencast.showActions({ position: 'top-left' });
+                this.screencastPage = page;
+                this.screencastPath = filePath;
+                this.screencastActive = true;
+                this.screencastTask = test?._explorbotTest || null;
+                this.screencastLastChapter = null;
+            }
+            catch (err) {
+                tag('substep').log(`Screencast start failed: ${err.message}`);
+            }
+        }
+        async emitChapter(_step) {
+            if (!this.screencastActive)
+                return;
+            const explanation = this.screencastTask?.activeNote?.getMessage?.();
+            if (!explanation)
+                return;
+            if (explanation === this.screencastLastChapter)
+                return;
+            this.screencastLastChapter = explanation;
+            try {
+                await this.screencastPage.screencast.showChapter(explanation);
+            }
+            catch (err) {
+                debugLog('screencast.showChapter failed:', err);
+            }
+        }
+        async stopScreencast() {
+            if (!this.screencastActive)
+                return;
+            const path = this.screencastPath;
+            const task = this.screencastTask;
+            try {
+                await this.screencastPage.screencast.stop();
+            }
+            catch (err) {
+                tag('substep').log(`Screencast stop failed: ${err.message}`);
+            }
+            this.screencastActive = false;
+            this.screencastPage = null;
+            this.screencastPath = null;
+            this.screencastTask = null;
+            this.screencastLastChapter = null;
+            if (path) {
+                this.savedFiles.add(path);
+                task?.addArtifact?.(path);
+                tag('substep').log(`Saved screencast: ${relativeToCwd(path)}`);
+            }
+        }
+    };
+}

package/dist/src/ai/historian.js CHANGED Viewed

@@ -5,18 +5,20 @@ import { relativeToCwd } from "../utils/next-steps.js";
 import { WithCodeceptJS } from "./historian/codeceptjs.js";
 import { WithExperience } from "./historian/experience.js";
 import { WithPlaywright } from "./historian/playwright.js";
+import { WithScreencast } from "./historian/screencast.js";
 export { isNonReusableCode } from "./historian/utils.js";
-const HistorianBase = WithPlaywright(WithCodeceptJS(WithExperience(Object)));
+const HistorianBase = WithScreencast(WithPlaywright(WithCodeceptJS(WithExperience(Object))));
 export class Historian extends HistorianBase {
-    constructor(provider, experienceTracker, reporter, stateManager, config, recorder) {
+    constructor(provider, experienceTracker, reporter, stateManager, config, playwright) {
         super();
         this.provider = provider;
         this.experienceTracker = experienceTracker || new ExperienceTracker();
         this.reporter = reporter;
         this.stateManager = stateManager;
         this.config = config;
-        this.recorder = recorder;
+        this.playwright = playwright;
         this.savedFiles = new Set();
+        this.attachScreencast();
     }
     isPlaywrightFramework() {
         return this.config?.ai?.agents?.historian?.framework === 'playwright';

package/dist/src/ai/pilot.js CHANGED Viewed

@@ -69,16 +69,17 @@ export class Pilot {
         const stateContext = this.buildStateContext(currentState);
         const notes = task.notesToString() || 'No notes recorded.';
         let visualAnalysis = '';
+        let screenshotState = null;
         if (this.provider.hasVision()) {
             try {
                 const action = this.explorer.createAction();
-                const screenshotState = await action.caputrePageWithScreenshot();
+                screenshotState = await action.caputrePageWithScreenshot();
                 if (screenshotState.screenshot) {
                     visualAnalysis = (await this.researcher.answerQuestionAboutScreenshot(screenshotState, `Describe current page state relevant to: ${task.scenario}`)) || '';
                 }
             }
             catch {
-                // vision not available, continue without
+                screenshotState = null;
             }
         }
         const schema = z.object({
@@ -140,28 +141,24 @@ export class Pilot {
                 task.finish(TestResult.FAILED);
                 return false;
             }
-            if (result.requestVerification && navigator) {
+            if (result.decision === 'pass' && result.requestVerification && navigator) {
                 tag('substep').log(`Pilot requesting verification: ${result.requestVerification}`);
-                try {
-                    const verifyResult = await navigator.verifyState(result.requestVerification, currentState);
-                    if (verifyResult.verified) {
-                        if (verifyResult.assertionSteps?.length) {
-                            this.explorer.getPlaywrightRecorder().recordVerification(verifyResult.assertionSteps);
-                        }
-                        tag('substep').log(`Pilot verified: ${result.requestVerification}`);
-                    }
-                    else {
-                        tag('substep').log(`Pilot verification failed: ${result.requestVerification}`);
-                        if (result.decision === 'pass') {
-                            const flipMessage = `Verification "${result.requestVerification}" did not match the page. Adjust approach and re-verify before finishing.`;
-                            result.decision = 'continue';
-                            result.reason = flipMessage;
-                            result.guidance = result.guidance ?? flipMessage;
-                        }
+                const verifyResult = await navigator.verifyState(result.requestVerification, currentState).catch(() => null);
+                if (verifyResult?.verified) {
+                    if (verifyResult.assertionSteps?.length) {
+                        this.explorer.getPlaywrightRecorder().recordVerification(verifyResult.assertionSteps);
                     }
                 }
-                catch (verifyErr) {
-                    tag('warning').log(`Pilot verification errored: ${verifyErr.message}`);
+                else {
+                    let answer = null;
+                    if (screenshotState?.screenshot) {
+                        answer = await this.researcher.answerQuestionAboutScreenshot(screenshotState, `Does the screen confirm: "${result.requestVerification}"? Answer YES or NO only.`);
+                    }
+                    if (!(answer || '').trim().toUpperCase().startsWith('YES')) {
+                        task.addNote(`Pilot: verification failed — ${result.requestVerification}`, TestResult.FAILED);
+                        task.finish(TestResult.FAILED);
+                        return false;
+                    }
                 }
             }
             tag('info').log(`Pilot: ${result.decision} — ${result.reason}`);
@@ -348,6 +345,8 @@ export class Pilot {
       - If no verification was done → prefer "continue" with guidance telling tester what to verify.
       - If verify assertion describes a state that was ALREADY TRUE before the test started, the verification proves nothing — reject with "continue".
+      requestVerification — pick assertions DOM can actually express. Some content is not assertable via DOM (iframe text, canvas, custom widgets, Monaco/CodeMirror editors). When the scenario goal lives in such a region, target a STABLE LANDMARK (container element, ARIA role, the parent that wraps the widget) rather than literal text inside it. Your "pass" verdict is honored even if the DOM assertion can't be made — pick the strongest landmark you can.
       GUIDANCE FIELD: When decision is "continue", you MUST provide "guidance" — a specific actionable instruction:
       - If evidence is insufficient: tell tester to verify with see()/verify(), specify WHAT to check
       - If approach was wrong: tell tester to try a different method, suggest which one
@@ -420,7 +419,7 @@ export class Pilot {
         Be concise and specific. Tester will follow your plan.
       `, 'pilot.planTest', { tools: true, planningOnly: true, maxToolRoundtrips: 3, task });
     }
-    async reviewNewPage(task, currentState) {
+    async reviewNewPage(task, currentState, testerConversation) {
         if (!this.conversation)
             return '';
         tag('substep').log('Pilot reviewing new page...');
@@ -430,7 +429,13 @@ export class Pilot {
         if (!pageSummary)
             return '';
         const stateContext = this.buildStateContext(currentState);
+        const toolCalls = testerConversation
+            .getToolExecutions()
+            .filter((t) => t.wasSuccessful)
+            .slice(-this.stepsToReview);
+        const actionsContext = this.formatActions(toolCalls);
         this.conversation.cleanupTag('page_summary', '...trimmed...', 1);
+        this.conversation.cleanupTag('recent_actions', '...trimmed...', 2);
         return this.sendToPilot(dedent `
         Navigated to new page.
         START URL: ${task.startUrl}
@@ -443,6 +448,10 @@ export class Pilot {
         ${pageSummary}
         </page_summary>
+        <recent_actions>
+        ${actionsContext || 'None'}
+        </recent_actions>
         ${this.formatExpectations(task)}
         First: evaluate whether this navigation makes sense for the scenario goal. If the page is unrelated, instruct Tester to back() or reset(). Then plan next steps.

package/dist/src/ai/rules.js CHANGED Viewed

@@ -272,11 +272,9 @@ export const actionRule = dedent `
     I.fillField('Description', 'Hello world', '.editor'); // works for rich text / code editors too
   </example>
-  I.fillField handles plain inputs, textareas, contenteditable regions, and rich text / code editors
-  (Monaco, ProseMirror, CodeMirror, TipTap, Quill, Draft.js, Slate, etc.) transparently.
-  ALWAYS use I.fillField for rich editors — target the editor container or its nearest label/heading with a normal locator.
-  Do NOT open the editor with raw JS (executeScript, page.evaluate), do NOT dispatch synthetic events,
-  do NOT call the editor's own API (monaco.editor.setValue, view.dispatch, etc.) to write text.
+  I.fillField handles plain inputs, textareas, contenteditable regions, and rich text / code editors transparently.
+  ALWAYS use I.fillField for rich text / code editors — target the editor container or its nearest label/heading with a normal locator.
+  If I.fillField does not work, I.type into the focused element is the fallback.
   ### I.type

package/dist/src/ai/session-analyst.js ADDED Viewed

@@ -0,0 +1,117 @@
+import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
+import path from 'node:path';
+import dedent from 'dedent';
+import { outputPath } from "../config.js";
+import { Stats } from "../stats.js";
+export class SessionAnalyst {
+    emoji = '🧐';
+    provider;
+    constructor(provider) {
+        this.provider = provider;
+    }
+    async analyze(tests) {
+        const eligible = tests.filter((t) => t.startTime != null);
+        if (eligible.length === 0)
+            return '';
+        const model = this.provider.getModelForAgent('analyst');
+        const customPrompt = this.provider.getSystemPromptForAgent('analyst', undefined);
+        const systemPrompt = dedent `
+      You write a brief end-of-session report after autonomous exploratory testing. Your reader is a developer who needs to know in seconds: what is broken, how to reproduce it, and which results were inconclusive.
+      Output MARKDOWN. No JSON, no preamble, no closing remarks. Start with the heading.
+      ## Clustering
+      Group by ROOT CAUSE, not by scenario. If three tests fail for the same dropdown, that is ONE defect listing all three test refs (#3, #5, #7). Do not produce one cluster per test.
+      ## Bucketing
+      Use the FINAL verdict (the test's \`result\` field) as the starting point. Mid-test errors that the automation recovered from do NOT make a passed test unreliable.
+      - **Defect** — real product bug. \`result: failed\` AND the failure reflects the app misbehaving (not the automation). The automation completed its interactions, the app contradicted the expected outcome. Severity required.
+      - **UX issue** — app works but the UI is ambiguous, controls are hidden, or labels are unclear. Worth flagging to design.
+      - **Execution issue** — the FINAL verdict is unreliable. Only two cases:
+        1. \`result: failed\` AND the failure was automation, environment, or UI/UX (locator missing, timeout, AI loop, navigation stuck, modal trapped focus, no accessible label) — i.e. the test could not conclude whether the app works.
+        2. \`result: passed\` AND clear evidence in the log shows the user-visible goal was NOT achieved (no confirmation visible, no state change verified, the assertion was vacuous).
+      A test that passed and shows no contrary evidence belongs in NO section. Do not list passed tests just because the log contains intermediate retries or recovered failures.
+      ## Severity emoji (defects only)
+      - 🔴 critical or high — core flow blocked, data loss, security
+      - 🟡 medium — partial breakage with workaround
+      - 🟢 low — cosmetic
+      ## Required format
+      # Session Analysis
+      <one sentence: total tests, defect count, headline finding>
+      ## Defects
+      ### 🔴 <plain-English title of the BUG, not the scenario name>
+      Affects: #3, #5, #7
+      Reproduce:
+        1. <concrete UI step a person can replay>
+        2. <next step>
+      Evidence: <one short observation from the test log>
+      ### 🟡 <next defect>
+      ...
+      ## UX issues
+      - **<title>** — #4
+        <one short evidence line>
+      ## Execution Issues
+      - **<short test name or scenario phrase>** — <plain-English one-liner: what made the result unreliable>
+      - **<…>** — <…>
+      ## Rules
+      - Defects first, sorted by severity descending. Omit any section that has zero entries.
+      - Defect title describes the BUG ("Run-type dropdown does not filter"), never the scenario name.
+      - Reproduce steps are concrete UI actions derived from the log: URL + clicks + inputs. Imperative, one short line each.
+      - Evidence is the smallest factual observation from notes/steps that supports the claim — what was OBSERVED in the page (HTML, message, missing element). Never quote the test's \`result\` field as evidence; that is a tautology.
+      - **Execution Issues** entries must explain what actually went wrong in concrete terms a human understands: "could not find a Submit button after navigation", "page reloaded before the assertion ran", "passed without ever seeing a confirmation message", "marked failed but the new item appears in the list", "modal trapped focus and tests could not click outside", "ARIA tree had no labelled controls". Avoid jargon like "locator failed" without context. Never write category prefixes ("execution:", "false-positive:") — the section header already says it. No emoji on these entries.
+      - Do NOT include a passed test in any section unless evidence proves its goal was not achieved. Intermediate retries or recovered errors in the log are not grounds for listing a passed test.
+      - No editorialising, no restating the scenario verbatim, no closing summary.
+      ${customPrompt || ''}
+    `;
+        const userPayload = dedent `
+      ${eligible.length} tests were executed in this session.
+      ${eligible.map((t, i) => this.serializeTest(t, i + 1)).join('\n\n')}
+    `;
+        const response = await this.provider.chat([
+            { role: 'system', content: systemPrompt },
+            { role: 'user', content: userPayload },
+        ], model, { agentName: 'analyst' });
+        return (response?.text || '').trim();
+    }
+    writeReport(markdown) {
+        const filePath = outputPath('reports', `${Stats.sessionLabel()}.md`);
+        const dir = path.dirname(filePath);
+        if (!existsSync(dir))
+            mkdirSync(dir, { recursive: true });
+        writeFileSync(filePath, markdown);
+        return filePath;
+    }
+    serializeTest(test, ref) {
+        const log = test
+            .getLog()
+            .slice(-30)
+            .map((entry) => `  - [${entry.type}] ${entry.content}`)
+            .join('\n');
+        return dedent `
+      <test ref="#${ref}">
+      url: ${test.startUrl || '/'}
+      scenario: ${test.scenario}
+      result: ${test.result || 'unknown'}
+      expected: ${test.expected.join(' | ') || '(none)'}
+      log:
+      ${log}
+      </test>
+    `;
+    }
+}

package/dist/src/ai/tester.js CHANGED Viewed

@@ -216,7 +216,7 @@ export class Tester extends TaskAgent {
                     nextStep += await this.reinjectContextIfNeeded(iteration, currentState);
                     nextStep += await this.prepareInstructionsForNextStep(task);
                     if (isNewPage && this.pilot) {
-                        const guidance = await this.pilot.reviewNewPage(task, currentState);
+                        const guidance = await this.pilot.reviewNewPage(task, currentState, conversation);
                         if (guidance)
                             nextStep += `\n\n${guidance}`;
                     }
@@ -388,6 +388,7 @@ export class Tester extends TaskAgent {
         this.previousUrl = currentUrl;
         this.previousStateHash = currentStateHash;
         let context = '';
+        const focusArea = detectFocusArea(currentState.ariaSnapshot);
         const focusedElement = extractFocusedElement(currentState.ariaSnapshot);
         if (focusedElement) {
             const isTextInput = ['textbox', 'combobox', 'searchbox'].includes(focusedElement.role);
@@ -403,6 +404,17 @@ export class Tester extends TaskAgent {
         <no_focus>
         No element is focused
         </no_focus>
+      `;
+        }
+        if (focusArea.detected) {
+            const areaName = focusArea.name ? ` "${focusArea.name}"` : '';
+            context += dedent `
+        <focus_scope>
+        A ${focusArea.type}${areaName} is currently open above the page.
+        Scope all interactions to elements inside this ${focusArea.type}.
+        Page navigation, filters, and tabs that exist outside it are not actionable while it is open and may share names or roles with elements inside it — prefer the locator inside the ${focusArea.type}.
+        Use <page_aria> to confirm the element you target is actually inside the ${focusArea.type}.
+        </focus_scope>
       `;
         }
         if (currentState.isInsideIframe) {
@@ -462,7 +474,6 @@ export class Tester extends TaskAgent {
       `;
             return context;
         }
-        const focusArea = detectFocusArea(currentState.ariaSnapshot);
         if (focusArea.detected && focusArea.name && this.pageStateHash && this.pageActionResult) {
             const overlaySection = await this.researcher.researchOverlay(currentState, this.pageActionResult, this.pageStateHash);
             if (overlaySection) {

package/dist/src/commands/base-command.js CHANGED Viewed

@@ -19,17 +19,17 @@ export class BaseCommand {
         if (this.suggestions.length === 0)
             return;
         const prefix = isInteractive() ? '/' : `${getCliName()} `;
-        tag('info').log('');
-        tag('info').log(chalk.bold('Suggested:'));
+        const commandWidth = this.suggestions.reduce((max, s) => (s.command ? Math.max(max, prefix.length + s.command.length) : max), 0);
+        const lines = [chalk.bold('Suggested:')];
         for (const { command, hint } of this.suggestions) {
-            tag('info').log('');
             if (!command) {
-                tag('info').log(chalk.dim(hint));
+                lines.push(`  ${chalk.dim(hint)}`);
                 continue;
             }
-            tag('info').log(chalk.dim(`${hint}:`));
-            tag('info').log(`  ${chalk.yellow(`${prefix}${command}`)}`);
+            const cmd = `${prefix}${command}`.padEnd(commandWidth);
+            lines.push(`  ${chalk.yellow(cmd)}  ${chalk.dim(hint)}`);
         }
+        tag('info').log(lines.join('\n'));
     }
     parseArgs(args) {
         const cmd = new Command();

package/dist/src/commands/drill-command.js CHANGED Viewed

@@ -2,6 +2,7 @@ import { BaseCommand } from './base-command.js';
 export class DrillCommand extends BaseCommand {
     name = 'drill';
     description = 'Drill all components on current page to learn interactions';
+    aliases = ['driller'];
     suggestions = [
         { command: 'research', hint: 'see UI map first' },
         { command: 'navigate <page>', hint: 'go to another page' },
@@ -13,7 +14,7 @@ export class DrillCommand extends BaseCommand {
         if (!state) {
             throw new Error('No active page to drill');
         }
-        await this.explorBot.agentBosun().drill({
+        await this.explorBot.agentDriller().drill({
             knowledgePath,
             maxComponents,
             interactive: true,
@@ -24,7 +25,7 @@ export class DrillCommand extends BaseCommand {
         return match ? match[1] : undefined;
     }
     parseMaxArg(args) {
-        const match = args.match(/--max\s+(\d+)/);
+        const match = args.match(/--max-components\s+(\d+)/);
         return match ? Number.parseInt(match[1], 10) : undefined;
     }
 }

package/dist/src/commands/exit-command.js CHANGED Viewed

@@ -8,6 +8,7 @@ export class ExitCommand extends BaseCommand {
     description = 'Exit the application';
     aliases = ['quit'];
     async execute(_args) {
+        await this.explorBot.printSessionAnalysis();
         await this.explorBot.getExplorer().stop();
         if (Stats.hasActivity()) {
             await new Promise((resolve) => {

package/dist/src/commands/explore-command.js CHANGED Viewed

@@ -1,11 +1,13 @@
 import figureSet from 'figures';
 import { getStyles } from '../ai/planner/styles.js';
+import { outputPath } from '../config.js';
 import { Stats } from '../stats.js';
 import { getCliName } from "../utils/cli-name.js";
 import { ErrorPageError } from "../utils/error-page.js";
 import { tag } from '../utils/logger.js';
 import { jsonToTable } from '../utils/markdown-parser.js';
 import { printNextSteps, relativeToCwd } from "../utils/next-steps.js";
+import { safeFilename } from "../utils/strings.js";
 import { BaseCommand } from './base-command.js';
 export class ExploreCommand extends BaseCommand {
     name = 'explore';
@@ -66,6 +68,7 @@ export class ExploreCommand extends BaseCommand {
             await this.explorBot.visit(mainUrl);
         const savedPath = this.explorBot.savePlans(this.completedPlans);
         this.printResults();
+        await this.explorBot.printSessionAnalysis();
         this.printNextSteps(savedPath);
     }
     async runAllStyles(pageUrl, feature, parentPlan, completedPlans) {
@@ -145,11 +148,25 @@ export class ExploreCommand extends BaseCommand {
             });
         }
         const savedFiles = this.explorBot.agentHistorian().getSavedFiles();
-        if (savedFiles.length > 0) {
-            const commands = savedFiles.map((f) => ({ label: '', command: `${cli} rerun ${relativeToCwd(f)}` }));
+        const screencasts = savedFiles.filter((f) => f.endsWith('.webm'));
+        const testFiles = savedFiles.filter((f) => !f.endsWith('.webm'));
+        if (testFiles.length > 0) {
+            const commands = testFiles.map((f) => ({ label: '', command: `${cli} rerun ${relativeToCwd(f)}` }));
             commands.push({ label: 'List tests', command: `${cli} runs` });
             sections.push({
-                label: `Generated tests (${savedFiles.length})`,
+                label: `Generated tests (${testFiles.length})`,
+                commands,
+            });
+        }
+        if (screencasts.length > 0) {
+            const commands = screencasts.map((f) => ({ label: '', command: relativeToCwd(f) }));
+            const screencastDir = relativeToCwd(outputPath('screencasts'));
+            const planSlugs = [...new Set(this.completedPlans.map((p) => safeFilename(p.title)).filter(Boolean))];
+            for (const slug of planSlugs) {
+                commands.push({ label: 'Browse plan', command: `ls ${screencastDir}/${slug}-*` });
+            }
+            sections.push({
+                label: `Screencasts (${screencasts.length})`,
                 commands,
             });
         }

package/dist/src/components/AddRule.js CHANGED Viewed

@@ -4,7 +4,7 @@ import { Box, Text, useInput } from 'ink';
 import React, { useEffect, useState } from 'react';
 import { AddRuleCommand } from '../commands/add-rule-command.js';
 import InputReadline from './InputReadline.js';
-const KNOWN_AGENTS = ['researcher', 'tester', 'planner', 'pilot', 'captain', 'bosun', 'navigator'];
+const KNOWN_AGENTS = ['researcher', 'tester', 'planner', 'pilot', 'captain', 'driller', 'navigator'];
 const AddRule = ({ initialAgent = '', initialName = '', onComplete, onCancel }) => {
     const [agent, setAgent] = useState(initialAgent);
     const [ruleName, setRuleName] = useState(initialName);