npm - explorbot - Versions diffs - 0.1.10 → 0.1.12 - Mend

explorbot 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/README.md +37 -1
package/bin/explorbot-cli.ts +27 -18
package/dist/bin/explorbot-cli.js +26 -18
package/dist/package.json +3 -3
package/dist/rules/navigator/output.md +9 -0
package/dist/rules/navigator/verification-actions.md +2 -0
package/dist/src/action-result.js +23 -1
package/dist/src/action.js +51 -42
package/dist/src/ai/bosun.js +11 -1
package/dist/src/ai/conversation.js +39 -0
package/dist/src/ai/historian/codeceptjs.js +109 -0
package/dist/src/ai/historian/experience.js +321 -0
package/dist/src/ai/historian/mixin.js +2 -0
package/dist/src/ai/historian/playwright.js +145 -0
package/dist/src/ai/historian/screencast.js +121 -0
package/dist/src/ai/historian/utils.js +18 -0
package/dist/src/ai/historian.js +21 -405
package/dist/src/ai/navigator.js +82 -29
package/dist/src/ai/pilot.js +232 -13
package/dist/src/ai/planner.js +29 -9
package/dist/src/ai/provider.js +54 -17
package/dist/src/ai/researcher.js +41 -32
package/dist/src/ai/rules.js +26 -14
package/dist/src/ai/tester.js +90 -26
package/dist/src/ai/tools.js +13 -7
package/dist/src/browser-server.js +16 -3
package/dist/src/commands/add-rule-command.js +11 -8
package/dist/src/commands/clean-command.js +2 -1
package/dist/src/commands/explore-command.js +43 -15
package/dist/src/commands/init-command.js +9 -8
package/dist/src/commands/plan-command.js +32 -0
package/dist/src/commands/plan-save-command.js +19 -7
package/dist/src/commands/rerun-command.js +4 -0
package/dist/src/components/App.js +15 -5
package/dist/src/execution-controller.js +13 -2
package/dist/src/experience-tracker.js +20 -64
package/dist/src/explorbot.js +8 -8
package/dist/src/explorer.js +11 -3
package/dist/src/observability.js +50 -99
package/dist/src/playwright-recorder.js +309 -0
package/dist/src/reporter.js +4 -1
package/dist/src/test-plan.js +12 -0
package/dist/src/utils/aria.js +37 -1
package/dist/src/utils/error-page.js +20 -7
package/dist/src/utils/next-steps.js +37 -0
package/dist/src/utils/strings.js +15 -0
package/package.json +3 -3
package/rules/navigator/output.md +9 -0
package/rules/navigator/verification-actions.md +2 -0
package/src/action-result.ts +26 -1
package/src/action.ts +49 -41
package/src/ai/bosun.ts +11 -1
package/src/ai/conversation.ts +37 -0
package/src/ai/historian/codeceptjs.ts +130 -0
package/src/ai/historian/experience.ts +384 -0
package/src/ai/historian/mixin.ts +4 -0
package/src/ai/historian/playwright.ts +169 -0
package/src/ai/historian/screencast.ts +133 -0
package/src/ai/historian/utils.ts +23 -0
package/src/ai/historian.ts +37 -473
package/src/ai/navigator.ts +82 -29
package/src/ai/pilot.ts +237 -14
package/src/ai/planner.ts +29 -9
package/src/ai/provider.ts +51 -17
package/src/ai/researcher.ts +45 -33
package/src/ai/rules.ts +27 -14
package/src/ai/tester.ts +94 -26
package/src/ai/tools.ts +47 -25
package/src/browser-server.ts +17 -3
package/src/commands/add-rule-command.ts +11 -7
package/src/commands/clean-command.ts +2 -1
package/src/commands/explore-command.ts +46 -14
package/src/commands/init-command.ts +9 -8
package/src/commands/plan-command.ts +35 -0
package/src/commands/plan-save-command.ts +18 -7
package/src/commands/rerun-command.ts +5 -0
package/src/components/App.tsx +16 -5
package/src/config.ts +12 -1
package/src/execution-controller.ts +14 -3
package/src/experience-tracker.ts +21 -72
package/src/explorbot.ts +8 -8
package/src/explorer.ts +13 -3
package/src/observability.ts +50 -109
package/src/playwright-recorder.ts +305 -0
package/src/reporter.ts +4 -1
package/src/test-plan.ts +12 -0
package/src/utils/aria.ts +38 -1
package/src/utils/error-page.ts +22 -7
package/src/utils/next-steps.ts +51 -0
package/src/utils/strings.ts +17 -0

package/dist/src/ai/provider.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { LangfuseSpanProcessor } from '@langfuse/otel';
 import { NodeSDK } from '@opentelemetry/sdk-node';
-import { generateObject, generateText } from 'ai';
+import { generateObject, generateText, stepCountIs } from 'ai';
 import { clearActivity, setActivity } from "../activity.js";
 import { executionController } from "../execution-controller.js";
 import { Observability } from "../observability.js";
@@ -16,6 +16,20 @@ class AiError extends Error {
 }
 export class ContextLengthError extends Error {
 }
+function rejectAfterIdle(ms, signal) {
+    return new Promise((_, reject) => {
+        const tick = () => {
+            if (signal.cancelled)
+                return;
+            if (executionController.isAwaitingInput()) {
+                setTimeout(tick, ms);
+                return;
+            }
+            reject(new Error('AI request timeout'));
+        };
+        setTimeout(tick, ms);
+    });
+}
 export class Provider {
     config;
     telemetryEnabled = false;
@@ -247,13 +261,19 @@ export class Provider {
         promptLog('Available tools:', toolNames);
         promptLog(messages[messages.length - 1].content);
         const telemetry = this.getTelemetry(options);
+        const maxRoundtrips = options.maxToolRoundtrips ?? 5;
+        const extraStop = options.stopWhen;
+        const stopConditions = [stepCountIs(maxRoundtrips)];
+        if (extraStop)
+            stopConditions.push(extraStop);
+        const { stopWhen: _ignoredStopWhen, ...optionsWithoutStop } = options;
         const config = this.mergeProviderOptions({
             tools,
             maxTokens: 16384,
-            maxToolRoundtrips: options.maxToolRoundtrips ?? 5,
             toolChoice: 'auto',
             ...(this.config.config || {}),
-            ...options,
+            ...optionsWithoutStop,
+            stopWhen: stopConditions,
             ...(telemetry ? { experimental_telemetry: telemetry } : {}),
             model,
             abortSignal: executionController.getAbortSignal(),
@@ -261,13 +281,24 @@ export class Provider {
         try {
             const response = await withRetry(async () => {
                 const timeout = config.timeout || 30000;
-                return (await Promise.race([
-                    generateText({
-                        messages,
-                        ...config,
-                    }),
-                    new Promise((_, reject) => setTimeout(() => reject(new Error('AI request timeout')), timeout)),
-                ]));
+                const cancel = { cancelled: false };
+                try {
+                    const result = (await Promise.race([
+                        generateText({
+                            messages,
+                            ...config,
+                        }),
+                        rejectAfterIdle(timeout, cancel),
+                    ]));
+                    const hasToolCall = (result.toolCalls?.length || 0) > 0;
+                    if (!result.text && !hasToolCall && result.finishReason === 'length') {
+                        throw new ContextLengthError('AI response empty: output truncated at maxTokens. Increase maxTokens in config or use a model with higher output capacity.');
+                    }
+                    return result;
+                }
+                finally {
+                    cancel.cancelled = true;
+                }
             }, this.getRetryOptions(options));
             clearActivity();
             // Log tool usage summary
@@ -330,13 +361,19 @@ export class Provider {
             promptLog(messages[messages.length - 1].content);
             const response = await withRetry(async () => {
                 const timeout = config.timeout || 30000;
-                return (await Promise.race([
-                    generateObject({
-                        messages,
-                        ...config,
-                    }),
-                    new Promise((_, reject) => setTimeout(() => reject(new Error('AI request timeout')), timeout)),
-                ]));
+                const cancel = { cancelled: false };
+                try {
+                    return (await Promise.race([
+                        generateObject({
+                            messages,
+                            ...config,
+                        }),
+                        rejectAfterIdle(timeout, cancel),
+                    ]));
+                }
+                finally {
+                    cancel.cancelled = true;
+                }
             }, this.getRetryOptions(options));
             clearActivity();
             responseLog(response.object);

package/dist/src/ai/researcher.js CHANGED Viewed

@@ -6,12 +6,11 @@ import { executionController } from "../execution-controller.js";
 import { Observability } from "../observability.js";
 import { Stats } from "../stats.js";
 import { diffAriaSnapshots } from "../utils/aria.js";
-import { ErrorPageError, isErrorPage } from "../utils/error-page.js";
+import { ErrorPageError, detectPageCondition } from "../utils/error-page.js";
 import { HooksRunner } from "../utils/hooks-runner.js";
 import { isBodyEmpty } from "../utils/html.js";
 import { createDebug, pluralize, tag } from '../utils/logger.js';
 import { mdq } from "../utils/markdown-query.js";
-import { withRetry } from "../utils/retry.js";
 import { RulesLoader } from "../utils/rules-loader.js";
 import { ContextLengthError } from './provider.js';
 import { findSimilarResearch, getCachedResearch, saveResearch } from "./researcher/cache.js";
@@ -98,11 +97,15 @@ export class Researcher extends ResearcherBase {
             const annotatedElements = await this.explorer.annotateElements();
             debugLog(`Annotated ${annotatedElements.length} interactive elements with eidx`);
             this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot: screenshot && this.provider.hasVision() });
-            if (isErrorPage(this.actionResult)) {
-                const recovered = await this.waitForPageLoad(screenshot);
-                if (!recovered) {
-                    tag('warning').log(`Detected error page at ${state.url}`);
-                    throw new ErrorPageError(state.url, this.actionResult.title);
+            const condition = detectPageCondition(this.actionResult);
+            if (condition === 'error') {
+                tag('warning').log(`Detected error page at ${state.url}`);
+                throw new ErrorPageError(state.url, this.actionResult.title);
+            }
+            if (condition === 'loading') {
+                const settled = await this.waitUntilSettled(screenshot);
+                if (!settled) {
+                    tag('warning').log(`Page at ${state.url} did not finish loading within timeout, continuing with best-effort research`);
                 }
             }
             debugLog('Researching web page:', this.actionResult.url);
@@ -285,41 +288,47 @@ export class Researcher extends ResearcherBase {
             }
             return;
         }
-        if (isEmpty) {
-            debugLog('HTML body is empty, refreshing page');
-            tag('step').log('Page body is empty, refreshing...');
-        }
-        else {
-            debugLog('Not on current state, navigating to URL');
-            tag('step').log('Navigating to URL...');
+        if (isEmpty && isOnCurrentState) {
+            debugLog('HTML body empty on current URL, waiting for content');
+            tag('step').log('Page body is empty, waiting for content...');
+            await this.waitUntilSettled(screenshot ?? false);
+            return;
         }
+        debugLog('Not on current state, navigating to URL');
+        tag('step').log('Navigating to URL...');
         await this.explorer.visit(url);
         this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot: screenshot ?? false });
     }
-    async waitForPageLoad(screenshot) {
+    async waitUntilSettled(screenshot) {
         const errorPageTimeout = this.explorer.getConfig().ai?.agents?.researcher?.errorPageTimeout ?? 10;
         if (errorPageTimeout <= 0)
             return false;
+        const page = this.explorer.playwrightHelper.page;
+        const includeScreenshot = screenshot && this.provider.hasVision();
         try {
-            await withRetry(async () => {
-                await this.explorer.annotateElements();
-                this.actionResult = await this.explorer.createAction().capturePageState({
-                    includeScreenshot: screenshot && this.provider.hasVision(),
-                });
-                if (isErrorPage(this.actionResult))
-                    throw new Error('Error page detected');
-            }, {
-                maxAttempts: Math.ceil(errorPageTimeout / 3) + 1,
-                baseDelay: 1000,
-                maxDelay: 5000,
-                backoffMultiplier: 2,
-                retryCondition: (e) => e.message === 'Error page detected',
-            });
-            return true;
+            await page?.waitForLoadState('networkidle', { timeout: errorPageTimeout * 1000 });
         }
-        catch {
-            return false;
+        catch { }
+        await this.explorer.annotateElements();
+        this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot });
+        let condition = detectPageCondition(this.actionResult);
+        if (condition === 'error') {
+            throw new ErrorPageError(this.actionResult.url, this.actionResult.title);
+        }
+        if (condition === 'ok')
+            return true;
+        for (let i = 0; i < 3; i++) {
+            await new Promise((r) => setTimeout(r, 1000));
+            await this.explorer.annotateElements();
+            this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot });
+            condition = detectPageCondition(this.actionResult);
+            if (condition === 'error') {
+                throw new ErrorPageError(this.actionResult.url, this.actionResult.title);
+            }
+            if (condition === 'ok')
+                return true;
         }
+        return false;
     }
     getConfiguredSections() {
         const configSections = this.explorer.getConfig().ai?.agents?.researcher?.sections;

package/dist/src/ai/rules.js CHANGED Viewed

@@ -161,6 +161,20 @@ export const focusedElementRule = dedent `
   If focus is on wrong element, click the correct field first.
   </focused_element_actions>
 `;
+export const unexpectedPopupRule = dedent `
+  <unexpected_popup_rule>
+  If a modal/popup appeared that you didn't expect, dismiss it first before continuing with original task.
+  If elements become hidden or unclickable (timeout errors on visible elements), a dialog or overlay may have appeared on top.
+  If a click error mentions "intercepts pointer events", another element is covering the target — dismiss it first.
+  If buttons are disabled unexpectedly, check if a popup is blocking interaction or if required form fields are empty.
+  Dismiss strategy (try in order):
+  1. I.clickXY(0, 0) — click outside the popup to close it
+  2. I.pressKey('Escape') — press Escape to dismiss
+  3. I.click('Cancel') — click Cancel button if present
+  4. I.click({ role: 'button', text: 'Close' }) — click X/close button if present
+  </unexpected_popup_rule>
+`;
 export const sectionContextRule = dedent `
   <section_context_rule>
   Context parameter is DEFAULT for all interactions. ALWAYS use container from UI map sections unless locator is XPath or unique ID.
@@ -186,17 +200,7 @@ export const sectionContextRule = dedent `
   - Locator is a unique ID (#specific-element)
   </section_context_rule>
-  <unexpected_popup_rule>
-  If a modal/popup appeared that you didn't expect, dismiss it first before continuing with original task.
-  If elements become hidden or unclickable (timeout errors on visible elements), a dialog or overlay may have appeared on top.
-  If buttons are disabled unexpectedly, check if a popup is blocking interaction or if required form fields are empty.
-  Dismiss strategy (try in order):
-  1. I.clickXY(0, 0) — click outside the popup to close it
-  2. I.pressKey('Escape') — press Escape to dismiss
-  3. I.click('Cancel') — click Cancel button if present
-  4. I.click({ role: 'button', text: 'Close' }) — click X/close button if present
-  </unexpected_popup_rule>
+  ${unexpectedPopupRule}
 `;
 export function multipleTabsRule(tabs) {
     const tabsList = tabs.map((tab, i) => `  ${i + 1}. ${tab.title} - ${tab.url}`).join('\n');
@@ -265,12 +269,19 @@ export const actionRule = dedent `
     I.fillField('Username', 'John', '.login-form'); // fills Username inside .login-form
     I.fillField('Username', 'John'); // fills the field located by name or placeholder or label "Username" with the text "John"
     I.fillField('//user/input', 'John'); // fills the field located by XPath "//user/input" with the text "John"
-  </example>
+    I.fillField('Description', 'Hello world', '.editor'); // works for rich text / code editors too
+  </example>
+  I.fillField handles plain inputs, textareas, contenteditable regions, and rich text / code editors
+  (Monaco, ProseMirror, CodeMirror, TipTap, Quill, Draft.js, Slate, etc.) transparently.
+  ALWAYS use I.fillField for rich editors — target the editor container or its nearest label/heading with a normal locator.
+  Do NOT open the editor with raw JS (executeScript, page.evaluate), do NOT dispatch synthetic events,
+  do NOT call the editor's own API (monaco.editor.setValue, view.dispatch, etc.) to write text.
   ### I.type
-  Types text into the currently focused element. Use when fillField doesn't work,
-  for instance, for highly customized input fields like Monaco editors or rich text editors.
+  Types text into the currently focused element. Use only when there is no locator you can pass to I.fillField —
+  e.g. the target is implicit (a just-opened command palette, an autocomplete that steals focus, a canvas-based surface).
   I.type(<text>)
@@ -282,6 +293,7 @@ export const actionRule = dedent `
   DOES NOT receive any locator, just text to type.
   NEVER write: I.type('text', locator) or I.type('text', {locator: '...'}) — this is INVALID.
   To type into a specific field: use I.fillField(locator, text) or I.click(locator) then I.type(text).
+  Do NOT reach for I.type just because the target looks like a rich editor — I.fillField handles those.
   ### I.pressKey

package/dist/src/ai/tester.js CHANGED Viewed

@@ -6,9 +6,11 @@ import { z } from 'zod';
 import { ActionResult } from "../action-result.js";
 import { setActivity } from "../activity.js";
 import { ConfigParser } from "../config.js";
+import { Observability } from "../observability.js";
 import { Stats } from "../stats.js";
 import { TestResult } from "../test-plan.js";
 import { detectFocusArea, extractFocusedElement } from "../utils/aria.js";
+import { ErrorPageError } from "../utils/error-page.js";
 import { HooksRunner } from "../utils/hooks-runner.js";
 import { createDebug, tag } from "../utils/logger.js";
 import { loop } from "../utils/loop.js";
@@ -121,10 +123,34 @@ export class Tester extends TaskAgent {
         // Note: Markdown saving functionality removed from Conversation class
         const initialPrompt = await this.buildTestPrompt(task, initialState);
         conversation.addUserText(initialPrompt);
+        return await Observability.run(`test: ${task.scenario}`, {
+            sessionId: task.sessionName,
+            tags: ['tester'],
+            input: {
+                scenario: task.scenario,
+                startUrl: task.startUrl,
+                expected: task.expected,
+            },
+        }, async () => this.runTestSession(task, initialState, conversation, { offFailedRequest, page, onPageError, onConsoleMessage }));
+    }
+    async runTestSession(task, initialState, conversation, handlers) {
+        const { offFailedRequest, page, onPageError, onConsoleMessage } = handlers;
         if (this.pilot) {
-            const plan = await this.pilot.planTest(task, initialState);
-            if (plan) {
-                conversation.addUserText(`Pilot's test plan:\n${plan}\n\nFollow this plan while executing the test.`);
+            try {
+                const plan = await this.pilot.planTest(task, initialState);
+                if (plan) {
+                    conversation.addUserText(`Pilot's test plan:\n${plan}\n\nFollow this plan while executing the test.`);
+                }
+            }
+            catch (err) {
+                const message = err instanceof Error ? err.message : String(err);
+                tag('error').log(`Pilot planning failed: ${message}`);
+                task.addNote(`Planning failed: ${message}`, TestResult.FAILED);
+                task.finish(TestResult.FAILED);
+                offFailedRequest?.();
+                page?.off('pageerror', onPageError);
+                page?.off('console', onConsoleMessage);
+                return { success: false };
             }
         }
         debugLog('Starting test execution with tools');
@@ -135,6 +161,8 @@ export class Tester extends TaskAgent {
         const currentUrl = this.explorer.getStateManager().getCurrentState()?.url || task.startUrl || '';
         await this.hooksRunner.runBeforeHook('tester', currentUrl);
         const offStateChange = this.explorer.getStateManager().onStateChange((event) => {
+            if (task.hasFinished)
+                return;
             if (event.toState?.url === event.fromState?.url)
                 return;
             task.addNote(`Navigated to ${event.toState?.url}`, TestResult.PASSED);
@@ -178,6 +206,10 @@ export class Tester extends TaskAgent {
                 conversation.cleanupTag('page_aria', '...cleaned aria snapshot...', 2);
                 conversation.cleanupTag('page_html', '...cleaned HTML snapshot...', 1);
                 conversation.cleanupTag('experience', '...cleaned experience...', 1);
+                conversation.cleanupTag('applied_experience', '...cleaned past experience...', 1);
+                conversation.cleanupTag('page_ui_map', '...cleaned UI map...', 1);
+                conversation.cleanupTag('page_ui_map_overlay', '...cleaned UI overlay...', 1);
+                conversation.compactToolResults(3);
                 if (iteration > 1) {
                     const isNewPage = this.previousUrl !== null && this.previousUrl !== currentState.url;
                     let nextStep = '';
@@ -199,6 +231,7 @@ export class Tester extends TaskAgent {
                 const result = await this.provider.invokeConversation(conversation, tools, {
                     maxToolRoundtrips: 5,
                     toolChoice: 'required',
+                    stopWhen: () => task.hasFinished,
                 });
                 if (!result)
                     throw new Error('Failed to get response from provider');
@@ -273,34 +306,30 @@ export class Tester extends TaskAgent {
                         context.setUserInput(result.message);
                     }
                     : undefined,
-                observability: {
-                    name: `test: ${task.scenario}`,
-                    agent: 'tester',
-                    sessionId: task.sessionName,
-                    metadata: {
-                        input: {
-                            scenario: task.scenario,
-                            startUrl: task.startUrl,
-                            expected: task.expected,
-                        },
-                    },
-                },
                 catch: async ({ error, stop }) => {
                     tag('error').log(`Test execution error: ${error}`);
-                    task.addNote(`Execution error: ${error instanceof Error ? error.message : String(error)}`);
+                    if (!task.hasFinished) {
+                        task.addNote(`Execution error: ${error instanceof Error ? error.message : String(error)}`);
+                    }
                     stop();
                 },
             });
             if (task.hasFinished)
                 break;
             const finalState = this.getCurrentState();
-            const wantsContinue = await this.pilot.finalReview(task, finalState, conversation);
+            const wantsContinue = await this.pilot.finalReview(task, finalState, conversation, this.navigator);
             if (!wantsContinue || task.hasFinished)
                 break;
             if (extensions >= this.MAX_EXTENSIONS)
                 break;
             extensions++;
             tag('info').log(`Pilot extending test (${extensions}/${this.MAX_EXTENSIONS})`);
+            conversation.cleanupTag('page_aria', '...trimmed...', 1);
+            conversation.cleanupTag('page_html', '...trimmed...', 0);
+            conversation.cleanupTag('experience', '...trimmed...', 0);
+            conversation.cleanupTag('page_ui_map', '...trimmed...', 0);
+            conversation.cleanupTag('page_ui_map_overlay', '...trimmed...', 0);
+            conversation.compactToolResults(1);
             shouldContinue = true;
         }
         const finalUrl = this.explorer.getStateManager().getCurrentState()?.url || currentUrl;
@@ -391,7 +420,15 @@ export class Tester extends TaskAgent {
             this.explorer.clearOtherTabsInfo();
         }
         if (isNewUrl) {
-            const research = await this.researcher.research(currentState);
+            let research = '';
+            try {
+                research = await this.researcher.research(currentState);
+            }
+            catch (err) {
+                if (!(err instanceof ErrorPageError))
+                    throw err;
+                tag('warning').log(`Research skipped: ${err.message}`);
+            }
             this.pageStateHash = currentStateHash;
             this.pageActionResult = currentState;
             let uiMapSection = '';
@@ -562,7 +599,7 @@ export class Tester extends TaskAgent {
     - Use finish() to complete the test, not record(). record() is for intermediate notes.
     - Call finish(verify) when all goals are achieved — provide an assertion to verify
     - ONLY call stop() if the scenario itself is completely irrelevant to this page and no expectations can be achieved
-    - Use reset() to navigate back to the initial page if needed. Do not call it if you are already on the initial page
+    - Use reset() ONLY as a last resort when the current page cannot host the scenario. Never reset after a successful flow just because an assertion or milestone did not match — verify differently or record() the finding instead. Reset is destructive and does not undo server-side side effects.
     - Be precise with locators (CSS or XPath)
     - Each click/type call returns the new page state automatically
     - Check for success messages from tool calls to verify if expected outcomes are achieved
@@ -678,13 +715,25 @@ export class Tester extends TaskAgent {
         return {
             reset: tool({
                 description: dedent `
-          Reset the testing flow by navigating back to the original page.
-          Use this when navigated too far from the desired state and
-          there's no clear path to achieve the expected result. This restarts the
-          testing flow from a known good state.
+          Navigate back to the start URL and discard progress in this iteration.
+          Reset is a LAST RESORT. It is destructive — any side effects already produced on the
+          server (records created, forms submitted) persist and cannot be undone by resetting.
+          Use reset ONLY for:
+          - navigation dead-ends where the current page cannot host the scenario
+          - irrecoverable errors that leave no actionable path forward
+          Do NOT use reset when:
+          - the previous action already succeeded (URL changed, record visible, confirmation shown)
+            and an assertion did not match — verify differently, record(), or finish() instead
+          - an expectation/milestone does not match app behavior but the flow worked — the work is
+            done; resetting just creates duplicates
+          - you want to "try again" after submitting a form — submitting again creates a duplicate
+          Pilot will review every reset and may veto it.
         `,
                 inputSchema: z.object({
-                    reason: z.string().optional().describe('Explanation why you need to navigate'),
+                    reason: z.string().optional().describe('Explanation why reset is the only option'),
                 }),
                 execute: async ({ reason }) => {
                     if (this.getCurrentState().isInsideIframe) {
@@ -698,6 +747,18 @@ export class Tester extends TaskAgent {
                             action: 'reset',
                         };
                     }
+                    task.resetCount += 1;
+                    if (this.pilot) {
+                        const currentStateForReview = this.getCurrentState();
+                        const allowed = await this.pilot.reviewReset(task, currentStateForReview, reason ?? '', conversation);
+                        if (!allowed) {
+                            return {
+                                success: false,
+                                action: 'reset',
+                                message: 'Reset rejected by Pilot; Continue execution',
+                            };
+                        }
+                    }
                     const explanation = reason ? `${reason} (RESET)` : 'Resetting to initial page';
                     const targetUrl = resetUrl;
                     task.addNote(explanation);
@@ -776,10 +837,13 @@ export class Tester extends TaskAgent {
                     verify: z.string().describe('Specific assertion to verify on the page before finishing (e.g., "New item appears in the list")'),
                 }),
                 execute: async ({ verify }) => {
+                    if (task.hasFinished) {
+                        return { success: true, action: 'finish', message: 'already finished' };
+                    }
                     task.addNote(`Finish requested: ${verify}`);
                     if (this.pilot) {
                         const currentState = this.getCurrentState();
-                        await this.pilot.reviewFinish(task, currentState, conversation);
+                        await this.pilot.reviewFinish(task, currentState, conversation, this.navigator);
                         if (!task.hasFinished) {
                             return {
                                 success: false,
@@ -852,7 +916,7 @@ export class Tester extends TaskAgent {
                     if (input.status !== null && task.isComplete()) {
                         if (this.pilot) {
                             const currentState = this.getCurrentState();
-                            await this.pilot.reviewCompletion(task, currentState, conversation);
+                            await this.pilot.reviewCompletion(task, currentState, conversation, this.navigator);
                         }
                         else {
                             const hasPassed = task.hasAchievedAny();

package/dist/src/ai/tools.js CHANGED Viewed

@@ -84,7 +84,7 @@ export function createCodeceptJSTools(explorer, task) {
                             activeNote.screenshot = await action.saveScreenshot();
                         }
                         activeNote.commit(TestResult.PASSED);
-                        return successToolResult('click', { ...toolResult, attempts, code: command });
+                        return successToolResult('click', { ...toolResult, attempts, code: command }, action);
                     }
                 }
                 let disambiguated = null;
@@ -109,7 +109,7 @@ export function createCodeceptJSTools(explorer, task) {
                             activeNote.screenshot = await action.saveScreenshot();
                         }
                         activeNote.commit(TestResult.PASSED);
-                        return successToolResult('click', { ...toolResult, attempts, code: retryCmd, disambiguated: true });
+                        return successToolResult('click', { ...toolResult, attempts, code: retryCmd, disambiguated: true }, action);
                     }
                 }
                 const toolResult = await ActionResult.fromState(stateManager.getCurrentState()).toToolResult(previousState, commands[0]);
@@ -183,7 +183,7 @@ export function createCodeceptJSTools(explorer, task) {
                                 message: `Automatically used type() for "${key}" (not a standard key press)`,
                                 code: typeCommand,
                                 fallback: true,
-                            });
+                            }, action);
                         }
                         const errorMsg = `pressKey fallback to type() failed: ${action.lastError?.toString()}`;
                         if (toolResult?.pageDiff?.ariaChanges || toolResult?.pageDiff?.urlChanged) {
@@ -229,7 +229,7 @@ export function createCodeceptJSTools(explorer, task) {
                             ...toolResult,
                             message: `Pressed key: ${key}${modifier ? ` with modifier(s): ${Array.isArray(modifier) ? modifier.join('+') : modifier}` : ''}`,
                             code: pressKeyCommand,
-                        });
+                        }, action);
                     }
                     const errorMsg = `pressKey() failed: ${action.lastError?.toString()}`;
                     if (toolResult?.pageDiff?.ariaChanges || toolResult?.pageDiff?.urlChanged) {
@@ -338,7 +338,7 @@ export function createCodeceptJSTools(explorer, task) {
                         commandsExecuted: lines.length,
                         code: codeBlock,
                         suggestion: 'Verify the form was filled in correctly using see() tool. If needed to submit: try click() tool or form() with I.pressKey("Enter").',
-                    });
+                    }, action);
                 }
                 catch (error) {
                     activeNote.commit(TestResult.FAILED);
@@ -511,7 +511,7 @@ export function createAgentTools({ explorer, researcher, navigator, experienceTr
                         return successToolResult('verify', {
                             message: `Verification passed: ${assertion}`,
                             code: result.successfulCodes.join('\n'),
-                        });
+                        }, { assertionSteps: result.assertionSteps });
                     }
                     return failedToolResult('verify', `Verification failed: ${assertion}`, {
                         suggestion: 'The assertion could not be verified. Check if the condition is actually present on the page or try a different assertion.',
@@ -886,8 +886,14 @@ function countAriaChanges(ariaChanges) {
     const removedCount = removedMatch ? Number.parseInt(removedMatch[1]) : 0;
     return addedCount + removedCount;
 }
-function successToolResult(action, data) {
+function successToolResult(action, data, source) {
     const result = { success: true, action, ...data };
+    if (source?.playwrightGroupId) {
+        result.playwrightGroupId = source.playwrightGroupId;
+    }
+    if (source?.assertionSteps?.length) {
+        result.assertionSteps = source.assertionSteps;
+    }
     if (data?.pageDiff) {
         let suggestion = PAGE_DIFF_SUGGESTION;
         const ariaChanges = data.pageDiff.ariaChanges || '';

package/dist/src/browser-server.js CHANGED Viewed

@@ -2,7 +2,9 @@ import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from '
 import path from 'node:path';
 import { chromium, firefox, webkit } from 'playwright-core';
 import { ConfigParser } from './config.js';
-import { log, tag } from './utils/logger.js';
+import { getCliName } from "./utils/cli-name.js";
+import { log } from './utils/logger.js';
+import { printNextSteps } from "./utils/next-steps.js";
 const ENDPOINT_FILENAME = '.browser-endpoint';
 function getEndpointFilePath() {
     const configParser = ConfigParser.getInstance();
@@ -50,8 +52,19 @@ async function launchServer(opts) {
     const wsEndpoint = server.wsEndpoint();
     writeEndpoint(wsEndpoint);
     log(`Browser server started: ${browserName} (${opts.show ? 'headed' : 'headless'})`);
-    tag('info').log(`WebSocket endpoint: ${wsEndpoint}`);
-    tag('info').log(`Endpoint saved to: ${getEndpointFilePath()}`);
+    const cli = getCliName();
+    const sections = [
+        {
+            label: 'Browser server',
+            path: getEndpointFilePath(),
+            commands: [
+                { label: 'Endpoint', command: wsEndpoint },
+                { label: 'Status', command: `${cli} browser status` },
+                { label: 'Stop', command: `${cli} browser stop` },
+            ],
+        },
+    ];
+    printNextSteps(sections);
     return server;
 }
 async function getAliveEndpoint() {

package/dist/src/commands/add-rule-command.js CHANGED Viewed

@@ -3,6 +3,7 @@ import { join } from 'node:path';
 import { render } from 'ink';
 import React from 'react';
 import { tag } from '../utils/logger.js';
+import { printNextSteps, relativeToCwd } from "../utils/next-steps.js";
 import { BaseCommand } from './base-command.js';
 export class AddRuleCommand extends BaseCommand {
     name = 'add-rule';
@@ -33,18 +34,20 @@ export class AddRuleCommand extends BaseCommand {
         mkdirSync(rulesDir, { recursive: true });
         const filePath = join(rulesDir, `${ruleName}.md`);
         if (existsSync(filePath)) {
-            tag('warning').log(`Rule file already exists: ${filePath}`);
+            tag('warning').log(`Rule file already exists: ${relativeToCwd(filePath)}`);
             return null;
         }
         const content = opts?.content || `Instructions for ${agentName} agent.`;
         writeFileSync(filePath, `${content.trim()}\n`);
-        tag('success').log(`Rule created: ${filePath}`);
-        if (opts?.urlPattern) {
-            tag('info').log(`Add to config: ai.agents.${agentName}.rules: [{ '${opts.urlPattern}': '${ruleName}' }]`);
-        }
-        else {
-            tag('info').log(`Add to config: ai.agents.${agentName}.rules: ['${ruleName}']`);
-        }
+        const configLine = opts?.urlPattern ? `ai.agents.${agentName}.rules: [{ '${opts.urlPattern}': '${ruleName}' }]` : `ai.agents.${agentName}.rules: ['${ruleName}']`;
+        const sections = [
+            {
+                label: 'Rule',
+                path: filePath,
+                commands: [{ label: 'Add to config', command: configLine }],
+            },
+        ];
+        printNextSteps(sections);
         return filePath;
     }
 }