npm - explorbot - Versions diffs - 0.1.9 → 0.1.11 - Mend

explorbot 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (157) hide show

package/README.md +27 -1
package/bin/explorbot-cli.ts +86 -15
package/boat/api-tester/src/ai/curler-tools.ts +3 -3
package/boat/api-tester/src/ai/curler.ts +1 -1
package/boat/api-tester/src/apibot.ts +2 -2
package/boat/api-tester/src/config.ts +1 -1
package/dist/bin/explorbot-cli.js +85 -14
package/dist/boat/api-tester/src/ai/curler-tools.js +2 -2
package/dist/boat/api-tester/src/apibot.js +2 -2
package/dist/package.json +2 -2
package/dist/rules/navigator/output.md +9 -0
package/dist/rules/navigator/verification-actions.md +2 -0
package/dist/src/action-result.js +23 -1
package/dist/src/action.js +46 -38
package/dist/src/ai/bosun.js +16 -2
package/dist/src/ai/conversation.js +39 -0
package/dist/src/ai/experience-compactor.js +235 -50
package/dist/src/ai/historian/codeceptjs.js +109 -0
package/dist/src/ai/historian/experience.js +320 -0
package/dist/src/ai/historian/mixin.js +2 -0
package/dist/src/ai/historian/playwright.js +145 -0
package/dist/src/ai/historian/utils.js +18 -0
package/dist/src/ai/historian.js +19 -398
package/dist/src/ai/navigator.js +133 -80
package/dist/src/ai/pilot.js +254 -13
package/dist/src/ai/planner/subpages.js +1 -30
package/dist/src/ai/planner.js +33 -13
package/dist/src/ai/provider.js +55 -18
package/dist/src/ai/rerunner.js +3 -3
package/dist/src/ai/researcher/deep-analysis.js +1 -1
package/dist/src/ai/researcher/fingerprint-worker.js +1 -1
package/dist/src/ai/researcher/locators.js +1 -1
package/dist/src/ai/researcher/sections.js +8 -1
package/dist/src/ai/researcher.js +43 -41
package/dist/src/ai/rules.js +26 -14
package/dist/src/ai/tester.js +90 -26
package/dist/src/ai/tools.js +18 -10
package/dist/src/api/request-store.js +20 -0
package/dist/src/api/xhr-capture.js +19 -3
package/dist/src/browser-server.js +16 -3
package/dist/src/command-handler.js +1 -1
package/dist/src/commands/add-rule-command.js +12 -9
package/dist/src/commands/base-command.js +20 -0
package/dist/src/commands/clean-command.js +3 -2
package/dist/src/commands/compact-command.js +138 -0
package/dist/src/commands/context-command.js +7 -1
package/dist/src/commands/drill-command.js +4 -1
package/dist/src/commands/experience-command.js +104 -0
package/dist/src/commands/explore-command.js +54 -19
package/dist/src/commands/freesail-command.js +2 -0
package/dist/src/commands/index.js +7 -3
package/dist/src/commands/init-command.js +11 -10
package/dist/src/commands/learn-command.js +1 -1
package/dist/src/commands/navigate-command.js +4 -1
package/dist/src/commands/plan-clear-command.js +4 -1
package/dist/src/commands/plan-command.js +43 -4
package/dist/src/commands/plan-edit-command.js +1 -1
package/dist/src/commands/plan-load-command.js +4 -1
package/dist/src/commands/plan-reload-command.js +4 -1
package/dist/src/commands/plan-save-command.js +20 -8
package/dist/src/commands/rerun-command.js +4 -0
package/dist/src/commands/research-command.js +5 -2
package/dist/src/commands/start-command.js +5 -1
package/dist/src/commands/test-command.js +7 -1
package/dist/src/components/App.js +15 -5
package/dist/src/execution-controller.js +13 -2
package/dist/src/experience-tracker.js +174 -83
package/dist/src/explorbot.js +31 -22
package/dist/src/explorer.js +12 -5
package/dist/src/observability.js +50 -99
package/dist/src/playwright-recorder.js +309 -0
package/dist/src/reporter.js +17 -2
package/dist/src/stats.js +2 -0
package/dist/src/suite.js +1 -1
package/dist/src/test-plan.js +12 -0
package/dist/src/utils/aria.js +37 -1
package/dist/src/utils/error-page.js +30 -7
package/dist/src/utils/logger.js +1 -1
package/dist/src/utils/next-steps.js +37 -0
package/dist/src/utils/rules-loader.js +1 -1
package/dist/src/utils/test-files.js +1 -1
package/dist/src/utils/url-matcher.js +50 -0
package/package.json +2 -2
package/rules/navigator/output.md +9 -0
package/rules/navigator/verification-actions.md +2 -0
package/src/action-result.ts +26 -1
package/src/action.ts +44 -37
package/src/ai/bosun.ts +16 -2
package/src/ai/conversation.ts +37 -0
package/src/ai/experience-compactor.ts +270 -63
package/src/ai/historian/codeceptjs.ts +130 -0
package/src/ai/historian/experience.ts +383 -0
package/src/ai/historian/mixin.ts +4 -0
package/src/ai/historian/playwright.ts +169 -0
package/src/ai/historian/utils.ts +23 -0
package/src/ai/historian.ts +35 -468
package/src/ai/navigator.ts +140 -85
package/src/ai/pilot.ts +259 -14
package/src/ai/planner/subpages.ts +1 -24
package/src/ai/planner.ts +34 -14
package/src/ai/provider.ts +52 -18
package/src/ai/rerunner.ts +3 -3
package/src/ai/researcher/deep-analysis.ts +1 -1
package/src/ai/researcher/fingerprint-worker.ts +1 -1
package/src/ai/researcher/locators.ts +2 -2
package/src/ai/researcher/sections.ts +7 -1
package/src/ai/researcher.ts +47 -42
package/src/ai/rules.ts +27 -14
package/src/ai/task-agent.ts +1 -1
package/src/ai/tester.ts +94 -26
package/src/ai/tools.ts +53 -29
package/src/api/request-store.ts +22 -0
package/src/api/xhr-capture.ts +21 -3
package/src/browser-server.ts +17 -3
package/src/command-handler.ts +1 -1
package/src/commands/add-rule-command.ts +13 -9
package/src/commands/base-command.ts +26 -1
package/src/commands/clean-command.ts +4 -3
package/src/commands/compact-command.ts +156 -0
package/src/commands/context-command.ts +8 -2
package/src/commands/drill-command.ts +5 -2
package/src/commands/experience-command.ts +125 -0
package/src/commands/explore-command.ts +58 -21
package/src/commands/freesail-command.ts +2 -0
package/src/commands/index.ts +7 -3
package/src/commands/init-command.ts +11 -10
package/src/commands/learn-command.ts +2 -2
package/src/commands/navigate-command.ts +5 -2
package/src/commands/plan-clear-command.ts +5 -2
package/src/commands/plan-command.ts +47 -5
package/src/commands/plan-edit-command.ts +2 -2
package/src/commands/plan-load-command.ts +5 -2
package/src/commands/plan-reload-command.ts +5 -2
package/src/commands/plan-save-command.ts +20 -9
package/src/commands/rerun-command.ts +5 -0
package/src/commands/research-command.ts +6 -3
package/src/commands/start-command.ts +6 -2
package/src/commands/test-command.ts +8 -2
package/src/components/App.tsx +16 -5
package/src/config.ts +6 -1
package/src/execution-controller.ts +14 -3
package/src/experience-tracker.ts +198 -100
package/src/explorbot.ts +33 -23
package/src/explorer.ts +14 -5
package/src/observability.ts +50 -109
package/src/playwright-recorder.ts +305 -0
package/src/reporter.ts +17 -3
package/src/stats.ts +4 -0
package/src/suite.ts +1 -1
package/src/test-plan.ts +12 -0
package/src/utils/aria.ts +38 -1
package/src/utils/error-page.ts +32 -7
package/src/utils/logger.ts +1 -1
package/src/utils/next-steps.ts +51 -0
package/src/utils/rules-loader.ts +1 -1
package/src/utils/test-files.ts +1 -1
package/src/utils/url-matcher.ts +43 -0

package/src/ai/pilot.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import { type ExperienceTracker, renderExperienceToc } from '../experience-track
 import type Explorer from '../explorer.ts';
 import { type Test, TestResult } from '../test-plan.ts';
 import { collectInteractiveNodes, detectFocusArea, extractFocusedElement } from '../utils/aria.ts';
+import { ErrorPageError } from '../utils/error-page.ts';
 import { createDebug, tag } from '../utils/logger.ts';
 const debugLog = createDebug('explorbot:pilot');
@@ -14,6 +15,7 @@ import { truncateJson } from '../utils/strings.ts';
 import type { Agent } from './agent.ts';
 import type { Conversation } from './conversation.ts';
 import type { Fisherman } from './fisherman.ts';
+import type { Navigator } from './navigator.ts';
 import type { Provider } from './provider.ts';
 import type { Researcher } from './researcher.ts';
 import { isInteractive } from './task-agent.ts';
@@ -56,25 +58,30 @@ export class Pilot implements Agent {
     return this.conversation.getLastMessage() || null;
   }
-  async reviewStop(task: Test, currentState: ActionResult, testerConversation: Conversation): Promise<boolean> {
-    return this.reviewDecision('stop', task, currentState, testerConversation);
+  async reviewStop(task: Test, currentState: ActionResult, testerConversation: Conversation, navigator?: Navigator): Promise<boolean> {
+    return this.reviewDecision('stop', task, currentState, testerConversation, navigator);
   }
-  async reviewFinish(task: Test, currentState: ActionResult, testerConversation: Conversation): Promise<boolean> {
-    return this.reviewDecision('finish', task, currentState, testerConversation);
+  async reviewFinish(task: Test, currentState: ActionResult, testerConversation: Conversation, navigator?: Navigator): Promise<boolean> {
+    return this.reviewDecision('finish', task, currentState, testerConversation, navigator);
   }
-  async reviewCompletion(task: Test, currentState: ActionResult, testerConversation: Conversation): Promise<boolean> {
+  async reviewCompletion(task: Test, currentState: ActionResult, testerConversation: Conversation, navigator?: Navigator): Promise<boolean> {
     const verdictType = task.hasAchievedAny() ? 'finish' : 'stop';
-    return this.reviewDecision(verdictType, task, currentState, testerConversation);
+    return this.reviewDecision(verdictType, task, currentState, testerConversation, navigator);
   }
-  async finalReview(task: Test, currentState: ActionResult, testerConversation: Conversation): Promise<boolean> {
+  async finalReview(task: Test, currentState: ActionResult, testerConversation: Conversation, navigator?: Navigator): Promise<boolean> {
     if (task.hasFinished) return false;
-    return this.reviewCompletion(task, currentState, testerConversation);
+    return this.reviewCompletion(task, currentState, testerConversation, navigator);
   }
-  private async reviewDecision(type: 'finish' | 'stop', task: Test, currentState: ActionResult, testerConversation: Conversation): Promise<boolean> {
+  async reviewReset(task: Test, currentState: ActionResult, reason: string, testerConversation: Conversation): Promise<boolean> {
+    return this.reviewResetDecision(task, currentState, reason, testerConversation);
+  }
+  private async reviewDecision(type: 'finish' | 'stop', task: Test, currentState: ActionResult, testerConversation: Conversation, navigator?: Navigator): Promise<boolean> {
+    if (task.hasFinished) return false;
     tag('substep').log(`Pilot reviewing ${type} verdict...`);
     const sessionLog = this.formatSessionLog(testerConversation);
@@ -98,6 +105,12 @@ export class Pilot implements Agent {
       decision: z.enum(['pass', 'fail', 'continue', 'skipped']).describe('pass = test succeeded, fail = test failed, continue = tester should keep going, skipped = scenario is irrelevant OR systematic execution failures prevented testing'),
       reason: z.string().describe('What happened and why (1-2 sentences). Do NOT repeat the decision status (e.g. "scenario goal achieved/not achieved") — just explain the evidence. For continue: explain why rejected and suggest alternatives.'),
       guidance: z.string().nullable().describe('Required for "continue": specific actionable instruction for the tester — what exactly to verify, retry differently, or complete next. Be concrete.'),
+      requestVerification: z
+        .string()
+        .nullable()
+        .describe(
+          'REQUIRED whenever decision is "pass" — provide a specific assertion that proves the scenario goal on the current page (e.g., "New test suite \\"Foo\\" is visible in the suites list"). The system runs it and bakes the resulting assertion into the generated test file; without it the test file has no verifiable expect(). Also use when evidence is insufficient before deciding pass/fail. Leave null for "continue", "fail", or "skipped".'
+        ),
     });
     const userContent = dedent`
@@ -126,6 +139,12 @@ export class Pilot implements Agent {
       - "continue" if tester hasn't completed the scenario goal yet — even if milestones were checked
       - If evidence is mixed, but final state indicates goal completion, choose "pass"
       - If evidence is mixed and final state is unclear, prefer "continue" over "fail"
+      When deciding "pass", you MUST also set requestVerification to a CodeceptJS assertion that
+      proves the scenario goal on the current page. Choose the strongest single evidence (a unique
+      element/text that exists ONLY because the scenario succeeded). The assertion is executed and
+      then converted into the spec file's expect() — without it the generated test has nothing to
+      assert and is worthless.
     `;
     const messages = [
@@ -148,6 +167,29 @@ export class Pilot implements Agent {
         return false;
       }
+      if (result.requestVerification && navigator) {
+        tag('substep').log(`Pilot requesting verification: ${result.requestVerification}`);
+        try {
+          const verifyResult = await navigator.verifyState(result.requestVerification, currentState);
+          if (verifyResult.verified) {
+            if (verifyResult.assertionSteps?.length) {
+              this.explorer.getPlaywrightRecorder().recordVerification(verifyResult.assertionSteps);
+            }
+            tag('substep').log(`Pilot verified: ${result.requestVerification}`);
+          } else {
+            tag('substep').log(`Pilot verification failed: ${result.requestVerification}`);
+            if (result.decision === 'pass') {
+              const flipMessage = `Verification "${result.requestVerification}" did not match the page. Adjust approach and re-verify before finishing.`;
+              result.decision = 'continue';
+              result.reason = flipMessage;
+              result.guidance = result.guidance ?? flipMessage;
+            }
+          }
+        } catch (verifyErr: any) {
+          tag('warning').log(`Pilot verification errored: ${verifyErr.message}`);
+        }
+      }
       tag('info').log(`Pilot: ${result.decision} — ${result.reason}`);
       task.summary = result.reason;
@@ -180,6 +222,142 @@ export class Pilot implements Agent {
     }
   }
+  private async reviewResetDecision(task: Test, currentState: ActionResult, reason: string, testerConversation: Conversation): Promise<boolean> {
+    if (task.hasFinished) return false;
+    tag('substep').log(`Pilot reviewing reset (count=${task.resetCount})...`);
+    const sessionLog = this.formatSessionLog(testerConversation);
+    const stateContext = this.buildStateContext(currentState);
+    const notes = task.notesToString() || 'No notes recorded.';
+    const schema = z.object({
+      decision: z.enum(['allow', 'fail', 'continue', 'skipped']).describe('allow = reset proceeds, fail = test failed (stop looping), continue = veto reset, tester should act on current page instead, skipped = scenario is irrelevant or cannot be executed'),
+      reason: z.string().describe('What evidence justifies this decision (1-2 sentences). Do not restate the decision.'),
+      guidance: z.string().nullable().describe('Required for "continue": concrete instruction for what the tester should do instead of resetting (e.g. which tool to call, what to verify).'),
+    });
+    const userContent = dedent`
+      Tester requested reset. Previous reset count: ${task.resetCount - 1}.
+      Reason given by tester: ${reason || '(none)'}
+      <state>
+      ${stateContext}
+      </state>
+      ${this.formatExpectations(task)}
+      <notes>
+      ${notes}
+      </notes>
+      <session_log>
+      ${sessionLog || 'No actions recorded'}
+      </session_log>
+      Decide:
+      - "allow" — the reset is legitimate (navigation dead-end, wrong page, irrecoverable error on current page).
+      - "continue" — veto the reset; something on the current page can still be used to progress or verify. Provide guidance.
+      - "fail" — reset-looping: tester has already reset and the underlying obstacle will not change. Stop the test as failed.
+      - "skipped" — the scenario is inapplicable to this application or cannot be executed here.
+    `;
+    const messages = [
+      {
+        role: 'system' as const,
+        content: this.buildResetSystemPrompt(task),
+      },
+      { role: 'user' as const, content: userContent },
+    ];
+    try {
+      const response = await this.provider.generateObject(messages, schema, this.provider.getAgenticModel('pilot'), {
+        agentName: 'pilot',
+        experimental_telemetry: { functionId: 'pilot.reviewReset' },
+      });
+      const result = response?.object;
+      if (!result) {
+        return true;
+      }
+      tag('info').log(`Pilot reset verdict: ${result.decision} — ${result.reason}`);
+      if (result.decision === 'allow') {
+        tag('substep').log(`Pilot allowed reset: ${result.reason}`);
+        return true;
+      }
+      if (result.decision === 'fail') {
+        task.addNote(`Pilot: reset refused — ${result.reason}`, TestResult.FAILED);
+        task.finish(TestResult.FAILED);
+        return false;
+      }
+      if (result.decision === 'skipped') {
+        task.addNote(`Pilot: skipped — ${result.reason}`, TestResult.SKIPPED);
+        task.finish(TestResult.SKIPPED);
+        return false;
+      }
+      tag('substep').log(`Pilot vetoed reset: ${result.reason}`);
+      const guidanceText = result.guidance ? `\n\nWhat to do instead: ${result.guidance}` : '';
+      testerConversation.addUserText(`Pilot vetoed reset: ${result.reason}${guidanceText}`);
+      return false;
+    } catch (error: any) {
+      tag('warning').log(`Pilot reset review failed: ${error.message}`);
+      return true;
+    }
+  }
+  private buildResetSystemPrompt(task: Test): string {
+    return dedent`
+      You are Pilot — the supervisor that decides whether a reset is legitimate.
+      Tester wants to reset (navigate back to the start URL and discard progress).
+      SCENARIO: ${task.scenario}
+      Reset is DESTRUCTIVE. It abandons all work done in this iteration. In stateful apps, any
+      side effects (records created, forms submitted) persist on the server — resetting does not
+      undo them. Unnecessary resets create duplicate data and loop forever.
+      LEGITIMATE RESET (decide "allow"):
+      - The current page is unrelated to the scenario and no path leads back.
+      - Navigation is stuck in an error state with no recoverable action.
+      - The tester arrived on a page that cannot host the scenario at all.
+      ILLEGITIMATE RESET (decide "continue"):
+      - The previous action already succeeded (URL changed to a success/detail page, record visible,
+        confirmation shown) and tester wants to redo it because an assertion did not match.
+        The work is done — verify, record, or finish instead of restarting.
+      - A single expectation / milestone does not match app reality but the scenario goal may still
+        have been achieved. Do not redo — instruct the tester to verify the actual outcome.
+      - Tester wants to "try again with different input" after a form was submitted. Submitting
+        again creates a duplicate; guide toward editing the existing record or accepting the state.
+      RESET-LOOP (decide "fail"):
+      - resetCount >= 2 and the previous resets did not change the underlying situation.
+      - The same flow has been attempted twice with the same failure mode.
+      - Repeating the reset cannot produce new information.
+      SCENARIO INAPPLICABLE (decide "skipped"):
+      - The feature the scenario targets does not exist on this app, or prerequisites cannot be met.
+      PRIORITY:
+      1) Evidence of successful side effects in session_log (URL transition, new record visible).
+         If present, almost never allow the reset — the work is done.
+      2) resetCount. Each prior reset raises the bar for allowing another.
+      3) Tester's stated reason. Weigh it against the observed evidence, do not trust it blindly.
+      GUIDANCE FIELD (required when decision is "continue"):
+      Give a specific next action on the current page: which tool to call, what to verify, or how to
+      record the outcome. Do not suggest repeating actions that already succeeded.
+      EXPECTED RESULTS (milestones, not the goal):
+      ${task.expected.map((e) => `- ${e}`).join('\n')}
+    `;
+  }
   private buildVerdictSystemPrompt(type: string, task: Test): string {
     return dedent`
       You are Pilot — the final decision maker for test pass/fail.
@@ -281,10 +459,14 @@ export class Pilot implements Agent {
         the elements needed for the scenario. The page summary does not list every element.
         Prefer interacting with the current page over navigating away.
+        If you load a recipe via learn_experience, do NOT rewrite its code in your plan — the
+        raw recipe is forwarded to Tester automatically. Reference it by step ("apply recipe
+        steps 1–3, then…") and call out anywhere your scenario diverges from it.
         Be concise and specific. Tester will follow your plan.
       `,
       'pilot.planTest',
-      { tools: true, maxToolRoundtrips: 3, task }
+      { tools: true, planningOnly: true, maxToolRoundtrips: 3, task }
     );
   }
@@ -377,7 +559,7 @@ export class Pilot implements Agent {
     return `CHECKED: ${checked.length > 0 ? checked.join(', ') : 'none'}\nREMAINING: ${remaining.length > 0 ? remaining.join(', ') : 'none'}`;
   }
-  private async sendToPilot(userText: string, functionId: string, opts: { tools?: boolean; maxToolRoundtrips?: number; task?: Test } = {}): Promise<string> {
+  private async sendToPilot(userText: string, functionId: string, opts: { tools?: boolean; planningOnly?: boolean; maxToolRoundtrips?: number; task?: Test } = {}): Promise<string> {
     debugLog(`sendToPilot: ${functionId}, tools: ${!!opts.tools}, roundtrips: ${opts.maxToolRoundtrips ?? 0}`);
     let finalUserText = userText;
@@ -388,7 +570,10 @@ export class Pilot implements Agent {
       }
     }
     this.conversation!.addUserText(finalUserText);
-    let tools = opts.tools ? this.agentTools : undefined;
+    let tools: any;
+    if (opts.tools) {
+      tools = opts.planningOnly ? this.pickPlanningTools() : this.agentTools;
+    }
     if (opts.tools && opts.task) {
       tools = { ...tools, ...this.buildPreconditionTool(opts.task) };
@@ -399,7 +584,19 @@ export class Pilot implements Agent {
       agentName: 'pilot',
       experimental_telemetry: { functionId },
     });
-    return result?.response?.text || '';
+    const text = result?.response?.text || '';
+    const learned = (result?.toolExecutions || []).filter((e: any) => e.toolName === 'learn_experience' && e.output?.content).map((e: any) => e.output.content);
+    if (learned.length === 0) return text;
+    return dedent`
+      ${text}
+      <applied_experience>
+      Recipes from prior successful runs that Pilot judged relevant. Locators worked then; the page may have changed since.
+      Treat code blocks below as a starting hypothesis. If a locator misses, fall back to ARIA/UI-map.
+      ${learned.join('\n\n')}
+      </applied_experience>
+    `;
   }
   private getExperienceToc(): string {
@@ -411,6 +608,19 @@ export class Pilot implements Agent {
     return renderExperienceToc(toc);
   }
+  private pickPlanningTools() {
+    const { see, context, verify, research, getVisitedStates, xpathCheck, learn_experience } = this.agentTools ?? {};
+    const planning: Record<string, unknown> = {};
+    if (see) planning.see = see;
+    if (context) planning.context = context;
+    if (verify) planning.verify = verify;
+    if (research) planning.research = research;
+    if (getVisitedStates) planning.getVisitedStates = getVisitedStates;
+    if (xpathCheck) planning.xpathCheck = xpathCheck;
+    if (learn_experience) planning.learn_experience = learn_experience;
+    return planning;
+  }
   private buildPreconditionTool(task: Test) {
     return {
       precondition: tool({
@@ -487,6 +697,28 @@ export class Pilot implements Agent {
       lines.push(`verifications: ${verifyLines.join(', ')}`);
     }
+    const consoleErrors = (state.browserLogs ?? []).filter((l: any) => (l.type || l.level) === 'error');
+    if (consoleErrors.length > 0) {
+      const sample = consoleErrors
+        .slice(0, 3)
+        .map((e: any) => e.text || e.message || String(e))
+        .join(' | ');
+      lines.push(`console errors: ${consoleErrors.length} (${sample})`);
+    } else {
+      lines.push('console errors: none');
+    }
+    const failedRequests = this.explorer.getRequestStore()?.getFailedRequests() ?? [];
+    if (failedRequests.length > 0) {
+      const sample = failedRequests
+        .slice(-5)
+        .map((r) => `${r.method} ${r.path} → ${r.status}`)
+        .join(', ');
+      lines.push(`network errors: ${sample}`);
+    } else {
+      lines.push('network errors: none');
+    }
     const interactiveNodes = collectInteractiveNodes(state.ariaSnapshot);
     const disabledButtons = interactiveNodes.filter((n) => n.role === 'button' && n.disabled === true && n.name).map((n) => n.name);
     lines.push(`disabled buttons: ${disabledButtons.length > 0 ? disabledButtons.join(', ') : 'none'}`);
@@ -536,7 +768,13 @@ export class Pilot implements Agent {
     }
     if (text.includes('ATTACH_UI_MAP')) {
-      const uiMap = await this.researcher.research(currentState);
+      let uiMap = '';
+      try {
+        uiMap = await this.researcher.research(currentState);
+      } catch (err) {
+        if (!(err instanceof ErrorPageError)) throw err;
+        tag('warning').log(`Pilot UI map skipped: ${err.message}`);
+      }
       if (uiMap) {
         parts.push(dedent`
           <page_ui_map>
@@ -705,6 +943,13 @@ export class Pilot implements Agent {
       - If the goal was achieved by a previous action (SUCCESS in recent_actions with confirming ariaDiff): instruct Tester to verify() the result and finish(). Do NOT repeat the same action.
       - If Tester keeps re-opening the same panel and re-submitting the same data — STOP. The action was already completed.
+      Action-goal alignment — classify every recent successful action:
+      - GOAL-ADVANCING: creates, edits, removes, submits, or verifies the scenario's subject data (the object the scenario actually changes).
+      - VIEW-ONLY: toggles layout, filters, tabs, segment controls, sort orders, collapse/expand — changes which data is shown without modifying it.
+      - A single VIEW-ONLY action is legitimate when needed to reveal a target element for the next GOAL-ADVANCING action.
+      - A run of two or more consecutive successful VIEW-ONLY actions with no interleaved GOAL-ADVANCING action is thrashing — Tester is exploring UI instead of executing the scenario. Redirect Tester to the specific mutation or verification the scenario requires.
+      - VIEW-ONLY actions also tend to produce large page diffs with many htmlParts; if you see that pattern repeatedly in recent_actions, treat it as evidence of thrashing.
       Navigation awareness — always compare current page url to START URL:
       - subpage navigation (deeper path from START URL) — OK, scenario may need sub-pages
       - outer-page navigation (parent/sibling path from START URL) — SUSPICIOUS. The scenario target is on the START page. Do NOT rationalize leaving it. Instruct Tester to back() or reset().

package/src/ai/planner/subpages.ts CHANGED Viewed

@@ -1,10 +1,10 @@
 import dedent from 'dedent';
 import { z } from 'zod';
-import { ConfigParser } from '../../config.ts';
 import { normalizeUrl } from '../../state-manager.ts';
 import type { StateManager } from '../../state-manager.ts';
 import type { Plan } from '../../test-plan.ts';
 import { tag } from '../../utils/logger.ts';
+import { isDynamicSegment } from '../../utils/url-matcher.ts';
 import type { Provider } from '../provider.ts';
 import type { Constructor } from '../researcher/mixin.ts';
@@ -38,29 +38,6 @@ function buildKey(url: string, feature?: string): string {
   return normalized;
 }
-export function isDynamicSegment(segment: string): boolean {
-  try {
-    const configRegex = ConfigParser.getInstance().getConfig().dynamicPageRegex;
-    if (configRegex) return new RegExp(configRegex, 'i').test(segment);
-  } catch {
-    /* config not loaded yet */
-  }
-  // numeric: /users/123
-  if (/^\d+$/.test(segment)) return true;
-  // UUID: /items/550e8400-e29b-41d4-a716-446655440000
-  if (/^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$/i.test(segment)) return true;
-  // ULID: /items/01ARZ3NDEKTSV4RRFFQ69G5FAV
-  if (/^[0-9A-HJKMNP-TV-Z]{26}$/.test(segment)) return true;
-  // hex ID (4+ chars): /suite/70dae98a
-  if (/^[a-f0-9]{4,}$/i.test(segment)) return true;
-  // hex-prefixed slug (8+ hex before dash): /suite/95ef0c94-mobile
-  if (/^[a-f0-9]{8,}-/i.test(segment)) return true;
-  // short mixed alphanumeric (digits + letters, ≤8 chars, no dash): /item/x7f2
-  if (segment.length <= 8 && !segment.includes('-') && /\d/.test(segment) && /[a-z]/i.test(segment)) return true;
-  return false;
-}
 export function isTemplateMatch(urlA: string, urlB: string): boolean {
   const partsA = normalizeUrl(urlA).split('/');
   const partsB = normalizeUrl(urlB).split('/');

package/src/ai/planner.ts CHANGED Viewed

@@ -8,22 +8,22 @@ import type Explorer from '../explorer.ts';
 import { Observability } from '../observability.ts';
 import type { StateManager } from '../state-manager.js';
 import { Stats } from '../stats.ts';
+import { Suite } from '../suite.ts';
 import { Plan, Test } from '../test-plan.ts';
-import { planToCompactAiContext } from '../utils/test-plan-markdown.ts';
 import { createDebug, tag } from '../utils/logger.js';
 import { jsonToTable } from '../utils/markdown-parser.ts';
 import { mdq } from '../utils/markdown-query.js';
+import { planToCompactAiContext } from '../utils/test-plan-markdown.ts';
 import type { Agent } from './agent.js';
 import { Conversation } from './conversation.ts';
 import type { Fisherman } from './fisherman.ts';
-import { getActiveStyle, getStyles } from './planner/styles.ts';
 import { WithSessionDedup } from './planner/session-dedup.ts';
+import { getActiveStyle, getStyles } from './planner/styles.ts';
 import { WithSubPages, getPlannedByStateHash, getRegisteredPlan, registerPlan } from './planner/subpages.ts';
-import { findSimilarStateHash } from './researcher/cache.ts';
 import type { Provider } from './provider.js';
-import { hasFocusedSection } from './researcher/focus.ts';
 import { POSSIBLE_SECTIONS, Researcher } from './researcher.ts';
-import { Suite } from '../suite.ts';
+import { findSimilarStateHash } from './researcher/cache.ts';
+import { hasFocusedSection } from './researcher/focus.ts';
 import { fileUploadRule, protectionRule } from './rules.ts';
 const debugLog = createDebug('explorbot:planner');
@@ -447,6 +447,34 @@ export class Planner extends PlannerBase implements Agent {
       const titleListing = allTests.map((t) => `- "${t.scenario}" [${t.result || 'pending'}]`).join('\n');
       const compactContext = planToCompactAiContext(this.currentPlan);
+      let planningStrategy: string;
+      if (feature) {
+        planningStrategy = dedent`
+          <planning_strategy>
+          Stay strictly inside the "${feature}" feature area. Do NOT switch to a different, unrelated feature even if it has no coverage.
+          Propose ${this.MIN_TASKS}-${this.MAX_TASKS} additional scenarios for "${feature}" that are not already in the tested list.
+          Use the <approach> above to decide which new angles to explore — different controls, inputs, states, outcome categories, or combinations — all within "${feature}".
+          Return an empty scenarios array only when no genuinely new scenario for "${feature}" remains.
+          </planning_strategy>
+        `;
+      } else {
+        let extendedResearchHint = '';
+        if (mdq(plannerResearch).query('section("Extended Research")').count() > 0) {
+          extendedResearchHint = 'IMPORTANT: The research contains "Extended Research" sections with dropdowns, modals, and panels. Prioritize testing features from Extended Research that have no coverage yet.';
+        }
+        planningStrategy = dedent`
+          <planning_strategy>
+          Find a feature area in the research that has NO or minimal test coverage.
+          Pick that ONE feature and propose ${this.MIN_TASKS}-${this.MAX_TASKS} tests for it.
+          ${extendedResearchHint}
+          Follow the <approach> described above when proposing tests for this feature.
+          If ALL features across ALL research sections are covered, return empty scenarios array.
+          </planning_strategy>
+        `;
+      }
       conversation.addUserText(dedent`
         CRITICAL: This plan already has tests.
@@ -466,15 +494,7 @@ export class Planner extends PlannerBase implements Agent {
         ${compactContext}
         </tested_scenarios>
-        <planning_strategy>
-        Find a feature area in the research that has NO or minimal test coverage.
-        Pick that ONE feature and propose ${this.MIN_TASKS}-${this.MAX_TASKS} tests for it.
-        ${mdq(plannerResearch).query('section("Extended Research")').count() > 0 ? 'IMPORTANT: The research contains "Extended Research" sections with dropdowns, modals, and panels. Prioritize testing features from Extended Research that have no coverage yet.' : ''}
-        Follow the <approach> described above when proposing tests for this feature.
-        If ALL features across ALL research sections are covered, return empty scenarios array.
-        </planning_strategy>
+        ${planningStrategy}
         <context_from_previous_tests>
         During testing, the following pages were visited:

package/src/ai/provider.ts CHANGED Viewed

@@ -1,15 +1,15 @@
 import { LangfuseSpanProcessor } from '@langfuse/otel';
 import { NodeSDK } from '@opentelemetry/sdk-node';
-import { generateObject, generateText } from 'ai';
+import { generateObject, generateText, stepCountIs } from 'ai';
 import type { ModelMessage } from 'ai';
 import { clearActivity, setActivity } from '../activity.ts';
 import type { AIConfig } from '../config.js';
-import { RulesLoader } from '../utils/rules-loader.ts';
 import { executionController } from '../execution-controller.ts';
 import { Observability } from '../observability.ts';
 import { Stats } from '../stats.ts';
 import { createDebug, tag } from '../utils/logger.js';
 import { type RetryOptions, withRetry } from '../utils/retry.js';
+import { RulesLoader } from '../utils/rules-loader.ts';
 import { Conversation } from './conversation.js';
 const debugLog = createDebug('explorbot:provider');
@@ -19,6 +19,20 @@ const responseLog = createDebug('explorbot:provider:in');
 class AiError extends Error {}
 export class ContextLengthError extends Error {}
+function rejectAfterIdle(ms: number, signal: { cancelled: boolean }): Promise<never> {
+  return new Promise((_, reject) => {
+    const tick = () => {
+      if (signal.cancelled) return;
+      if (executionController.isAwaitingInput()) {
+        setTimeout(tick, ms);
+        return;
+      }
+      reject(new Error('AI request timeout'));
+    };
+    setTimeout(tick, ms);
+  });
+}
 export class Provider {
   private config: AIConfig;
   private telemetryEnabled = false;
@@ -286,14 +300,19 @@ export class Provider {
     promptLog(messages[messages.length - 1].content);
     const telemetry = this.getTelemetry(options);
+    const maxRoundtrips = options.maxToolRoundtrips ?? 5;
+    const extraStop = options.stopWhen;
+    const stopConditions: any[] = [stepCountIs(maxRoundtrips)];
+    if (extraStop) stopConditions.push(extraStop);
+    const { stopWhen: _ignoredStopWhen, ...optionsWithoutStop } = options;
     const config = this.mergeProviderOptions(
       {
         tools,
         maxTokens: 16384,
-        maxToolRoundtrips: options.maxToolRoundtrips ?? 5,
         toolChoice: 'auto',
         ...(this.config.config || {}),
-        ...options,
+        ...optionsWithoutStop,
+        stopWhen: stopConditions,
         ...(telemetry ? { experimental_telemetry: telemetry } : {}),
         model,
         abortSignal: executionController.getAbortSignal(),
@@ -303,13 +322,23 @@ export class Provider {
     try {
       const response = await withRetry(async () => {
         const timeout = config.timeout || 30000;
-        return (await Promise.race([
-          generateText({
-            messages,
-            ...config,
-          }),
-          new Promise((_, reject) => setTimeout(() => reject(new Error('AI request timeout')), timeout)),
-        ])) as any;
+        const cancel = { cancelled: false };
+        try {
+          const result = (await Promise.race([
+            generateText({
+              messages,
+              ...config,
+            }),
+            rejectAfterIdle(timeout, cancel),
+          ])) as any;
+          const hasToolCall = (result.toolCalls?.length || 0) > 0;
+          if (!result.text && !hasToolCall && result.finishReason === 'length') {
+            throw new ContextLengthError('AI response empty: output truncated at maxTokens. Increase maxTokens in config or use a model with higher output capacity.');
+          }
+          return result;
+        } finally {
+          cancel.cancelled = true;
+        }
       }, this.getRetryOptions(options));
       clearActivity();
@@ -380,13 +409,18 @@ export class Provider {
       promptLog(messages[messages.length - 1].content);
       const response = await withRetry(async () => {
         const timeout = config.timeout || 30000;
-        return (await Promise.race([
-          generateObject({
-            messages,
-            ...config,
-          }),
-          new Promise((_, reject) => setTimeout(() => reject(new Error('AI request timeout')), timeout)),
-        ])) as any;
+        const cancel = { cancelled: false };
+        try {
+          return (await Promise.race([
+            generateObject({
+              messages,
+              ...config,
+            }),
+            rejectAfterIdle(timeout, cancel),
+          ])) as any;
+        } finally {
+          cancel.cancelled = true;
+        }
       }, this.getRetryOptions(options));
       clearActivity();

package/src/ai/rerunner.ts CHANGED Viewed

@@ -7,8 +7,8 @@ import { highlight } from 'cli-highlight';
 import * as codeceptjs from 'codeceptjs';
 import heal from 'codeceptjs/lib/heal';
 import aiTracePlugin from 'codeceptjs/lib/plugin/aiTrace';
-import figureSet from 'figures';
 import dedent from 'dedent';
+import figureSet from 'figures';
 import { z } from 'zod';
 import { ActionResult } from '../action-result.ts';
 import { setActivity } from '../activity.ts';
@@ -19,14 +19,14 @@ import { Stats } from '../stats.ts';
 import { Task, Test, TestResult } from '../test-plan.ts';
 import { createDebug, tag } from '../utils/logger.ts';
 import { loop } from '../utils/loop.ts';
+import { RulesLoader } from '../utils/rules-loader.ts';
 import { loadTestSuites, printTestList } from '../utils/test-files.ts';
 import type { Agent } from './agent.ts';
 import { toolExecutionLabel } from './conversation.ts';
 import type { Navigator } from './navigator.ts';
 import { Provider } from './provider.ts';
-import { locatorRule, actionRule, sectionContextRule } from './rules.ts';
+import { actionRule, locatorRule, sectionContextRule } from './rules.ts';
 import { TaskAgent } from './task-agent.ts';
-import { RulesLoader } from '../utils/rules-loader.ts';
 import { createCodeceptJSTools } from './tools.ts';
 const debugLog = createDebug('explorbot:rerunner');

package/src/ai/researcher/deep-analysis.ts CHANGED Viewed

@@ -1,10 +1,10 @@
 import dedent from 'dedent';
 import { ActionResult, type Diff } from '../../action-result.js';
+import { executionController } from '../../execution-controller.ts';
 import type Explorer from '../../explorer.ts';
 import type { StateManager } from '../../state-manager.js';
 import { WebPageState } from '../../state-manager.js';
 import { detectFocusArea, diffAriaSnapshots } from '../../utils/aria.ts';
-import { executionController } from '../../execution-controller.ts';
 import { tag } from '../../utils/logger.js';
 import { mdq } from '../../utils/markdown-query.ts';
 import type { Provider } from '../provider.js';