npm - explorbot - Versions diffs - 0.1.0 → 0.1.1 - Mend

explorbot 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

package/bin/explorbot-cli.ts +93 -36
package/dist/bin/explorbot-cli.js +71 -16
package/dist/rules/rerunner/healing-approach.md +19 -0
package/dist/src/action.js +8 -10
package/dist/src/ai/historian.js +34 -3
package/dist/src/ai/navigator.js +35 -28
package/dist/src/ai/pilot.js +33 -9
package/dist/src/ai/planner.js +29 -10
package/dist/src/ai/rerunner.js +472 -0
package/dist/src/ai/researcher.js +3 -4
package/dist/src/ai/rules.js +2 -2
package/dist/src/ai/tools.js +2 -2
package/dist/src/commands/add-rule-command.js +1 -2
package/dist/src/commands/base-command.js +12 -0
package/dist/src/commands/context-command.js +12 -5
package/dist/src/commands/drill-command.js +0 -1
package/dist/src/commands/explore-command.js +20 -5
package/dist/src/commands/freesail-command.js +8 -22
package/dist/src/commands/index.js +4 -0
package/dist/src/commands/init-command.js +3 -3
package/dist/src/commands/path-command.js +2 -1
package/dist/src/commands/plan-command.js +37 -15
package/dist/src/commands/rerun-command.js +42 -0
package/dist/src/commands/research-command.js +10 -4
package/dist/src/commands/runs-command.js +22 -0
package/dist/src/commands/start-command.js +0 -1
package/dist/src/commands/test-command.js +3 -3
package/dist/src/components/App.js +8 -0
package/dist/src/config.js +3 -0
package/dist/src/explorbot.js +19 -0
package/dist/src/explorer.js +2 -1
package/dist/src/suite.js +115 -0
package/dist/src/utils/html.js +2 -5
package/dist/src/utils/rules-loader.js +33 -17
package/dist/src/utils/test-files.js +103 -0
package/package.json +2 -1
package/rules/rerunner/healing-approach.md +19 -0
package/src/action.ts +7 -9
package/src/ai/historian.ts +37 -3
package/src/ai/navigator.ts +35 -28
package/src/ai/pilot.ts +33 -9
package/src/ai/planner.ts +28 -9
package/src/ai/rerunner.ts +532 -0
package/src/ai/researcher.ts +3 -4
package/src/ai/rules.ts +2 -2
package/src/ai/tools.ts +2 -2
package/src/commands/add-rule-command.ts +1 -2
package/src/commands/base-command.ts +13 -0
package/src/commands/context-command.ts +12 -5
package/src/commands/drill-command.ts +0 -1
package/src/commands/explore-command.ts +21 -5
package/src/commands/freesail-command.ts +6 -23
package/src/commands/index.ts +4 -0
package/src/commands/init-command.ts +3 -3
package/src/commands/path-command.ts +2 -1
package/src/commands/plan-command.ts +45 -16
package/src/commands/rerun-command.ts +46 -0
package/src/commands/research-command.ts +10 -4
package/src/commands/runs-command.ts +27 -0
package/src/commands/start-command.ts +0 -1
package/src/commands/test-command.ts +3 -3
package/src/components/App.tsx +8 -0
package/src/config.ts +23 -0
package/src/explorbot.ts +21 -0
package/src/explorer.ts +3 -2
package/src/suite.ts +135 -0
package/src/utils/html.ts +1 -5
package/src/utils/rules-loader.ts +35 -17
package/src/utils/test-files.ts +122 -0

package/dist/src/utils/rules-loader.js CHANGED Viewed

@@ -60,28 +60,44 @@ export class RulesLoader {
         const name = names[idx];
         return { name, approach: styles[name] };
     }
-    static extractStyles(agentName, targetDir) {
-        const sourceDir = join(BUILT_IN_DIR, agentName, 'styles');
+    static extractRules(agentName, targetDir) {
+        const sourceDir = join(BUILT_IN_DIR, agentName);
         if (!existsSync(sourceDir))
-            throw new Error(`No built-in styles found for agent: ${agentName}`);
-        mkdirSync(targetDir, { recursive: true });
-        const files = readdirSync(sourceDir)
-            .filter((f) => f.endsWith('.md'))
-            .sort();
+            throw new Error(`No built-in rules found for agent: ${agentName}`);
         const extracted = [];
-        for (const file of files) {
-            const target = join(targetDir, file);
-            if (existsSync(target)) {
-                tag('info').log(`Skipping ${file} (already exists)`);
-                continue;
-            }
-            writeFileSync(target, readFileSync(join(sourceDir, file), 'utf8'));
-            extracted.push(file);
-            tag('success').log(`Extracted ${file}`);
-        }
+        copyMarkdownTree(sourceDir, targetDir, '', extracted);
         return extracted;
     }
 }
+function copyMarkdownTree(sourceDir, targetDir, relative, extracted) {
+    const entries = readdirSync(sourceDir, { withFileTypes: true }).sort((a, b) => a.name.localeCompare(b.name));
+    let dirCreated = false;
+    const ensureTargetDir = () => {
+        if (dirCreated)
+            return;
+        mkdirSync(targetDir, { recursive: true });
+        dirCreated = true;
+    };
+    for (const entry of entries) {
+        const sourcePath = join(sourceDir, entry.name);
+        const targetPath = join(targetDir, entry.name);
+        const relPath = relative ? `${relative}/${entry.name}` : entry.name;
+        if (entry.isDirectory()) {
+            copyMarkdownTree(sourcePath, targetPath, relPath, extracted);
+            continue;
+        }
+        if (!entry.name.endsWith('.md'))
+            continue;
+        if (existsSync(targetPath)) {
+            tag('info').log(`Skipping ${relPath} (already exists)`);
+            continue;
+        }
+        ensureTargetDir();
+        writeFileSync(targetPath, readFileSync(sourcePath, 'utf8'));
+        extracted.push(relPath);
+        tag('success').log(`Extracted ${relPath}`);
+    }
+}
 function loadFile(agentName, name, subdir) {
     const file = `${name}.md`;
     const segments = subdir ? [agentName, subdir, file] : [agentName, file];

package/dist/src/utils/test-files.js ADDED Viewed

@@ -0,0 +1,103 @@
+import { existsSync, readdirSync } from 'node:fs';
+import path from 'node:path';
+import chalk from 'chalk';
+import { highlight } from 'cli-highlight';
+import * as codeceptjs from 'codeceptjs';
+import store from 'codeceptjs/lib/store';
+import stepsListener from 'codeceptjs/lib/listener/steps';
+import storeListener from 'codeceptjs/lib/listener/store';
+import figureSet from 'figures';
+import { ConfigParser } from "../config.js";
+export function loadTestSuites(testsDir) {
+    if (!existsSync(testsDir))
+        return [];
+    const jsFiles = readdirSync(testsDir)
+        .filter((f) => f.endsWith('.js'))
+        .map((f) => path.resolve(testsDir, f));
+    if (jsFiles.length === 0)
+        return [];
+    codeceptjs.container.createMocha();
+    const mocha = codeceptjs.container.mocha();
+    mocha.files = jsFiles;
+    mocha.loadFiles();
+    return mocha.suite.suites || [];
+}
+export function printTestList(suites) {
+    if (suites.length === 0) {
+        console.log(chalk.yellow('No test files found. Run /explore first.'));
+        return;
+    }
+    let totalActive = 0;
+    let totalSkipped = 0;
+    let index = 0;
+    for (const suite of suites) {
+        const file = path.relative(process.cwd(), suite.file || '');
+        const active = suite.tests.filter((t) => !t.pending).length;
+        const skipped = suite.tests.filter((t) => t.pending).length;
+        totalActive += active;
+        totalSkipped += skipped;
+        console.log(`\n${chalk.bold.cyan(suite.title)}`);
+        console.log(chalk.gray(file));
+        for (const test of suite.tests) {
+            const idx = chalk.dim(`${++index}.`);
+            if (test.pending) {
+                console.log(chalk.gray(`  ${idx} ${figureSet.line} ${test.title} (skipped)`));
+            }
+            else {
+                console.log(`  ${idx} ${chalk.green(figureSet.pointer)} ${test.title}`);
+            }
+        }
+    }
+    console.log(`\n${chalk.bold(`${totalActive + totalSkipped}`)} scenarios (${chalk.green(`${totalActive} active`)}, ${chalk.gray(`${totalSkipped} skipped`)})`);
+}
+export async function dryRunTestFile(filePath) {
+    const absPath = path.resolve(filePath);
+    if (!existsSync(absPath)) {
+        console.log(chalk.yellow(`File not found: ${absPath}`));
+        return;
+    }
+    const config = ConfigParser.getInstance().getConfig();
+    const configPath = ConfigParser.getInstance().getConfigPath();
+    const projectRoot = configPath ? path.dirname(configPath) : process.cwd();
+    const codeceptConfig = {
+        helpers: {
+            Playwright: { browser: config.playwright.browser, url: config.playwright.url },
+        },
+    };
+    global.output_dir = path.join(projectRoot, 'output', 'states');
+    global.codecept_dir = projectRoot;
+    codeceptjs.container.create(codeceptConfig, {});
+    await codeceptjs.recorder.start();
+    await codeceptjs.container.started(null);
+    store.dryRun = true;
+    global.container = codeceptjs.container;
+    storeListener();
+    stepsListener();
+    codeceptjs.container.createMocha();
+    const mocha = codeceptjs.container.mocha();
+    mocha.reporter(class {
+    });
+    mocha.files = [absPath];
+    mocha.loadFiles();
+    let currentSuite = '';
+    codeceptjs.event.dispatcher.on('suite.before', (suite) => {
+        if (suite.title && suite.title !== currentSuite) {
+            currentSuite = suite.title;
+            console.log(`\n${chalk.bold.cyan(suite.title)}`);
+            console.log(chalk.gray(path.relative(process.cwd(), suite.file || absPath)));
+        }
+    });
+    codeceptjs.event.dispatcher.on('test.before', (t) => {
+        console.log(`\n  ${chalk.green(figureSet.pointer)} ${chalk.bold(t.title)}`);
+    });
+    codeceptjs.event.dispatcher.on('step.start', (step) => {
+        const code = highlight(step.toCode(), { language: 'javascript' });
+        console.log(chalk.dim(`    ${code}`));
+    });
+    await new Promise((resolve) => {
+        const runner = mocha.run(() => resolve());
+        runner.on('pending', (t) => {
+            console.log(chalk.gray(`  ${figureSet.line} ${t.title} (skipped)`));
+        });
+    });
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "explorbot",
-  "version": "0.1.0",
+  "version": "0.1.1",
   "description": "CLI app built with React Ink, CodeceptJS, and Playwright",
   "license": "Elastic-2.0",
   "type": "module",
@@ -66,6 +66,7 @@
     "@ai-sdk/groq": "^3.0",
     "@ai-sdk/openai": "^3.0",
     "@axe-core/playwright": "^4.11.0",
+    "@codeceptjs/reflection": "^0.5.2",
     "@inkjs/ui": "^2.0.0",
     "@langfuse/otel": "^4.5.1",
     "@openrouter/ai-sdk-provider": "^2.3.3",

package/rules/rerunner/healing-approach.md ADDED Viewed

@@ -0,0 +1,19 @@
+<healing_approach>
+The failed step was NOT performed. You MUST execute a replacement action.
+Just waiting or diagnosing is NOT enough — you must perform the click/fill/press that was intended.
+1. FIRST: Check the page URL and ARIA — are you on the right page?
+   - If URL or ARIA shows login/error/404 page → call giveUp immediately
+2. If ARIA is empty/minimal → page may still be loading:
+   - Use xpathCheck() to detect spinners, loaders, or loading indicators on the page
+   - Use wait() to let the page load — it returns fresh ARIA automatically
+   - Then execute the replacement action with a working locator
+3. If the target element is visible in ARIA:
+   - Use click() with multiple fallback locators (ARIA, CSS, XPath)
+4. If element is NOT in ARIA but page is correct:
+   - Use xpathCheck() to search the full HTML
+   - Use research() to get a semantic UI map of the page if needed
+   - If found → click it
+   - If not → bash to check console logs → giveUp
+5. Call done() with the command that replaced the failed step
+</healing_approach>

package/src/action.ts CHANGED Viewed

@@ -64,7 +64,7 @@ class Action {
     }
   }
-  async capturePageState({ includeScreenshot = false, ariaSnapshot: preCapuredAria }: { includeScreenshot?: boolean; ariaSnapshot?: string } = {}): Promise<ActionResult> {
+  async capturePageState({ includeScreenshot = false }: { includeScreenshot?: boolean } = {}): Promise<ActionResult> {
     try {
       const currentState = this.stateManager.getCurrentState();
       const stateHash = currentState?.hash || 'screenshot';
@@ -111,16 +111,14 @@ class Action {
       // Capture iframe HTML snapshots
       const iframeSnapshots = await this.captureIframeSnapshots(html);
-      let ariaSnapshot: string | null = preCapuredAria || null;
+      let ariaSnapshot: string | null = null;
       let ariaSnapshotFile: string | undefined = undefined;
-      if (!ariaSnapshot) {
-        try {
-          const page = this.playwrightHelper.page;
-          ariaSnapshot = await page.locator('body').ariaSnapshot();
-        } catch (err) {
-          debugLog('ARIA snapshot failed:', err instanceof Error ? `${err.message}\n${err.stack}` : err);
-        }
+      try {
+        const page = this.playwrightHelper.page;
+        ariaSnapshot = await page.locator('body').ariaSnapshot();
+      } catch (err) {
+        debugLog('ARIA snapshot failed:', err instanceof Error ? `${err.message}\n${err.stack}` : err);
       }
       if (ariaSnapshot) {

package/src/ai/historian.ts CHANGED Viewed

@@ -1,9 +1,10 @@
-import { mkdirSync, writeFileSync } from 'node:fs';
+import { mkdirSync, readFileSync, writeFileSync } from 'node:fs';
 import { join } from 'node:path';
 import dedent from 'dedent';
 import { z } from 'zod';
 import { ActionResult } from '../action-result.ts';
 import { ConfigParser } from '../config.ts';
+import { KnowledgeTracker } from '../knowledge-tracker.ts';
 import { ExperienceTracker, type SessionExperienceEntry, type SessionStep } from '../experience-tracker.ts';
 import { type Reporter, type ReporterStep } from '../reporter.ts';
 import type { StateManager } from '../state-manager.ts';
@@ -397,6 +398,7 @@ export class Historian {
     if (startUrl) {
       lines.push('Before(({ I }) => {');
       lines.push(`  I.amOnPage('${this.escapeString(startUrl)}');`);
+      lines.push(...this.getKnowledgeLines(startUrl));
       lines.push('});');
       lines.push('');
     }
@@ -425,8 +427,7 @@ export class Historian {
       lines.push('');
     }
-    const outputDir = ConfigParser.getInstance().getOutputDir();
-    const testsDir = join(outputDir, 'tests');
+    const testsDir = ConfigParser.getInstance().getTestsDir();
     mkdirSync(testsDir, { recursive: true });
     const filename = plan.title.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase();
@@ -437,6 +438,18 @@ export class Historian {
     return filePath;
   }
+  rewriteScenarioInFile(filePath: string, healedSteps: Array<{ test: string; original: string; healed: string }>): void {
+    let content = readFileSync(filePath, 'utf-8');
+    for (const step of healedSteps) {
+      if (!content.includes(step.original)) continue;
+      content = content.replace(step.original, step.healed);
+    }
+    writeFileSync(filePath, content);
+    tag('substep').log(`Updated test file with healed steps: ${filePath}`);
+  }
   private getExecutionLabel(exec: ToolExecution, fallback?: string): string {
     return exec.input?.explanation || exec.input?.assertion || exec.input?.note || fallback || '';
   }
@@ -445,6 +458,27 @@ export class Historian {
     return str.replace(/'/g, "\\'").replace(/\n/g, ' ');
   }
+  private getKnowledgeLines(url: string, indent = '  '): string[] {
+    const knowledgeTracker = new KnowledgeTracker();
+    const state = new ActionResult({ url });
+    const { wait, waitForElement, code } = knowledgeTracker.getStateParameters(state, ['wait', 'waitForElement', 'code']);
+    const lines: string[] = [];
+    if (wait !== undefined) {
+      lines.push(`${indent}I.wait(${wait});`);
+    }
+    if (waitForElement) {
+      lines.push(`${indent}I.waitForElement(${JSON.stringify(waitForElement)});`);
+    }
+    if (code) {
+      for (const codeLine of code.split('\n')) {
+        const trimmed = codeLine.trim();
+        if (trimmed) lines.push(`${indent}${trimmed}`);
+      }
+    }
+    return lines;
+  }
   private stripComments(code: string): string {
     return code
       .split('\n')

package/src/ai/navigator.ts CHANGED Viewed

@@ -40,6 +40,18 @@ class Navigator implements Agent {
     You are given the web page and a message from user.
     You need to resolve the state of the page based on the message.
   </task>
+  ${locatorRule}
+  <constraints>
+    NEVER navigate away from the base URL domain. Stay on the same origin at all times.
+    NEVER attempt to rewrite, replace, mock, or spoof the URL via JavaScript, history API, location assignment, or any client-side trick.
+    NEVER use executeScript, executeAsyncScript, or any JS evaluation to change the URL, bypass redirects, or fake the page state.
+    If the target URL redirects to an authentication/login page, DO NOT try to force the original URL. Instead:
+      1. Look for credentials in the provided knowledge/hint context and perform a real login through the form.
+      2. If no credentials are available, ask the user for credentials or ask the user to log in manually.
+    A redirect to /login, /sign_in, /auth, or similar is a signal that authentication is required — treat it as such, never as an obstacle to bypass.
+  </constraints>
   `;
   private freeSailSystemPrompt = dedent`
   <role>
@@ -168,6 +180,14 @@ class Navigator implements Agent {
         ${message}
       </message>
+      <page>
+        ${actionResult.toAiContext()}
+        <page_html>
+        ${await actionResult.combinedHtml()}
+        </page_html>
+      </page>
       <task>
         Identify the actual request of the user.
         Identify what is expected by user.
@@ -178,25 +198,13 @@ class Navigator implements Agent {
         Try various ways to achieve the result
       </task>
-      <page>
-        ${actionResult.toAiContext()}
-        <page_html>
-        ${await actionResult.simplifiedHtml()}
-        </page_html>
-      </page>
-      ${knowledge}
       ${actionRule}
-      ${experience}
+      ${RulesLoader.loadRules('navigator', ['multiple-locator', 'output'], actionResult.url || '').replace('{{maxAttempts}}', String(this.MAX_ATTEMPTS))}
-      ${locatorRule}
+      ${experience}
-      ${RulesLoader.loadRules('navigator', ['multiple-locator', 'output'], actionResult.url || '').replace('{{maxAttempts}}', String(this.MAX_ATTEMPTS))}
+      ${knowledge}
     `;
     const conversation = this.provider.startConversation(this.systemPrompt, 'navigator');
@@ -234,7 +242,7 @@ class Navigator implements Agent {
               Previous solutions did not work. Here is the full HTML context:
               <page_html>
-              ${await actionResult.simplifiedHtml()}
+              ${await actionResult.combinedHtml()}
               </page_html>
               Please suggest new solutions based on this additional context.
@@ -265,6 +273,7 @@ class Navigator implements Agent {
         if (resolved) {
           tag('success').log('Navigation resolved successfully');
+          await this.experienceTracker.saveSuccessfulResolution(actionResult, message, codeBlock);
           stop();
           return;
         }
@@ -479,6 +488,14 @@ class Navigator implements Agent {
         ${message}
       </message>
+      <page>
+        ${actionResult.toAiContext()}
+        <page_html>
+        ${await actionResult.combinedHtml()}
+        </page_html>
+      </page>
       <task>
         Identify what assertion the user wants to verify on the page.
         Propose different CodeceptJS assertion code blocks to verify the expected state.
@@ -492,21 +509,11 @@ class Navigator implements Agent {
         Do not generate assertions that would pass even if the specific claim is false.
       </task>
-      <page>
-        ${actionResult.toAiContext()}
-        <page_html>
-        ${await actionResult.simplifiedHtml()}
-        </page_html>
-      </page>
-      ${knowledge}
       ${RulesLoader.loadRules('navigator', ['verification-actions'], actionResult.url || '')}
-      ${locatorRule}
       ${experience}
+      ${knowledge}
     `;
     debugLog('Sending verification prompt to AI provider');

package/src/ai/pilot.ts CHANGED Viewed

@@ -256,9 +256,18 @@ export class Pilot implements Agent {
         Plan the test execution for this scenario.
-        FIRST: Call precondition() to create fresh data that this test will act on.
-        Ask: "What will this test edit/delete/use?" — create THAT item via precondition.
-        Do not describe what's already on the page — create new disposable items for the test.
+        FIRST: Decide if precondition() is needed.
+        Call precondition() WHEN:
+        - The scenario edits/deletes/modifies an item, and you want a DISPOSABLE item to act on safely
+        - The scenario needs specific data clearly NOT on the current page (e.g., items with specific statuses for filtering)
+        SKIP precondition() WHEN:
+        - The scenario is "Create X" — the test itself creates the item
+        - The current page already shows the item the test will act on (check <state> and <page_summary>)
+        - The scenario tests navigation, UI behavior, or viewing — no data mutation needed
+        If needed, call precondition() now. If not, proceed directly to planning.
         THEN: Based on the page elements and current state, outline:
         1. Which elements to interact with and in what order
@@ -701,6 +710,8 @@ export class Pilot implements Agent {
       - Click succeeded but ariaDiff shows elements unrelated to tester's intention (e.g., clicked "Edit" but dropdown appeared) → wrong button or unexpected behavior. Instruct Tester to Escape and try a different approach.
       - form(I.type()) succeeded → I.type() sends keys to whatever is focused, no guarantee it's the right field. Instruct Tester to verify with see() that text appeared in the correct field. If targetedHtml shows a button/link, text went to wrong element — click the correct field first and retry.
       - ariaDiff shows 5+ elements removed/added after clicking content → page entered a different mode (editor, panel, modal). Instruct Tester to call context() to see current state before guessing selectors.
+      - Dropdown/select opened but contains NO options, or a list/table is empty when items were expected → data doesn't exist yet. Call precondition() to create the missing items (labels, categories, etc.), then instruct Tester to retry.
+      - Tester tries to select/filter/assign something but the option list is empty or expected value is not present → missing auxiliary data. Call precondition() to create it.
       Detecting logically wrong successes — review "executed", "element", and "skipped" fields:
       - Click SUCCESS but "executed" command differs from "explanation" intent → wrong element was clicked. The intended element wasn't found and a different one was clicked instead.
@@ -750,23 +761,36 @@ export class Pilot implements Agent {
       YOUR tools (Pilot-only):
       - precondition(description) — create FRESH test data via API that the test will act on. Do NOT request users.
-      PRECONDITIONS — what to create:
+      PRECONDITIONS — when and what to create:
       Preconditions create NEW disposable items that the test will modify, delete, or interact with.
-      Do NOT describe what already exists on the page — describe what NEW data the test needs to act on.
       Ask yourself: "What object will this test change/delete/use? Create THAT."
-      Examples:
+      When to call precondition():
+      - Scenario edits/deletes/modifies an item → create a disposable target
+      - Scenario needs auxiliary data (labels, categories, statuses to filter by)
+      - Tester failed because required data is missing (empty dropdown, no items to select)
+      When to SKIP precondition():
+      - Scenario is "Create X" — the test itself creates the item, no precondition needed
+      - Current page already shows the exact data needed (check <state> h1/title and <page_summary>)
+      - Scenario tests navigation, search UI, or viewing — no data mutation involved
+      Examples — when to create:
       - "Edit test description" → precondition("1 test") — the test will edit this item
       - "Delete a comment" → precondition("1 comment") — the test will delete this item
       - "Assign a label to item" → precondition("1 item and 1 label named Bug") — test assigns the label
       - "Filter by status" → precondition("3 items: 2 with status Open, 1 with status Closed")
-      - "Move item between lists" → precondition("1 item in list A")
-      WRONG: precondition("1 test suite named Updated Suite with existing tests") — this describes the page, not what to create
+      Examples — when to skip:
+      - "Create a new blog post" → SKIP, the test creates it
+      - "Edit blog post" while on a blog post page → SKIP, data already exists
+      - "View dashboard" → SKIP, no data mutation
+      WRONG: precondition("1 test suite named Updated Suite with existing tests") — describes the page, not what to create
       RIGHT: precondition("1 test") — create a fresh test that the scenario will edit
-      Call precondition() for EVERY item the scenario will act on. Keep descriptions short and specific.
+      Keep descriptions short and specific.
       Response format:
       PROGRESS: <1 sentence assessment>

package/src/ai/planner.ts CHANGED Viewed

@@ -23,6 +23,7 @@ import { findSimilarStateHash } from './researcher/cache.ts';
 import type { Provider } from './provider.js';
 import { hasFocusedSection } from './researcher/focus.ts';
 import { POSSIBLE_SECTIONS, Researcher } from './researcher.ts';
+import { Suite } from '../suite.ts';
 import { fileUploadRule, protectionRule } from './rules.ts';
 const debugLog = createDebug('explorbot:planner');
@@ -58,6 +59,7 @@ export class Planner extends PlannerBase implements Agent {
   currentPlan: Plan | null = null;
   freshStart = false;
   private lastStyleName = '';
+  private lastSuite: Suite | null = null;
   researcher: Researcher;
   private fisherman: Fisherman | null = null;
@@ -201,14 +203,14 @@ export class Planner extends PlannerBase implements Agent {
       this.currentPlan.url = state.url;
       if (parentPlan) this.currentPlan.parentPlan = parentPlan;
       const allPreviousScenarios = this.getPreviousSessionScenarios();
+      const existingTestScenarios = this.getExistingTestFileScenarios(state.url);
+      for (const s of existingTestScenarios) allPreviousScenarios.add(s);
       for (const t of tests) {
         if (allPreviousScenarios.has(t.scenario.toLowerCase())) continue;
         t.style = this.lastStyleName;
         t.startUrl = state.url;
         this.currentPlan.addTest(t);
       }
-      const summary = `Scenarios:\n${this.currentPlan.tests.map((t) => `- [${t.priority}] ${t.scenario}`).join('\n')}`;
-      tag('multiline').log(summary);
     } else {
       tag('step').log(`Expanding plan: "${this.currentPlan.title}"`);
       this.currentPlan.nextIteration();
@@ -219,7 +221,6 @@ export class Planner extends PlannerBase implements Agent {
       }
     }
-    this.moveExecutedTestsToEnd();
     const availableStyles = Object.keys(getStyles()).join(', ');
     tag('success').log(`Planning complete! ${this.currentPlan.tests.length} tests in plan: ${this.currentPlan.title}`);
     tag('info').log(`Planning style: ${this.lastStyleName} (available: ${availableStyles})`);
@@ -231,12 +232,8 @@ export class Planner extends PlannerBase implements Agent {
     return this.currentPlan;
   }
-  private moveExecutedTestsToEnd(): void {
-    if (!this.currentPlan) return;
-    const pending = this.currentPlan.tests.filter((t) => t.result === null);
-    const executed = this.currentPlan.tests.filter((t) => t.result !== null);
-    this.currentPlan.tests = [...pending, ...executed];
-    this.currentPlan.notifyChange();
+  getSuite(): Suite | null {
+    return this.lastSuite;
   }
   private addNewTests(tests: Test[], defaultStartUrl: string): Test[] {
@@ -262,6 +259,17 @@ export class Planner extends PlannerBase implements Agent {
     return added;
   }
+  private getExistingTestFileScenarios(currentUrl?: string): Set<string> {
+    if (!currentUrl) return new Set<string>();
+    try {
+      this.lastSuite = new Suite(currentUrl);
+      return this.lastSuite.getActiveScenarioTitles();
+    } catch (err: any) {
+      debugLog('Failed to load existing test files: %s', err.message);
+      return new Set<string>();
+    }
+  }
   private cleanExperienceFlows(text: string): string | null {
     const seenTitles = new Set<string>();
     let result = text;
@@ -421,6 +429,17 @@ export class Planner extends PlannerBase implements Agent {
       }
     }
+    if (this.lastSuite && this.lastSuite.automatedTestCount > 0) {
+      const automatedNames = this.lastSuite.getAutomatedTestNames();
+      conversation.addUserText(dedent`
+        <existing_automated_tests>
+        The following ${automatedNames.length} tests are already implemented and automated for this URL.
+        Do not propose tests that duplicate these:
+        ${automatedNames.map((n) => `- ${n}`).join('\n')}
+        </existing_automated_tests>
+      `);
+    }
     if (this.currentPlan) {
       tag('step').log('Analyzing current plan to expand testing');