explorbot 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/bin/explorbot-cli.ts +93 -36
  2. package/dist/bin/explorbot-cli.js +71 -16
  3. package/dist/rules/rerunner/healing-approach.md +19 -0
  4. package/dist/src/action.js +8 -10
  5. package/dist/src/ai/historian.js +34 -3
  6. package/dist/src/ai/navigator.js +35 -28
  7. package/dist/src/ai/pilot.js +33 -9
  8. package/dist/src/ai/planner.js +29 -10
  9. package/dist/src/ai/rerunner.js +472 -0
  10. package/dist/src/ai/researcher.js +3 -4
  11. package/dist/src/ai/rules.js +2 -2
  12. package/dist/src/ai/tools.js +2 -2
  13. package/dist/src/commands/add-rule-command.js +1 -2
  14. package/dist/src/commands/base-command.js +12 -0
  15. package/dist/src/commands/context-command.js +12 -5
  16. package/dist/src/commands/drill-command.js +0 -1
  17. package/dist/src/commands/explore-command.js +20 -5
  18. package/dist/src/commands/freesail-command.js +8 -22
  19. package/dist/src/commands/index.js +4 -0
  20. package/dist/src/commands/init-command.js +3 -3
  21. package/dist/src/commands/path-command.js +2 -1
  22. package/dist/src/commands/plan-command.js +37 -15
  23. package/dist/src/commands/rerun-command.js +42 -0
  24. package/dist/src/commands/research-command.js +10 -4
  25. package/dist/src/commands/runs-command.js +22 -0
  26. package/dist/src/commands/start-command.js +0 -1
  27. package/dist/src/commands/test-command.js +3 -3
  28. package/dist/src/components/App.js +8 -0
  29. package/dist/src/config.js +3 -0
  30. package/dist/src/explorbot.js +19 -0
  31. package/dist/src/explorer.js +2 -1
  32. package/dist/src/suite.js +115 -0
  33. package/dist/src/utils/html.js +2 -5
  34. package/dist/src/utils/rules-loader.js +33 -17
  35. package/dist/src/utils/test-files.js +103 -0
  36. package/package.json +2 -1
  37. package/rules/rerunner/healing-approach.md +19 -0
  38. package/src/action.ts +7 -9
  39. package/src/ai/historian.ts +37 -3
  40. package/src/ai/navigator.ts +35 -28
  41. package/src/ai/pilot.ts +33 -9
  42. package/src/ai/planner.ts +28 -9
  43. package/src/ai/rerunner.ts +532 -0
  44. package/src/ai/researcher.ts +3 -4
  45. package/src/ai/rules.ts +2 -2
  46. package/src/ai/tools.ts +2 -2
  47. package/src/commands/add-rule-command.ts +1 -2
  48. package/src/commands/base-command.ts +13 -0
  49. package/src/commands/context-command.ts +12 -5
  50. package/src/commands/drill-command.ts +0 -1
  51. package/src/commands/explore-command.ts +21 -5
  52. package/src/commands/freesail-command.ts +6 -23
  53. package/src/commands/index.ts +4 -0
  54. package/src/commands/init-command.ts +3 -3
  55. package/src/commands/path-command.ts +2 -1
  56. package/src/commands/plan-command.ts +45 -16
  57. package/src/commands/rerun-command.ts +46 -0
  58. package/src/commands/research-command.ts +10 -4
  59. package/src/commands/runs-command.ts +27 -0
  60. package/src/commands/start-command.ts +0 -1
  61. package/src/commands/test-command.ts +3 -3
  62. package/src/components/App.tsx +8 -0
  63. package/src/config.ts +23 -0
  64. package/src/explorbot.ts +21 -0
  65. package/src/explorer.ts +3 -2
  66. package/src/suite.ts +135 -0
  67. package/src/utils/html.ts +1 -5
  68. package/src/utils/rules-loader.ts +35 -17
  69. package/src/utils/test-files.ts +122 -0
@@ -60,28 +60,44 @@ export class RulesLoader {
60
60
  const name = names[idx];
61
61
  return { name, approach: styles[name] };
62
62
  }
63
- static extractStyles(agentName, targetDir) {
64
- const sourceDir = join(BUILT_IN_DIR, agentName, 'styles');
63
+ static extractRules(agentName, targetDir) {
64
+ const sourceDir = join(BUILT_IN_DIR, agentName);
65
65
  if (!existsSync(sourceDir))
66
- throw new Error(`No built-in styles found for agent: ${agentName}`);
67
- mkdirSync(targetDir, { recursive: true });
68
- const files = readdirSync(sourceDir)
69
- .filter((f) => f.endsWith('.md'))
70
- .sort();
66
+ throw new Error(`No built-in rules found for agent: ${agentName}`);
71
67
  const extracted = [];
72
- for (const file of files) {
73
- const target = join(targetDir, file);
74
- if (existsSync(target)) {
75
- tag('info').log(`Skipping ${file} (already exists)`);
76
- continue;
77
- }
78
- writeFileSync(target, readFileSync(join(sourceDir, file), 'utf8'));
79
- extracted.push(file);
80
- tag('success').log(`Extracted ${file}`);
81
- }
68
+ copyMarkdownTree(sourceDir, targetDir, '', extracted);
82
69
  return extracted;
83
70
  }
84
71
  }
72
+ function copyMarkdownTree(sourceDir, targetDir, relative, extracted) {
73
+ const entries = readdirSync(sourceDir, { withFileTypes: true }).sort((a, b) => a.name.localeCompare(b.name));
74
+ let dirCreated = false;
75
+ const ensureTargetDir = () => {
76
+ if (dirCreated)
77
+ return;
78
+ mkdirSync(targetDir, { recursive: true });
79
+ dirCreated = true;
80
+ };
81
+ for (const entry of entries) {
82
+ const sourcePath = join(sourceDir, entry.name);
83
+ const targetPath = join(targetDir, entry.name);
84
+ const relPath = relative ? `${relative}/${entry.name}` : entry.name;
85
+ if (entry.isDirectory()) {
86
+ copyMarkdownTree(sourcePath, targetPath, relPath, extracted);
87
+ continue;
88
+ }
89
+ if (!entry.name.endsWith('.md'))
90
+ continue;
91
+ if (existsSync(targetPath)) {
92
+ tag('info').log(`Skipping ${relPath} (already exists)`);
93
+ continue;
94
+ }
95
+ ensureTargetDir();
96
+ writeFileSync(targetPath, readFileSync(sourcePath, 'utf8'));
97
+ extracted.push(relPath);
98
+ tag('success').log(`Extracted ${relPath}`);
99
+ }
100
+ }
85
101
  function loadFile(agentName, name, subdir) {
86
102
  const file = `${name}.md`;
87
103
  const segments = subdir ? [agentName, subdir, file] : [agentName, file];
@@ -0,0 +1,103 @@
1
+ import { existsSync, readdirSync } from 'node:fs';
2
+ import path from 'node:path';
3
+ import chalk from 'chalk';
4
+ import { highlight } from 'cli-highlight';
5
+ import * as codeceptjs from 'codeceptjs';
6
+ import store from 'codeceptjs/lib/store';
7
+ import stepsListener from 'codeceptjs/lib/listener/steps';
8
+ import storeListener from 'codeceptjs/lib/listener/store';
9
+ import figureSet from 'figures';
10
+ import { ConfigParser } from "../config.js";
11
+ export function loadTestSuites(testsDir) {
12
+ if (!existsSync(testsDir))
13
+ return [];
14
+ const jsFiles = readdirSync(testsDir)
15
+ .filter((f) => f.endsWith('.js'))
16
+ .map((f) => path.resolve(testsDir, f));
17
+ if (jsFiles.length === 0)
18
+ return [];
19
+ codeceptjs.container.createMocha();
20
+ const mocha = codeceptjs.container.mocha();
21
+ mocha.files = jsFiles;
22
+ mocha.loadFiles();
23
+ return mocha.suite.suites || [];
24
+ }
25
+ export function printTestList(suites) {
26
+ if (suites.length === 0) {
27
+ console.log(chalk.yellow('No test files found. Run /explore first.'));
28
+ return;
29
+ }
30
+ let totalActive = 0;
31
+ let totalSkipped = 0;
32
+ let index = 0;
33
+ for (const suite of suites) {
34
+ const file = path.relative(process.cwd(), suite.file || '');
35
+ const active = suite.tests.filter((t) => !t.pending).length;
36
+ const skipped = suite.tests.filter((t) => t.pending).length;
37
+ totalActive += active;
38
+ totalSkipped += skipped;
39
+ console.log(`\n${chalk.bold.cyan(suite.title)}`);
40
+ console.log(chalk.gray(file));
41
+ for (const test of suite.tests) {
42
+ const idx = chalk.dim(`${++index}.`);
43
+ if (test.pending) {
44
+ console.log(chalk.gray(` ${idx} ${figureSet.line} ${test.title} (skipped)`));
45
+ }
46
+ else {
47
+ console.log(` ${idx} ${chalk.green(figureSet.pointer)} ${test.title}`);
48
+ }
49
+ }
50
+ }
51
+ console.log(`\n${chalk.bold(`${totalActive + totalSkipped}`)} scenarios (${chalk.green(`${totalActive} active`)}, ${chalk.gray(`${totalSkipped} skipped`)})`);
52
+ }
53
+ export async function dryRunTestFile(filePath) {
54
+ const absPath = path.resolve(filePath);
55
+ if (!existsSync(absPath)) {
56
+ console.log(chalk.yellow(`File not found: ${absPath}`));
57
+ return;
58
+ }
59
+ const config = ConfigParser.getInstance().getConfig();
60
+ const configPath = ConfigParser.getInstance().getConfigPath();
61
+ const projectRoot = configPath ? path.dirname(configPath) : process.cwd();
62
+ const codeceptConfig = {
63
+ helpers: {
64
+ Playwright: { browser: config.playwright.browser, url: config.playwright.url },
65
+ },
66
+ };
67
+ global.output_dir = path.join(projectRoot, 'output', 'states');
68
+ global.codecept_dir = projectRoot;
69
+ codeceptjs.container.create(codeceptConfig, {});
70
+ await codeceptjs.recorder.start();
71
+ await codeceptjs.container.started(null);
72
+ store.dryRun = true;
73
+ global.container = codeceptjs.container;
74
+ storeListener();
75
+ stepsListener();
76
+ codeceptjs.container.createMocha();
77
+ const mocha = codeceptjs.container.mocha();
78
+ mocha.reporter(class {
79
+ });
80
+ mocha.files = [absPath];
81
+ mocha.loadFiles();
82
+ let currentSuite = '';
83
+ codeceptjs.event.dispatcher.on('suite.before', (suite) => {
84
+ if (suite.title && suite.title !== currentSuite) {
85
+ currentSuite = suite.title;
86
+ console.log(`\n${chalk.bold.cyan(suite.title)}`);
87
+ console.log(chalk.gray(path.relative(process.cwd(), suite.file || absPath)));
88
+ }
89
+ });
90
+ codeceptjs.event.dispatcher.on('test.before', (t) => {
91
+ console.log(`\n ${chalk.green(figureSet.pointer)} ${chalk.bold(t.title)}`);
92
+ });
93
+ codeceptjs.event.dispatcher.on('step.start', (step) => {
94
+ const code = highlight(step.toCode(), { language: 'javascript' });
95
+ console.log(chalk.dim(` ${code}`));
96
+ });
97
+ await new Promise((resolve) => {
98
+ const runner = mocha.run(() => resolve());
99
+ runner.on('pending', (t) => {
100
+ console.log(chalk.gray(` ${figureSet.line} ${t.title} (skipped)`));
101
+ });
102
+ });
103
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "explorbot",
3
- "version": "0.1.0",
3
+ "version": "0.1.1",
4
4
  "description": "CLI app built with React Ink, CodeceptJS, and Playwright",
5
5
  "license": "Elastic-2.0",
6
6
  "type": "module",
@@ -66,6 +66,7 @@
66
66
  "@ai-sdk/groq": "^3.0",
67
67
  "@ai-sdk/openai": "^3.0",
68
68
  "@axe-core/playwright": "^4.11.0",
69
+ "@codeceptjs/reflection": "^0.5.2",
69
70
  "@inkjs/ui": "^2.0.0",
70
71
  "@langfuse/otel": "^4.5.1",
71
72
  "@openrouter/ai-sdk-provider": "^2.3.3",
@@ -0,0 +1,19 @@
1
+ <healing_approach>
2
+ The failed step was NOT performed. You MUST execute a replacement action.
3
+ Just waiting or diagnosing is NOT enough — you must perform the click/fill/press that was intended.
4
+
5
+ 1. FIRST: Check the page URL and ARIA — are you on the right page?
6
+ - If URL or ARIA shows login/error/404 page → call giveUp immediately
7
+ 2. If ARIA is empty/minimal → page may still be loading:
8
+ - Use xpathCheck() to detect spinners, loaders, or loading indicators on the page
9
+ - Use wait() to let the page load — it returns fresh ARIA automatically
10
+ - Then execute the replacement action with a working locator
11
+ 3. If the target element is visible in ARIA:
12
+ - Use click() with multiple fallback locators (ARIA, CSS, XPath)
13
+ 4. If element is NOT in ARIA but page is correct:
14
+ - Use xpathCheck() to search the full HTML
15
+ - Use research() to get a semantic UI map of the page if needed
16
+ - If found → click it
17
+ - If not → bash to check console logs → giveUp
18
+ 5. Call done() with the command that replaced the failed step
19
+ </healing_approach>
package/src/action.ts CHANGED
@@ -64,7 +64,7 @@ class Action {
64
64
  }
65
65
  }
66
66
 
67
- async capturePageState({ includeScreenshot = false, ariaSnapshot: preCapuredAria }: { includeScreenshot?: boolean; ariaSnapshot?: string } = {}): Promise<ActionResult> {
67
+ async capturePageState({ includeScreenshot = false }: { includeScreenshot?: boolean } = {}): Promise<ActionResult> {
68
68
  try {
69
69
  const currentState = this.stateManager.getCurrentState();
70
70
  const stateHash = currentState?.hash || 'screenshot';
@@ -111,16 +111,14 @@ class Action {
111
111
  // Capture iframe HTML snapshots
112
112
  const iframeSnapshots = await this.captureIframeSnapshots(html);
113
113
 
114
- let ariaSnapshot: string | null = preCapuredAria || null;
114
+ let ariaSnapshot: string | null = null;
115
115
  let ariaSnapshotFile: string | undefined = undefined;
116
116
 
117
- if (!ariaSnapshot) {
118
- try {
119
- const page = this.playwrightHelper.page;
120
- ariaSnapshot = await page.locator('body').ariaSnapshot();
121
- } catch (err) {
122
- debugLog('ARIA snapshot failed:', err instanceof Error ? `${err.message}\n${err.stack}` : err);
123
- }
117
+ try {
118
+ const page = this.playwrightHelper.page;
119
+ ariaSnapshot = await page.locator('body').ariaSnapshot();
120
+ } catch (err) {
121
+ debugLog('ARIA snapshot failed:', err instanceof Error ? `${err.message}\n${err.stack}` : err);
124
122
  }
125
123
 
126
124
  if (ariaSnapshot) {
@@ -1,9 +1,10 @@
1
- import { mkdirSync, writeFileSync } from 'node:fs';
1
+ import { mkdirSync, readFileSync, writeFileSync } from 'node:fs';
2
2
  import { join } from 'node:path';
3
3
  import dedent from 'dedent';
4
4
  import { z } from 'zod';
5
5
  import { ActionResult } from '../action-result.ts';
6
6
  import { ConfigParser } from '../config.ts';
7
+ import { KnowledgeTracker } from '../knowledge-tracker.ts';
7
8
  import { ExperienceTracker, type SessionExperienceEntry, type SessionStep } from '../experience-tracker.ts';
8
9
  import { type Reporter, type ReporterStep } from '../reporter.ts';
9
10
  import type { StateManager } from '../state-manager.ts';
@@ -397,6 +398,7 @@ export class Historian {
397
398
  if (startUrl) {
398
399
  lines.push('Before(({ I }) => {');
399
400
  lines.push(` I.amOnPage('${this.escapeString(startUrl)}');`);
401
+ lines.push(...this.getKnowledgeLines(startUrl));
400
402
  lines.push('});');
401
403
  lines.push('');
402
404
  }
@@ -425,8 +427,7 @@ export class Historian {
425
427
  lines.push('');
426
428
  }
427
429
 
428
- const outputDir = ConfigParser.getInstance().getOutputDir();
429
- const testsDir = join(outputDir, 'tests');
430
+ const testsDir = ConfigParser.getInstance().getTestsDir();
430
431
  mkdirSync(testsDir, { recursive: true });
431
432
 
432
433
  const filename = plan.title.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase();
@@ -437,6 +438,18 @@ export class Historian {
437
438
  return filePath;
438
439
  }
439
440
 
441
+ rewriteScenarioInFile(filePath: string, healedSteps: Array<{ test: string; original: string; healed: string }>): void {
442
+ let content = readFileSync(filePath, 'utf-8');
443
+
444
+ for (const step of healedSteps) {
445
+ if (!content.includes(step.original)) continue;
446
+ content = content.replace(step.original, step.healed);
447
+ }
448
+
449
+ writeFileSync(filePath, content);
450
+ tag('substep').log(`Updated test file with healed steps: ${filePath}`);
451
+ }
452
+
440
453
  private getExecutionLabel(exec: ToolExecution, fallback?: string): string {
441
454
  return exec.input?.explanation || exec.input?.assertion || exec.input?.note || fallback || '';
442
455
  }
@@ -445,6 +458,27 @@ export class Historian {
445
458
  return str.replace(/'/g, "\\'").replace(/\n/g, ' ');
446
459
  }
447
460
 
461
+ private getKnowledgeLines(url: string, indent = ' '): string[] {
462
+ const knowledgeTracker = new KnowledgeTracker();
463
+ const state = new ActionResult({ url });
464
+ const { wait, waitForElement, code } = knowledgeTracker.getStateParameters(state, ['wait', 'waitForElement', 'code']);
465
+
466
+ const lines: string[] = [];
467
+ if (wait !== undefined) {
468
+ lines.push(`${indent}I.wait(${wait});`);
469
+ }
470
+ if (waitForElement) {
471
+ lines.push(`${indent}I.waitForElement(${JSON.stringify(waitForElement)});`);
472
+ }
473
+ if (code) {
474
+ for (const codeLine of code.split('\n')) {
475
+ const trimmed = codeLine.trim();
476
+ if (trimmed) lines.push(`${indent}${trimmed}`);
477
+ }
478
+ }
479
+ return lines;
480
+ }
481
+
448
482
  private stripComments(code: string): string {
449
483
  return code
450
484
  .split('\n')
@@ -40,6 +40,18 @@ class Navigator implements Agent {
40
40
  You are given the web page and a message from user.
41
41
  You need to resolve the state of the page based on the message.
42
42
  </task>
43
+
44
+ ${locatorRule}
45
+
46
+ <constraints>
47
+ NEVER navigate away from the base URL domain. Stay on the same origin at all times.
48
+ NEVER attempt to rewrite, replace, mock, or spoof the URL via JavaScript, history API, location assignment, or any client-side trick.
49
+ NEVER use executeScript, executeAsyncScript, or any JS evaluation to change the URL, bypass redirects, or fake the page state.
50
+ If the target URL redirects to an authentication/login page, DO NOT try to force the original URL. Instead:
51
+ 1. Look for credentials in the provided knowledge/hint context and perform a real login through the form.
52
+ 2. If no credentials are available, ask the user for credentials or ask the user to log in manually.
53
+ A redirect to /login, /sign_in, /auth, or similar is a signal that authentication is required — treat it as such, never as an obstacle to bypass.
54
+ </constraints>
43
55
  `;
44
56
  private freeSailSystemPrompt = dedent`
45
57
  <role>
@@ -168,6 +180,14 @@ class Navigator implements Agent {
168
180
  ${message}
169
181
  </message>
170
182
 
183
+ <page>
184
+ ${actionResult.toAiContext()}
185
+
186
+ <page_html>
187
+ ${await actionResult.combinedHtml()}
188
+ </page_html>
189
+ </page>
190
+
171
191
  <task>
172
192
  Identify the actual request of the user.
173
193
  Identify what is expected by user.
@@ -178,25 +198,13 @@ class Navigator implements Agent {
178
198
  Try various ways to achieve the result
179
199
  </task>
180
200
 
181
-
182
- <page>
183
- ${actionResult.toAiContext()}
184
-
185
- <page_html>
186
- ${await actionResult.simplifiedHtml()}
187
- </page_html>
188
- </page>
189
-
190
-
191
- ${knowledge}
192
-
193
201
  ${actionRule}
194
202
 
195
- ${experience}
203
+ ${RulesLoader.loadRules('navigator', ['multiple-locator', 'output'], actionResult.url || '').replace('{{maxAttempts}}', String(this.MAX_ATTEMPTS))}
196
204
 
197
- ${locatorRule}
205
+ ${experience}
198
206
 
199
- ${RulesLoader.loadRules('navigator', ['multiple-locator', 'output'], actionResult.url || '').replace('{{maxAttempts}}', String(this.MAX_ATTEMPTS))}
207
+ ${knowledge}
200
208
  `;
201
209
 
202
210
  const conversation = this.provider.startConversation(this.systemPrompt, 'navigator');
@@ -234,7 +242,7 @@ class Navigator implements Agent {
234
242
  Previous solutions did not work. Here is the full HTML context:
235
243
 
236
244
  <page_html>
237
- ${await actionResult.simplifiedHtml()}
245
+ ${await actionResult.combinedHtml()}
238
246
  </page_html>
239
247
 
240
248
  Please suggest new solutions based on this additional context.
@@ -265,6 +273,7 @@ class Navigator implements Agent {
265
273
 
266
274
  if (resolved) {
267
275
  tag('success').log('Navigation resolved successfully');
276
+ await this.experienceTracker.saveSuccessfulResolution(actionResult, message, codeBlock);
268
277
  stop();
269
278
  return;
270
279
  }
@@ -479,6 +488,14 @@ class Navigator implements Agent {
479
488
  ${message}
480
489
  </message>
481
490
 
491
+ <page>
492
+ ${actionResult.toAiContext()}
493
+
494
+ <page_html>
495
+ ${await actionResult.combinedHtml()}
496
+ </page_html>
497
+ </page>
498
+
482
499
  <task>
483
500
  Identify what assertion the user wants to verify on the page.
484
501
  Propose different CodeceptJS assertion code blocks to verify the expected state.
@@ -492,21 +509,11 @@ class Navigator implements Agent {
492
509
  Do not generate assertions that would pass even if the specific claim is false.
493
510
  </task>
494
511
 
495
- <page>
496
- ${actionResult.toAiContext()}
497
-
498
- <page_html>
499
- ${await actionResult.simplifiedHtml()}
500
- </page_html>
501
- </page>
502
-
503
- ${knowledge}
504
-
505
512
  ${RulesLoader.loadRules('navigator', ['verification-actions'], actionResult.url || '')}
506
513
 
507
- ${locatorRule}
508
-
509
514
  ${experience}
515
+
516
+ ${knowledge}
510
517
  `;
511
518
 
512
519
  debugLog('Sending verification prompt to AI provider');
package/src/ai/pilot.ts CHANGED
@@ -256,9 +256,18 @@ export class Pilot implements Agent {
256
256
 
257
257
  Plan the test execution for this scenario.
258
258
 
259
- FIRST: Call precondition() to create fresh data that this test will act on.
260
- Ask: "What will this test edit/delete/use?" — create THAT item via precondition.
261
- Do not describe what's already on the page — create new disposable items for the test.
259
+ FIRST: Decide if precondition() is needed.
260
+
261
+ Call precondition() WHEN:
262
+ - The scenario edits/deletes/modifies an item, and you want a DISPOSABLE item to act on safely
263
+ - The scenario needs specific data clearly NOT on the current page (e.g., items with specific statuses for filtering)
264
+
265
+ SKIP precondition() WHEN:
266
+ - The scenario is "Create X" — the test itself creates the item
267
+ - The current page already shows the item the test will act on (check <state> and <page_summary>)
268
+ - The scenario tests navigation, UI behavior, or viewing — no data mutation needed
269
+
270
+ If needed, call precondition() now. If not, proceed directly to planning.
262
271
 
263
272
  THEN: Based on the page elements and current state, outline:
264
273
  1. Which elements to interact with and in what order
@@ -701,6 +710,8 @@ export class Pilot implements Agent {
701
710
  - Click succeeded but ariaDiff shows elements unrelated to tester's intention (e.g., clicked "Edit" but dropdown appeared) → wrong button or unexpected behavior. Instruct Tester to Escape and try a different approach.
702
711
  - form(I.type()) succeeded → I.type() sends keys to whatever is focused, no guarantee it's the right field. Instruct Tester to verify with see() that text appeared in the correct field. If targetedHtml shows a button/link, text went to wrong element — click the correct field first and retry.
703
712
  - ariaDiff shows 5+ elements removed/added after clicking content → page entered a different mode (editor, panel, modal). Instruct Tester to call context() to see current state before guessing selectors.
713
+ - Dropdown/select opened but contains NO options, or a list/table is empty when items were expected → data doesn't exist yet. Call precondition() to create the missing items (labels, categories, etc.), then instruct Tester to retry.
714
+ - Tester tries to select/filter/assign something but the option list is empty or expected value is not present → missing auxiliary data. Call precondition() to create it.
704
715
 
705
716
  Detecting logically wrong successes — review "executed", "element", and "skipped" fields:
706
717
  - Click SUCCESS but "executed" command differs from "explanation" intent → wrong element was clicked. The intended element wasn't found and a different one was clicked instead.
@@ -750,23 +761,36 @@ export class Pilot implements Agent {
750
761
  YOUR tools (Pilot-only):
751
762
  - precondition(description) — create FRESH test data via API that the test will act on. Do NOT request users.
752
763
 
753
- PRECONDITIONS — what to create:
764
+ PRECONDITIONS — when and what to create:
754
765
  Preconditions create NEW disposable items that the test will modify, delete, or interact with.
755
- Do NOT describe what already exists on the page — describe what NEW data the test needs to act on.
756
766
 
757
767
  Ask yourself: "What object will this test change/delete/use? Create THAT."
758
768
 
759
- Examples:
769
+ When to call precondition():
770
+ - Scenario edits/deletes/modifies an item → create a disposable target
771
+ - Scenario needs auxiliary data (labels, categories, statuses to filter by)
772
+ - Tester failed because required data is missing (empty dropdown, no items to select)
773
+
774
+ When to SKIP precondition():
775
+ - Scenario is "Create X" — the test itself creates the item, no precondition needed
776
+ - Current page already shows the exact data needed (check <state> h1/title and <page_summary>)
777
+ - Scenario tests navigation, search UI, or viewing — no data mutation involved
778
+
779
+ Examples — when to create:
760
780
  - "Edit test description" → precondition("1 test") — the test will edit this item
761
781
  - "Delete a comment" → precondition("1 comment") — the test will delete this item
762
782
  - "Assign a label to item" → precondition("1 item and 1 label named Bug") — test assigns the label
763
783
  - "Filter by status" → precondition("3 items: 2 with status Open, 1 with status Closed")
764
- - "Move item between lists" → precondition("1 item in list A")
765
784
 
766
- WRONG: precondition("1 test suite named Updated Suite with existing tests") this describes the page, not what to create
785
+ Exampleswhen to skip:
786
+ - "Create a new blog post" → SKIP, the test creates it
787
+ - "Edit blog post" while on a blog post page → SKIP, data already exists
788
+ - "View dashboard" → SKIP, no data mutation
789
+
790
+ WRONG: precondition("1 test suite named Updated Suite with existing tests") — describes the page, not what to create
767
791
  RIGHT: precondition("1 test") — create a fresh test that the scenario will edit
768
792
 
769
- Call precondition() for EVERY item the scenario will act on. Keep descriptions short and specific.
793
+ Keep descriptions short and specific.
770
794
 
771
795
  Response format:
772
796
  PROGRESS: <1 sentence assessment>
package/src/ai/planner.ts CHANGED
@@ -23,6 +23,7 @@ import { findSimilarStateHash } from './researcher/cache.ts';
23
23
  import type { Provider } from './provider.js';
24
24
  import { hasFocusedSection } from './researcher/focus.ts';
25
25
  import { POSSIBLE_SECTIONS, Researcher } from './researcher.ts';
26
+ import { Suite } from '../suite.ts';
26
27
  import { fileUploadRule, protectionRule } from './rules.ts';
27
28
 
28
29
  const debugLog = createDebug('explorbot:planner');
@@ -58,6 +59,7 @@ export class Planner extends PlannerBase implements Agent {
58
59
  currentPlan: Plan | null = null;
59
60
  freshStart = false;
60
61
  private lastStyleName = '';
62
+ private lastSuite: Suite | null = null;
61
63
  researcher: Researcher;
62
64
  private fisherman: Fisherman | null = null;
63
65
 
@@ -201,14 +203,14 @@ export class Planner extends PlannerBase implements Agent {
201
203
  this.currentPlan.url = state.url;
202
204
  if (parentPlan) this.currentPlan.parentPlan = parentPlan;
203
205
  const allPreviousScenarios = this.getPreviousSessionScenarios();
206
+ const existingTestScenarios = this.getExistingTestFileScenarios(state.url);
207
+ for (const s of existingTestScenarios) allPreviousScenarios.add(s);
204
208
  for (const t of tests) {
205
209
  if (allPreviousScenarios.has(t.scenario.toLowerCase())) continue;
206
210
  t.style = this.lastStyleName;
207
211
  t.startUrl = state.url;
208
212
  this.currentPlan.addTest(t);
209
213
  }
210
- const summary = `Scenarios:\n${this.currentPlan.tests.map((t) => `- [${t.priority}] ${t.scenario}`).join('\n')}`;
211
- tag('multiline').log(summary);
212
214
  } else {
213
215
  tag('step').log(`Expanding plan: "${this.currentPlan.title}"`);
214
216
  this.currentPlan.nextIteration();
@@ -219,7 +221,6 @@ export class Planner extends PlannerBase implements Agent {
219
221
  }
220
222
  }
221
223
 
222
- this.moveExecutedTestsToEnd();
223
224
  const availableStyles = Object.keys(getStyles()).join(', ');
224
225
  tag('success').log(`Planning complete! ${this.currentPlan.tests.length} tests in plan: ${this.currentPlan.title}`);
225
226
  tag('info').log(`Planning style: ${this.lastStyleName} (available: ${availableStyles})`);
@@ -231,12 +232,8 @@ export class Planner extends PlannerBase implements Agent {
231
232
  return this.currentPlan;
232
233
  }
233
234
 
234
- private moveExecutedTestsToEnd(): void {
235
- if (!this.currentPlan) return;
236
- const pending = this.currentPlan.tests.filter((t) => t.result === null);
237
- const executed = this.currentPlan.tests.filter((t) => t.result !== null);
238
- this.currentPlan.tests = [...pending, ...executed];
239
- this.currentPlan.notifyChange();
235
+ getSuite(): Suite | null {
236
+ return this.lastSuite;
240
237
  }
241
238
 
242
239
  private addNewTests(tests: Test[], defaultStartUrl: string): Test[] {
@@ -262,6 +259,17 @@ export class Planner extends PlannerBase implements Agent {
262
259
  return added;
263
260
  }
264
261
 
262
+ private getExistingTestFileScenarios(currentUrl?: string): Set<string> {
263
+ if (!currentUrl) return new Set<string>();
264
+ try {
265
+ this.lastSuite = new Suite(currentUrl);
266
+ return this.lastSuite.getActiveScenarioTitles();
267
+ } catch (err: any) {
268
+ debugLog('Failed to load existing test files: %s', err.message);
269
+ return new Set<string>();
270
+ }
271
+ }
272
+
265
273
  private cleanExperienceFlows(text: string): string | null {
266
274
  const seenTitles = new Set<string>();
267
275
  let result = text;
@@ -421,6 +429,17 @@ export class Planner extends PlannerBase implements Agent {
421
429
  }
422
430
  }
423
431
 
432
+ if (this.lastSuite && this.lastSuite.automatedTestCount > 0) {
433
+ const automatedNames = this.lastSuite.getAutomatedTestNames();
434
+ conversation.addUserText(dedent`
435
+ <existing_automated_tests>
436
+ The following ${automatedNames.length} tests are already implemented and automated for this URL.
437
+ Do not propose tests that duplicate these:
438
+ ${automatedNames.map((n) => `- ${n}`).join('\n')}
439
+ </existing_automated_tests>
440
+ `);
441
+ }
442
+
424
443
  if (this.currentPlan) {
425
444
  tag('step').log('Analyzing current plan to expand testing');
426
445