explorbot 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/bin/explorbot-cli.ts +93 -36
  2. package/dist/bin/explorbot-cli.js +71 -16
  3. package/dist/rules/rerunner/healing-approach.md +19 -0
  4. package/dist/src/action.js +8 -10
  5. package/dist/src/ai/historian.js +34 -3
  6. package/dist/src/ai/navigator.js +35 -28
  7. package/dist/src/ai/pilot.js +33 -9
  8. package/dist/src/ai/planner/session-dedup.js +3 -0
  9. package/dist/src/ai/planner/styles.js +3 -0
  10. package/dist/src/ai/planner.js +29 -10
  11. package/dist/src/ai/rerunner.js +472 -0
  12. package/dist/src/ai/researcher/cache.js +4 -3
  13. package/dist/src/ai/researcher/fingerprint-worker.js +7 -6
  14. package/dist/src/ai/researcher.js +3 -4
  15. package/dist/src/ai/rules.js +2 -2
  16. package/dist/src/ai/tools.js +2 -2
  17. package/dist/src/commands/add-rule-command.js +1 -2
  18. package/dist/src/commands/base-command.js +12 -0
  19. package/dist/src/commands/context-command.js +12 -5
  20. package/dist/src/commands/drill-command.js +0 -1
  21. package/dist/src/commands/explore-command.js +20 -5
  22. package/dist/src/commands/freesail-command.js +8 -22
  23. package/dist/src/commands/index.js +4 -0
  24. package/dist/src/commands/init-command.js +3 -3
  25. package/dist/src/commands/path-command.js +2 -1
  26. package/dist/src/commands/plan-command.js +37 -15
  27. package/dist/src/commands/rerun-command.js +42 -0
  28. package/dist/src/commands/research-command.js +10 -4
  29. package/dist/src/commands/runs-command.js +22 -0
  30. package/dist/src/commands/start-command.js +0 -1
  31. package/dist/src/commands/test-command.js +3 -3
  32. package/dist/src/components/App.js +8 -0
  33. package/dist/src/config.js +3 -0
  34. package/dist/src/explorbot.js +19 -0
  35. package/dist/src/explorer.js +2 -1
  36. package/dist/src/suite.js +115 -0
  37. package/dist/src/utils/html.js +2 -5
  38. package/dist/src/utils/rules-loader.js +33 -17
  39. package/dist/src/utils/test-files.js +103 -0
  40. package/package.json +3 -1
  41. package/rules/rerunner/healing-approach.md +19 -0
  42. package/src/action.ts +7 -9
  43. package/src/ai/historian.ts +37 -3
  44. package/src/ai/navigator.ts +35 -28
  45. package/src/ai/pilot.ts +33 -9
  46. package/src/ai/planner/session-dedup.ts +4 -0
  47. package/src/ai/planner/styles.ts +4 -0
  48. package/src/ai/planner.ts +28 -9
  49. package/src/ai/rerunner.ts +532 -0
  50. package/src/ai/researcher/cache.ts +4 -3
  51. package/src/ai/researcher/fingerprint-worker.ts +7 -13
  52. package/src/ai/researcher.ts +3 -4
  53. package/src/ai/rules.ts +2 -2
  54. package/src/ai/tools.ts +2 -2
  55. package/src/commands/add-rule-command.ts +1 -2
  56. package/src/commands/base-command.ts +13 -0
  57. package/src/commands/context-command.ts +12 -5
  58. package/src/commands/drill-command.ts +0 -1
  59. package/src/commands/explore-command.ts +21 -5
  60. package/src/commands/freesail-command.ts +6 -23
  61. package/src/commands/index.ts +4 -0
  62. package/src/commands/init-command.ts +3 -3
  63. package/src/commands/path-command.ts +2 -1
  64. package/src/commands/plan-command.ts +45 -16
  65. package/src/commands/rerun-command.ts +46 -0
  66. package/src/commands/research-command.ts +10 -4
  67. package/src/commands/runs-command.ts +27 -0
  68. package/src/commands/start-command.ts +0 -1
  69. package/src/commands/test-command.ts +3 -3
  70. package/src/components/App.tsx +8 -0
  71. package/src/config.ts +23 -0
  72. package/src/explorbot.ts +21 -0
  73. package/src/explorer.ts +3 -2
  74. package/src/suite.ts +135 -0
  75. package/src/utils/html.ts +1 -5
  76. package/src/utils/rules-loader.ts +35 -17
  77. package/src/utils/test-files.ts +122 -0
@@ -0,0 +1,115 @@
1
+ import { existsSync, readFileSync, readdirSync } from 'node:fs';
2
+ import path from 'node:path';
3
+ import { Reflection } from '@codeceptjs/reflection';
4
+ import { ConfigParser } from "./config.js";
5
+ import { normalizeUrl } from "./state-manager.js";
6
+ import { parsePlanFromMarkdown } from "./utils/test-plan-markdown.js";
7
+ import { createDebug } from "./utils/logger.js";
8
+ const debugLog = createDebug('explorbot:suite');
9
+ export class Suite {
10
+ url;
11
+ _automatedTests = null;
12
+ _plannedScenarios = null;
13
+ constructor(url) {
14
+ this.url = url;
15
+ }
16
+ getAutomatedTests() {
17
+ if (this._automatedTests !== null)
18
+ return this._automatedTests;
19
+ this._automatedTests = this.loadAutomatedTests();
20
+ return this._automatedTests;
21
+ }
22
+ getPlannedScenarios() {
23
+ if (this._plannedScenarios !== null)
24
+ return this._plannedScenarios;
25
+ this._plannedScenarios = this.loadPlannedScenarios();
26
+ return this._plannedScenarios;
27
+ }
28
+ getActiveScenarioTitles() {
29
+ return new Set(this.getAutomatedTests()
30
+ .filter((t) => !t.pending)
31
+ .map((t) => t.title.toLowerCase()));
32
+ }
33
+ get automatedTestCount() {
34
+ return this.getAutomatedTests().filter((t) => !t.pending).length;
35
+ }
36
+ getAutomatedTestNames() {
37
+ return this.getAutomatedTests()
38
+ .filter((t) => !t.pending)
39
+ .map((t) => t.title);
40
+ }
41
+ getAutomatedTestFiles() {
42
+ return [...new Set(this.getAutomatedTests().map((t) => t.file))];
43
+ }
44
+ loadAutomatedTests() {
45
+ const testsDir = ConfigParser.getInstance().getTestsDir();
46
+ if (!existsSync(testsDir))
47
+ return [];
48
+ const jsFiles = readdirSync(testsDir)
49
+ .filter((f) => f.endsWith('.js'))
50
+ .map((f) => path.resolve(testsDir, f));
51
+ const results = [];
52
+ for (const filePath of jsFiles) {
53
+ const parsed = this.parseTestFile(filePath);
54
+ if (!parsed)
55
+ continue;
56
+ if (normalizeUrl(parsed.url) !== normalizeUrl(this.url))
57
+ continue;
58
+ results.push(...parsed.tests);
59
+ }
60
+ return results;
61
+ }
62
+ parseTestFile(filePath) {
63
+ try {
64
+ const scanned = Reflection.scanFile(filePath);
65
+ if (!scanned.suites?.length)
66
+ return null;
67
+ const content = readFileSync(filePath, 'utf-8');
68
+ const suiteRef = Reflection.forSuite(scanned.suites[0]);
69
+ const beforeHooks = suiteRef.findHook('Before');
70
+ if (!beforeHooks?.length)
71
+ return null;
72
+ const hookBody = content.slice(beforeHooks[0].range.start, beforeHooks[0].range.end);
73
+ const match = hookBody.match(/I\.amOnPage\(['"]([^'"]+)['"]\)/);
74
+ if (!match)
75
+ return null;
76
+ const lines = content.split('\n');
77
+ const tests = (scanned.tests || []).map((t) => {
78
+ const line = lines[t.line - 1] || '';
79
+ const pending = line.includes('Scenario.skip') || line.includes('Scenario.todo');
80
+ return { title: t.title, pending, file: filePath };
81
+ });
82
+ return { url: match[1], tests };
83
+ }
84
+ catch (err) {
85
+ debugLog('Failed to parse test file %s: %s', filePath, err.message);
86
+ return null;
87
+ }
88
+ }
89
+ loadPlannedScenarios() {
90
+ try {
91
+ const plansDir = ConfigParser.getInstance().getPlansDir();
92
+ if (!existsSync(plansDir))
93
+ return [];
94
+ const mdFiles = readdirSync(plansDir)
95
+ .filter((f) => f.endsWith('.md'))
96
+ .map((f) => path.resolve(plansDir, f));
97
+ const scenarios = [];
98
+ for (const filePath of mdFiles) {
99
+ const plan = parsePlanFromMarkdown(filePath);
100
+ if (!plan.url)
101
+ continue;
102
+ if (normalizeUrl(plan.url) !== normalizeUrl(this.url))
103
+ continue;
104
+ for (const test of plan.tests) {
105
+ scenarios.push(test.scenario);
106
+ }
107
+ }
108
+ return scenarios;
109
+ }
110
+ catch (err) {
111
+ debugLog('Failed to load planned scenarios: %s', err.message);
112
+ return [];
113
+ }
114
+ }
115
+ }
@@ -424,18 +424,15 @@ export function htmlMinimalUISnapshot(html, htmlConfig) {
424
424
  node.attrs = node.attrs.filter((attr) => {
425
425
  const { name, value } = attr;
426
426
  if (name === 'class') {
427
- // Remove classes containing digits
428
427
  attr.value = value
429
428
  .split(' ')
430
- // remove classes containing digits/
431
429
  .filter((className) => !/\d/.test(className))
432
- // remove popular trash classes
433
430
  .filter((className) => !className.match(trashHtmlClasses))
434
- // remove classes with : and __ in them
435
431
  .filter((className) => !className.match(/(:|__)/))
436
- // remove tailwind utility classes
437
432
  .filter((className) => !TAILWIND_CLASS_PATTERNS.some((pattern) => pattern.test(className)))
438
433
  .join(' ');
434
+ if (attr.value === '')
435
+ return false;
439
436
  }
440
437
  return allowedAttrs.includes(name) || name.startsWith('data-explorbot-');
441
438
  });
@@ -60,28 +60,44 @@ export class RulesLoader {
60
60
  const name = names[idx];
61
61
  return { name, approach: styles[name] };
62
62
  }
63
- static extractStyles(agentName, targetDir) {
64
- const sourceDir = join(BUILT_IN_DIR, agentName, 'styles');
63
+ static extractRules(agentName, targetDir) {
64
+ const sourceDir = join(BUILT_IN_DIR, agentName);
65
65
  if (!existsSync(sourceDir))
66
- throw new Error(`No built-in styles found for agent: ${agentName}`);
67
- mkdirSync(targetDir, { recursive: true });
68
- const files = readdirSync(sourceDir)
69
- .filter((f) => f.endsWith('.md'))
70
- .sort();
66
+ throw new Error(`No built-in rules found for agent: ${agentName}`);
71
67
  const extracted = [];
72
- for (const file of files) {
73
- const target = join(targetDir, file);
74
- if (existsSync(target)) {
75
- tag('info').log(`Skipping ${file} (already exists)`);
76
- continue;
77
- }
78
- writeFileSync(target, readFileSync(join(sourceDir, file), 'utf8'));
79
- extracted.push(file);
80
- tag('success').log(`Extracted ${file}`);
81
- }
68
+ copyMarkdownTree(sourceDir, targetDir, '', extracted);
82
69
  return extracted;
83
70
  }
84
71
  }
72
+ function copyMarkdownTree(sourceDir, targetDir, relative, extracted) {
73
+ const entries = readdirSync(sourceDir, { withFileTypes: true }).sort((a, b) => a.name.localeCompare(b.name));
74
+ let dirCreated = false;
75
+ const ensureTargetDir = () => {
76
+ if (dirCreated)
77
+ return;
78
+ mkdirSync(targetDir, { recursive: true });
79
+ dirCreated = true;
80
+ };
81
+ for (const entry of entries) {
82
+ const sourcePath = join(sourceDir, entry.name);
83
+ const targetPath = join(targetDir, entry.name);
84
+ const relPath = relative ? `${relative}/${entry.name}` : entry.name;
85
+ if (entry.isDirectory()) {
86
+ copyMarkdownTree(sourcePath, targetPath, relPath, extracted);
87
+ continue;
88
+ }
89
+ if (!entry.name.endsWith('.md'))
90
+ continue;
91
+ if (existsSync(targetPath)) {
92
+ tag('info').log(`Skipping ${relPath} (already exists)`);
93
+ continue;
94
+ }
95
+ ensureTargetDir();
96
+ writeFileSync(targetPath, readFileSync(sourcePath, 'utf8'));
97
+ extracted.push(relPath);
98
+ tag('success').log(`Extracted ${relPath}`);
99
+ }
100
+ }
85
101
  function loadFile(agentName, name, subdir) {
86
102
  const file = `${name}.md`;
87
103
  const segments = subdir ? [agentName, subdir, file] : [agentName, file];
@@ -0,0 +1,103 @@
1
+ import { existsSync, readdirSync } from 'node:fs';
2
+ import path from 'node:path';
3
+ import chalk from 'chalk';
4
+ import { highlight } from 'cli-highlight';
5
+ import * as codeceptjs from 'codeceptjs';
6
+ import store from 'codeceptjs/lib/store';
7
+ import stepsListener from 'codeceptjs/lib/listener/steps';
8
+ import storeListener from 'codeceptjs/lib/listener/store';
9
+ import figureSet from 'figures';
10
+ import { ConfigParser } from "../config.js";
11
+ export function loadTestSuites(testsDir) {
12
+ if (!existsSync(testsDir))
13
+ return [];
14
+ const jsFiles = readdirSync(testsDir)
15
+ .filter((f) => f.endsWith('.js'))
16
+ .map((f) => path.resolve(testsDir, f));
17
+ if (jsFiles.length === 0)
18
+ return [];
19
+ codeceptjs.container.createMocha();
20
+ const mocha = codeceptjs.container.mocha();
21
+ mocha.files = jsFiles;
22
+ mocha.loadFiles();
23
+ return mocha.suite.suites || [];
24
+ }
25
+ export function printTestList(suites) {
26
+ if (suites.length === 0) {
27
+ console.log(chalk.yellow('No test files found. Run /explore first.'));
28
+ return;
29
+ }
30
+ let totalActive = 0;
31
+ let totalSkipped = 0;
32
+ let index = 0;
33
+ for (const suite of suites) {
34
+ const file = path.relative(process.cwd(), suite.file || '');
35
+ const active = suite.tests.filter((t) => !t.pending).length;
36
+ const skipped = suite.tests.filter((t) => t.pending).length;
37
+ totalActive += active;
38
+ totalSkipped += skipped;
39
+ console.log(`\n${chalk.bold.cyan(suite.title)}`);
40
+ console.log(chalk.gray(file));
41
+ for (const test of suite.tests) {
42
+ const idx = chalk.dim(`${++index}.`);
43
+ if (test.pending) {
44
+ console.log(chalk.gray(` ${idx} ${figureSet.line} ${test.title} (skipped)`));
45
+ }
46
+ else {
47
+ console.log(` ${idx} ${chalk.green(figureSet.pointer)} ${test.title}`);
48
+ }
49
+ }
50
+ }
51
+ console.log(`\n${chalk.bold(`${totalActive + totalSkipped}`)} scenarios (${chalk.green(`${totalActive} active`)}, ${chalk.gray(`${totalSkipped} skipped`)})`);
52
+ }
53
+ export async function dryRunTestFile(filePath) {
54
+ const absPath = path.resolve(filePath);
55
+ if (!existsSync(absPath)) {
56
+ console.log(chalk.yellow(`File not found: ${absPath}`));
57
+ return;
58
+ }
59
+ const config = ConfigParser.getInstance().getConfig();
60
+ const configPath = ConfigParser.getInstance().getConfigPath();
61
+ const projectRoot = configPath ? path.dirname(configPath) : process.cwd();
62
+ const codeceptConfig = {
63
+ helpers: {
64
+ Playwright: { browser: config.playwright.browser, url: config.playwright.url },
65
+ },
66
+ };
67
+ global.output_dir = path.join(projectRoot, 'output', 'states');
68
+ global.codecept_dir = projectRoot;
69
+ codeceptjs.container.create(codeceptConfig, {});
70
+ await codeceptjs.recorder.start();
71
+ await codeceptjs.container.started(null);
72
+ store.dryRun = true;
73
+ global.container = codeceptjs.container;
74
+ storeListener();
75
+ stepsListener();
76
+ codeceptjs.container.createMocha();
77
+ const mocha = codeceptjs.container.mocha();
78
+ mocha.reporter(class {
79
+ });
80
+ mocha.files = [absPath];
81
+ mocha.loadFiles();
82
+ let currentSuite = '';
83
+ codeceptjs.event.dispatcher.on('suite.before', (suite) => {
84
+ if (suite.title && suite.title !== currentSuite) {
85
+ currentSuite = suite.title;
86
+ console.log(`\n${chalk.bold.cyan(suite.title)}`);
87
+ console.log(chalk.gray(path.relative(process.cwd(), suite.file || absPath)));
88
+ }
89
+ });
90
+ codeceptjs.event.dispatcher.on('test.before', (t) => {
91
+ console.log(`\n ${chalk.green(figureSet.pointer)} ${chalk.bold(t.title)}`);
92
+ });
93
+ codeceptjs.event.dispatcher.on('step.start', (step) => {
94
+ const code = highlight(step.toCode(), { language: 'javascript' });
95
+ console.log(chalk.dim(` ${code}`));
96
+ });
97
+ await new Promise((resolve) => {
98
+ const runner = mocha.run(() => resolve());
99
+ runner.on('pending', (t) => {
100
+ console.log(chalk.gray(` ${figureSet.line} ${t.title} (skipped)`));
101
+ });
102
+ });
103
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "explorbot",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "description": "CLI app built with React Ink, CodeceptJS, and Playwright",
5
5
  "license": "Elastic-2.0",
6
6
  "type": "module",
@@ -66,6 +66,7 @@
66
66
  "@ai-sdk/groq": "^3.0",
67
67
  "@ai-sdk/openai": "^3.0",
68
68
  "@axe-core/playwright": "^4.11.0",
69
+ "@codeceptjs/reflection": "^0.5.2",
69
70
  "@inkjs/ui": "^2.0.0",
70
71
  "@langfuse/otel": "^4.5.1",
71
72
  "@openrouter/ai-sdk-provider": "^2.3.3",
@@ -113,6 +114,7 @@
113
114
  },
114
115
  "devDependencies": {
115
116
  "@biomejs/biome": "^1.5.3",
117
+ "@copilotkit/aimock": "^1.14.0",
116
118
  "@testing-library/react": "^16.3.0",
117
119
  "@types/debug": "^4.1.12",
118
120
  "@types/jsdom": "^27.0.0",
@@ -0,0 +1,19 @@
1
+ <healing_approach>
2
+ The failed step was NOT performed. You MUST execute a replacement action.
3
+ Just waiting or diagnosing is NOT enough — you must perform the click/fill/press that was intended.
4
+
5
+ 1. FIRST: Check the page URL and ARIA — are you on the right page?
6
+ - If URL or ARIA shows login/error/404 page → call giveUp immediately
7
+ 2. If ARIA is empty/minimal → page may still be loading:
8
+ - Use xpathCheck() to detect spinners, loaders, or loading indicators on the page
9
+ - Use wait() to let the page load — it returns fresh ARIA automatically
10
+ - Then execute the replacement action with a working locator
11
+ 3. If the target element is visible in ARIA:
12
+ - Use click() with multiple fallback locators (ARIA, CSS, XPath)
13
+ 4. If element is NOT in ARIA but page is correct:
14
+ - Use xpathCheck() to search the full HTML
15
+ - Use research() to get a semantic UI map of the page if needed
16
+ - If found → click it
17
+ - If not → bash to check console logs → giveUp
18
+ 5. Call done() with the command that replaced the failed step
19
+ </healing_approach>
package/src/action.ts CHANGED
@@ -64,7 +64,7 @@ class Action {
64
64
  }
65
65
  }
66
66
 
67
- async capturePageState({ includeScreenshot = false, ariaSnapshot: preCapuredAria }: { includeScreenshot?: boolean; ariaSnapshot?: string } = {}): Promise<ActionResult> {
67
+ async capturePageState({ includeScreenshot = false }: { includeScreenshot?: boolean } = {}): Promise<ActionResult> {
68
68
  try {
69
69
  const currentState = this.stateManager.getCurrentState();
70
70
  const stateHash = currentState?.hash || 'screenshot';
@@ -111,16 +111,14 @@ class Action {
111
111
  // Capture iframe HTML snapshots
112
112
  const iframeSnapshots = await this.captureIframeSnapshots(html);
113
113
 
114
- let ariaSnapshot: string | null = preCapuredAria || null;
114
+ let ariaSnapshot: string | null = null;
115
115
  let ariaSnapshotFile: string | undefined = undefined;
116
116
 
117
- if (!ariaSnapshot) {
118
- try {
119
- const page = this.playwrightHelper.page;
120
- ariaSnapshot = await page.locator('body').ariaSnapshot();
121
- } catch (err) {
122
- debugLog('ARIA snapshot failed:', err instanceof Error ? `${err.message}\n${err.stack}` : err);
123
- }
117
+ try {
118
+ const page = this.playwrightHelper.page;
119
+ ariaSnapshot = await page.locator('body').ariaSnapshot();
120
+ } catch (err) {
121
+ debugLog('ARIA snapshot failed:', err instanceof Error ? `${err.message}\n${err.stack}` : err);
124
122
  }
125
123
 
126
124
  if (ariaSnapshot) {
@@ -1,9 +1,10 @@
1
- import { mkdirSync, writeFileSync } from 'node:fs';
1
+ import { mkdirSync, readFileSync, writeFileSync } from 'node:fs';
2
2
  import { join } from 'node:path';
3
3
  import dedent from 'dedent';
4
4
  import { z } from 'zod';
5
5
  import { ActionResult } from '../action-result.ts';
6
6
  import { ConfigParser } from '../config.ts';
7
+ import { KnowledgeTracker } from '../knowledge-tracker.ts';
7
8
  import { ExperienceTracker, type SessionExperienceEntry, type SessionStep } from '../experience-tracker.ts';
8
9
  import { type Reporter, type ReporterStep } from '../reporter.ts';
9
10
  import type { StateManager } from '../state-manager.ts';
@@ -397,6 +398,7 @@ export class Historian {
397
398
  if (startUrl) {
398
399
  lines.push('Before(({ I }) => {');
399
400
  lines.push(` I.amOnPage('${this.escapeString(startUrl)}');`);
401
+ lines.push(...this.getKnowledgeLines(startUrl));
400
402
  lines.push('});');
401
403
  lines.push('');
402
404
  }
@@ -425,8 +427,7 @@ export class Historian {
425
427
  lines.push('');
426
428
  }
427
429
 
428
- const outputDir = ConfigParser.getInstance().getOutputDir();
429
- const testsDir = join(outputDir, 'tests');
430
+ const testsDir = ConfigParser.getInstance().getTestsDir();
430
431
  mkdirSync(testsDir, { recursive: true });
431
432
 
432
433
  const filename = plan.title.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase();
@@ -437,6 +438,18 @@ export class Historian {
437
438
  return filePath;
438
439
  }
439
440
 
441
+ rewriteScenarioInFile(filePath: string, healedSteps: Array<{ test: string; original: string; healed: string }>): void {
442
+ let content = readFileSync(filePath, 'utf-8');
443
+
444
+ for (const step of healedSteps) {
445
+ if (!content.includes(step.original)) continue;
446
+ content = content.replace(step.original, step.healed);
447
+ }
448
+
449
+ writeFileSync(filePath, content);
450
+ tag('substep').log(`Updated test file with healed steps: ${filePath}`);
451
+ }
452
+
440
453
  private getExecutionLabel(exec: ToolExecution, fallback?: string): string {
441
454
  return exec.input?.explanation || exec.input?.assertion || exec.input?.note || fallback || '';
442
455
  }
@@ -445,6 +458,27 @@ export class Historian {
445
458
  return str.replace(/'/g, "\\'").replace(/\n/g, ' ');
446
459
  }
447
460
 
461
+ private getKnowledgeLines(url: string, indent = ' '): string[] {
462
+ const knowledgeTracker = new KnowledgeTracker();
463
+ const state = new ActionResult({ url });
464
+ const { wait, waitForElement, code } = knowledgeTracker.getStateParameters(state, ['wait', 'waitForElement', 'code']);
465
+
466
+ const lines: string[] = [];
467
+ if (wait !== undefined) {
468
+ lines.push(`${indent}I.wait(${wait});`);
469
+ }
470
+ if (waitForElement) {
471
+ lines.push(`${indent}I.waitForElement(${JSON.stringify(waitForElement)});`);
472
+ }
473
+ if (code) {
474
+ for (const codeLine of code.split('\n')) {
475
+ const trimmed = codeLine.trim();
476
+ if (trimmed) lines.push(`${indent}${trimmed}`);
477
+ }
478
+ }
479
+ return lines;
480
+ }
481
+
448
482
  private stripComments(code: string): string {
449
483
  return code
450
484
  .split('\n')
@@ -40,6 +40,18 @@ class Navigator implements Agent {
40
40
  You are given the web page and a message from user.
41
41
  You need to resolve the state of the page based on the message.
42
42
  </task>
43
+
44
+ ${locatorRule}
45
+
46
+ <constraints>
47
+ NEVER navigate away from the base URL domain. Stay on the same origin at all times.
48
+ NEVER attempt to rewrite, replace, mock, or spoof the URL via JavaScript, history API, location assignment, or any client-side trick.
49
+ NEVER use executeScript, executeAsyncScript, or any JS evaluation to change the URL, bypass redirects, or fake the page state.
50
+ If the target URL redirects to an authentication/login page, DO NOT try to force the original URL. Instead:
51
+ 1. Look for credentials in the provided knowledge/hint context and perform a real login through the form.
52
+ 2. If no credentials are available, ask the user for credentials or ask the user to log in manually.
53
+ A redirect to /login, /sign_in, /auth, or similar is a signal that authentication is required — treat it as such, never as an obstacle to bypass.
54
+ </constraints>
43
55
  `;
44
56
  private freeSailSystemPrompt = dedent`
45
57
  <role>
@@ -168,6 +180,14 @@ class Navigator implements Agent {
168
180
  ${message}
169
181
  </message>
170
182
 
183
+ <page>
184
+ ${actionResult.toAiContext()}
185
+
186
+ <page_html>
187
+ ${await actionResult.combinedHtml()}
188
+ </page_html>
189
+ </page>
190
+
171
191
  <task>
172
192
  Identify the actual request of the user.
173
193
  Identify what is expected by user.
@@ -178,25 +198,13 @@ class Navigator implements Agent {
178
198
  Try various ways to achieve the result
179
199
  </task>
180
200
 
181
-
182
- <page>
183
- ${actionResult.toAiContext()}
184
-
185
- <page_html>
186
- ${await actionResult.simplifiedHtml()}
187
- </page_html>
188
- </page>
189
-
190
-
191
- ${knowledge}
192
-
193
201
  ${actionRule}
194
202
 
195
- ${experience}
203
+ ${RulesLoader.loadRules('navigator', ['multiple-locator', 'output'], actionResult.url || '').replace('{{maxAttempts}}', String(this.MAX_ATTEMPTS))}
196
204
 
197
- ${locatorRule}
205
+ ${experience}
198
206
 
199
- ${RulesLoader.loadRules('navigator', ['multiple-locator', 'output'], actionResult.url || '').replace('{{maxAttempts}}', String(this.MAX_ATTEMPTS))}
207
+ ${knowledge}
200
208
  `;
201
209
 
202
210
  const conversation = this.provider.startConversation(this.systemPrompt, 'navigator');
@@ -234,7 +242,7 @@ class Navigator implements Agent {
234
242
  Previous solutions did not work. Here is the full HTML context:
235
243
 
236
244
  <page_html>
237
- ${await actionResult.simplifiedHtml()}
245
+ ${await actionResult.combinedHtml()}
238
246
  </page_html>
239
247
 
240
248
  Please suggest new solutions based on this additional context.
@@ -265,6 +273,7 @@ class Navigator implements Agent {
265
273
 
266
274
  if (resolved) {
267
275
  tag('success').log('Navigation resolved successfully');
276
+ await this.experienceTracker.saveSuccessfulResolution(actionResult, message, codeBlock);
268
277
  stop();
269
278
  return;
270
279
  }
@@ -479,6 +488,14 @@ class Navigator implements Agent {
479
488
  ${message}
480
489
  </message>
481
490
 
491
+ <page>
492
+ ${actionResult.toAiContext()}
493
+
494
+ <page_html>
495
+ ${await actionResult.combinedHtml()}
496
+ </page_html>
497
+ </page>
498
+
482
499
  <task>
483
500
  Identify what assertion the user wants to verify on the page.
484
501
  Propose different CodeceptJS assertion code blocks to verify the expected state.
@@ -492,21 +509,11 @@ class Navigator implements Agent {
492
509
  Do not generate assertions that would pass even if the specific claim is false.
493
510
  </task>
494
511
 
495
- <page>
496
- ${actionResult.toAiContext()}
497
-
498
- <page_html>
499
- ${await actionResult.simplifiedHtml()}
500
- </page_html>
501
- </page>
502
-
503
- ${knowledge}
504
-
505
512
  ${RulesLoader.loadRules('navigator', ['verification-actions'], actionResult.url || '')}
506
513
 
507
- ${locatorRule}
508
-
509
514
  ${experience}
515
+
516
+ ${knowledge}
510
517
  `;
511
518
 
512
519
  debugLog('Sending verification prompt to AI provider');
package/src/ai/pilot.ts CHANGED
@@ -256,9 +256,18 @@ export class Pilot implements Agent {
256
256
 
257
257
  Plan the test execution for this scenario.
258
258
 
259
- FIRST: Call precondition() to create fresh data that this test will act on.
260
- Ask: "What will this test edit/delete/use?" — create THAT item via precondition.
261
- Do not describe what's already on the page — create new disposable items for the test.
259
+ FIRST: Decide if precondition() is needed.
260
+
261
+ Call precondition() WHEN:
262
+ - The scenario edits/deletes/modifies an item, and you want a DISPOSABLE item to act on safely
263
+ - The scenario needs specific data clearly NOT on the current page (e.g., items with specific statuses for filtering)
264
+
265
+ SKIP precondition() WHEN:
266
+ - The scenario is "Create X" — the test itself creates the item
267
+ - The current page already shows the item the test will act on (check <state> and <page_summary>)
268
+ - The scenario tests navigation, UI behavior, or viewing — no data mutation needed
269
+
270
+ If needed, call precondition() now. If not, proceed directly to planning.
262
271
 
263
272
  THEN: Based on the page elements and current state, outline:
264
273
  1. Which elements to interact with and in what order
@@ -701,6 +710,8 @@ export class Pilot implements Agent {
701
710
  - Click succeeded but ariaDiff shows elements unrelated to tester's intention (e.g., clicked "Edit" but dropdown appeared) → wrong button or unexpected behavior. Instruct Tester to Escape and try a different approach.
702
711
  - form(I.type()) succeeded → I.type() sends keys to whatever is focused, no guarantee it's the right field. Instruct Tester to verify with see() that text appeared in the correct field. If targetedHtml shows a button/link, text went to wrong element — click the correct field first and retry.
703
712
  - ariaDiff shows 5+ elements removed/added after clicking content → page entered a different mode (editor, panel, modal). Instruct Tester to call context() to see current state before guessing selectors.
713
+ - Dropdown/select opened but contains NO options, or a list/table is empty when items were expected → data doesn't exist yet. Call precondition() to create the missing items (labels, categories, etc.), then instruct Tester to retry.
714
+ - Tester tries to select/filter/assign something but the option list is empty or expected value is not present → missing auxiliary data. Call precondition() to create it.
704
715
 
705
716
  Detecting logically wrong successes — review "executed", "element", and "skipped" fields:
706
717
  - Click SUCCESS but "executed" command differs from "explanation" intent → wrong element was clicked. The intended element wasn't found and a different one was clicked instead.
@@ -750,23 +761,36 @@ export class Pilot implements Agent {
750
761
  YOUR tools (Pilot-only):
751
762
  - precondition(description) — create FRESH test data via API that the test will act on. Do NOT request users.
752
763
 
753
- PRECONDITIONS — what to create:
764
+ PRECONDITIONS — when and what to create:
754
765
  Preconditions create NEW disposable items that the test will modify, delete, or interact with.
755
- Do NOT describe what already exists on the page — describe what NEW data the test needs to act on.
756
766
 
757
767
  Ask yourself: "What object will this test change/delete/use? Create THAT."
758
768
 
759
- Examples:
769
+ When to call precondition():
770
+ - Scenario edits/deletes/modifies an item → create a disposable target
771
+ - Scenario needs auxiliary data (labels, categories, statuses to filter by)
772
+ - Tester failed because required data is missing (empty dropdown, no items to select)
773
+
774
+ When to SKIP precondition():
775
+ - Scenario is "Create X" — the test itself creates the item, no precondition needed
776
+ - Current page already shows the exact data needed (check <state> h1/title and <page_summary>)
777
+ - Scenario tests navigation, search UI, or viewing — no data mutation involved
778
+
779
+ Examples — when to create:
760
780
  - "Edit test description" → precondition("1 test") — the test will edit this item
761
781
  - "Delete a comment" → precondition("1 comment") — the test will delete this item
762
782
  - "Assign a label to item" → precondition("1 item and 1 label named Bug") — test assigns the label
763
783
  - "Filter by status" → precondition("3 items: 2 with status Open, 1 with status Closed")
764
- - "Move item between lists" → precondition("1 item in list A")
765
784
 
766
- WRONG: precondition("1 test suite named Updated Suite with existing tests") this describes the page, not what to create
785
+ Exampleswhen to skip:
786
+ - "Create a new blog post" → SKIP, the test creates it
787
+ - "Edit blog post" while on a blog post page → SKIP, data already exists
788
+ - "View dashboard" → SKIP, no data mutation
789
+
790
+ WRONG: precondition("1 test suite named Updated Suite with existing tests") — describes the page, not what to create
767
791
  RIGHT: precondition("1 test") — create a fresh test that the scenario will edit
768
792
 
769
- Call precondition() for EVERY item the scenario will act on. Keep descriptions short and specific.
793
+ Keep descriptions short and specific.
770
794
 
771
795
  Response format:
772
796
  PROGRESS: <1 sentence assessment>
@@ -33,3 +33,7 @@ export function WithSessionDedup<T extends Constructor>(Base: T) {
33
33
  }
34
34
  };
35
35
  }
36
+
37
+ export function clearSessionDedup(): void {
38
+ previousPlans.length = 0;
39
+ }
@@ -15,3 +15,7 @@ export function getStyles(): Record<string, string> {
15
15
  export function getActiveStyle(iteration: number, override?: string): { name: string; approach: string } {
16
16
  return RulesLoader.getActiveStyle(getStyles(), iteration, override);
17
17
  }
18
+
19
+ export function clearStyleCache(): void {
20
+ cache = null;
21
+ }