explorbot 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. package/README.md +27 -1
  2. package/bin/explorbot-cli.ts +86 -15
  3. package/boat/api-tester/src/ai/curler-tools.ts +3 -3
  4. package/boat/api-tester/src/ai/curler.ts +1 -1
  5. package/boat/api-tester/src/apibot.ts +2 -2
  6. package/boat/api-tester/src/config.ts +1 -1
  7. package/dist/bin/explorbot-cli.js +85 -14
  8. package/dist/boat/api-tester/src/ai/curler-tools.js +2 -2
  9. package/dist/boat/api-tester/src/apibot.js +2 -2
  10. package/dist/package.json +2 -2
  11. package/dist/rules/navigator/output.md +9 -0
  12. package/dist/rules/navigator/verification-actions.md +2 -0
  13. package/dist/src/action-result.js +23 -1
  14. package/dist/src/action.js +46 -38
  15. package/dist/src/ai/bosun.js +16 -2
  16. package/dist/src/ai/conversation.js +39 -0
  17. package/dist/src/ai/experience-compactor.js +235 -50
  18. package/dist/src/ai/historian/codeceptjs.js +109 -0
  19. package/dist/src/ai/historian/experience.js +320 -0
  20. package/dist/src/ai/historian/mixin.js +2 -0
  21. package/dist/src/ai/historian/playwright.js +145 -0
  22. package/dist/src/ai/historian/utils.js +18 -0
  23. package/dist/src/ai/historian.js +19 -398
  24. package/dist/src/ai/navigator.js +133 -80
  25. package/dist/src/ai/pilot.js +254 -13
  26. package/dist/src/ai/planner/subpages.js +1 -30
  27. package/dist/src/ai/planner.js +33 -13
  28. package/dist/src/ai/provider.js +55 -18
  29. package/dist/src/ai/rerunner.js +3 -3
  30. package/dist/src/ai/researcher/deep-analysis.js +1 -1
  31. package/dist/src/ai/researcher/fingerprint-worker.js +1 -1
  32. package/dist/src/ai/researcher/locators.js +1 -1
  33. package/dist/src/ai/researcher/sections.js +8 -1
  34. package/dist/src/ai/researcher.js +43 -41
  35. package/dist/src/ai/rules.js +26 -14
  36. package/dist/src/ai/tester.js +90 -26
  37. package/dist/src/ai/tools.js +18 -10
  38. package/dist/src/api/request-store.js +20 -0
  39. package/dist/src/api/xhr-capture.js +19 -3
  40. package/dist/src/browser-server.js +16 -3
  41. package/dist/src/command-handler.js +1 -1
  42. package/dist/src/commands/add-rule-command.js +12 -9
  43. package/dist/src/commands/base-command.js +20 -0
  44. package/dist/src/commands/clean-command.js +3 -2
  45. package/dist/src/commands/compact-command.js +138 -0
  46. package/dist/src/commands/context-command.js +7 -1
  47. package/dist/src/commands/drill-command.js +4 -1
  48. package/dist/src/commands/experience-command.js +104 -0
  49. package/dist/src/commands/explore-command.js +54 -19
  50. package/dist/src/commands/freesail-command.js +2 -0
  51. package/dist/src/commands/index.js +7 -3
  52. package/dist/src/commands/init-command.js +11 -10
  53. package/dist/src/commands/learn-command.js +1 -1
  54. package/dist/src/commands/navigate-command.js +4 -1
  55. package/dist/src/commands/plan-clear-command.js +4 -1
  56. package/dist/src/commands/plan-command.js +43 -4
  57. package/dist/src/commands/plan-edit-command.js +1 -1
  58. package/dist/src/commands/plan-load-command.js +4 -1
  59. package/dist/src/commands/plan-reload-command.js +4 -1
  60. package/dist/src/commands/plan-save-command.js +20 -8
  61. package/dist/src/commands/rerun-command.js +4 -0
  62. package/dist/src/commands/research-command.js +5 -2
  63. package/dist/src/commands/start-command.js +5 -1
  64. package/dist/src/commands/test-command.js +7 -1
  65. package/dist/src/components/App.js +15 -5
  66. package/dist/src/execution-controller.js +13 -2
  67. package/dist/src/experience-tracker.js +174 -83
  68. package/dist/src/explorbot.js +31 -22
  69. package/dist/src/explorer.js +12 -5
  70. package/dist/src/observability.js +50 -99
  71. package/dist/src/playwright-recorder.js +309 -0
  72. package/dist/src/reporter.js +17 -2
  73. package/dist/src/stats.js +2 -0
  74. package/dist/src/suite.js +1 -1
  75. package/dist/src/test-plan.js +12 -0
  76. package/dist/src/utils/aria.js +37 -1
  77. package/dist/src/utils/error-page.js +30 -7
  78. package/dist/src/utils/logger.js +1 -1
  79. package/dist/src/utils/next-steps.js +37 -0
  80. package/dist/src/utils/rules-loader.js +1 -1
  81. package/dist/src/utils/test-files.js +1 -1
  82. package/dist/src/utils/url-matcher.js +50 -0
  83. package/package.json +2 -2
  84. package/rules/navigator/output.md +9 -0
  85. package/rules/navigator/verification-actions.md +2 -0
  86. package/src/action-result.ts +26 -1
  87. package/src/action.ts +44 -37
  88. package/src/ai/bosun.ts +16 -2
  89. package/src/ai/conversation.ts +37 -0
  90. package/src/ai/experience-compactor.ts +270 -63
  91. package/src/ai/historian/codeceptjs.ts +130 -0
  92. package/src/ai/historian/experience.ts +383 -0
  93. package/src/ai/historian/mixin.ts +4 -0
  94. package/src/ai/historian/playwright.ts +169 -0
  95. package/src/ai/historian/utils.ts +23 -0
  96. package/src/ai/historian.ts +35 -468
  97. package/src/ai/navigator.ts +140 -85
  98. package/src/ai/pilot.ts +259 -14
  99. package/src/ai/planner/subpages.ts +1 -24
  100. package/src/ai/planner.ts +34 -14
  101. package/src/ai/provider.ts +52 -18
  102. package/src/ai/rerunner.ts +3 -3
  103. package/src/ai/researcher/deep-analysis.ts +1 -1
  104. package/src/ai/researcher/fingerprint-worker.ts +1 -1
  105. package/src/ai/researcher/locators.ts +2 -2
  106. package/src/ai/researcher/sections.ts +7 -1
  107. package/src/ai/researcher.ts +47 -42
  108. package/src/ai/rules.ts +27 -14
  109. package/src/ai/task-agent.ts +1 -1
  110. package/src/ai/tester.ts +94 -26
  111. package/src/ai/tools.ts +53 -29
  112. package/src/api/request-store.ts +22 -0
  113. package/src/api/xhr-capture.ts +21 -3
  114. package/src/browser-server.ts +17 -3
  115. package/src/command-handler.ts +1 -1
  116. package/src/commands/add-rule-command.ts +13 -9
  117. package/src/commands/base-command.ts +26 -1
  118. package/src/commands/clean-command.ts +4 -3
  119. package/src/commands/compact-command.ts +156 -0
  120. package/src/commands/context-command.ts +8 -2
  121. package/src/commands/drill-command.ts +5 -2
  122. package/src/commands/experience-command.ts +125 -0
  123. package/src/commands/explore-command.ts +58 -21
  124. package/src/commands/freesail-command.ts +2 -0
  125. package/src/commands/index.ts +7 -3
  126. package/src/commands/init-command.ts +11 -10
  127. package/src/commands/learn-command.ts +2 -2
  128. package/src/commands/navigate-command.ts +5 -2
  129. package/src/commands/plan-clear-command.ts +5 -2
  130. package/src/commands/plan-command.ts +47 -5
  131. package/src/commands/plan-edit-command.ts +2 -2
  132. package/src/commands/plan-load-command.ts +5 -2
  133. package/src/commands/plan-reload-command.ts +5 -2
  134. package/src/commands/plan-save-command.ts +20 -9
  135. package/src/commands/rerun-command.ts +5 -0
  136. package/src/commands/research-command.ts +6 -3
  137. package/src/commands/start-command.ts +6 -2
  138. package/src/commands/test-command.ts +8 -2
  139. package/src/components/App.tsx +16 -5
  140. package/src/config.ts +6 -1
  141. package/src/execution-controller.ts +14 -3
  142. package/src/experience-tracker.ts +198 -100
  143. package/src/explorbot.ts +33 -23
  144. package/src/explorer.ts +14 -5
  145. package/src/observability.ts +50 -109
  146. package/src/playwright-recorder.ts +305 -0
  147. package/src/reporter.ts +17 -3
  148. package/src/stats.ts +4 -0
  149. package/src/suite.ts +1 -1
  150. package/src/test-plan.ts +12 -0
  151. package/src/utils/aria.ts +38 -1
  152. package/src/utils/error-page.ts +32 -7
  153. package/src/utils/logger.ts +1 -1
  154. package/src/utils/next-steps.ts +51 -0
  155. package/src/utils/rules-loader.ts +1 -1
  156. package/src/utils/test-files.ts +1 -1
  157. package/src/utils/url-matcher.ts +43 -0
@@ -22,7 +22,14 @@ export function WithSections(Base) {
22
22
  for (const [name, description] of targets) {
23
23
  if (executionController.isInterrupted())
24
24
  break;
25
- const text = await this._researchSingleSection(name, description, ariaSnapshot, focusCss);
25
+ let text = '';
26
+ try {
27
+ text = await this._researchSingleSection(name, description, ariaSnapshot, focusCss);
28
+ }
29
+ catch (err) {
30
+ tag('warning').log(`Section "${name}" research failed, skipping: ${err instanceof Error ? err.message : err}`);
31
+ continue;
32
+ }
26
33
  if (!text)
27
34
  continue;
28
35
  const trimmed = text.trim();
@@ -2,16 +2,16 @@ import dedent from 'dedent';
2
2
  import { ActionResult } from '../action-result.js';
3
3
  import { setActivity } from "../activity.js";
4
4
  import { outputPath } from "../config.js";
5
+ import { executionController } from "../execution-controller.js";
5
6
  import { Observability } from "../observability.js";
6
7
  import { Stats } from "../stats.js";
7
8
  import { diffAriaSnapshots } from "../utils/aria.js";
8
- import { isErrorPage } from "../utils/error-page.js";
9
+ import { ErrorPageError, detectPageCondition } from "../utils/error-page.js";
9
10
  import { HooksRunner } from "../utils/hooks-runner.js";
10
11
  import { isBodyEmpty } from "../utils/html.js";
11
12
  import { createDebug, pluralize, tag } from '../utils/logger.js';
12
13
  import { mdq } from "../utils/markdown-query.js";
13
- import { withRetry } from "../utils/retry.js";
14
- import { executionController } from "../execution-controller.js";
14
+ import { RulesLoader } from "../utils/rules-loader.js";
15
15
  import { ContextLengthError } from './provider.js';
16
16
  import { findSimilarResearch, getCachedResearch, saveResearch } from "./researcher/cache.js";
17
17
  import { WithCoordinates } from "./researcher/coordinates.js";
@@ -22,7 +22,6 @@ import { extractValidContainers, formatResearchSummary, parseResearchSections }
22
22
  import { ResearchResult } from "./researcher/research-result.js";
23
23
  import { WithSections } from "./researcher/sections.js";
24
24
  import { locatorRule as generalLocatorRuleText } from './rules.js';
25
- import { RulesLoader } from "../utils/rules-loader.js";
26
25
  import { TaskAgent } from "./task-agent.js";
27
26
  const debugLog = createDebug('explorbot:researcher');
28
27
  export const POSSIBLE_SECTIONS = {
@@ -98,18 +97,15 @@ export class Researcher extends ResearcherBase {
98
97
  const annotatedElements = await this.explorer.annotateElements();
99
98
  debugLog(`Annotated ${annotatedElements.length} interactive elements with eidx`);
100
99
  this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot: screenshot && this.provider.hasVision() });
101
- if (isErrorPage(this.actionResult)) {
102
- const recovered = await this.waitForPageLoad(screenshot);
103
- if (!recovered) {
104
- tag('warning').log(`Detected error page at ${state.url}`);
105
- return dedent `
106
- ## Error Page Detected
107
-
108
- URL: ${state.url}
109
- Title: ${this.actionResult.title || 'N/A'}
110
-
111
- Research skipped. Navigate to a valid page to continue.
112
- `;
100
+ const condition = detectPageCondition(this.actionResult);
101
+ if (condition === 'error') {
102
+ tag('warning').log(`Detected error page at ${state.url}`);
103
+ throw new ErrorPageError(state.url, this.actionResult.title);
104
+ }
105
+ if (condition === 'loading') {
106
+ const settled = await this.waitUntilSettled(screenshot);
107
+ if (!settled) {
108
+ tag('warning').log(`Page at ${state.url} did not finish loading within timeout, continuing with best-effort research`);
113
109
  }
114
110
  }
115
111
  debugLog('Researching web page:', this.actionResult.url);
@@ -292,41 +288,47 @@ export class Researcher extends ResearcherBase {
292
288
  }
293
289
  return;
294
290
  }
295
- if (isEmpty) {
296
- debugLog('HTML body is empty, refreshing page');
297
- tag('step').log('Page body is empty, refreshing...');
298
- }
299
- else {
300
- debugLog('Not on current state, navigating to URL');
301
- tag('step').log('Navigating to URL...');
291
+ if (isEmpty && isOnCurrentState) {
292
+ debugLog('HTML body empty on current URL, waiting for content');
293
+ tag('step').log('Page body is empty, waiting for content...');
294
+ await this.waitUntilSettled(screenshot ?? false);
295
+ return;
302
296
  }
297
+ debugLog('Not on current state, navigating to URL');
298
+ tag('step').log('Navigating to URL...');
303
299
  await this.explorer.visit(url);
304
300
  this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot: screenshot ?? false });
305
301
  }
306
- async waitForPageLoad(screenshot) {
302
+ async waitUntilSettled(screenshot) {
307
303
  const errorPageTimeout = this.explorer.getConfig().ai?.agents?.researcher?.errorPageTimeout ?? 10;
308
304
  if (errorPageTimeout <= 0)
309
305
  return false;
306
+ const page = this.explorer.playwrightHelper.page;
307
+ const includeScreenshot = screenshot && this.provider.hasVision();
310
308
  try {
311
- await withRetry(async () => {
312
- await this.explorer.annotateElements();
313
- this.actionResult = await this.explorer.createAction().capturePageState({
314
- includeScreenshot: screenshot && this.provider.hasVision(),
315
- });
316
- if (isErrorPage(this.actionResult))
317
- throw new Error('Error page detected');
318
- }, {
319
- maxAttempts: Math.ceil(errorPageTimeout / 3) + 1,
320
- baseDelay: 1000,
321
- maxDelay: 5000,
322
- backoffMultiplier: 2,
323
- retryCondition: (e) => e.message === 'Error page detected',
324
- });
325
- return true;
309
+ await page?.waitForLoadState('networkidle', { timeout: errorPageTimeout * 1000 });
326
310
  }
327
- catch {
328
- return false;
311
+ catch { }
312
+ await this.explorer.annotateElements();
313
+ this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot });
314
+ let condition = detectPageCondition(this.actionResult);
315
+ if (condition === 'error') {
316
+ throw new ErrorPageError(this.actionResult.url, this.actionResult.title);
317
+ }
318
+ if (condition === 'ok')
319
+ return true;
320
+ for (let i = 0; i < 3; i++) {
321
+ await new Promise((r) => setTimeout(r, 1000));
322
+ await this.explorer.annotateElements();
323
+ this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot });
324
+ condition = detectPageCondition(this.actionResult);
325
+ if (condition === 'error') {
326
+ throw new ErrorPageError(this.actionResult.url, this.actionResult.title);
327
+ }
328
+ if (condition === 'ok')
329
+ return true;
329
330
  }
331
+ return false;
330
332
  }
331
333
  getConfiguredSections() {
332
334
  const configSections = this.explorer.getConfig().ai?.agents?.researcher?.sections;
@@ -161,6 +161,20 @@ export const focusedElementRule = dedent `
161
161
  If focus is on wrong element, click the correct field first.
162
162
  </focused_element_actions>
163
163
  `;
164
+ export const unexpectedPopupRule = dedent `
165
+ <unexpected_popup_rule>
166
+ If a modal/popup appeared that you didn't expect, dismiss it first before continuing with original task.
167
+ If elements become hidden or unclickable (timeout errors on visible elements), a dialog or overlay may have appeared on top.
168
+ If a click error mentions "intercepts pointer events", another element is covering the target — dismiss it first.
169
+ If buttons are disabled unexpectedly, check if a popup is blocking interaction or if required form fields are empty.
170
+
171
+ Dismiss strategy (try in order):
172
+ 1. I.clickXY(0, 0) — click outside the popup to close it
173
+ 2. I.pressKey('Escape') — press Escape to dismiss
174
+ 3. I.click('Cancel') — click Cancel button if present
175
+ 4. I.click({ role: 'button', text: 'Close' }) — click X/close button if present
176
+ </unexpected_popup_rule>
177
+ `;
164
178
  export const sectionContextRule = dedent `
165
179
  <section_context_rule>
166
180
  Context parameter is DEFAULT for all interactions. ALWAYS use container from UI map sections unless locator is XPath or unique ID.
@@ -186,17 +200,7 @@ export const sectionContextRule = dedent `
186
200
  - Locator is a unique ID (#specific-element)
187
201
  </section_context_rule>
188
202
 
189
- <unexpected_popup_rule>
190
- If a modal/popup appeared that you didn't expect, dismiss it first before continuing with original task.
191
- If elements become hidden or unclickable (timeout errors on visible elements), a dialog or overlay may have appeared on top.
192
- If buttons are disabled unexpectedly, check if a popup is blocking interaction or if required form fields are empty.
193
-
194
- Dismiss strategy (try in order):
195
- 1. I.clickXY(0, 0) — click outside the popup to close it
196
- 2. I.pressKey('Escape') — press Escape to dismiss
197
- 3. I.click('Cancel') — click Cancel button if present
198
- 4. I.click({ role: 'button', text: 'Close' }) — click X/close button if present
199
- </unexpected_popup_rule>
203
+ ${unexpectedPopupRule}
200
204
  `;
201
205
  export function multipleTabsRule(tabs) {
202
206
  const tabsList = tabs.map((tab, i) => ` ${i + 1}. ${tab.title} - ${tab.url}`).join('\n');
@@ -265,12 +269,19 @@ export const actionRule = dedent `
265
269
  I.fillField('Username', 'John', '.login-form'); // fills Username inside .login-form
266
270
  I.fillField('Username', 'John'); // fills the field located by name or placeholder or label "Username" with the text "John"
267
271
  I.fillField('//user/input', 'John'); // fills the field located by XPath "//user/input" with the text "John"
268
- </example>
272
+ I.fillField('Description', 'Hello world', '.editor'); // works for rich text / code editors too
273
+ </example>
274
+
275
+ I.fillField handles plain inputs, textareas, contenteditable regions, and rich text / code editors
276
+ (Monaco, ProseMirror, CodeMirror, TipTap, Quill, Draft.js, Slate, etc.) transparently.
277
+ ALWAYS use I.fillField for rich editors — target the editor container or its nearest label/heading with a normal locator.
278
+ Do NOT open the editor with raw JS (executeScript, page.evaluate), do NOT dispatch synthetic events,
279
+ do NOT call the editor's own API (monaco.editor.setValue, view.dispatch, etc.) to write text.
269
280
 
270
281
  ### I.type
271
282
 
272
- Types text into the currently focused element. Use when fillField doesn't work,
273
- for instance, for highly customized input fields like Monaco editors or rich text editors.
283
+ Types text into the currently focused element. Use only when there is no locator you can pass to I.fillField
284
+ e.g. the target is implicit (a just-opened command palette, an autocomplete that steals focus, a canvas-based surface).
274
285
 
275
286
  I.type(<text>)
276
287
 
@@ -282,6 +293,7 @@ export const actionRule = dedent `
282
293
  DOES NOT receive any locator, just text to type.
283
294
  NEVER write: I.type('text', locator) or I.type('text', {locator: '...'}) — this is INVALID.
284
295
  To type into a specific field: use I.fillField(locator, text) or I.click(locator) then I.type(text).
296
+ Do NOT reach for I.type just because the target looks like a rich editor — I.fillField handles those.
285
297
 
286
298
  ### I.pressKey
287
299
 
@@ -6,9 +6,11 @@ import { z } from 'zod';
6
6
  import { ActionResult } from "../action-result.js";
7
7
  import { setActivity } from "../activity.js";
8
8
  import { ConfigParser } from "../config.js";
9
+ import { Observability } from "../observability.js";
9
10
  import { Stats } from "../stats.js";
10
11
  import { TestResult } from "../test-plan.js";
11
12
  import { detectFocusArea, extractFocusedElement } from "../utils/aria.js";
13
+ import { ErrorPageError } from "../utils/error-page.js";
12
14
  import { HooksRunner } from "../utils/hooks-runner.js";
13
15
  import { createDebug, tag } from "../utils/logger.js";
14
16
  import { loop } from "../utils/loop.js";
@@ -121,10 +123,34 @@ export class Tester extends TaskAgent {
121
123
  // Note: Markdown saving functionality removed from Conversation class
122
124
  const initialPrompt = await this.buildTestPrompt(task, initialState);
123
125
  conversation.addUserText(initialPrompt);
126
+ return await Observability.run(`test: ${task.scenario}`, {
127
+ sessionId: task.sessionName,
128
+ tags: ['tester'],
129
+ input: {
130
+ scenario: task.scenario,
131
+ startUrl: task.startUrl,
132
+ expected: task.expected,
133
+ },
134
+ }, async () => this.runTestSession(task, initialState, conversation, { offFailedRequest, page, onPageError, onConsoleMessage }));
135
+ }
136
+ async runTestSession(task, initialState, conversation, handlers) {
137
+ const { offFailedRequest, page, onPageError, onConsoleMessage } = handlers;
124
138
  if (this.pilot) {
125
- const plan = await this.pilot.planTest(task, initialState);
126
- if (plan) {
127
- conversation.addUserText(`Pilot's test plan:\n${plan}\n\nFollow this plan while executing the test.`);
139
+ try {
140
+ const plan = await this.pilot.planTest(task, initialState);
141
+ if (plan) {
142
+ conversation.addUserText(`Pilot's test plan:\n${plan}\n\nFollow this plan while executing the test.`);
143
+ }
144
+ }
145
+ catch (err) {
146
+ const message = err instanceof Error ? err.message : String(err);
147
+ tag('error').log(`Pilot planning failed: ${message}`);
148
+ task.addNote(`Planning failed: ${message}`, TestResult.FAILED);
149
+ task.finish(TestResult.FAILED);
150
+ offFailedRequest?.();
151
+ page?.off('pageerror', onPageError);
152
+ page?.off('console', onConsoleMessage);
153
+ return { success: false };
128
154
  }
129
155
  }
130
156
  debugLog('Starting test execution with tools');
@@ -135,6 +161,8 @@ export class Tester extends TaskAgent {
135
161
  const currentUrl = this.explorer.getStateManager().getCurrentState()?.url || task.startUrl || '';
136
162
  await this.hooksRunner.runBeforeHook('tester', currentUrl);
137
163
  const offStateChange = this.explorer.getStateManager().onStateChange((event) => {
164
+ if (task.hasFinished)
165
+ return;
138
166
  if (event.toState?.url === event.fromState?.url)
139
167
  return;
140
168
  task.addNote(`Navigated to ${event.toState?.url}`, TestResult.PASSED);
@@ -178,6 +206,10 @@ export class Tester extends TaskAgent {
178
206
  conversation.cleanupTag('page_aria', '...cleaned aria snapshot...', 2);
179
207
  conversation.cleanupTag('page_html', '...cleaned HTML snapshot...', 1);
180
208
  conversation.cleanupTag('experience', '...cleaned experience...', 1);
209
+ conversation.cleanupTag('applied_experience', '...cleaned past experience...', 1);
210
+ conversation.cleanupTag('page_ui_map', '...cleaned UI map...', 1);
211
+ conversation.cleanupTag('page_ui_map_overlay', '...cleaned UI overlay...', 1);
212
+ conversation.compactToolResults(3);
181
213
  if (iteration > 1) {
182
214
  const isNewPage = this.previousUrl !== null && this.previousUrl !== currentState.url;
183
215
  let nextStep = '';
@@ -199,6 +231,7 @@ export class Tester extends TaskAgent {
199
231
  const result = await this.provider.invokeConversation(conversation, tools, {
200
232
  maxToolRoundtrips: 5,
201
233
  toolChoice: 'required',
234
+ stopWhen: () => task.hasFinished,
202
235
  });
203
236
  if (!result)
204
237
  throw new Error('Failed to get response from provider');
@@ -273,34 +306,30 @@ export class Tester extends TaskAgent {
273
306
  context.setUserInput(result.message);
274
307
  }
275
308
  : undefined,
276
- observability: {
277
- name: `test: ${task.scenario}`,
278
- agent: 'tester',
279
- sessionId: task.sessionName,
280
- metadata: {
281
- input: {
282
- scenario: task.scenario,
283
- startUrl: task.startUrl,
284
- expected: task.expected,
285
- },
286
- },
287
- },
288
309
  catch: async ({ error, stop }) => {
289
310
  tag('error').log(`Test execution error: ${error}`);
290
- task.addNote(`Execution error: ${error instanceof Error ? error.message : String(error)}`);
311
+ if (!task.hasFinished) {
312
+ task.addNote(`Execution error: ${error instanceof Error ? error.message : String(error)}`);
313
+ }
291
314
  stop();
292
315
  },
293
316
  });
294
317
  if (task.hasFinished)
295
318
  break;
296
319
  const finalState = this.getCurrentState();
297
- const wantsContinue = await this.pilot.finalReview(task, finalState, conversation);
320
+ const wantsContinue = await this.pilot.finalReview(task, finalState, conversation, this.navigator);
298
321
  if (!wantsContinue || task.hasFinished)
299
322
  break;
300
323
  if (extensions >= this.MAX_EXTENSIONS)
301
324
  break;
302
325
  extensions++;
303
326
  tag('info').log(`Pilot extending test (${extensions}/${this.MAX_EXTENSIONS})`);
327
+ conversation.cleanupTag('page_aria', '...trimmed...', 1);
328
+ conversation.cleanupTag('page_html', '...trimmed...', 0);
329
+ conversation.cleanupTag('experience', '...trimmed...', 0);
330
+ conversation.cleanupTag('page_ui_map', '...trimmed...', 0);
331
+ conversation.cleanupTag('page_ui_map_overlay', '...trimmed...', 0);
332
+ conversation.compactToolResults(1);
304
333
  shouldContinue = true;
305
334
  }
306
335
  const finalUrl = this.explorer.getStateManager().getCurrentState()?.url || currentUrl;
@@ -391,7 +420,15 @@ export class Tester extends TaskAgent {
391
420
  this.explorer.clearOtherTabsInfo();
392
421
  }
393
422
  if (isNewUrl) {
394
- const research = await this.researcher.research(currentState);
423
+ let research = '';
424
+ try {
425
+ research = await this.researcher.research(currentState);
426
+ }
427
+ catch (err) {
428
+ if (!(err instanceof ErrorPageError))
429
+ throw err;
430
+ tag('warning').log(`Research skipped: ${err.message}`);
431
+ }
395
432
  this.pageStateHash = currentStateHash;
396
433
  this.pageActionResult = currentState;
397
434
  let uiMapSection = '';
@@ -562,7 +599,7 @@ export class Tester extends TaskAgent {
562
599
  - Use finish() to complete the test, not record(). record() is for intermediate notes.
563
600
  - Call finish(verify) when all goals are achieved — provide an assertion to verify
564
601
  - ONLY call stop() if the scenario itself is completely irrelevant to this page and no expectations can be achieved
565
- - Use reset() to navigate back to the initial page if needed. Do not call it if you are already on the initial page
602
+ - Use reset() ONLY as a last resort when the current page cannot host the scenario. Never reset after a successful flow just because an assertion or milestone did not match verify differently or record() the finding instead. Reset is destructive and does not undo server-side side effects.
566
603
  - Be precise with locators (CSS or XPath)
567
604
  - Each click/type call returns the new page state automatically
568
605
  - Check for success messages from tool calls to verify if expected outcomes are achieved
@@ -678,13 +715,25 @@ export class Tester extends TaskAgent {
678
715
  return {
679
716
  reset: tool({
680
717
  description: dedent `
681
- Reset the testing flow by navigating back to the original page.
682
- Use this when navigated too far from the desired state and
683
- there's no clear path to achieve the expected result. This restarts the
684
- testing flow from a known good state.
718
+ Navigate back to the start URL and discard progress in this iteration.
719
+ Reset is a LAST RESORT. It is destructive any side effects already produced on the
720
+ server (records created, forms submitted) persist and cannot be undone by resetting.
721
+
722
+ Use reset ONLY for:
723
+ - navigation dead-ends where the current page cannot host the scenario
724
+ - irrecoverable errors that leave no actionable path forward
725
+
726
+ Do NOT use reset when:
727
+ - the previous action already succeeded (URL changed, record visible, confirmation shown)
728
+ and an assertion did not match — verify differently, record(), or finish() instead
729
+ - an expectation/milestone does not match app behavior but the flow worked — the work is
730
+ done; resetting just creates duplicates
731
+ - you want to "try again" after submitting a form — submitting again creates a duplicate
732
+
733
+ Pilot will review every reset and may veto it.
685
734
  `,
686
735
  inputSchema: z.object({
687
- reason: z.string().optional().describe('Explanation why you need to navigate'),
736
+ reason: z.string().optional().describe('Explanation why reset is the only option'),
688
737
  }),
689
738
  execute: async ({ reason }) => {
690
739
  if (this.getCurrentState().isInsideIframe) {
@@ -698,6 +747,18 @@ export class Tester extends TaskAgent {
698
747
  action: 'reset',
699
748
  };
700
749
  }
750
+ task.resetCount += 1;
751
+ if (this.pilot) {
752
+ const currentStateForReview = this.getCurrentState();
753
+ const allowed = await this.pilot.reviewReset(task, currentStateForReview, reason ?? '', conversation);
754
+ if (!allowed) {
755
+ return {
756
+ success: false,
757
+ action: 'reset',
758
+ message: 'Reset rejected by Pilot; Continue execution',
759
+ };
760
+ }
761
+ }
701
762
  const explanation = reason ? `${reason} (RESET)` : 'Resetting to initial page';
702
763
  const targetUrl = resetUrl;
703
764
  task.addNote(explanation);
@@ -776,10 +837,13 @@ export class Tester extends TaskAgent {
776
837
  verify: z.string().describe('Specific assertion to verify on the page before finishing (e.g., "New item appears in the list")'),
777
838
  }),
778
839
  execute: async ({ verify }) => {
840
+ if (task.hasFinished) {
841
+ return { success: true, action: 'finish', message: 'already finished' };
842
+ }
779
843
  task.addNote(`Finish requested: ${verify}`);
780
844
  if (this.pilot) {
781
845
  const currentState = this.getCurrentState();
782
- await this.pilot.reviewFinish(task, currentState, conversation);
846
+ await this.pilot.reviewFinish(task, currentState, conversation, this.navigator);
783
847
  if (!task.hasFinished) {
784
848
  return {
785
849
  success: false,
@@ -852,7 +916,7 @@ export class Tester extends TaskAgent {
852
916
  if (input.status !== null && task.isComplete()) {
853
917
  if (this.pilot) {
854
918
  const currentState = this.getCurrentState();
855
- await this.pilot.reviewCompletion(task, currentState, conversation);
919
+ await this.pilot.reviewCompletion(task, currentState, conversation, this.navigator);
856
920
  }
857
921
  else {
858
922
  const hasPassed = task.hasAchievedAny();
@@ -84,7 +84,7 @@ export function createCodeceptJSTools(explorer, task) {
84
84
  activeNote.screenshot = await action.saveScreenshot();
85
85
  }
86
86
  activeNote.commit(TestResult.PASSED);
87
- return successToolResult('click', { ...toolResult, attempts, code: command });
87
+ return successToolResult('click', { ...toolResult, attempts, code: command }, action);
88
88
  }
89
89
  }
90
90
  let disambiguated = null;
@@ -109,7 +109,7 @@ export function createCodeceptJSTools(explorer, task) {
109
109
  activeNote.screenshot = await action.saveScreenshot();
110
110
  }
111
111
  activeNote.commit(TestResult.PASSED);
112
- return successToolResult('click', { ...toolResult, attempts, code: retryCmd, disambiguated: true });
112
+ return successToolResult('click', { ...toolResult, attempts, code: retryCmd, disambiguated: true }, action);
113
113
  }
114
114
  }
115
115
  const toolResult = await ActionResult.fromState(stateManager.getCurrentState()).toToolResult(previousState, commands[0]);
@@ -183,7 +183,7 @@ export function createCodeceptJSTools(explorer, task) {
183
183
  message: `Automatically used type() for "${key}" (not a standard key press)`,
184
184
  code: typeCommand,
185
185
  fallback: true,
186
- });
186
+ }, action);
187
187
  }
188
188
  const errorMsg = `pressKey fallback to type() failed: ${action.lastError?.toString()}`;
189
189
  if (toolResult?.pageDiff?.ariaChanges || toolResult?.pageDiff?.urlChanged) {
@@ -229,7 +229,7 @@ export function createCodeceptJSTools(explorer, task) {
229
229
  ...toolResult,
230
230
  message: `Pressed key: ${key}${modifier ? ` with modifier(s): ${Array.isArray(modifier) ? modifier.join('+') : modifier}` : ''}`,
231
231
  code: pressKeyCommand,
232
- });
232
+ }, action);
233
233
  }
234
234
  const errorMsg = `pressKey() failed: ${action.lastError?.toString()}`;
235
235
  if (toolResult?.pageDiff?.ariaChanges || toolResult?.pageDiff?.urlChanged) {
@@ -252,14 +252,16 @@ export function createCodeceptJSTools(explorer, task) {
252
252
  form: tool({
253
253
  description: dedent `
254
254
  Execute raw CodeceptJS code block with multiple commands.
255
- USE THIS TOOL for all keyboard interactions: I.fillField, I.type, I.pressKey
255
+ USE THIS TOOL for typing text into fields: I.fillField, I.type
256
256
 
257
257
  Follow <actions> from system prompt for available commands.
258
258
  Follow <locator_priority> from system prompt for locator selection.
259
259
 
260
+ I.type(text) types the literal characters of its argument into the focused element.
261
+ To press key combination or special keys (Ctrl, Meta, Esc) use I.pressKey instead.
262
+
260
263
  Use cases:
261
264
  - Typing into input fields (I.fillField, I.type)
262
- - Pressing keyboard keys (I.pressKey)
263
265
  - Working with iframes (switch context with I.switchTo)
264
266
  - Performing multiple form actions in a single batch
265
267
  - Complex interactions requiring sequential commands
@@ -336,7 +338,7 @@ export function createCodeceptJSTools(explorer, task) {
336
338
  commandsExecuted: lines.length,
337
339
  code: codeBlock,
338
340
  suggestion: 'Verify the form was filled in correctly using see() tool. If needed to submit: try click() tool or form() with I.pressKey("Enter").',
339
- });
341
+ }, action);
340
342
  }
341
343
  catch (error) {
342
344
  activeNote.commit(TestResult.FAILED);
@@ -509,7 +511,7 @@ export function createAgentTools({ explorer, researcher, navigator, experienceTr
509
511
  return successToolResult('verify', {
510
512
  message: `Verification passed: ${assertion}`,
511
513
  code: result.successfulCodes.join('\n'),
512
- });
514
+ }, { assertionSteps: result.assertionSteps });
513
515
  }
514
516
  return failedToolResult('verify', `Verification failed: ${assertion}`, {
515
517
  suggestion: 'The assertion could not be verified. Check if the condition is actually present on the page or try a different assertion.',
@@ -834,7 +836,7 @@ export function createAgentTools({ explorer, researcher, navigator, experienceTr
834
836
  tools.learn_experience = tool({
835
837
  description: dedent `
836
838
  Read the full body of a specific experience section listed in <experience>.
837
- The TOC shows entries like "A.1 ## Successful Flow: ...". Pass the fileTag and sectionIndex.
839
+ The TOC shows entries like "A.1 ## FLOW: ..." or "A.2 ## ACTION: ...". Pass the fileTag and sectionIndex.
838
840
  Only call when a TOC entry looks directly relevant to the current step.
839
841
  `,
840
842
  inputSchema: z.object({
@@ -884,8 +886,14 @@ function countAriaChanges(ariaChanges) {
884
886
  const removedCount = removedMatch ? Number.parseInt(removedMatch[1]) : 0;
885
887
  return addedCount + removedCount;
886
888
  }
887
- function successToolResult(action, data) {
889
+ function successToolResult(action, data, source) {
888
890
  const result = { success: true, action, ...data };
891
+ if (source?.playwrightGroupId) {
892
+ result.playwrightGroupId = source.playwrightGroupId;
893
+ }
894
+ if (source?.assertionSteps?.length) {
895
+ result.assertionSteps = source.assertionSteps;
896
+ }
889
897
  if (data?.pageDiff) {
890
898
  let suggestion = PAGE_DIFF_SUGGESTION;
891
899
  const ariaChanges = data.pageDiff.ariaChanges || '';
@@ -5,6 +5,8 @@ const AUTH_HEADERS = ['authorization', 'cookie', 'x-api-key', 'x-csrf-token'];
5
5
  export class RequestStore {
6
6
  capturedRequests = [];
7
7
  madeRequests = [];
8
+ failedRequests = [];
9
+ onFailedListeners = [];
8
10
  outputDir;
9
11
  constructor(outputDir) {
10
12
  this.outputDir = outputDir;
@@ -13,6 +15,23 @@ export class RequestStore {
13
15
  this.capturedRequests.push(result);
14
16
  result.save(this.outputDir);
15
17
  }
18
+ addFailedRequest(result) {
19
+ this.failedRequests.push(result);
20
+ for (const cb of this.onFailedListeners) {
21
+ cb(result);
22
+ }
23
+ }
24
+ getFailedRequests() {
25
+ return this.failedRequests;
26
+ }
27
+ onFailedRequest(cb) {
28
+ this.onFailedListeners.push(cb);
29
+ return () => {
30
+ const idx = this.onFailedListeners.indexOf(cb);
31
+ if (idx !== -1)
32
+ this.onFailedListeners.splice(idx, 1);
33
+ };
34
+ }
16
35
  addMadeRequest(result) {
17
36
  this.madeRequests.push(result);
18
37
  result.save(this.outputDir);
@@ -101,6 +120,7 @@ export class RequestStore {
101
120
  clear() {
102
121
  this.capturedRequests = [];
103
122
  this.madeRequests = [];
123
+ this.failedRequests = [];
104
124
  }
105
125
  }
106
126
  function normalizePathPattern(urlPath) {
@@ -32,15 +32,31 @@ export class XhrCapture {
32
32
  if (resourceType !== 'xhr' && resourceType !== 'fetch')
33
33
  return;
34
34
  const method = request.method();
35
- if (!WRITE_METHODS.has(method))
36
- return;
37
35
  const url = request.url();
38
36
  if (!url.startsWith(this.baseOrigin))
39
37
  return;
38
+ const status = response.status();
39
+ if (status >= 400) {
40
+ const failedUrl = new URL(url);
41
+ const failure = new RequestResult({
42
+ id: generateRequestId(method, failedUrl.pathname, 'fail_'),
43
+ method,
44
+ path: failedUrl.pathname,
45
+ fullUrl: failedUrl.pathname + failedUrl.search,
46
+ requestHeaders: {},
47
+ status,
48
+ statusText: response.statusText(),
49
+ responseHeaders: {},
50
+ timing: 0,
51
+ timestamp: new Date(),
52
+ });
53
+ this.store.addFailedRequest(failure);
54
+ }
55
+ if (!WRITE_METHODS.has(method))
56
+ return;
40
57
  const contentType = response.headers()['content-type'] || '';
41
58
  if (!JSON_CONTENT_TYPES.test(contentType))
42
59
  return;
43
- const status = response.status();
44
60
  if (status === 304)
45
61
  return;
46
62
  const parsedUrl = new URL(url);
@@ -2,7 +2,9 @@ import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from '
2
2
  import path from 'node:path';
3
3
  import { chromium, firefox, webkit } from 'playwright-core';
4
4
  import { ConfigParser } from './config.js';
5
- import { log, tag } from './utils/logger.js';
5
+ import { getCliName } from "./utils/cli-name.js";
6
+ import { log } from './utils/logger.js';
7
+ import { printNextSteps } from "./utils/next-steps.js";
6
8
  const ENDPOINT_FILENAME = '.browser-endpoint';
7
9
  function getEndpointFilePath() {
8
10
  const configParser = ConfigParser.getInstance();
@@ -50,8 +52,19 @@ async function launchServer(opts) {
50
52
  const wsEndpoint = server.wsEndpoint();
51
53
  writeEndpoint(wsEndpoint);
52
54
  log(`Browser server started: ${browserName} (${opts.show ? 'headed' : 'headless'})`);
53
- tag('info').log(`WebSocket endpoint: ${wsEndpoint}`);
54
- tag('info').log(`Endpoint saved to: ${getEndpointFilePath()}`);
55
+ const cli = getCliName();
56
+ const sections = [
57
+ {
58
+ label: 'Browser server',
59
+ path: getEndpointFilePath(),
60
+ commands: [
61
+ { label: 'Endpoint', command: wsEndpoint },
62
+ { label: 'Status', command: `${cli} browser status` },
63
+ { label: 'Stop', command: `${cli} browser stop` },
64
+ ],
65
+ },
66
+ ];
67
+ printNextSteps(sections);
55
68
  return server;
56
69
  }
57
70
  async function getAliveEndpoint() {
@@ -105,7 +105,7 @@ export class CommandHandler {
105
105
  this.runningCommands.add(command.name);
106
106
  try {
107
107
  await command.execute(argsString);
108
- command.suggestions.forEach((s) => tag('step').log(s));
108
+ command.printSuggestions();
109
109
  }
110
110
  catch (error) {
111
111
  if (error?.name === 'AbortError')