explorbot 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/README.md +37 -1
  2. package/bin/explorbot-cli.ts +27 -18
  3. package/dist/bin/explorbot-cli.js +26 -18
  4. package/dist/package.json +3 -3
  5. package/dist/rules/navigator/output.md +9 -0
  6. package/dist/rules/navigator/verification-actions.md +2 -0
  7. package/dist/src/action-result.js +23 -1
  8. package/dist/src/action.js +51 -42
  9. package/dist/src/ai/bosun.js +11 -1
  10. package/dist/src/ai/conversation.js +39 -0
  11. package/dist/src/ai/historian/codeceptjs.js +109 -0
  12. package/dist/src/ai/historian/experience.js +321 -0
  13. package/dist/src/ai/historian/mixin.js +2 -0
  14. package/dist/src/ai/historian/playwright.js +145 -0
  15. package/dist/src/ai/historian/screencast.js +121 -0
  16. package/dist/src/ai/historian/utils.js +18 -0
  17. package/dist/src/ai/historian.js +21 -405
  18. package/dist/src/ai/navigator.js +82 -29
  19. package/dist/src/ai/pilot.js +232 -13
  20. package/dist/src/ai/planner.js +29 -9
  21. package/dist/src/ai/provider.js +54 -17
  22. package/dist/src/ai/researcher.js +41 -32
  23. package/dist/src/ai/rules.js +26 -14
  24. package/dist/src/ai/tester.js +90 -26
  25. package/dist/src/ai/tools.js +13 -7
  26. package/dist/src/browser-server.js +16 -3
  27. package/dist/src/commands/add-rule-command.js +11 -8
  28. package/dist/src/commands/clean-command.js +2 -1
  29. package/dist/src/commands/explore-command.js +43 -15
  30. package/dist/src/commands/init-command.js +9 -8
  31. package/dist/src/commands/plan-command.js +32 -0
  32. package/dist/src/commands/plan-save-command.js +19 -7
  33. package/dist/src/commands/rerun-command.js +4 -0
  34. package/dist/src/components/App.js +15 -5
  35. package/dist/src/execution-controller.js +13 -2
  36. package/dist/src/experience-tracker.js +20 -64
  37. package/dist/src/explorbot.js +8 -8
  38. package/dist/src/explorer.js +11 -3
  39. package/dist/src/observability.js +50 -99
  40. package/dist/src/playwright-recorder.js +309 -0
  41. package/dist/src/reporter.js +4 -1
  42. package/dist/src/test-plan.js +12 -0
  43. package/dist/src/utils/aria.js +37 -1
  44. package/dist/src/utils/error-page.js +20 -7
  45. package/dist/src/utils/next-steps.js +37 -0
  46. package/dist/src/utils/strings.js +15 -0
  47. package/package.json +3 -3
  48. package/rules/navigator/output.md +9 -0
  49. package/rules/navigator/verification-actions.md +2 -0
  50. package/src/action-result.ts +26 -1
  51. package/src/action.ts +49 -41
  52. package/src/ai/bosun.ts +11 -1
  53. package/src/ai/conversation.ts +37 -0
  54. package/src/ai/historian/codeceptjs.ts +130 -0
  55. package/src/ai/historian/experience.ts +384 -0
  56. package/src/ai/historian/mixin.ts +4 -0
  57. package/src/ai/historian/playwright.ts +169 -0
  58. package/src/ai/historian/screencast.ts +133 -0
  59. package/src/ai/historian/utils.ts +23 -0
  60. package/src/ai/historian.ts +37 -473
  61. package/src/ai/navigator.ts +82 -29
  62. package/src/ai/pilot.ts +237 -14
  63. package/src/ai/planner.ts +29 -9
  64. package/src/ai/provider.ts +51 -17
  65. package/src/ai/researcher.ts +45 -33
  66. package/src/ai/rules.ts +27 -14
  67. package/src/ai/tester.ts +94 -26
  68. package/src/ai/tools.ts +47 -25
  69. package/src/browser-server.ts +17 -3
  70. package/src/commands/add-rule-command.ts +11 -7
  71. package/src/commands/clean-command.ts +2 -1
  72. package/src/commands/explore-command.ts +46 -14
  73. package/src/commands/init-command.ts +9 -8
  74. package/src/commands/plan-command.ts +35 -0
  75. package/src/commands/plan-save-command.ts +18 -7
  76. package/src/commands/rerun-command.ts +5 -0
  77. package/src/components/App.tsx +16 -5
  78. package/src/config.ts +12 -1
  79. package/src/execution-controller.ts +14 -3
  80. package/src/experience-tracker.ts +21 -72
  81. package/src/explorbot.ts +8 -8
  82. package/src/explorer.ts +13 -3
  83. package/src/observability.ts +50 -109
  84. package/src/playwright-recorder.ts +305 -0
  85. package/src/reporter.ts +4 -1
  86. package/src/test-plan.ts +12 -0
  87. package/src/utils/aria.ts +38 -1
  88. package/src/utils/error-page.ts +22 -7
  89. package/src/utils/next-steps.ts +51 -0
  90. package/src/utils/strings.ts +17 -0
@@ -1,6 +1,6 @@
1
1
  import { LangfuseSpanProcessor } from '@langfuse/otel';
2
2
  import { NodeSDK } from '@opentelemetry/sdk-node';
3
- import { generateObject, generateText } from 'ai';
3
+ import { generateObject, generateText, stepCountIs } from 'ai';
4
4
  import { clearActivity, setActivity } from "../activity.js";
5
5
  import { executionController } from "../execution-controller.js";
6
6
  import { Observability } from "../observability.js";
@@ -16,6 +16,20 @@ class AiError extends Error {
16
16
  }
17
17
  export class ContextLengthError extends Error {
18
18
  }
19
+ function rejectAfterIdle(ms, signal) {
20
+ return new Promise((_, reject) => {
21
+ const tick = () => {
22
+ if (signal.cancelled)
23
+ return;
24
+ if (executionController.isAwaitingInput()) {
25
+ setTimeout(tick, ms);
26
+ return;
27
+ }
28
+ reject(new Error('AI request timeout'));
29
+ };
30
+ setTimeout(tick, ms);
31
+ });
32
+ }
19
33
  export class Provider {
20
34
  config;
21
35
  telemetryEnabled = false;
@@ -247,13 +261,19 @@ export class Provider {
247
261
  promptLog('Available tools:', toolNames);
248
262
  promptLog(messages[messages.length - 1].content);
249
263
  const telemetry = this.getTelemetry(options);
264
+ const maxRoundtrips = options.maxToolRoundtrips ?? 5;
265
+ const extraStop = options.stopWhen;
266
+ const stopConditions = [stepCountIs(maxRoundtrips)];
267
+ if (extraStop)
268
+ stopConditions.push(extraStop);
269
+ const { stopWhen: _ignoredStopWhen, ...optionsWithoutStop } = options;
250
270
  const config = this.mergeProviderOptions({
251
271
  tools,
252
272
  maxTokens: 16384,
253
- maxToolRoundtrips: options.maxToolRoundtrips ?? 5,
254
273
  toolChoice: 'auto',
255
274
  ...(this.config.config || {}),
256
- ...options,
275
+ ...optionsWithoutStop,
276
+ stopWhen: stopConditions,
257
277
  ...(telemetry ? { experimental_telemetry: telemetry } : {}),
258
278
  model,
259
279
  abortSignal: executionController.getAbortSignal(),
@@ -261,13 +281,24 @@ export class Provider {
261
281
  try {
262
282
  const response = await withRetry(async () => {
263
283
  const timeout = config.timeout || 30000;
264
- return (await Promise.race([
265
- generateText({
266
- messages,
267
- ...config,
268
- }),
269
- new Promise((_, reject) => setTimeout(() => reject(new Error('AI request timeout')), timeout)),
270
- ]));
284
+ const cancel = { cancelled: false };
285
+ try {
286
+ const result = (await Promise.race([
287
+ generateText({
288
+ messages,
289
+ ...config,
290
+ }),
291
+ rejectAfterIdle(timeout, cancel),
292
+ ]));
293
+ const hasToolCall = (result.toolCalls?.length || 0) > 0;
294
+ if (!result.text && !hasToolCall && result.finishReason === 'length') {
295
+ throw new ContextLengthError('AI response empty: output truncated at maxTokens. Increase maxTokens in config or use a model with higher output capacity.');
296
+ }
297
+ return result;
298
+ }
299
+ finally {
300
+ cancel.cancelled = true;
301
+ }
271
302
  }, this.getRetryOptions(options));
272
303
  clearActivity();
273
304
  // Log tool usage summary
@@ -330,13 +361,19 @@ export class Provider {
330
361
  promptLog(messages[messages.length - 1].content);
331
362
  const response = await withRetry(async () => {
332
363
  const timeout = config.timeout || 30000;
333
- return (await Promise.race([
334
- generateObject({
335
- messages,
336
- ...config,
337
- }),
338
- new Promise((_, reject) => setTimeout(() => reject(new Error('AI request timeout')), timeout)),
339
- ]));
364
+ const cancel = { cancelled: false };
365
+ try {
366
+ return (await Promise.race([
367
+ generateObject({
368
+ messages,
369
+ ...config,
370
+ }),
371
+ rejectAfterIdle(timeout, cancel),
372
+ ]));
373
+ }
374
+ finally {
375
+ cancel.cancelled = true;
376
+ }
340
377
  }, this.getRetryOptions(options));
341
378
  clearActivity();
342
379
  responseLog(response.object);
@@ -6,12 +6,11 @@ import { executionController } from "../execution-controller.js";
6
6
  import { Observability } from "../observability.js";
7
7
  import { Stats } from "../stats.js";
8
8
  import { diffAriaSnapshots } from "../utils/aria.js";
9
- import { ErrorPageError, isErrorPage } from "../utils/error-page.js";
9
+ import { ErrorPageError, detectPageCondition } from "../utils/error-page.js";
10
10
  import { HooksRunner } from "../utils/hooks-runner.js";
11
11
  import { isBodyEmpty } from "../utils/html.js";
12
12
  import { createDebug, pluralize, tag } from '../utils/logger.js';
13
13
  import { mdq } from "../utils/markdown-query.js";
14
- import { withRetry } from "../utils/retry.js";
15
14
  import { RulesLoader } from "../utils/rules-loader.js";
16
15
  import { ContextLengthError } from './provider.js';
17
16
  import { findSimilarResearch, getCachedResearch, saveResearch } from "./researcher/cache.js";
@@ -98,11 +97,15 @@ export class Researcher extends ResearcherBase {
98
97
  const annotatedElements = await this.explorer.annotateElements();
99
98
  debugLog(`Annotated ${annotatedElements.length} interactive elements with eidx`);
100
99
  this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot: screenshot && this.provider.hasVision() });
101
- if (isErrorPage(this.actionResult)) {
102
- const recovered = await this.waitForPageLoad(screenshot);
103
- if (!recovered) {
104
- tag('warning').log(`Detected error page at ${state.url}`);
105
- throw new ErrorPageError(state.url, this.actionResult.title);
100
+ const condition = detectPageCondition(this.actionResult);
101
+ if (condition === 'error') {
102
+ tag('warning').log(`Detected error page at ${state.url}`);
103
+ throw new ErrorPageError(state.url, this.actionResult.title);
104
+ }
105
+ if (condition === 'loading') {
106
+ const settled = await this.waitUntilSettled(screenshot);
107
+ if (!settled) {
108
+ tag('warning').log(`Page at ${state.url} did not finish loading within timeout, continuing with best-effort research`);
106
109
  }
107
110
  }
108
111
  debugLog('Researching web page:', this.actionResult.url);
@@ -285,41 +288,47 @@ export class Researcher extends ResearcherBase {
285
288
  }
286
289
  return;
287
290
  }
288
- if (isEmpty) {
289
- debugLog('HTML body is empty, refreshing page');
290
- tag('step').log('Page body is empty, refreshing...');
291
- }
292
- else {
293
- debugLog('Not on current state, navigating to URL');
294
- tag('step').log('Navigating to URL...');
291
+ if (isEmpty && isOnCurrentState) {
292
+ debugLog('HTML body empty on current URL, waiting for content');
293
+ tag('step').log('Page body is empty, waiting for content...');
294
+ await this.waitUntilSettled(screenshot ?? false);
295
+ return;
295
296
  }
297
+ debugLog('Not on current state, navigating to URL');
298
+ tag('step').log('Navigating to URL...');
296
299
  await this.explorer.visit(url);
297
300
  this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot: screenshot ?? false });
298
301
  }
299
- async waitForPageLoad(screenshot) {
302
+ async waitUntilSettled(screenshot) {
300
303
  const errorPageTimeout = this.explorer.getConfig().ai?.agents?.researcher?.errorPageTimeout ?? 10;
301
304
  if (errorPageTimeout <= 0)
302
305
  return false;
306
+ const page = this.explorer.playwrightHelper.page;
307
+ const includeScreenshot = screenshot && this.provider.hasVision();
303
308
  try {
304
- await withRetry(async () => {
305
- await this.explorer.annotateElements();
306
- this.actionResult = await this.explorer.createAction().capturePageState({
307
- includeScreenshot: screenshot && this.provider.hasVision(),
308
- });
309
- if (isErrorPage(this.actionResult))
310
- throw new Error('Error page detected');
311
- }, {
312
- maxAttempts: Math.ceil(errorPageTimeout / 3) + 1,
313
- baseDelay: 1000,
314
- maxDelay: 5000,
315
- backoffMultiplier: 2,
316
- retryCondition: (e) => e.message === 'Error page detected',
317
- });
318
- return true;
309
+ await page?.waitForLoadState('networkidle', { timeout: errorPageTimeout * 1000 });
319
310
  }
320
- catch {
321
- return false;
311
+ catch { }
312
+ await this.explorer.annotateElements();
313
+ this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot });
314
+ let condition = detectPageCondition(this.actionResult);
315
+ if (condition === 'error') {
316
+ throw new ErrorPageError(this.actionResult.url, this.actionResult.title);
317
+ }
318
+ if (condition === 'ok')
319
+ return true;
320
+ for (let i = 0; i < 3; i++) {
321
+ await new Promise((r) => setTimeout(r, 1000));
322
+ await this.explorer.annotateElements();
323
+ this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot });
324
+ condition = detectPageCondition(this.actionResult);
325
+ if (condition === 'error') {
326
+ throw new ErrorPageError(this.actionResult.url, this.actionResult.title);
327
+ }
328
+ if (condition === 'ok')
329
+ return true;
322
330
  }
331
+ return false;
323
332
  }
324
333
  getConfiguredSections() {
325
334
  const configSections = this.explorer.getConfig().ai?.agents?.researcher?.sections;
@@ -161,6 +161,20 @@ export const focusedElementRule = dedent `
161
161
  If focus is on wrong element, click the correct field first.
162
162
  </focused_element_actions>
163
163
  `;
164
+ export const unexpectedPopupRule = dedent `
165
+ <unexpected_popup_rule>
166
+ If a modal/popup appeared that you didn't expect, dismiss it first before continuing with original task.
167
+ If elements become hidden or unclickable (timeout errors on visible elements), a dialog or overlay may have appeared on top.
168
+ If a click error mentions "intercepts pointer events", another element is covering the target — dismiss it first.
169
+ If buttons are disabled unexpectedly, check if a popup is blocking interaction or if required form fields are empty.
170
+
171
+ Dismiss strategy (try in order):
172
+ 1. I.clickXY(0, 0) — click outside the popup to close it
173
+ 2. I.pressKey('Escape') — press Escape to dismiss
174
+ 3. I.click('Cancel') — click Cancel button if present
175
+ 4. I.click({ role: 'button', text: 'Close' }) — click X/close button if present
176
+ </unexpected_popup_rule>
177
+ `;
164
178
  export const sectionContextRule = dedent `
165
179
  <section_context_rule>
166
180
  Context parameter is DEFAULT for all interactions. ALWAYS use container from UI map sections unless locator is XPath or unique ID.
@@ -186,17 +200,7 @@ export const sectionContextRule = dedent `
186
200
  - Locator is a unique ID (#specific-element)
187
201
  </section_context_rule>
188
202
 
189
- <unexpected_popup_rule>
190
- If a modal/popup appeared that you didn't expect, dismiss it first before continuing with original task.
191
- If elements become hidden or unclickable (timeout errors on visible elements), a dialog or overlay may have appeared on top.
192
- If buttons are disabled unexpectedly, check if a popup is blocking interaction or if required form fields are empty.
193
-
194
- Dismiss strategy (try in order):
195
- 1. I.clickXY(0, 0) — click outside the popup to close it
196
- 2. I.pressKey('Escape') — press Escape to dismiss
197
- 3. I.click('Cancel') — click Cancel button if present
198
- 4. I.click({ role: 'button', text: 'Close' }) — click X/close button if present
199
- </unexpected_popup_rule>
203
+ ${unexpectedPopupRule}
200
204
  `;
201
205
  export function multipleTabsRule(tabs) {
202
206
  const tabsList = tabs.map((tab, i) => ` ${i + 1}. ${tab.title} - ${tab.url}`).join('\n');
@@ -265,12 +269,19 @@ export const actionRule = dedent `
265
269
  I.fillField('Username', 'John', '.login-form'); // fills Username inside .login-form
266
270
  I.fillField('Username', 'John'); // fills the field located by name or placeholder or label "Username" with the text "John"
267
271
  I.fillField('//user/input', 'John'); // fills the field located by XPath "//user/input" with the text "John"
268
- </example>
272
+ I.fillField('Description', 'Hello world', '.editor'); // works for rich text / code editors too
273
+ </example>
274
+
275
+ I.fillField handles plain inputs, textareas, contenteditable regions, and rich text / code editors
276
+ (Monaco, ProseMirror, CodeMirror, TipTap, Quill, Draft.js, Slate, etc.) transparently.
277
+ ALWAYS use I.fillField for rich editors — target the editor container or its nearest label/heading with a normal locator.
278
+ Do NOT open the editor with raw JS (executeScript, page.evaluate), do NOT dispatch synthetic events,
279
+ do NOT call the editor's own API (monaco.editor.setValue, view.dispatch, etc.) to write text.
269
280
 
270
281
  ### I.type
271
282
 
272
- Types text into the currently focused element. Use when fillField doesn't work,
273
- for instance, for highly customized input fields like Monaco editors or rich text editors.
283
+ Types text into the currently focused element. Use only when there is no locator you can pass to I.fillField
284
+ e.g. the target is implicit (a just-opened command palette, an autocomplete that steals focus, a canvas-based surface).
274
285
 
275
286
  I.type(<text>)
276
287
 
@@ -282,6 +293,7 @@ export const actionRule = dedent `
282
293
  DOES NOT receive any locator, just text to type.
283
294
  NEVER write: I.type('text', locator) or I.type('text', {locator: '...'}) — this is INVALID.
284
295
  To type into a specific field: use I.fillField(locator, text) or I.click(locator) then I.type(text).
296
+ Do NOT reach for I.type just because the target looks like a rich editor — I.fillField handles those.
285
297
 
286
298
  ### I.pressKey
287
299
 
@@ -6,9 +6,11 @@ import { z } from 'zod';
6
6
  import { ActionResult } from "../action-result.js";
7
7
  import { setActivity } from "../activity.js";
8
8
  import { ConfigParser } from "../config.js";
9
+ import { Observability } from "../observability.js";
9
10
  import { Stats } from "../stats.js";
10
11
  import { TestResult } from "../test-plan.js";
11
12
  import { detectFocusArea, extractFocusedElement } from "../utils/aria.js";
13
+ import { ErrorPageError } from "../utils/error-page.js";
12
14
  import { HooksRunner } from "../utils/hooks-runner.js";
13
15
  import { createDebug, tag } from "../utils/logger.js";
14
16
  import { loop } from "../utils/loop.js";
@@ -121,10 +123,34 @@ export class Tester extends TaskAgent {
121
123
  // Note: Markdown saving functionality removed from Conversation class
122
124
  const initialPrompt = await this.buildTestPrompt(task, initialState);
123
125
  conversation.addUserText(initialPrompt);
126
+ return await Observability.run(`test: ${task.scenario}`, {
127
+ sessionId: task.sessionName,
128
+ tags: ['tester'],
129
+ input: {
130
+ scenario: task.scenario,
131
+ startUrl: task.startUrl,
132
+ expected: task.expected,
133
+ },
134
+ }, async () => this.runTestSession(task, initialState, conversation, { offFailedRequest, page, onPageError, onConsoleMessage }));
135
+ }
136
+ async runTestSession(task, initialState, conversation, handlers) {
137
+ const { offFailedRequest, page, onPageError, onConsoleMessage } = handlers;
124
138
  if (this.pilot) {
125
- const plan = await this.pilot.planTest(task, initialState);
126
- if (plan) {
127
- conversation.addUserText(`Pilot's test plan:\n${plan}\n\nFollow this plan while executing the test.`);
139
+ try {
140
+ const plan = await this.pilot.planTest(task, initialState);
141
+ if (plan) {
142
+ conversation.addUserText(`Pilot's test plan:\n${plan}\n\nFollow this plan while executing the test.`);
143
+ }
144
+ }
145
+ catch (err) {
146
+ const message = err instanceof Error ? err.message : String(err);
147
+ tag('error').log(`Pilot planning failed: ${message}`);
148
+ task.addNote(`Planning failed: ${message}`, TestResult.FAILED);
149
+ task.finish(TestResult.FAILED);
150
+ offFailedRequest?.();
151
+ page?.off('pageerror', onPageError);
152
+ page?.off('console', onConsoleMessage);
153
+ return { success: false };
128
154
  }
129
155
  }
130
156
  debugLog('Starting test execution with tools');
@@ -135,6 +161,8 @@ export class Tester extends TaskAgent {
135
161
  const currentUrl = this.explorer.getStateManager().getCurrentState()?.url || task.startUrl || '';
136
162
  await this.hooksRunner.runBeforeHook('tester', currentUrl);
137
163
  const offStateChange = this.explorer.getStateManager().onStateChange((event) => {
164
+ if (task.hasFinished)
165
+ return;
138
166
  if (event.toState?.url === event.fromState?.url)
139
167
  return;
140
168
  task.addNote(`Navigated to ${event.toState?.url}`, TestResult.PASSED);
@@ -178,6 +206,10 @@ export class Tester extends TaskAgent {
178
206
  conversation.cleanupTag('page_aria', '...cleaned aria snapshot...', 2);
179
207
  conversation.cleanupTag('page_html', '...cleaned HTML snapshot...', 1);
180
208
  conversation.cleanupTag('experience', '...cleaned experience...', 1);
209
+ conversation.cleanupTag('applied_experience', '...cleaned past experience...', 1);
210
+ conversation.cleanupTag('page_ui_map', '...cleaned UI map...', 1);
211
+ conversation.cleanupTag('page_ui_map_overlay', '...cleaned UI overlay...', 1);
212
+ conversation.compactToolResults(3);
181
213
  if (iteration > 1) {
182
214
  const isNewPage = this.previousUrl !== null && this.previousUrl !== currentState.url;
183
215
  let nextStep = '';
@@ -199,6 +231,7 @@ export class Tester extends TaskAgent {
199
231
  const result = await this.provider.invokeConversation(conversation, tools, {
200
232
  maxToolRoundtrips: 5,
201
233
  toolChoice: 'required',
234
+ stopWhen: () => task.hasFinished,
202
235
  });
203
236
  if (!result)
204
237
  throw new Error('Failed to get response from provider');
@@ -273,34 +306,30 @@ export class Tester extends TaskAgent {
273
306
  context.setUserInput(result.message);
274
307
  }
275
308
  : undefined,
276
- observability: {
277
- name: `test: ${task.scenario}`,
278
- agent: 'tester',
279
- sessionId: task.sessionName,
280
- metadata: {
281
- input: {
282
- scenario: task.scenario,
283
- startUrl: task.startUrl,
284
- expected: task.expected,
285
- },
286
- },
287
- },
288
309
  catch: async ({ error, stop }) => {
289
310
  tag('error').log(`Test execution error: ${error}`);
290
- task.addNote(`Execution error: ${error instanceof Error ? error.message : String(error)}`);
311
+ if (!task.hasFinished) {
312
+ task.addNote(`Execution error: ${error instanceof Error ? error.message : String(error)}`);
313
+ }
291
314
  stop();
292
315
  },
293
316
  });
294
317
  if (task.hasFinished)
295
318
  break;
296
319
  const finalState = this.getCurrentState();
297
- const wantsContinue = await this.pilot.finalReview(task, finalState, conversation);
320
+ const wantsContinue = await this.pilot.finalReview(task, finalState, conversation, this.navigator);
298
321
  if (!wantsContinue || task.hasFinished)
299
322
  break;
300
323
  if (extensions >= this.MAX_EXTENSIONS)
301
324
  break;
302
325
  extensions++;
303
326
  tag('info').log(`Pilot extending test (${extensions}/${this.MAX_EXTENSIONS})`);
327
+ conversation.cleanupTag('page_aria', '...trimmed...', 1);
328
+ conversation.cleanupTag('page_html', '...trimmed...', 0);
329
+ conversation.cleanupTag('experience', '...trimmed...', 0);
330
+ conversation.cleanupTag('page_ui_map', '...trimmed...', 0);
331
+ conversation.cleanupTag('page_ui_map_overlay', '...trimmed...', 0);
332
+ conversation.compactToolResults(1);
304
333
  shouldContinue = true;
305
334
  }
306
335
  const finalUrl = this.explorer.getStateManager().getCurrentState()?.url || currentUrl;
@@ -391,7 +420,15 @@ export class Tester extends TaskAgent {
391
420
  this.explorer.clearOtherTabsInfo();
392
421
  }
393
422
  if (isNewUrl) {
394
- const research = await this.researcher.research(currentState);
423
+ let research = '';
424
+ try {
425
+ research = await this.researcher.research(currentState);
426
+ }
427
+ catch (err) {
428
+ if (!(err instanceof ErrorPageError))
429
+ throw err;
430
+ tag('warning').log(`Research skipped: ${err.message}`);
431
+ }
395
432
  this.pageStateHash = currentStateHash;
396
433
  this.pageActionResult = currentState;
397
434
  let uiMapSection = '';
@@ -562,7 +599,7 @@ export class Tester extends TaskAgent {
562
599
  - Use finish() to complete the test, not record(). record() is for intermediate notes.
563
600
  - Call finish(verify) when all goals are achieved — provide an assertion to verify
564
601
  - ONLY call stop() if the scenario itself is completely irrelevant to this page and no expectations can be achieved
565
- - Use reset() to navigate back to the initial page if needed. Do not call it if you are already on the initial page
602
+ - Use reset() ONLY as a last resort when the current page cannot host the scenario. Never reset after a successful flow just because an assertion or milestone did not match verify differently or record() the finding instead. Reset is destructive and does not undo server-side side effects.
566
603
  - Be precise with locators (CSS or XPath)
567
604
  - Each click/type call returns the new page state automatically
568
605
  - Check for success messages from tool calls to verify if expected outcomes are achieved
@@ -678,13 +715,25 @@ export class Tester extends TaskAgent {
678
715
  return {
679
716
  reset: tool({
680
717
  description: dedent `
681
- Reset the testing flow by navigating back to the original page.
682
- Use this when navigated too far from the desired state and
683
- there's no clear path to achieve the expected result. This restarts the
684
- testing flow from a known good state.
718
+ Navigate back to the start URL and discard progress in this iteration.
719
+ Reset is a LAST RESORT. It is destructive any side effects already produced on the
720
+ server (records created, forms submitted) persist and cannot be undone by resetting.
721
+
722
+ Use reset ONLY for:
723
+ - navigation dead-ends where the current page cannot host the scenario
724
+ - irrecoverable errors that leave no actionable path forward
725
+
726
+ Do NOT use reset when:
727
+ - the previous action already succeeded (URL changed, record visible, confirmation shown)
728
+ and an assertion did not match — verify differently, record(), or finish() instead
729
+ - an expectation/milestone does not match app behavior but the flow worked — the work is
730
+ done; resetting just creates duplicates
731
+ - you want to "try again" after submitting a form — submitting again creates a duplicate
732
+
733
+ Pilot will review every reset and may veto it.
685
734
  `,
686
735
  inputSchema: z.object({
687
- reason: z.string().optional().describe('Explanation why you need to navigate'),
736
+ reason: z.string().optional().describe('Explanation why reset is the only option'),
688
737
  }),
689
738
  execute: async ({ reason }) => {
690
739
  if (this.getCurrentState().isInsideIframe) {
@@ -698,6 +747,18 @@ export class Tester extends TaskAgent {
698
747
  action: 'reset',
699
748
  };
700
749
  }
750
+ task.resetCount += 1;
751
+ if (this.pilot) {
752
+ const currentStateForReview = this.getCurrentState();
753
+ const allowed = await this.pilot.reviewReset(task, currentStateForReview, reason ?? '', conversation);
754
+ if (!allowed) {
755
+ return {
756
+ success: false,
757
+ action: 'reset',
758
+ message: 'Reset rejected by Pilot; Continue execution',
759
+ };
760
+ }
761
+ }
701
762
  const explanation = reason ? `${reason} (RESET)` : 'Resetting to initial page';
702
763
  const targetUrl = resetUrl;
703
764
  task.addNote(explanation);
@@ -776,10 +837,13 @@ export class Tester extends TaskAgent {
776
837
  verify: z.string().describe('Specific assertion to verify on the page before finishing (e.g., "New item appears in the list")'),
777
838
  }),
778
839
  execute: async ({ verify }) => {
840
+ if (task.hasFinished) {
841
+ return { success: true, action: 'finish', message: 'already finished' };
842
+ }
779
843
  task.addNote(`Finish requested: ${verify}`);
780
844
  if (this.pilot) {
781
845
  const currentState = this.getCurrentState();
782
- await this.pilot.reviewFinish(task, currentState, conversation);
846
+ await this.pilot.reviewFinish(task, currentState, conversation, this.navigator);
783
847
  if (!task.hasFinished) {
784
848
  return {
785
849
  success: false,
@@ -852,7 +916,7 @@ export class Tester extends TaskAgent {
852
916
  if (input.status !== null && task.isComplete()) {
853
917
  if (this.pilot) {
854
918
  const currentState = this.getCurrentState();
855
- await this.pilot.reviewCompletion(task, currentState, conversation);
919
+ await this.pilot.reviewCompletion(task, currentState, conversation, this.navigator);
856
920
  }
857
921
  else {
858
922
  const hasPassed = task.hasAchievedAny();
@@ -84,7 +84,7 @@ export function createCodeceptJSTools(explorer, task) {
84
84
  activeNote.screenshot = await action.saveScreenshot();
85
85
  }
86
86
  activeNote.commit(TestResult.PASSED);
87
- return successToolResult('click', { ...toolResult, attempts, code: command });
87
+ return successToolResult('click', { ...toolResult, attempts, code: command }, action);
88
88
  }
89
89
  }
90
90
  let disambiguated = null;
@@ -109,7 +109,7 @@ export function createCodeceptJSTools(explorer, task) {
109
109
  activeNote.screenshot = await action.saveScreenshot();
110
110
  }
111
111
  activeNote.commit(TestResult.PASSED);
112
- return successToolResult('click', { ...toolResult, attempts, code: retryCmd, disambiguated: true });
112
+ return successToolResult('click', { ...toolResult, attempts, code: retryCmd, disambiguated: true }, action);
113
113
  }
114
114
  }
115
115
  const toolResult = await ActionResult.fromState(stateManager.getCurrentState()).toToolResult(previousState, commands[0]);
@@ -183,7 +183,7 @@ export function createCodeceptJSTools(explorer, task) {
183
183
  message: `Automatically used type() for "${key}" (not a standard key press)`,
184
184
  code: typeCommand,
185
185
  fallback: true,
186
- });
186
+ }, action);
187
187
  }
188
188
  const errorMsg = `pressKey fallback to type() failed: ${action.lastError?.toString()}`;
189
189
  if (toolResult?.pageDiff?.ariaChanges || toolResult?.pageDiff?.urlChanged) {
@@ -229,7 +229,7 @@ export function createCodeceptJSTools(explorer, task) {
229
229
  ...toolResult,
230
230
  message: `Pressed key: ${key}${modifier ? ` with modifier(s): ${Array.isArray(modifier) ? modifier.join('+') : modifier}` : ''}`,
231
231
  code: pressKeyCommand,
232
- });
232
+ }, action);
233
233
  }
234
234
  const errorMsg = `pressKey() failed: ${action.lastError?.toString()}`;
235
235
  if (toolResult?.pageDiff?.ariaChanges || toolResult?.pageDiff?.urlChanged) {
@@ -338,7 +338,7 @@ export function createCodeceptJSTools(explorer, task) {
338
338
  commandsExecuted: lines.length,
339
339
  code: codeBlock,
340
340
  suggestion: 'Verify the form was filled in correctly using see() tool. If needed to submit: try click() tool or form() with I.pressKey("Enter").',
341
- });
341
+ }, action);
342
342
  }
343
343
  catch (error) {
344
344
  activeNote.commit(TestResult.FAILED);
@@ -511,7 +511,7 @@ export function createAgentTools({ explorer, researcher, navigator, experienceTr
511
511
  return successToolResult('verify', {
512
512
  message: `Verification passed: ${assertion}`,
513
513
  code: result.successfulCodes.join('\n'),
514
- });
514
+ }, { assertionSteps: result.assertionSteps });
515
515
  }
516
516
  return failedToolResult('verify', `Verification failed: ${assertion}`, {
517
517
  suggestion: 'The assertion could not be verified. Check if the condition is actually present on the page or try a different assertion.',
@@ -886,8 +886,14 @@ function countAriaChanges(ariaChanges) {
886
886
  const removedCount = removedMatch ? Number.parseInt(removedMatch[1]) : 0;
887
887
  return addedCount + removedCount;
888
888
  }
889
- function successToolResult(action, data) {
889
+ function successToolResult(action, data, source) {
890
890
  const result = { success: true, action, ...data };
891
+ if (source?.playwrightGroupId) {
892
+ result.playwrightGroupId = source.playwrightGroupId;
893
+ }
894
+ if (source?.assertionSteps?.length) {
895
+ result.assertionSteps = source.assertionSteps;
896
+ }
891
897
  if (data?.pageDiff) {
892
898
  let suggestion = PAGE_DIFF_SUGGESTION;
893
899
  const ariaChanges = data.pageDiff.ariaChanges || '';
@@ -2,7 +2,9 @@ import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from '
2
2
  import path from 'node:path';
3
3
  import { chromium, firefox, webkit } from 'playwright-core';
4
4
  import { ConfigParser } from './config.js';
5
- import { log, tag } from './utils/logger.js';
5
+ import { getCliName } from "./utils/cli-name.js";
6
+ import { log } from './utils/logger.js';
7
+ import { printNextSteps } from "./utils/next-steps.js";
6
8
  const ENDPOINT_FILENAME = '.browser-endpoint';
7
9
  function getEndpointFilePath() {
8
10
  const configParser = ConfigParser.getInstance();
@@ -50,8 +52,19 @@ async function launchServer(opts) {
50
52
  const wsEndpoint = server.wsEndpoint();
51
53
  writeEndpoint(wsEndpoint);
52
54
  log(`Browser server started: ${browserName} (${opts.show ? 'headed' : 'headless'})`);
53
- tag('info').log(`WebSocket endpoint: ${wsEndpoint}`);
54
- tag('info').log(`Endpoint saved to: ${getEndpointFilePath()}`);
55
+ const cli = getCliName();
56
+ const sections = [
57
+ {
58
+ label: 'Browser server',
59
+ path: getEndpointFilePath(),
60
+ commands: [
61
+ { label: 'Endpoint', command: wsEndpoint },
62
+ { label: 'Status', command: `${cli} browser status` },
63
+ { label: 'Stop', command: `${cli} browser stop` },
64
+ ],
65
+ },
66
+ ];
67
+ printNextSteps(sections);
55
68
  return server;
56
69
  }
57
70
  async function getAliveEndpoint() {
@@ -3,6 +3,7 @@ import { join } from 'node:path';
3
3
  import { render } from 'ink';
4
4
  import React from 'react';
5
5
  import { tag } from '../utils/logger.js';
6
+ import { printNextSteps, relativeToCwd } from "../utils/next-steps.js";
6
7
  import { BaseCommand } from './base-command.js';
7
8
  export class AddRuleCommand extends BaseCommand {
8
9
  name = 'add-rule';
@@ -33,18 +34,20 @@ export class AddRuleCommand extends BaseCommand {
33
34
  mkdirSync(rulesDir, { recursive: true });
34
35
  const filePath = join(rulesDir, `${ruleName}.md`);
35
36
  if (existsSync(filePath)) {
36
- tag('warning').log(`Rule file already exists: ${filePath}`);
37
+ tag('warning').log(`Rule file already exists: ${relativeToCwd(filePath)}`);
37
38
  return null;
38
39
  }
39
40
  const content = opts?.content || `Instructions for ${agentName} agent.`;
40
41
  writeFileSync(filePath, `${content.trim()}\n`);
41
- tag('success').log(`Rule created: ${filePath}`);
42
- if (opts?.urlPattern) {
43
- tag('info').log(`Add to config: ai.agents.${agentName}.rules: [{ '${opts.urlPattern}': '${ruleName}' }]`);
44
- }
45
- else {
46
- tag('info').log(`Add to config: ai.agents.${agentName}.rules: ['${ruleName}']`);
47
- }
42
+ const configLine = opts?.urlPattern ? `ai.agents.${agentName}.rules: [{ '${opts.urlPattern}': '${ruleName}' }]` : `ai.agents.${agentName}.rules: ['${ruleName}']`;
43
+ const sections = [
44
+ {
45
+ label: 'Rule',
46
+ path: filePath,
47
+ commands: [{ label: 'Add to config', command: configLine }],
48
+ },
49
+ ];
50
+ printNextSteps(sections);
48
51
  return filePath;
49
52
  }
50
53
  }