explorbot 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. package/README.md +27 -1
  2. package/bin/explorbot-cli.ts +86 -15
  3. package/boat/api-tester/src/ai/curler-tools.ts +3 -3
  4. package/boat/api-tester/src/ai/curler.ts +1 -1
  5. package/boat/api-tester/src/apibot.ts +2 -2
  6. package/boat/api-tester/src/config.ts +1 -1
  7. package/dist/bin/explorbot-cli.js +85 -14
  8. package/dist/boat/api-tester/src/ai/curler-tools.js +2 -2
  9. package/dist/boat/api-tester/src/apibot.js +2 -2
  10. package/dist/package.json +2 -2
  11. package/dist/rules/navigator/output.md +9 -0
  12. package/dist/rules/navigator/verification-actions.md +2 -0
  13. package/dist/src/action-result.js +23 -1
  14. package/dist/src/action.js +46 -38
  15. package/dist/src/ai/bosun.js +16 -2
  16. package/dist/src/ai/conversation.js +39 -0
  17. package/dist/src/ai/experience-compactor.js +235 -50
  18. package/dist/src/ai/historian/codeceptjs.js +109 -0
  19. package/dist/src/ai/historian/experience.js +320 -0
  20. package/dist/src/ai/historian/mixin.js +2 -0
  21. package/dist/src/ai/historian/playwright.js +145 -0
  22. package/dist/src/ai/historian/utils.js +18 -0
  23. package/dist/src/ai/historian.js +19 -398
  24. package/dist/src/ai/navigator.js +133 -80
  25. package/dist/src/ai/pilot.js +254 -13
  26. package/dist/src/ai/planner/subpages.js +1 -30
  27. package/dist/src/ai/planner.js +33 -13
  28. package/dist/src/ai/provider.js +55 -18
  29. package/dist/src/ai/rerunner.js +3 -3
  30. package/dist/src/ai/researcher/deep-analysis.js +1 -1
  31. package/dist/src/ai/researcher/fingerprint-worker.js +1 -1
  32. package/dist/src/ai/researcher/locators.js +1 -1
  33. package/dist/src/ai/researcher/sections.js +8 -1
  34. package/dist/src/ai/researcher.js +43 -41
  35. package/dist/src/ai/rules.js +26 -14
  36. package/dist/src/ai/tester.js +90 -26
  37. package/dist/src/ai/tools.js +18 -10
  38. package/dist/src/api/request-store.js +20 -0
  39. package/dist/src/api/xhr-capture.js +19 -3
  40. package/dist/src/browser-server.js +16 -3
  41. package/dist/src/command-handler.js +1 -1
  42. package/dist/src/commands/add-rule-command.js +12 -9
  43. package/dist/src/commands/base-command.js +20 -0
  44. package/dist/src/commands/clean-command.js +3 -2
  45. package/dist/src/commands/compact-command.js +138 -0
  46. package/dist/src/commands/context-command.js +7 -1
  47. package/dist/src/commands/drill-command.js +4 -1
  48. package/dist/src/commands/experience-command.js +104 -0
  49. package/dist/src/commands/explore-command.js +54 -19
  50. package/dist/src/commands/freesail-command.js +2 -0
  51. package/dist/src/commands/index.js +7 -3
  52. package/dist/src/commands/init-command.js +11 -10
  53. package/dist/src/commands/learn-command.js +1 -1
  54. package/dist/src/commands/navigate-command.js +4 -1
  55. package/dist/src/commands/plan-clear-command.js +4 -1
  56. package/dist/src/commands/plan-command.js +43 -4
  57. package/dist/src/commands/plan-edit-command.js +1 -1
  58. package/dist/src/commands/plan-load-command.js +4 -1
  59. package/dist/src/commands/plan-reload-command.js +4 -1
  60. package/dist/src/commands/plan-save-command.js +20 -8
  61. package/dist/src/commands/rerun-command.js +4 -0
  62. package/dist/src/commands/research-command.js +5 -2
  63. package/dist/src/commands/start-command.js +5 -1
  64. package/dist/src/commands/test-command.js +7 -1
  65. package/dist/src/components/App.js +15 -5
  66. package/dist/src/execution-controller.js +13 -2
  67. package/dist/src/experience-tracker.js +174 -83
  68. package/dist/src/explorbot.js +31 -22
  69. package/dist/src/explorer.js +12 -5
  70. package/dist/src/observability.js +50 -99
  71. package/dist/src/playwright-recorder.js +309 -0
  72. package/dist/src/reporter.js +17 -2
  73. package/dist/src/stats.js +2 -0
  74. package/dist/src/suite.js +1 -1
  75. package/dist/src/test-plan.js +12 -0
  76. package/dist/src/utils/aria.js +37 -1
  77. package/dist/src/utils/error-page.js +30 -7
  78. package/dist/src/utils/logger.js +1 -1
  79. package/dist/src/utils/next-steps.js +37 -0
  80. package/dist/src/utils/rules-loader.js +1 -1
  81. package/dist/src/utils/test-files.js +1 -1
  82. package/dist/src/utils/url-matcher.js +50 -0
  83. package/package.json +2 -2
  84. package/rules/navigator/output.md +9 -0
  85. package/rules/navigator/verification-actions.md +2 -0
  86. package/src/action-result.ts +26 -1
  87. package/src/action.ts +44 -37
  88. package/src/ai/bosun.ts +16 -2
  89. package/src/ai/conversation.ts +37 -0
  90. package/src/ai/experience-compactor.ts +270 -63
  91. package/src/ai/historian/codeceptjs.ts +130 -0
  92. package/src/ai/historian/experience.ts +383 -0
  93. package/src/ai/historian/mixin.ts +4 -0
  94. package/src/ai/historian/playwright.ts +169 -0
  95. package/src/ai/historian/utils.ts +23 -0
  96. package/src/ai/historian.ts +35 -468
  97. package/src/ai/navigator.ts +140 -85
  98. package/src/ai/pilot.ts +259 -14
  99. package/src/ai/planner/subpages.ts +1 -24
  100. package/src/ai/planner.ts +34 -14
  101. package/src/ai/provider.ts +52 -18
  102. package/src/ai/rerunner.ts +3 -3
  103. package/src/ai/researcher/deep-analysis.ts +1 -1
  104. package/src/ai/researcher/fingerprint-worker.ts +1 -1
  105. package/src/ai/researcher/locators.ts +2 -2
  106. package/src/ai/researcher/sections.ts +7 -1
  107. package/src/ai/researcher.ts +47 -42
  108. package/src/ai/rules.ts +27 -14
  109. package/src/ai/task-agent.ts +1 -1
  110. package/src/ai/tester.ts +94 -26
  111. package/src/ai/tools.ts +53 -29
  112. package/src/api/request-store.ts +22 -0
  113. package/src/api/xhr-capture.ts +21 -3
  114. package/src/browser-server.ts +17 -3
  115. package/src/command-handler.ts +1 -1
  116. package/src/commands/add-rule-command.ts +13 -9
  117. package/src/commands/base-command.ts +26 -1
  118. package/src/commands/clean-command.ts +4 -3
  119. package/src/commands/compact-command.ts +156 -0
  120. package/src/commands/context-command.ts +8 -2
  121. package/src/commands/drill-command.ts +5 -2
  122. package/src/commands/experience-command.ts +125 -0
  123. package/src/commands/explore-command.ts +58 -21
  124. package/src/commands/freesail-command.ts +2 -0
  125. package/src/commands/index.ts +7 -3
  126. package/src/commands/init-command.ts +11 -10
  127. package/src/commands/learn-command.ts +2 -2
  128. package/src/commands/navigate-command.ts +5 -2
  129. package/src/commands/plan-clear-command.ts +5 -2
  130. package/src/commands/plan-command.ts +47 -5
  131. package/src/commands/plan-edit-command.ts +2 -2
  132. package/src/commands/plan-load-command.ts +5 -2
  133. package/src/commands/plan-reload-command.ts +5 -2
  134. package/src/commands/plan-save-command.ts +20 -9
  135. package/src/commands/rerun-command.ts +5 -0
  136. package/src/commands/research-command.ts +6 -3
  137. package/src/commands/start-command.ts +6 -2
  138. package/src/commands/test-command.ts +8 -2
  139. package/src/components/App.tsx +16 -5
  140. package/src/config.ts +6 -1
  141. package/src/execution-controller.ts +14 -3
  142. package/src/experience-tracker.ts +198 -100
  143. package/src/explorbot.ts +33 -23
  144. package/src/explorer.ts +14 -5
  145. package/src/observability.ts +50 -109
  146. package/src/playwright-recorder.ts +305 -0
  147. package/src/reporter.ts +17 -3
  148. package/src/stats.ts +4 -0
  149. package/src/suite.ts +1 -1
  150. package/src/test-plan.ts +12 -0
  151. package/src/utils/aria.ts +38 -1
  152. package/src/utils/error-page.ts +32 -7
  153. package/src/utils/logger.ts +1 -1
  154. package/src/utils/next-steps.ts +51 -0
  155. package/src/utils/rules-loader.ts +1 -1
  156. package/src/utils/test-files.ts +1 -1
  157. package/src/utils/url-matcher.ts +43 -0
@@ -1,4 +1,4 @@
1
- import { existsSync, readdirSync, readFileSync, statSync } from 'node:fs';
1
+ import { existsSync, readFileSync, readdirSync, statSync } from 'node:fs';
2
2
  import { join } from 'node:path';
3
3
  import { parentPort } from 'node:worker_threads';
4
4
  import { computeHtmlFingerprint } from '../../utils/html-diff.ts';
@@ -1,15 +1,15 @@
1
1
  import dedent from 'dedent';
2
2
  import type { ActionResult } from '../../action-result.js';
3
- import type Explorer from '../../explorer.ts';
4
3
  import { executionController } from '../../execution-controller.ts';
4
+ import type Explorer from '../../explorer.ts';
5
5
  import { parseAriaLocator } from '../../utils/aria.ts';
6
6
  import { tag } from '../../utils/logger.js';
7
7
  import { mdq } from '../../utils/markdown-query.ts';
8
8
  import { WebElement } from '../../utils/web-element.ts';
9
- import { FOCUSED_MARKER } from './focus.ts';
10
9
  import type { Conversation } from '../conversation.ts';
11
10
  import type { Provider } from '../provider.js';
12
11
  import { locatorRule as generalLocatorRuleText } from '../rules.js';
12
+ import { FOCUSED_MARKER } from './focus.ts';
13
13
  import { type Constructor, debugLog } from './mixin.ts';
14
14
  import { parseResearchSections } from './parser.ts';
15
15
  import type { ResearchResult } from './research-result.ts';
@@ -37,7 +37,13 @@ export function WithSections<T extends Constructor>(Base: T) {
37
37
  const parts: string[] = [];
38
38
  for (const [name, description] of targets) {
39
39
  if (executionController.isInterrupted()) break;
40
- const text = await this._researchSingleSection(name, description, ariaSnapshot, focusCss);
40
+ let text = '';
41
+ try {
42
+ text = await this._researchSingleSection(name, description, ariaSnapshot, focusCss);
43
+ } catch (err) {
44
+ tag('warning').log(`Section "${name}" research failed, skipping: ${err instanceof Error ? err.message : err}`);
45
+ continue;
46
+ }
41
47
  if (!text) continue;
42
48
  const trimmed = text.trim();
43
49
  if (trimmed === 'NOT_PRESENT' || trimmed.startsWith('NOT_PRESENT')) continue;
@@ -3,6 +3,7 @@ import dedent from 'dedent';
3
3
  import { ActionResult } from '../action-result.js';
4
4
  import { setActivity } from '../activity.ts';
5
5
  import { ConfigParser, outputPath } from '../config.ts';
6
+ import { executionController } from '../execution-controller.ts';
6
7
  import type { ExperienceTracker } from '../experience-tracker.ts';
7
8
  import type Explorer from '../explorer.ts';
8
9
  import type { KnowledgeTracker } from '../knowledge-tracker.ts';
@@ -11,13 +12,12 @@ import type { StateManager } from '../state-manager.js';
11
12
  import { WebPageState } from '../state-manager.js';
12
13
  import { Stats } from '../stats.ts';
13
14
  import { diffAriaSnapshots } from '../utils/aria.ts';
14
- import { isErrorPage } from '../utils/error-page.ts';
15
+ import { ErrorPageError, detectPageCondition } from '../utils/error-page.ts';
15
16
  import { HooksRunner } from '../utils/hooks-runner.ts';
16
17
  import { isBodyEmpty } from '../utils/html.ts';
17
18
  import { createDebug, pluralize, tag } from '../utils/logger.js';
18
19
  import { mdq } from '../utils/markdown-query.ts';
19
- import { withRetry } from '../utils/retry.ts';
20
- import { executionController } from '../execution-controller.ts';
20
+ import { RulesLoader } from '../utils/rules-loader.ts';
21
21
  import type { Agent } from './agent.js';
22
22
  import type { Navigator } from './navigator.ts';
23
23
  import { ContextLengthError, type Provider } from './provider.js';
@@ -30,7 +30,6 @@ import { extractValidContainers, formatResearchSummary, parseResearchSections }
30
30
  import { ResearchResult } from './researcher/research-result.ts';
31
31
  import { type SectionMethods, WithSections } from './researcher/sections.ts';
32
32
  import { locatorRule as generalLocatorRuleText } from './rules.js';
33
- import { RulesLoader } from '../utils/rules-loader.ts';
34
33
  import { TaskAgent } from './task-agent.ts';
35
34
 
36
35
  export type { Locator } from './researcher/locators.ts';
@@ -132,18 +131,15 @@ export class Researcher extends ResearcherBase implements Agent {
132
131
  debugLog(`Annotated ${annotatedElements.length} interactive elements with eidx`);
133
132
  this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot: screenshot && this.provider.hasVision() });
134
133
 
135
- if (isErrorPage(this.actionResult!)) {
136
- const recovered = await this.waitForPageLoad(screenshot);
137
- if (!recovered) {
138
- tag('warning').log(`Detected error page at ${state.url}`);
139
- return dedent`
140
- ## Error Page Detected
141
-
142
- URL: ${state.url}
143
- Title: ${this.actionResult!.title || 'N/A'}
144
-
145
- Research skipped. Navigate to a valid page to continue.
146
- `;
134
+ const condition = detectPageCondition(this.actionResult!);
135
+ if (condition === 'error') {
136
+ tag('warning').log(`Detected error page at ${state.url}`);
137
+ throw new ErrorPageError(state.url, this.actionResult!.title);
138
+ }
139
+ if (condition === 'loading') {
140
+ const settled = await this.waitUntilSettled(screenshot);
141
+ if (!settled) {
142
+ tag('warning').log(`Page at ${state.url} did not finish loading within timeout, continuing with best-effort research`);
147
143
  }
148
144
  }
149
145
 
@@ -350,43 +346,52 @@ export class Researcher extends ResearcherBase implements Agent {
350
346
  return;
351
347
  }
352
348
 
353
- if (isEmpty) {
354
- debugLog('HTML body is empty, refreshing page');
355
- tag('step').log('Page body is empty, refreshing...');
356
- } else {
357
- debugLog('Not on current state, navigating to URL');
358
- tag('step').log('Navigating to URL...');
349
+ if (isEmpty && isOnCurrentState) {
350
+ debugLog('HTML body empty on current URL, waiting for content');
351
+ tag('step').log('Page body is empty, waiting for content...');
352
+ await this.waitUntilSettled(screenshot ?? false);
353
+ return;
359
354
  }
360
355
 
356
+ debugLog('Not on current state, navigating to URL');
357
+ tag('step').log('Navigating to URL...');
358
+
361
359
  await this.explorer.visit(url);
362
360
  this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot: screenshot ?? false });
363
361
  }
364
362
 
365
- private async waitForPageLoad(screenshot: boolean): Promise<boolean> {
363
+ private async waitUntilSettled(screenshot: boolean): Promise<boolean> {
366
364
  const errorPageTimeout = (this.explorer.getConfig().ai?.agents?.researcher as any)?.errorPageTimeout ?? 10;
367
365
  if (errorPageTimeout <= 0) return false;
368
366
 
367
+ const page = this.explorer.playwrightHelper.page;
368
+ const includeScreenshot = screenshot && this.provider.hasVision();
369
+
369
370
  try {
370
- await withRetry(
371
- async () => {
372
- await this.explorer.annotateElements();
373
- this.actionResult = await this.explorer.createAction().capturePageState({
374
- includeScreenshot: screenshot && this.provider.hasVision(),
375
- });
376
- if (isErrorPage(this.actionResult!)) throw new Error('Error page detected');
377
- },
378
- {
379
- maxAttempts: Math.ceil(errorPageTimeout / 3) + 1,
380
- baseDelay: 1000,
381
- maxDelay: 5000,
382
- backoffMultiplier: 2,
383
- retryCondition: (e) => e.message === 'Error page detected',
384
- }
385
- );
386
- return true;
387
- } catch {
388
- return false;
371
+ await page?.waitForLoadState('networkidle', { timeout: errorPageTimeout * 1000 });
372
+ } catch {}
373
+
374
+ await this.explorer.annotateElements();
375
+ this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot });
376
+
377
+ let condition = detectPageCondition(this.actionResult!);
378
+ if (condition === 'error') {
379
+ throw new ErrorPageError(this.actionResult!.url, this.actionResult!.title);
380
+ }
381
+ if (condition === 'ok') return true;
382
+
383
+ for (let i = 0; i < 3; i++) {
384
+ await new Promise((r) => setTimeout(r, 1000));
385
+ await this.explorer.annotateElements();
386
+ this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot });
387
+ condition = detectPageCondition(this.actionResult!);
388
+ if (condition === 'error') {
389
+ throw new ErrorPageError(this.actionResult!.url, this.actionResult!.title);
390
+ }
391
+ if (condition === 'ok') return true;
389
392
  }
393
+
394
+ return false;
390
395
  }
391
396
 
392
397
  private getConfiguredSections(): Record<string, string> {
package/src/ai/rules.ts CHANGED
@@ -167,6 +167,21 @@ export const focusedElementRule = dedent`
167
167
  </focused_element_actions>
168
168
  `;
169
169
 
170
+ export const unexpectedPopupRule = dedent`
171
+ <unexpected_popup_rule>
172
+ If a modal/popup appeared that you didn't expect, dismiss it first before continuing with original task.
173
+ If elements become hidden or unclickable (timeout errors on visible elements), a dialog or overlay may have appeared on top.
174
+ If a click error mentions "intercepts pointer events", another element is covering the target — dismiss it first.
175
+ If buttons are disabled unexpectedly, check if a popup is blocking interaction or if required form fields are empty.
176
+
177
+ Dismiss strategy (try in order):
178
+ 1. I.clickXY(0, 0) — click outside the popup to close it
179
+ 2. I.pressKey('Escape') — press Escape to dismiss
180
+ 3. I.click('Cancel') — click Cancel button if present
181
+ 4. I.click({ role: 'button', text: 'Close' }) — click X/close button if present
182
+ </unexpected_popup_rule>
183
+ `;
184
+
170
185
  export const sectionContextRule = dedent`
171
186
  <section_context_rule>
172
187
  Context parameter is DEFAULT for all interactions. ALWAYS use container from UI map sections unless locator is XPath or unique ID.
@@ -192,17 +207,7 @@ export const sectionContextRule = dedent`
192
207
  - Locator is a unique ID (#specific-element)
193
208
  </section_context_rule>
194
209
 
195
- <unexpected_popup_rule>
196
- If a modal/popup appeared that you didn't expect, dismiss it first before continuing with original task.
197
- If elements become hidden or unclickable (timeout errors on visible elements), a dialog or overlay may have appeared on top.
198
- If buttons are disabled unexpectedly, check if a popup is blocking interaction or if required form fields are empty.
199
-
200
- Dismiss strategy (try in order):
201
- 1. I.clickXY(0, 0) — click outside the popup to close it
202
- 2. I.pressKey('Escape') — press Escape to dismiss
203
- 3. I.click('Cancel') — click Cancel button if present
204
- 4. I.click({ role: 'button', text: 'Close' }) — click X/close button if present
205
- </unexpected_popup_rule>
210
+ ${unexpectedPopupRule}
206
211
  `;
207
212
 
208
213
  export function multipleTabsRule(tabs: Array<{ url: string; title: string }>): string {
@@ -274,12 +279,19 @@ export const actionRule = dedent`
274
279
  I.fillField('Username', 'John', '.login-form'); // fills Username inside .login-form
275
280
  I.fillField('Username', 'John'); // fills the field located by name or placeholder or label "Username" with the text "John"
276
281
  I.fillField('//user/input', 'John'); // fills the field located by XPath "//user/input" with the text "John"
277
- </example>
282
+ I.fillField('Description', 'Hello world', '.editor'); // works for rich text / code editors too
283
+ </example>
284
+
285
+ I.fillField handles plain inputs, textareas, contenteditable regions, and rich text / code editors
286
+ (Monaco, ProseMirror, CodeMirror, TipTap, Quill, Draft.js, Slate, etc.) transparently.
287
+ ALWAYS use I.fillField for rich editors — target the editor container or its nearest label/heading with a normal locator.
288
+ Do NOT open the editor with raw JS (executeScript, page.evaluate), do NOT dispatch synthetic events,
289
+ do NOT call the editor's own API (monaco.editor.setValue, view.dispatch, etc.) to write text.
278
290
 
279
291
  ### I.type
280
292
 
281
- Types text into the currently focused element. Use when fillField doesn't work,
282
- for instance, for highly customized input fields like Monaco editors or rich text editors.
293
+ Types text into the currently focused element. Use only when there is no locator you can pass to I.fillField
294
+ e.g. the target is implicit (a just-opened command palette, an autocomplete that steals focus, a canvas-based surface).
283
295
 
284
296
  I.type(<text>)
285
297
 
@@ -291,6 +303,7 @@ export const actionRule = dedent`
291
303
  DOES NOT receive any locator, just text to type.
292
304
  NEVER write: I.type('text', locator) or I.type('text', {locator: '...'}) — this is INVALID.
293
305
  To type into a specific field: use I.fillField(locator, text) or I.click(locator) then I.type(text).
306
+ Do NOT reach for I.type just because the target looks like a rich editor — I.fillField handles those.
294
307
 
295
308
  ### I.pressKey
296
309
 
@@ -1,6 +1,6 @@
1
1
  import dedent from 'dedent';
2
2
  import type { ActionResult } from '../action-result.js';
3
- import { renderExperienceToc, type ExperienceTracker } from '../experience-tracker.js';
3
+ import { type ExperienceTracker, renderExperienceToc } from '../experience-tracker.js';
4
4
  import type { KnowledgeTracker } from '../knowledge-tracker.js';
5
5
  import { createDebug, pluralize, tag } from '../utils/logger.js';
6
6
 
package/src/ai/tester.ts CHANGED
@@ -8,10 +8,12 @@ import { setActivity } from '../activity.ts';
8
8
  import { ConfigParser } from '../config.ts';
9
9
  import type { ExperienceTracker } from '../experience-tracker.ts';
10
10
  import type Explorer from '../explorer.ts';
11
+ import { Observability } from '../observability.ts';
11
12
  import type { StateTransition, WebPageState } from '../state-manager.ts';
12
13
  import { Stats } from '../stats.ts';
13
14
  import { type Note, type Test, TestResult, type TestResultType } from '../test-plan.ts';
14
15
  import { detectFocusArea, extractFocusedElement } from '../utils/aria.ts';
16
+ import { ErrorPageError } from '../utils/error-page.ts';
15
17
  import { HooksRunner } from '../utils/hooks-runner.ts';
16
18
  import { codeToMarkdown } from '../utils/html.ts';
17
19
  import { createDebug, tag } from '../utils/logger.ts';
@@ -154,10 +156,39 @@ export class Tester extends TaskAgent implements Agent {
154
156
  const initialPrompt = await this.buildTestPrompt(task, initialState);
155
157
  conversation.addUserText(initialPrompt);
156
158
 
159
+ return await Observability.run(
160
+ `test: ${task.scenario}`,
161
+ {
162
+ sessionId: task.sessionName,
163
+ tags: ['tester'],
164
+ input: {
165
+ scenario: task.scenario,
166
+ startUrl: task.startUrl,
167
+ expected: task.expected,
168
+ },
169
+ },
170
+ async () => this.runTestSession(task, initialState, conversation, { offFailedRequest, page, onPageError, onConsoleMessage })
171
+ );
172
+ }
173
+
174
+ private async runTestSession(task: Test, initialState: ActionResult, conversation: Conversation, handlers: { offFailedRequest?: () => void; page: any; onPageError: (err: Error) => void; onConsoleMessage: (msg: any) => void }): Promise<{ success: boolean }> {
175
+ const { offFailedRequest, page, onPageError, onConsoleMessage } = handlers;
176
+
157
177
  if (this.pilot) {
158
- const plan = await this.pilot.planTest(task, initialState);
159
- if (plan) {
160
- conversation.addUserText(`Pilot's test plan:\n${plan}\n\nFollow this plan while executing the test.`);
178
+ try {
179
+ const plan = await this.pilot.planTest(task, initialState);
180
+ if (plan) {
181
+ conversation.addUserText(`Pilot's test plan:\n${plan}\n\nFollow this plan while executing the test.`);
182
+ }
183
+ } catch (err) {
184
+ const message = err instanceof Error ? err.message : String(err);
185
+ tag('error').log(`Pilot planning failed: ${message}`);
186
+ task.addNote(`Planning failed: ${message}`, TestResult.FAILED);
187
+ task.finish(TestResult.FAILED);
188
+ offFailedRequest?.();
189
+ page?.off('pageerror', onPageError);
190
+ page?.off('console', onConsoleMessage);
191
+ return { success: false };
161
192
  }
162
193
  }
163
194
 
@@ -173,6 +204,7 @@ export class Tester extends TaskAgent implements Agent {
173
204
  await this.hooksRunner.runBeforeHook('tester', currentUrl);
174
205
 
175
206
  const offStateChange = this.explorer.getStateManager().onStateChange((event: StateTransition) => {
207
+ if (task.hasFinished) return;
176
208
  if (event.toState?.url === event.fromState?.url) return;
177
209
  task.addNote(`Navigated to ${event.toState?.url}`, TestResult.PASSED);
178
210
  task.states.push(event.toState);
@@ -224,6 +256,10 @@ export class Tester extends TaskAgent implements Agent {
224
256
  conversation.cleanupTag('page_aria', '...cleaned aria snapshot...', 2);
225
257
  conversation.cleanupTag('page_html', '...cleaned HTML snapshot...', 1);
226
258
  conversation.cleanupTag('experience', '...cleaned experience...', 1);
259
+ conversation.cleanupTag('applied_experience', '...cleaned past experience...', 1);
260
+ conversation.cleanupTag('page_ui_map', '...cleaned UI map...', 1);
261
+ conversation.cleanupTag('page_ui_map_overlay', '...cleaned UI overlay...', 1);
262
+ conversation.compactToolResults(3);
227
263
 
228
264
  if (iteration > 1) {
229
265
  const isNewPage = this.previousUrl !== null && this.previousUrl !== currentState.url;
@@ -245,6 +281,7 @@ export class Tester extends TaskAgent implements Agent {
245
281
  const result = await this.provider.invokeConversation(conversation, tools, {
246
282
  maxToolRoundtrips: 5,
247
283
  toolChoice: 'required',
284
+ stopWhen: () => task.hasFinished,
248
285
  });
249
286
 
250
287
  if (!result) throw new Error('Failed to get response from provider');
@@ -329,21 +366,11 @@ export class Tester extends TaskAgent implements Agent {
329
366
  context.setUserInput(result.message);
330
367
  }
331
368
  : undefined,
332
- observability: {
333
- name: `test: ${task.scenario}`,
334
- agent: 'tester',
335
- sessionId: task.sessionName,
336
- metadata: {
337
- input: {
338
- scenario: task.scenario,
339
- startUrl: task.startUrl,
340
- expected: task.expected,
341
- },
342
- },
343
- },
344
369
  catch: async ({ error, stop }) => {
345
370
  tag('error').log(`Test execution error: ${error}`);
346
- task.addNote(`Execution error: ${error instanceof Error ? error.message : String(error)}`);
371
+ if (!task.hasFinished) {
372
+ task.addNote(`Execution error: ${error instanceof Error ? error.message : String(error)}`);
373
+ }
347
374
  stop();
348
375
  },
349
376
  }
@@ -352,13 +379,19 @@ export class Tester extends TaskAgent implements Agent {
352
379
  if (task.hasFinished) break;
353
380
 
354
381
  const finalState = this.getCurrentState();
355
- const wantsContinue = await this.pilot!.finalReview(task, finalState, conversation);
382
+ const wantsContinue = await this.pilot!.finalReview(task, finalState, conversation, this.navigator);
356
383
 
357
384
  if (!wantsContinue || task.hasFinished) break;
358
385
  if (extensions >= this.MAX_EXTENSIONS) break;
359
386
 
360
387
  extensions++;
361
388
  tag('info').log(`Pilot extending test (${extensions}/${this.MAX_EXTENSIONS})`);
389
+ conversation.cleanupTag('page_aria', '...trimmed...', 1);
390
+ conversation.cleanupTag('page_html', '...trimmed...', 0);
391
+ conversation.cleanupTag('experience', '...trimmed...', 0);
392
+ conversation.cleanupTag('page_ui_map', '...trimmed...', 0);
393
+ conversation.cleanupTag('page_ui_map_overlay', '...trimmed...', 0);
394
+ conversation.compactToolResults(1);
362
395
  shouldContinue = true;
363
396
  }
364
397
 
@@ -464,7 +497,13 @@ export class Tester extends TaskAgent implements Agent {
464
497
  }
465
498
 
466
499
  if (isNewUrl) {
467
- const research = await this.researcher.research(currentState);
500
+ let research = '';
501
+ try {
502
+ research = await this.researcher.research(currentState);
503
+ } catch (err) {
504
+ if (!(err instanceof ErrorPageError)) throw err;
505
+ tag('warning').log(`Research skipped: ${err.message}`);
506
+ }
468
507
  this.pageStateHash = currentStateHash;
469
508
  this.pageActionResult = currentState;
470
509
  let uiMapSection = '';
@@ -646,7 +685,7 @@ export class Tester extends TaskAgent implements Agent {
646
685
  - Use finish() to complete the test, not record(). record() is for intermediate notes.
647
686
  - Call finish(verify) when all goals are achieved — provide an assertion to verify
648
687
  - ONLY call stop() if the scenario itself is completely irrelevant to this page and no expectations can be achieved
649
- - Use reset() to navigate back to the initial page if needed. Do not call it if you are already on the initial page
688
+ - Use reset() ONLY as a last resort when the current page cannot host the scenario. Never reset after a successful flow just because an assertion or milestone did not match verify differently or record() the finding instead. Reset is destructive and does not undo server-side side effects.
650
689
  - Be precise with locators (CSS or XPath)
651
690
  - Each click/type call returns the new page state automatically
652
691
  - Check for success messages from tool calls to verify if expected outcomes are achieved
@@ -769,13 +808,25 @@ export class Tester extends TaskAgent implements Agent {
769
808
  return {
770
809
  reset: tool({
771
810
  description: dedent`
772
- Reset the testing flow by navigating back to the original page.
773
- Use this when navigated too far from the desired state and
774
- there's no clear path to achieve the expected result. This restarts the
775
- testing flow from a known good state.
811
+ Navigate back to the start URL and discard progress in this iteration.
812
+ Reset is a LAST RESORT. It is destructive any side effects already produced on the
813
+ server (records created, forms submitted) persist and cannot be undone by resetting.
814
+
815
+ Use reset ONLY for:
816
+ - navigation dead-ends where the current page cannot host the scenario
817
+ - irrecoverable errors that leave no actionable path forward
818
+
819
+ Do NOT use reset when:
820
+ - the previous action already succeeded (URL changed, record visible, confirmation shown)
821
+ and an assertion did not match — verify differently, record(), or finish() instead
822
+ - an expectation/milestone does not match app behavior but the flow worked — the work is
823
+ done; resetting just creates duplicates
824
+ - you want to "try again" after submitting a form — submitting again creates a duplicate
825
+
826
+ Pilot will review every reset and may veto it.
776
827
  `,
777
828
  inputSchema: z.object({
778
- reason: z.string().optional().describe('Explanation why you need to navigate'),
829
+ reason: z.string().optional().describe('Explanation why reset is the only option'),
779
830
  }),
780
831
  execute: async ({ reason }) => {
781
832
  if (this.getCurrentState().isInsideIframe) {
@@ -791,6 +842,20 @@ export class Tester extends TaskAgent implements Agent {
791
842
  };
792
843
  }
793
844
 
845
+ task.resetCount += 1;
846
+
847
+ if (this.pilot) {
848
+ const currentStateForReview = this.getCurrentState();
849
+ const allowed = await this.pilot.reviewReset(task, currentStateForReview, reason ?? '', conversation);
850
+ if (!allowed) {
851
+ return {
852
+ success: false,
853
+ action: 'reset',
854
+ message: 'Reset rejected by Pilot; Continue execution',
855
+ };
856
+ }
857
+ }
858
+
794
859
  const explanation = reason ? `${reason} (RESET)` : 'Resetting to initial page';
795
860
  const targetUrl = resetUrl!;
796
861
  task.addNote(explanation);
@@ -874,11 +939,14 @@ export class Tester extends TaskAgent implements Agent {
874
939
  verify: z.string().describe('Specific assertion to verify on the page before finishing (e.g., "New item appears in the list")'),
875
940
  }),
876
941
  execute: async ({ verify }) => {
942
+ if (task.hasFinished) {
943
+ return { success: true, action: 'finish', message: 'already finished' };
944
+ }
877
945
  task.addNote(`Finish requested: ${verify}`);
878
946
 
879
947
  if (this.pilot) {
880
948
  const currentState = this.getCurrentState();
881
- await this.pilot.reviewFinish(task, currentState, conversation);
949
+ await this.pilot.reviewFinish(task, currentState, conversation, this.navigator);
882
950
  if (!task.hasFinished) {
883
951
  return {
884
952
  success: false,
@@ -953,7 +1021,7 @@ export class Tester extends TaskAgent implements Agent {
953
1021
  if (input.status !== null && task.isComplete()) {
954
1022
  if (this.pilot) {
955
1023
  const currentState = this.getCurrentState();
956
- await this.pilot.reviewCompletion(task, currentState, conversation);
1024
+ await this.pilot.reviewCompletion(task, currentState, conversation, this.navigator);
957
1025
  } else {
958
1026
  const hasPassed = task.hasAchievedAny();
959
1027
  task.finish(hasPassed ? TestResult.PASSED : TestResult.FAILED);
package/src/ai/tools.ts CHANGED
@@ -10,9 +10,9 @@ import { createDebug, tag } from '../utils/logger.js';
10
10
  import { pause } from '../utils/loop.js';
11
11
  import { WebElement } from '../utils/web-element.ts';
12
12
  import { Navigator } from './navigator.ts';
13
+ import type { AIProvider } from './provider.ts';
13
14
  import { Researcher } from './researcher.ts';
14
15
  import { sectionContextRule } from './rules.ts';
15
- import type { AIProvider } from './provider.ts';
16
16
  import { isInteractive } from './task-agent.ts';
17
17
 
18
18
  const debugLog = createDebug('explorbot:tools');
@@ -100,7 +100,7 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) {
100
100
  activeNote.screenshot = await action.saveScreenshot();
101
101
  }
102
102
  activeNote.commit(TestResult.PASSED);
103
- return successToolResult('click', { ...toolResult, attempts, code: command });
103
+ return successToolResult('click', { ...toolResult, attempts, code: command }, action);
104
104
  }
105
105
  }
106
106
 
@@ -128,7 +128,7 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) {
128
128
  activeNote.screenshot = await action.saveScreenshot();
129
129
  }
130
130
  activeNote.commit(TestResult.PASSED);
131
- return successToolResult('click', { ...toolResult, attempts, code: retryCmd, disambiguated: true });
131
+ return successToolResult('click', { ...toolResult, attempts, code: retryCmd, disambiguated: true }, action);
132
132
  }
133
133
  }
134
134
 
@@ -208,12 +208,16 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) {
208
208
  activeNote.screenshot = await action.saveScreenshot();
209
209
  }
210
210
  activeNote.commit(TestResult.PASSED);
211
- return successToolResult('pressKey', {
212
- ...toolResult,
213
- message: `Automatically used type() for "${key}" (not a standard key press)`,
214
- code: typeCommand,
215
- fallback: true,
216
- });
211
+ return successToolResult(
212
+ 'pressKey',
213
+ {
214
+ ...toolResult,
215
+ message: `Automatically used type() for "${key}" (not a standard key press)`,
216
+ code: typeCommand,
217
+ fallback: true,
218
+ },
219
+ action
220
+ );
217
221
  }
218
222
 
219
223
  const errorMsg = `pressKey fallback to type() failed: ${action.lastError?.toString()}`;
@@ -261,11 +265,15 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) {
261
265
  activeNote.screenshot = await action.saveScreenshot();
262
266
  }
263
267
  activeNote.commit(TestResult.PASSED);
264
- return successToolResult('pressKey', {
265
- ...toolResult,
266
- message: `Pressed key: ${key}${modifier ? ` with modifier(s): ${Array.isArray(modifier) ? modifier.join('+') : modifier}` : ''}`,
267
- code: pressKeyCommand,
268
- });
268
+ return successToolResult(
269
+ 'pressKey',
270
+ {
271
+ ...toolResult,
272
+ message: `Pressed key: ${key}${modifier ? ` with modifier(s): ${Array.isArray(modifier) ? modifier.join('+') : modifier}` : ''}`,
273
+ code: pressKeyCommand,
274
+ },
275
+ action
276
+ );
269
277
  }
270
278
 
271
279
  const errorMsg = `pressKey() failed: ${action.lastError?.toString()}`;
@@ -289,14 +297,16 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) {
289
297
  form: tool({
290
298
  description: dedent`
291
299
  Execute raw CodeceptJS code block with multiple commands.
292
- USE THIS TOOL for all keyboard interactions: I.fillField, I.type, I.pressKey
300
+ USE THIS TOOL for typing text into fields: I.fillField, I.type
293
301
 
294
302
  Follow <actions> from system prompt for available commands.
295
303
  Follow <locator_priority> from system prompt for locator selection.
296
304
 
305
+ I.type(text) types the literal characters of its argument into the focused element.
306
+ To press key combination or special keys (Ctrl, Meta, Esc) use I.pressKey instead.
307
+
297
308
  Use cases:
298
309
  - Typing into input fields (I.fillField, I.type)
299
- - Pressing keyboard keys (I.pressKey)
300
310
  - Working with iframes (switch context with I.switchTo)
301
311
  - Performing multiple form actions in a single batch
302
312
  - Complex interactions requiring sequential commands
@@ -381,13 +391,17 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) {
381
391
  activeNote.screenshot = await action.saveScreenshot();
382
392
  }
383
393
  activeNote.commit(TestResult.PASSED);
384
- return successToolResult('form', {
385
- ...toolResult,
386
- message: `Form completed successfully with ${lines.length} commands.`,
387
- commandsExecuted: lines.length,
388
- code: codeBlock,
389
- suggestion: 'Verify the form was filled in correctly using see() tool. If needed to submit: try click() tool or form() with I.pressKey("Enter").',
390
- });
394
+ return successToolResult(
395
+ 'form',
396
+ {
397
+ ...toolResult,
398
+ message: `Form completed successfully with ${lines.length} commands.`,
399
+ commandsExecuted: lines.length,
400
+ code: codeBlock,
401
+ suggestion: 'Verify the form was filled in correctly using see() tool. If needed to submit: try click() tool or form() with I.pressKey("Enter").',
402
+ },
403
+ action
404
+ );
391
405
  } catch (error) {
392
406
  activeNote.commit(TestResult.FAILED);
393
407
  const errorMessage = error instanceof Error ? error.toString() : 'Unknown error occurred';
@@ -587,10 +601,14 @@ export function createAgentTools({
587
601
  const result = await navigator.verifyState(assertion, actionResult);
588
602
 
589
603
  if (result.verified) {
590
- return successToolResult('verify', {
591
- message: `Verification passed: ${assertion}`,
592
- code: result.successfulCodes.join('\n'),
593
- });
604
+ return successToolResult(
605
+ 'verify',
606
+ {
607
+ message: `Verification passed: ${assertion}`,
608
+ code: result.successfulCodes.join('\n'),
609
+ },
610
+ { assertionSteps: result.assertionSteps }
611
+ );
594
612
  }
595
613
 
596
614
  return failedToolResult('verify', `Verification failed: ${assertion}`, {
@@ -957,7 +975,7 @@ export function createAgentTools({
957
975
  tools.learn_experience = tool({
958
976
  description: dedent`
959
977
  Read the full body of a specific experience section listed in <experience>.
960
- The TOC shows entries like "A.1 ## Successful Flow: ...". Pass the fileTag and sectionIndex.
978
+ The TOC shows entries like "A.1 ## FLOW: ..." or "A.2 ## ACTION: ...". Pass the fileTag and sectionIndex.
961
979
  Only call when a TOC entry looks directly relevant to the current step.
962
980
  `,
963
981
  inputSchema: z.object({
@@ -1015,8 +1033,14 @@ function countAriaChanges(ariaChanges: string): number {
1015
1033
  return addedCount + removedCount;
1016
1034
  }
1017
1035
 
1018
- function successToolResult(action: string, data?: Record<string, any>) {
1036
+ function successToolResult(action: string, data?: Record<string, any>, source?: { playwrightGroupId?: string | null; assertionSteps?: any[] }) {
1019
1037
  const result: Record<string, any> = { success: true, action, ...data };
1038
+ if (source?.playwrightGroupId) {
1039
+ result.playwrightGroupId = source.playwrightGroupId;
1040
+ }
1041
+ if (source?.assertionSteps?.length) {
1042
+ result.assertionSteps = source.assertionSteps;
1043
+ }
1020
1044
  if (data?.pageDiff) {
1021
1045
  let suggestion = PAGE_DIFF_SUGGESTION;
1022
1046
  const ariaChanges = data.pageDiff.ariaChanges || '';