explorbot 0.1.9 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -1
- package/bin/explorbot-cli.ts +86 -15
- package/boat/api-tester/src/ai/curler-tools.ts +3 -3
- package/boat/api-tester/src/ai/curler.ts +1 -1
- package/boat/api-tester/src/apibot.ts +2 -2
- package/boat/api-tester/src/config.ts +1 -1
- package/dist/bin/explorbot-cli.js +85 -14
- package/dist/boat/api-tester/src/ai/curler-tools.js +2 -2
- package/dist/boat/api-tester/src/apibot.js +2 -2
- package/dist/package.json +2 -2
- package/dist/rules/navigator/output.md +9 -0
- package/dist/rules/navigator/verification-actions.md +2 -0
- package/dist/src/action-result.js +23 -1
- package/dist/src/action.js +46 -38
- package/dist/src/ai/bosun.js +16 -2
- package/dist/src/ai/conversation.js +39 -0
- package/dist/src/ai/experience-compactor.js +235 -50
- package/dist/src/ai/historian/codeceptjs.js +109 -0
- package/dist/src/ai/historian/experience.js +320 -0
- package/dist/src/ai/historian/mixin.js +2 -0
- package/dist/src/ai/historian/playwright.js +145 -0
- package/dist/src/ai/historian/utils.js +18 -0
- package/dist/src/ai/historian.js +19 -398
- package/dist/src/ai/navigator.js +133 -80
- package/dist/src/ai/pilot.js +254 -13
- package/dist/src/ai/planner/subpages.js +1 -30
- package/dist/src/ai/planner.js +33 -13
- package/dist/src/ai/provider.js +55 -18
- package/dist/src/ai/rerunner.js +3 -3
- package/dist/src/ai/researcher/deep-analysis.js +1 -1
- package/dist/src/ai/researcher/fingerprint-worker.js +1 -1
- package/dist/src/ai/researcher/locators.js +1 -1
- package/dist/src/ai/researcher/sections.js +8 -1
- package/dist/src/ai/researcher.js +43 -41
- package/dist/src/ai/rules.js +26 -14
- package/dist/src/ai/tester.js +90 -26
- package/dist/src/ai/tools.js +18 -10
- package/dist/src/api/request-store.js +20 -0
- package/dist/src/api/xhr-capture.js +19 -3
- package/dist/src/browser-server.js +16 -3
- package/dist/src/command-handler.js +1 -1
- package/dist/src/commands/add-rule-command.js +12 -9
- package/dist/src/commands/base-command.js +20 -0
- package/dist/src/commands/clean-command.js +3 -2
- package/dist/src/commands/compact-command.js +138 -0
- package/dist/src/commands/context-command.js +7 -1
- package/dist/src/commands/drill-command.js +4 -1
- package/dist/src/commands/experience-command.js +104 -0
- package/dist/src/commands/explore-command.js +54 -19
- package/dist/src/commands/freesail-command.js +2 -0
- package/dist/src/commands/index.js +7 -3
- package/dist/src/commands/init-command.js +11 -10
- package/dist/src/commands/learn-command.js +1 -1
- package/dist/src/commands/navigate-command.js +4 -1
- package/dist/src/commands/plan-clear-command.js +4 -1
- package/dist/src/commands/plan-command.js +43 -4
- package/dist/src/commands/plan-edit-command.js +1 -1
- package/dist/src/commands/plan-load-command.js +4 -1
- package/dist/src/commands/plan-reload-command.js +4 -1
- package/dist/src/commands/plan-save-command.js +20 -8
- package/dist/src/commands/rerun-command.js +4 -0
- package/dist/src/commands/research-command.js +5 -2
- package/dist/src/commands/start-command.js +5 -1
- package/dist/src/commands/test-command.js +7 -1
- package/dist/src/components/App.js +15 -5
- package/dist/src/execution-controller.js +13 -2
- package/dist/src/experience-tracker.js +174 -83
- package/dist/src/explorbot.js +31 -22
- package/dist/src/explorer.js +12 -5
- package/dist/src/observability.js +50 -99
- package/dist/src/playwright-recorder.js +309 -0
- package/dist/src/reporter.js +17 -2
- package/dist/src/stats.js +2 -0
- package/dist/src/suite.js +1 -1
- package/dist/src/test-plan.js +12 -0
- package/dist/src/utils/aria.js +37 -1
- package/dist/src/utils/error-page.js +30 -7
- package/dist/src/utils/logger.js +1 -1
- package/dist/src/utils/next-steps.js +37 -0
- package/dist/src/utils/rules-loader.js +1 -1
- package/dist/src/utils/test-files.js +1 -1
- package/dist/src/utils/url-matcher.js +50 -0
- package/package.json +2 -2
- package/rules/navigator/output.md +9 -0
- package/rules/navigator/verification-actions.md +2 -0
- package/src/action-result.ts +26 -1
- package/src/action.ts +44 -37
- package/src/ai/bosun.ts +16 -2
- package/src/ai/conversation.ts +37 -0
- package/src/ai/experience-compactor.ts +270 -63
- package/src/ai/historian/codeceptjs.ts +130 -0
- package/src/ai/historian/experience.ts +383 -0
- package/src/ai/historian/mixin.ts +4 -0
- package/src/ai/historian/playwright.ts +169 -0
- package/src/ai/historian/utils.ts +23 -0
- package/src/ai/historian.ts +35 -468
- package/src/ai/navigator.ts +140 -85
- package/src/ai/pilot.ts +259 -14
- package/src/ai/planner/subpages.ts +1 -24
- package/src/ai/planner.ts +34 -14
- package/src/ai/provider.ts +52 -18
- package/src/ai/rerunner.ts +3 -3
- package/src/ai/researcher/deep-analysis.ts +1 -1
- package/src/ai/researcher/fingerprint-worker.ts +1 -1
- package/src/ai/researcher/locators.ts +2 -2
- package/src/ai/researcher/sections.ts +7 -1
- package/src/ai/researcher.ts +47 -42
- package/src/ai/rules.ts +27 -14
- package/src/ai/task-agent.ts +1 -1
- package/src/ai/tester.ts +94 -26
- package/src/ai/tools.ts +53 -29
- package/src/api/request-store.ts +22 -0
- package/src/api/xhr-capture.ts +21 -3
- package/src/browser-server.ts +17 -3
- package/src/command-handler.ts +1 -1
- package/src/commands/add-rule-command.ts +13 -9
- package/src/commands/base-command.ts +26 -1
- package/src/commands/clean-command.ts +4 -3
- package/src/commands/compact-command.ts +156 -0
- package/src/commands/context-command.ts +8 -2
- package/src/commands/drill-command.ts +5 -2
- package/src/commands/experience-command.ts +125 -0
- package/src/commands/explore-command.ts +58 -21
- package/src/commands/freesail-command.ts +2 -0
- package/src/commands/index.ts +7 -3
- package/src/commands/init-command.ts +11 -10
- package/src/commands/learn-command.ts +2 -2
- package/src/commands/navigate-command.ts +5 -2
- package/src/commands/plan-clear-command.ts +5 -2
- package/src/commands/plan-command.ts +47 -5
- package/src/commands/plan-edit-command.ts +2 -2
- package/src/commands/plan-load-command.ts +5 -2
- package/src/commands/plan-reload-command.ts +5 -2
- package/src/commands/plan-save-command.ts +20 -9
- package/src/commands/rerun-command.ts +5 -0
- package/src/commands/research-command.ts +6 -3
- package/src/commands/start-command.ts +6 -2
- package/src/commands/test-command.ts +8 -2
- package/src/components/App.tsx +16 -5
- package/src/config.ts +6 -1
- package/src/execution-controller.ts +14 -3
- package/src/experience-tracker.ts +198 -100
- package/src/explorbot.ts +33 -23
- package/src/explorer.ts +14 -5
- package/src/observability.ts +50 -109
- package/src/playwright-recorder.ts +305 -0
- package/src/reporter.ts +17 -3
- package/src/stats.ts +4 -0
- package/src/suite.ts +1 -1
- package/src/test-plan.ts +12 -0
- package/src/utils/aria.ts +38 -1
- package/src/utils/error-page.ts +32 -7
- package/src/utils/logger.ts +1 -1
- package/src/utils/next-steps.ts +51 -0
- package/src/utils/rules-loader.ts +1 -1
- package/src/utils/test-files.ts +1 -1
- package/src/utils/url-matcher.ts +43 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { existsSync,
|
|
1
|
+
import { existsSync, readFileSync, readdirSync, statSync } from 'node:fs';
|
|
2
2
|
import { join } from 'node:path';
|
|
3
3
|
import { parentPort } from 'node:worker_threads';
|
|
4
4
|
import { computeHtmlFingerprint } from '../../utils/html-diff.ts';
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
import dedent from 'dedent';
|
|
2
2
|
import type { ActionResult } from '../../action-result.js';
|
|
3
|
-
import type Explorer from '../../explorer.ts';
|
|
4
3
|
import { executionController } from '../../execution-controller.ts';
|
|
4
|
+
import type Explorer from '../../explorer.ts';
|
|
5
5
|
import { parseAriaLocator } from '../../utils/aria.ts';
|
|
6
6
|
import { tag } from '../../utils/logger.js';
|
|
7
7
|
import { mdq } from '../../utils/markdown-query.ts';
|
|
8
8
|
import { WebElement } from '../../utils/web-element.ts';
|
|
9
|
-
import { FOCUSED_MARKER } from './focus.ts';
|
|
10
9
|
import type { Conversation } from '../conversation.ts';
|
|
11
10
|
import type { Provider } from '../provider.js';
|
|
12
11
|
import { locatorRule as generalLocatorRuleText } from '../rules.js';
|
|
12
|
+
import { FOCUSED_MARKER } from './focus.ts';
|
|
13
13
|
import { type Constructor, debugLog } from './mixin.ts';
|
|
14
14
|
import { parseResearchSections } from './parser.ts';
|
|
15
15
|
import type { ResearchResult } from './research-result.ts';
|
|
@@ -37,7 +37,13 @@ export function WithSections<T extends Constructor>(Base: T) {
|
|
|
37
37
|
const parts: string[] = [];
|
|
38
38
|
for (const [name, description] of targets) {
|
|
39
39
|
if (executionController.isInterrupted()) break;
|
|
40
|
-
|
|
40
|
+
let text = '';
|
|
41
|
+
try {
|
|
42
|
+
text = await this._researchSingleSection(name, description, ariaSnapshot, focusCss);
|
|
43
|
+
} catch (err) {
|
|
44
|
+
tag('warning').log(`Section "${name}" research failed, skipping: ${err instanceof Error ? err.message : err}`);
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
41
47
|
if (!text) continue;
|
|
42
48
|
const trimmed = text.trim();
|
|
43
49
|
if (trimmed === 'NOT_PRESENT' || trimmed.startsWith('NOT_PRESENT')) continue;
|
package/src/ai/researcher.ts
CHANGED
|
@@ -3,6 +3,7 @@ import dedent from 'dedent';
|
|
|
3
3
|
import { ActionResult } from '../action-result.js';
|
|
4
4
|
import { setActivity } from '../activity.ts';
|
|
5
5
|
import { ConfigParser, outputPath } from '../config.ts';
|
|
6
|
+
import { executionController } from '../execution-controller.ts';
|
|
6
7
|
import type { ExperienceTracker } from '../experience-tracker.ts';
|
|
7
8
|
import type Explorer from '../explorer.ts';
|
|
8
9
|
import type { KnowledgeTracker } from '../knowledge-tracker.ts';
|
|
@@ -11,13 +12,12 @@ import type { StateManager } from '../state-manager.js';
|
|
|
11
12
|
import { WebPageState } from '../state-manager.js';
|
|
12
13
|
import { Stats } from '../stats.ts';
|
|
13
14
|
import { diffAriaSnapshots } from '../utils/aria.ts';
|
|
14
|
-
import {
|
|
15
|
+
import { ErrorPageError, detectPageCondition } from '../utils/error-page.ts';
|
|
15
16
|
import { HooksRunner } from '../utils/hooks-runner.ts';
|
|
16
17
|
import { isBodyEmpty } from '../utils/html.ts';
|
|
17
18
|
import { createDebug, pluralize, tag } from '../utils/logger.js';
|
|
18
19
|
import { mdq } from '../utils/markdown-query.ts';
|
|
19
|
-
import {
|
|
20
|
-
import { executionController } from '../execution-controller.ts';
|
|
20
|
+
import { RulesLoader } from '../utils/rules-loader.ts';
|
|
21
21
|
import type { Agent } from './agent.js';
|
|
22
22
|
import type { Navigator } from './navigator.ts';
|
|
23
23
|
import { ContextLengthError, type Provider } from './provider.js';
|
|
@@ -30,7 +30,6 @@ import { extractValidContainers, formatResearchSummary, parseResearchSections }
|
|
|
30
30
|
import { ResearchResult } from './researcher/research-result.ts';
|
|
31
31
|
import { type SectionMethods, WithSections } from './researcher/sections.ts';
|
|
32
32
|
import { locatorRule as generalLocatorRuleText } from './rules.js';
|
|
33
|
-
import { RulesLoader } from '../utils/rules-loader.ts';
|
|
34
33
|
import { TaskAgent } from './task-agent.ts';
|
|
35
34
|
|
|
36
35
|
export type { Locator } from './researcher/locators.ts';
|
|
@@ -132,18 +131,15 @@ export class Researcher extends ResearcherBase implements Agent {
|
|
|
132
131
|
debugLog(`Annotated ${annotatedElements.length} interactive elements with eidx`);
|
|
133
132
|
this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot: screenshot && this.provider.hasVision() });
|
|
134
133
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
Research skipped. Navigate to a valid page to continue.
|
|
146
|
-
`;
|
|
134
|
+
const condition = detectPageCondition(this.actionResult!);
|
|
135
|
+
if (condition === 'error') {
|
|
136
|
+
tag('warning').log(`Detected error page at ${state.url}`);
|
|
137
|
+
throw new ErrorPageError(state.url, this.actionResult!.title);
|
|
138
|
+
}
|
|
139
|
+
if (condition === 'loading') {
|
|
140
|
+
const settled = await this.waitUntilSettled(screenshot);
|
|
141
|
+
if (!settled) {
|
|
142
|
+
tag('warning').log(`Page at ${state.url} did not finish loading within timeout, continuing with best-effort research`);
|
|
147
143
|
}
|
|
148
144
|
}
|
|
149
145
|
|
|
@@ -350,43 +346,52 @@ export class Researcher extends ResearcherBase implements Agent {
|
|
|
350
346
|
return;
|
|
351
347
|
}
|
|
352
348
|
|
|
353
|
-
if (isEmpty) {
|
|
354
|
-
debugLog('HTML body
|
|
355
|
-
tag('step').log('Page body is empty,
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
tag('step').log('Navigating to URL...');
|
|
349
|
+
if (isEmpty && isOnCurrentState) {
|
|
350
|
+
debugLog('HTML body empty on current URL, waiting for content');
|
|
351
|
+
tag('step').log('Page body is empty, waiting for content...');
|
|
352
|
+
await this.waitUntilSettled(screenshot ?? false);
|
|
353
|
+
return;
|
|
359
354
|
}
|
|
360
355
|
|
|
356
|
+
debugLog('Not on current state, navigating to URL');
|
|
357
|
+
tag('step').log('Navigating to URL...');
|
|
358
|
+
|
|
361
359
|
await this.explorer.visit(url);
|
|
362
360
|
this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot: screenshot ?? false });
|
|
363
361
|
}
|
|
364
362
|
|
|
365
|
-
private async
|
|
363
|
+
private async waitUntilSettled(screenshot: boolean): Promise<boolean> {
|
|
366
364
|
const errorPageTimeout = (this.explorer.getConfig().ai?.agents?.researcher as any)?.errorPageTimeout ?? 10;
|
|
367
365
|
if (errorPageTimeout <= 0) return false;
|
|
368
366
|
|
|
367
|
+
const page = this.explorer.playwrightHelper.page;
|
|
368
|
+
const includeScreenshot = screenshot && this.provider.hasVision();
|
|
369
|
+
|
|
369
370
|
try {
|
|
370
|
-
await
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
);
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
371
|
+
await page?.waitForLoadState('networkidle', { timeout: errorPageTimeout * 1000 });
|
|
372
|
+
} catch {}
|
|
373
|
+
|
|
374
|
+
await this.explorer.annotateElements();
|
|
375
|
+
this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot });
|
|
376
|
+
|
|
377
|
+
let condition = detectPageCondition(this.actionResult!);
|
|
378
|
+
if (condition === 'error') {
|
|
379
|
+
throw new ErrorPageError(this.actionResult!.url, this.actionResult!.title);
|
|
380
|
+
}
|
|
381
|
+
if (condition === 'ok') return true;
|
|
382
|
+
|
|
383
|
+
for (let i = 0; i < 3; i++) {
|
|
384
|
+
await new Promise((r) => setTimeout(r, 1000));
|
|
385
|
+
await this.explorer.annotateElements();
|
|
386
|
+
this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot });
|
|
387
|
+
condition = detectPageCondition(this.actionResult!);
|
|
388
|
+
if (condition === 'error') {
|
|
389
|
+
throw new ErrorPageError(this.actionResult!.url, this.actionResult!.title);
|
|
390
|
+
}
|
|
391
|
+
if (condition === 'ok') return true;
|
|
389
392
|
}
|
|
393
|
+
|
|
394
|
+
return false;
|
|
390
395
|
}
|
|
391
396
|
|
|
392
397
|
private getConfiguredSections(): Record<string, string> {
|
package/src/ai/rules.ts
CHANGED
|
@@ -167,6 +167,21 @@ export const focusedElementRule = dedent`
|
|
|
167
167
|
</focused_element_actions>
|
|
168
168
|
`;
|
|
169
169
|
|
|
170
|
+
export const unexpectedPopupRule = dedent`
|
|
171
|
+
<unexpected_popup_rule>
|
|
172
|
+
If a modal/popup appeared that you didn't expect, dismiss it first before continuing with original task.
|
|
173
|
+
If elements become hidden or unclickable (timeout errors on visible elements), a dialog or overlay may have appeared on top.
|
|
174
|
+
If a click error mentions "intercepts pointer events", another element is covering the target — dismiss it first.
|
|
175
|
+
If buttons are disabled unexpectedly, check if a popup is blocking interaction or if required form fields are empty.
|
|
176
|
+
|
|
177
|
+
Dismiss strategy (try in order):
|
|
178
|
+
1. I.clickXY(0, 0) — click outside the popup to close it
|
|
179
|
+
2. I.pressKey('Escape') — press Escape to dismiss
|
|
180
|
+
3. I.click('Cancel') — click Cancel button if present
|
|
181
|
+
4. I.click({ role: 'button', text: 'Close' }) — click X/close button if present
|
|
182
|
+
</unexpected_popup_rule>
|
|
183
|
+
`;
|
|
184
|
+
|
|
170
185
|
export const sectionContextRule = dedent`
|
|
171
186
|
<section_context_rule>
|
|
172
187
|
Context parameter is DEFAULT for all interactions. ALWAYS use container from UI map sections unless locator is XPath or unique ID.
|
|
@@ -192,17 +207,7 @@ export const sectionContextRule = dedent`
|
|
|
192
207
|
- Locator is a unique ID (#specific-element)
|
|
193
208
|
</section_context_rule>
|
|
194
209
|
|
|
195
|
-
|
|
196
|
-
If a modal/popup appeared that you didn't expect, dismiss it first before continuing with original task.
|
|
197
|
-
If elements become hidden or unclickable (timeout errors on visible elements), a dialog or overlay may have appeared on top.
|
|
198
|
-
If buttons are disabled unexpectedly, check if a popup is blocking interaction or if required form fields are empty.
|
|
199
|
-
|
|
200
|
-
Dismiss strategy (try in order):
|
|
201
|
-
1. I.clickXY(0, 0) — click outside the popup to close it
|
|
202
|
-
2. I.pressKey('Escape') — press Escape to dismiss
|
|
203
|
-
3. I.click('Cancel') — click Cancel button if present
|
|
204
|
-
4. I.click({ role: 'button', text: 'Close' }) — click X/close button if present
|
|
205
|
-
</unexpected_popup_rule>
|
|
210
|
+
${unexpectedPopupRule}
|
|
206
211
|
`;
|
|
207
212
|
|
|
208
213
|
export function multipleTabsRule(tabs: Array<{ url: string; title: string }>): string {
|
|
@@ -274,12 +279,19 @@ export const actionRule = dedent`
|
|
|
274
279
|
I.fillField('Username', 'John', '.login-form'); // fills Username inside .login-form
|
|
275
280
|
I.fillField('Username', 'John'); // fills the field located by name or placeholder or label "Username" with the text "John"
|
|
276
281
|
I.fillField('//user/input', 'John'); // fills the field located by XPath "//user/input" with the text "John"
|
|
277
|
-
|
|
282
|
+
I.fillField('Description', 'Hello world', '.editor'); // works for rich text / code editors too
|
|
283
|
+
</example>
|
|
284
|
+
|
|
285
|
+
I.fillField handles plain inputs, textareas, contenteditable regions, and rich text / code editors
|
|
286
|
+
(Monaco, ProseMirror, CodeMirror, TipTap, Quill, Draft.js, Slate, etc.) transparently.
|
|
287
|
+
ALWAYS use I.fillField for rich editors — target the editor container or its nearest label/heading with a normal locator.
|
|
288
|
+
Do NOT open the editor with raw JS (executeScript, page.evaluate), do NOT dispatch synthetic events,
|
|
289
|
+
do NOT call the editor's own API (monaco.editor.setValue, view.dispatch, etc.) to write text.
|
|
278
290
|
|
|
279
291
|
### I.type
|
|
280
292
|
|
|
281
|
-
Types text into the currently focused element. Use when fillField
|
|
282
|
-
|
|
293
|
+
Types text into the currently focused element. Use only when there is no locator you can pass to I.fillField —
|
|
294
|
+
e.g. the target is implicit (a just-opened command palette, an autocomplete that steals focus, a canvas-based surface).
|
|
283
295
|
|
|
284
296
|
I.type(<text>)
|
|
285
297
|
|
|
@@ -291,6 +303,7 @@ export const actionRule = dedent`
|
|
|
291
303
|
DOES NOT receive any locator, just text to type.
|
|
292
304
|
NEVER write: I.type('text', locator) or I.type('text', {locator: '...'}) — this is INVALID.
|
|
293
305
|
To type into a specific field: use I.fillField(locator, text) or I.click(locator) then I.type(text).
|
|
306
|
+
Do NOT reach for I.type just because the target looks like a rich editor — I.fillField handles those.
|
|
294
307
|
|
|
295
308
|
### I.pressKey
|
|
296
309
|
|
package/src/ai/task-agent.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import dedent from 'dedent';
|
|
2
2
|
import type { ActionResult } from '../action-result.js';
|
|
3
|
-
import {
|
|
3
|
+
import { type ExperienceTracker, renderExperienceToc } from '../experience-tracker.js';
|
|
4
4
|
import type { KnowledgeTracker } from '../knowledge-tracker.js';
|
|
5
5
|
import { createDebug, pluralize, tag } from '../utils/logger.js';
|
|
6
6
|
|
package/src/ai/tester.ts
CHANGED
|
@@ -8,10 +8,12 @@ import { setActivity } from '../activity.ts';
|
|
|
8
8
|
import { ConfigParser } from '../config.ts';
|
|
9
9
|
import type { ExperienceTracker } from '../experience-tracker.ts';
|
|
10
10
|
import type Explorer from '../explorer.ts';
|
|
11
|
+
import { Observability } from '../observability.ts';
|
|
11
12
|
import type { StateTransition, WebPageState } from '../state-manager.ts';
|
|
12
13
|
import { Stats } from '../stats.ts';
|
|
13
14
|
import { type Note, type Test, TestResult, type TestResultType } from '../test-plan.ts';
|
|
14
15
|
import { detectFocusArea, extractFocusedElement } from '../utils/aria.ts';
|
|
16
|
+
import { ErrorPageError } from '../utils/error-page.ts';
|
|
15
17
|
import { HooksRunner } from '../utils/hooks-runner.ts';
|
|
16
18
|
import { codeToMarkdown } from '../utils/html.ts';
|
|
17
19
|
import { createDebug, tag } from '../utils/logger.ts';
|
|
@@ -154,10 +156,39 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
154
156
|
const initialPrompt = await this.buildTestPrompt(task, initialState);
|
|
155
157
|
conversation.addUserText(initialPrompt);
|
|
156
158
|
|
|
159
|
+
return await Observability.run(
|
|
160
|
+
`test: ${task.scenario}`,
|
|
161
|
+
{
|
|
162
|
+
sessionId: task.sessionName,
|
|
163
|
+
tags: ['tester'],
|
|
164
|
+
input: {
|
|
165
|
+
scenario: task.scenario,
|
|
166
|
+
startUrl: task.startUrl,
|
|
167
|
+
expected: task.expected,
|
|
168
|
+
},
|
|
169
|
+
},
|
|
170
|
+
async () => this.runTestSession(task, initialState, conversation, { offFailedRequest, page, onPageError, onConsoleMessage })
|
|
171
|
+
);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
private async runTestSession(task: Test, initialState: ActionResult, conversation: Conversation, handlers: { offFailedRequest?: () => void; page: any; onPageError: (err: Error) => void; onConsoleMessage: (msg: any) => void }): Promise<{ success: boolean }> {
|
|
175
|
+
const { offFailedRequest, page, onPageError, onConsoleMessage } = handlers;
|
|
176
|
+
|
|
157
177
|
if (this.pilot) {
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
178
|
+
try {
|
|
179
|
+
const plan = await this.pilot.planTest(task, initialState);
|
|
180
|
+
if (plan) {
|
|
181
|
+
conversation.addUserText(`Pilot's test plan:\n${plan}\n\nFollow this plan while executing the test.`);
|
|
182
|
+
}
|
|
183
|
+
} catch (err) {
|
|
184
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
185
|
+
tag('error').log(`Pilot planning failed: ${message}`);
|
|
186
|
+
task.addNote(`Planning failed: ${message}`, TestResult.FAILED);
|
|
187
|
+
task.finish(TestResult.FAILED);
|
|
188
|
+
offFailedRequest?.();
|
|
189
|
+
page?.off('pageerror', onPageError);
|
|
190
|
+
page?.off('console', onConsoleMessage);
|
|
191
|
+
return { success: false };
|
|
161
192
|
}
|
|
162
193
|
}
|
|
163
194
|
|
|
@@ -173,6 +204,7 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
173
204
|
await this.hooksRunner.runBeforeHook('tester', currentUrl);
|
|
174
205
|
|
|
175
206
|
const offStateChange = this.explorer.getStateManager().onStateChange((event: StateTransition) => {
|
|
207
|
+
if (task.hasFinished) return;
|
|
176
208
|
if (event.toState?.url === event.fromState?.url) return;
|
|
177
209
|
task.addNote(`Navigated to ${event.toState?.url}`, TestResult.PASSED);
|
|
178
210
|
task.states.push(event.toState);
|
|
@@ -224,6 +256,10 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
224
256
|
conversation.cleanupTag('page_aria', '...cleaned aria snapshot...', 2);
|
|
225
257
|
conversation.cleanupTag('page_html', '...cleaned HTML snapshot...', 1);
|
|
226
258
|
conversation.cleanupTag('experience', '...cleaned experience...', 1);
|
|
259
|
+
conversation.cleanupTag('applied_experience', '...cleaned past experience...', 1);
|
|
260
|
+
conversation.cleanupTag('page_ui_map', '...cleaned UI map...', 1);
|
|
261
|
+
conversation.cleanupTag('page_ui_map_overlay', '...cleaned UI overlay...', 1);
|
|
262
|
+
conversation.compactToolResults(3);
|
|
227
263
|
|
|
228
264
|
if (iteration > 1) {
|
|
229
265
|
const isNewPage = this.previousUrl !== null && this.previousUrl !== currentState.url;
|
|
@@ -245,6 +281,7 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
245
281
|
const result = await this.provider.invokeConversation(conversation, tools, {
|
|
246
282
|
maxToolRoundtrips: 5,
|
|
247
283
|
toolChoice: 'required',
|
|
284
|
+
stopWhen: () => task.hasFinished,
|
|
248
285
|
});
|
|
249
286
|
|
|
250
287
|
if (!result) throw new Error('Failed to get response from provider');
|
|
@@ -329,21 +366,11 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
329
366
|
context.setUserInput(result.message);
|
|
330
367
|
}
|
|
331
368
|
: undefined,
|
|
332
|
-
observability: {
|
|
333
|
-
name: `test: ${task.scenario}`,
|
|
334
|
-
agent: 'tester',
|
|
335
|
-
sessionId: task.sessionName,
|
|
336
|
-
metadata: {
|
|
337
|
-
input: {
|
|
338
|
-
scenario: task.scenario,
|
|
339
|
-
startUrl: task.startUrl,
|
|
340
|
-
expected: task.expected,
|
|
341
|
-
},
|
|
342
|
-
},
|
|
343
|
-
},
|
|
344
369
|
catch: async ({ error, stop }) => {
|
|
345
370
|
tag('error').log(`Test execution error: ${error}`);
|
|
346
|
-
task.
|
|
371
|
+
if (!task.hasFinished) {
|
|
372
|
+
task.addNote(`Execution error: ${error instanceof Error ? error.message : String(error)}`);
|
|
373
|
+
}
|
|
347
374
|
stop();
|
|
348
375
|
},
|
|
349
376
|
}
|
|
@@ -352,13 +379,19 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
352
379
|
if (task.hasFinished) break;
|
|
353
380
|
|
|
354
381
|
const finalState = this.getCurrentState();
|
|
355
|
-
const wantsContinue = await this.pilot!.finalReview(task, finalState, conversation);
|
|
382
|
+
const wantsContinue = await this.pilot!.finalReview(task, finalState, conversation, this.navigator);
|
|
356
383
|
|
|
357
384
|
if (!wantsContinue || task.hasFinished) break;
|
|
358
385
|
if (extensions >= this.MAX_EXTENSIONS) break;
|
|
359
386
|
|
|
360
387
|
extensions++;
|
|
361
388
|
tag('info').log(`Pilot extending test (${extensions}/${this.MAX_EXTENSIONS})`);
|
|
389
|
+
conversation.cleanupTag('page_aria', '...trimmed...', 1);
|
|
390
|
+
conversation.cleanupTag('page_html', '...trimmed...', 0);
|
|
391
|
+
conversation.cleanupTag('experience', '...trimmed...', 0);
|
|
392
|
+
conversation.cleanupTag('page_ui_map', '...trimmed...', 0);
|
|
393
|
+
conversation.cleanupTag('page_ui_map_overlay', '...trimmed...', 0);
|
|
394
|
+
conversation.compactToolResults(1);
|
|
362
395
|
shouldContinue = true;
|
|
363
396
|
}
|
|
364
397
|
|
|
@@ -464,7 +497,13 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
464
497
|
}
|
|
465
498
|
|
|
466
499
|
if (isNewUrl) {
|
|
467
|
-
|
|
500
|
+
let research = '';
|
|
501
|
+
try {
|
|
502
|
+
research = await this.researcher.research(currentState);
|
|
503
|
+
} catch (err) {
|
|
504
|
+
if (!(err instanceof ErrorPageError)) throw err;
|
|
505
|
+
tag('warning').log(`Research skipped: ${err.message}`);
|
|
506
|
+
}
|
|
468
507
|
this.pageStateHash = currentStateHash;
|
|
469
508
|
this.pageActionResult = currentState;
|
|
470
509
|
let uiMapSection = '';
|
|
@@ -646,7 +685,7 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
646
685
|
- Use finish() to complete the test, not record(). record() is for intermediate notes.
|
|
647
686
|
- Call finish(verify) when all goals are achieved — provide an assertion to verify
|
|
648
687
|
- ONLY call stop() if the scenario itself is completely irrelevant to this page and no expectations can be achieved
|
|
649
|
-
- Use reset()
|
|
688
|
+
- Use reset() ONLY as a last resort when the current page cannot host the scenario. Never reset after a successful flow just because an assertion or milestone did not match — verify differently or record() the finding instead. Reset is destructive and does not undo server-side side effects.
|
|
650
689
|
- Be precise with locators (CSS or XPath)
|
|
651
690
|
- Each click/type call returns the new page state automatically
|
|
652
691
|
- Check for success messages from tool calls to verify if expected outcomes are achieved
|
|
@@ -769,13 +808,25 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
769
808
|
return {
|
|
770
809
|
reset: tool({
|
|
771
810
|
description: dedent`
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
811
|
+
Navigate back to the start URL and discard progress in this iteration.
|
|
812
|
+
Reset is a LAST RESORT. It is destructive — any side effects already produced on the
|
|
813
|
+
server (records created, forms submitted) persist and cannot be undone by resetting.
|
|
814
|
+
|
|
815
|
+
Use reset ONLY for:
|
|
816
|
+
- navigation dead-ends where the current page cannot host the scenario
|
|
817
|
+
- irrecoverable errors that leave no actionable path forward
|
|
818
|
+
|
|
819
|
+
Do NOT use reset when:
|
|
820
|
+
- the previous action already succeeded (URL changed, record visible, confirmation shown)
|
|
821
|
+
and an assertion did not match — verify differently, record(), or finish() instead
|
|
822
|
+
- an expectation/milestone does not match app behavior but the flow worked — the work is
|
|
823
|
+
done; resetting just creates duplicates
|
|
824
|
+
- you want to "try again" after submitting a form — submitting again creates a duplicate
|
|
825
|
+
|
|
826
|
+
Pilot will review every reset and may veto it.
|
|
776
827
|
`,
|
|
777
828
|
inputSchema: z.object({
|
|
778
|
-
reason: z.string().optional().describe('Explanation why
|
|
829
|
+
reason: z.string().optional().describe('Explanation why reset is the only option'),
|
|
779
830
|
}),
|
|
780
831
|
execute: async ({ reason }) => {
|
|
781
832
|
if (this.getCurrentState().isInsideIframe) {
|
|
@@ -791,6 +842,20 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
791
842
|
};
|
|
792
843
|
}
|
|
793
844
|
|
|
845
|
+
task.resetCount += 1;
|
|
846
|
+
|
|
847
|
+
if (this.pilot) {
|
|
848
|
+
const currentStateForReview = this.getCurrentState();
|
|
849
|
+
const allowed = await this.pilot.reviewReset(task, currentStateForReview, reason ?? '', conversation);
|
|
850
|
+
if (!allowed) {
|
|
851
|
+
return {
|
|
852
|
+
success: false,
|
|
853
|
+
action: 'reset',
|
|
854
|
+
message: 'Reset rejected by Pilot; Continue execution',
|
|
855
|
+
};
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
|
|
794
859
|
const explanation = reason ? `${reason} (RESET)` : 'Resetting to initial page';
|
|
795
860
|
const targetUrl = resetUrl!;
|
|
796
861
|
task.addNote(explanation);
|
|
@@ -874,11 +939,14 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
874
939
|
verify: z.string().describe('Specific assertion to verify on the page before finishing (e.g., "New item appears in the list")'),
|
|
875
940
|
}),
|
|
876
941
|
execute: async ({ verify }) => {
|
|
942
|
+
if (task.hasFinished) {
|
|
943
|
+
return { success: true, action: 'finish', message: 'already finished' };
|
|
944
|
+
}
|
|
877
945
|
task.addNote(`Finish requested: ${verify}`);
|
|
878
946
|
|
|
879
947
|
if (this.pilot) {
|
|
880
948
|
const currentState = this.getCurrentState();
|
|
881
|
-
await this.pilot.reviewFinish(task, currentState, conversation);
|
|
949
|
+
await this.pilot.reviewFinish(task, currentState, conversation, this.navigator);
|
|
882
950
|
if (!task.hasFinished) {
|
|
883
951
|
return {
|
|
884
952
|
success: false,
|
|
@@ -953,7 +1021,7 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
953
1021
|
if (input.status !== null && task.isComplete()) {
|
|
954
1022
|
if (this.pilot) {
|
|
955
1023
|
const currentState = this.getCurrentState();
|
|
956
|
-
await this.pilot.reviewCompletion(task, currentState, conversation);
|
|
1024
|
+
await this.pilot.reviewCompletion(task, currentState, conversation, this.navigator);
|
|
957
1025
|
} else {
|
|
958
1026
|
const hasPassed = task.hasAchievedAny();
|
|
959
1027
|
task.finish(hasPassed ? TestResult.PASSED : TestResult.FAILED);
|
package/src/ai/tools.ts
CHANGED
|
@@ -10,9 +10,9 @@ import { createDebug, tag } from '../utils/logger.js';
|
|
|
10
10
|
import { pause } from '../utils/loop.js';
|
|
11
11
|
import { WebElement } from '../utils/web-element.ts';
|
|
12
12
|
import { Navigator } from './navigator.ts';
|
|
13
|
+
import type { AIProvider } from './provider.ts';
|
|
13
14
|
import { Researcher } from './researcher.ts';
|
|
14
15
|
import { sectionContextRule } from './rules.ts';
|
|
15
|
-
import type { AIProvider } from './provider.ts';
|
|
16
16
|
import { isInteractive } from './task-agent.ts';
|
|
17
17
|
|
|
18
18
|
const debugLog = createDebug('explorbot:tools');
|
|
@@ -100,7 +100,7 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) {
|
|
|
100
100
|
activeNote.screenshot = await action.saveScreenshot();
|
|
101
101
|
}
|
|
102
102
|
activeNote.commit(TestResult.PASSED);
|
|
103
|
-
return successToolResult('click', { ...toolResult, attempts, code: command });
|
|
103
|
+
return successToolResult('click', { ...toolResult, attempts, code: command }, action);
|
|
104
104
|
}
|
|
105
105
|
}
|
|
106
106
|
|
|
@@ -128,7 +128,7 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) {
|
|
|
128
128
|
activeNote.screenshot = await action.saveScreenshot();
|
|
129
129
|
}
|
|
130
130
|
activeNote.commit(TestResult.PASSED);
|
|
131
|
-
return successToolResult('click', { ...toolResult, attempts, code: retryCmd, disambiguated: true });
|
|
131
|
+
return successToolResult('click', { ...toolResult, attempts, code: retryCmd, disambiguated: true }, action);
|
|
132
132
|
}
|
|
133
133
|
}
|
|
134
134
|
|
|
@@ -208,12 +208,16 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) {
|
|
|
208
208
|
activeNote.screenshot = await action.saveScreenshot();
|
|
209
209
|
}
|
|
210
210
|
activeNote.commit(TestResult.PASSED);
|
|
211
|
-
return successToolResult(
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
211
|
+
return successToolResult(
|
|
212
|
+
'pressKey',
|
|
213
|
+
{
|
|
214
|
+
...toolResult,
|
|
215
|
+
message: `Automatically used type() for "${key}" (not a standard key press)`,
|
|
216
|
+
code: typeCommand,
|
|
217
|
+
fallback: true,
|
|
218
|
+
},
|
|
219
|
+
action
|
|
220
|
+
);
|
|
217
221
|
}
|
|
218
222
|
|
|
219
223
|
const errorMsg = `pressKey fallback to type() failed: ${action.lastError?.toString()}`;
|
|
@@ -261,11 +265,15 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) {
|
|
|
261
265
|
activeNote.screenshot = await action.saveScreenshot();
|
|
262
266
|
}
|
|
263
267
|
activeNote.commit(TestResult.PASSED);
|
|
264
|
-
return successToolResult(
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
268
|
+
return successToolResult(
|
|
269
|
+
'pressKey',
|
|
270
|
+
{
|
|
271
|
+
...toolResult,
|
|
272
|
+
message: `Pressed key: ${key}${modifier ? ` with modifier(s): ${Array.isArray(modifier) ? modifier.join('+') : modifier}` : ''}`,
|
|
273
|
+
code: pressKeyCommand,
|
|
274
|
+
},
|
|
275
|
+
action
|
|
276
|
+
);
|
|
269
277
|
}
|
|
270
278
|
|
|
271
279
|
const errorMsg = `pressKey() failed: ${action.lastError?.toString()}`;
|
|
@@ -289,14 +297,16 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) {
|
|
|
289
297
|
form: tool({
|
|
290
298
|
description: dedent`
|
|
291
299
|
Execute raw CodeceptJS code block with multiple commands.
|
|
292
|
-
USE THIS TOOL for
|
|
300
|
+
USE THIS TOOL for typing text into fields: I.fillField, I.type
|
|
293
301
|
|
|
294
302
|
Follow <actions> from system prompt for available commands.
|
|
295
303
|
Follow <locator_priority> from system prompt for locator selection.
|
|
296
304
|
|
|
305
|
+
I.type(text) types the literal characters of its argument into the focused element.
|
|
306
|
+
To press key combination or special keys (Ctrl, Meta, Esc) use I.pressKey instead.
|
|
307
|
+
|
|
297
308
|
Use cases:
|
|
298
309
|
- Typing into input fields (I.fillField, I.type)
|
|
299
|
-
- Pressing keyboard keys (I.pressKey)
|
|
300
310
|
- Working with iframes (switch context with I.switchTo)
|
|
301
311
|
- Performing multiple form actions in a single batch
|
|
302
312
|
- Complex interactions requiring sequential commands
|
|
@@ -381,13 +391,17 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) {
|
|
|
381
391
|
activeNote.screenshot = await action.saveScreenshot();
|
|
382
392
|
}
|
|
383
393
|
activeNote.commit(TestResult.PASSED);
|
|
384
|
-
return successToolResult(
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
394
|
+
return successToolResult(
|
|
395
|
+
'form',
|
|
396
|
+
{
|
|
397
|
+
...toolResult,
|
|
398
|
+
message: `Form completed successfully with ${lines.length} commands.`,
|
|
399
|
+
commandsExecuted: lines.length,
|
|
400
|
+
code: codeBlock,
|
|
401
|
+
suggestion: 'Verify the form was filled in correctly using see() tool. If needed to submit: try click() tool or form() with I.pressKey("Enter").',
|
|
402
|
+
},
|
|
403
|
+
action
|
|
404
|
+
);
|
|
391
405
|
} catch (error) {
|
|
392
406
|
activeNote.commit(TestResult.FAILED);
|
|
393
407
|
const errorMessage = error instanceof Error ? error.toString() : 'Unknown error occurred';
|
|
@@ -587,10 +601,14 @@ export function createAgentTools({
|
|
|
587
601
|
const result = await navigator.verifyState(assertion, actionResult);
|
|
588
602
|
|
|
589
603
|
if (result.verified) {
|
|
590
|
-
return successToolResult(
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
604
|
+
return successToolResult(
|
|
605
|
+
'verify',
|
|
606
|
+
{
|
|
607
|
+
message: `Verification passed: ${assertion}`,
|
|
608
|
+
code: result.successfulCodes.join('\n'),
|
|
609
|
+
},
|
|
610
|
+
{ assertionSteps: result.assertionSteps }
|
|
611
|
+
);
|
|
594
612
|
}
|
|
595
613
|
|
|
596
614
|
return failedToolResult('verify', `Verification failed: ${assertion}`, {
|
|
@@ -957,7 +975,7 @@ export function createAgentTools({
|
|
|
957
975
|
tools.learn_experience = tool({
|
|
958
976
|
description: dedent`
|
|
959
977
|
Read the full body of a specific experience section listed in <experience>.
|
|
960
|
-
The TOC shows entries like "A.1 ##
|
|
978
|
+
The TOC shows entries like "A.1 ## FLOW: ..." or "A.2 ## ACTION: ...". Pass the fileTag and sectionIndex.
|
|
961
979
|
Only call when a TOC entry looks directly relevant to the current step.
|
|
962
980
|
`,
|
|
963
981
|
inputSchema: z.object({
|
|
@@ -1015,8 +1033,14 @@ function countAriaChanges(ariaChanges: string): number {
|
|
|
1015
1033
|
return addedCount + removedCount;
|
|
1016
1034
|
}
|
|
1017
1035
|
|
|
1018
|
-
function successToolResult(action: string, data?: Record<string, any
|
|
1036
|
+
function successToolResult(action: string, data?: Record<string, any>, source?: { playwrightGroupId?: string | null; assertionSteps?: any[] }) {
|
|
1019
1037
|
const result: Record<string, any> = { success: true, action, ...data };
|
|
1038
|
+
if (source?.playwrightGroupId) {
|
|
1039
|
+
result.playwrightGroupId = source.playwrightGroupId;
|
|
1040
|
+
}
|
|
1041
|
+
if (source?.assertionSteps?.length) {
|
|
1042
|
+
result.assertionSteps = source.assertionSteps;
|
|
1043
|
+
}
|
|
1020
1044
|
if (data?.pageDiff) {
|
|
1021
1045
|
let suggestion = PAGE_DIFF_SUGGESTION;
|
|
1022
1046
|
const ariaChanges = data.pageDiff.ariaChanges || '';
|