explorbot 0.1.10 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -1
- package/bin/explorbot-cli.ts +27 -18
- package/dist/bin/explorbot-cli.js +26 -18
- package/dist/package.json +3 -3
- package/dist/rules/navigator/output.md +9 -0
- package/dist/rules/navigator/verification-actions.md +2 -0
- package/dist/src/action-result.js +23 -1
- package/dist/src/action.js +51 -42
- package/dist/src/ai/bosun.js +11 -1
- package/dist/src/ai/conversation.js +39 -0
- package/dist/src/ai/historian/codeceptjs.js +109 -0
- package/dist/src/ai/historian/experience.js +321 -0
- package/dist/src/ai/historian/mixin.js +2 -0
- package/dist/src/ai/historian/playwright.js +145 -0
- package/dist/src/ai/historian/screencast.js +121 -0
- package/dist/src/ai/historian/utils.js +18 -0
- package/dist/src/ai/historian.js +21 -405
- package/dist/src/ai/navigator.js +82 -29
- package/dist/src/ai/pilot.js +232 -13
- package/dist/src/ai/planner.js +29 -9
- package/dist/src/ai/provider.js +54 -17
- package/dist/src/ai/researcher.js +41 -32
- package/dist/src/ai/rules.js +26 -14
- package/dist/src/ai/tester.js +90 -26
- package/dist/src/ai/tools.js +13 -7
- package/dist/src/browser-server.js +16 -3
- package/dist/src/commands/add-rule-command.js +11 -8
- package/dist/src/commands/clean-command.js +2 -1
- package/dist/src/commands/explore-command.js +43 -15
- package/dist/src/commands/init-command.js +9 -8
- package/dist/src/commands/plan-command.js +32 -0
- package/dist/src/commands/plan-save-command.js +19 -7
- package/dist/src/commands/rerun-command.js +4 -0
- package/dist/src/components/App.js +15 -5
- package/dist/src/execution-controller.js +13 -2
- package/dist/src/experience-tracker.js +20 -64
- package/dist/src/explorbot.js +8 -8
- package/dist/src/explorer.js +11 -3
- package/dist/src/observability.js +50 -99
- package/dist/src/playwright-recorder.js +309 -0
- package/dist/src/reporter.js +4 -1
- package/dist/src/test-plan.js +12 -0
- package/dist/src/utils/aria.js +37 -1
- package/dist/src/utils/error-page.js +20 -7
- package/dist/src/utils/next-steps.js +37 -0
- package/dist/src/utils/strings.js +15 -0
- package/package.json +3 -3
- package/rules/navigator/output.md +9 -0
- package/rules/navigator/verification-actions.md +2 -0
- package/src/action-result.ts +26 -1
- package/src/action.ts +49 -41
- package/src/ai/bosun.ts +11 -1
- package/src/ai/conversation.ts +37 -0
- package/src/ai/historian/codeceptjs.ts +130 -0
- package/src/ai/historian/experience.ts +384 -0
- package/src/ai/historian/mixin.ts +4 -0
- package/src/ai/historian/playwright.ts +169 -0
- package/src/ai/historian/screencast.ts +133 -0
- package/src/ai/historian/utils.ts +23 -0
- package/src/ai/historian.ts +37 -473
- package/src/ai/navigator.ts +82 -29
- package/src/ai/pilot.ts +237 -14
- package/src/ai/planner.ts +29 -9
- package/src/ai/provider.ts +51 -17
- package/src/ai/researcher.ts +45 -33
- package/src/ai/rules.ts +27 -14
- package/src/ai/tester.ts +94 -26
- package/src/ai/tools.ts +47 -25
- package/src/browser-server.ts +17 -3
- package/src/commands/add-rule-command.ts +11 -7
- package/src/commands/clean-command.ts +2 -1
- package/src/commands/explore-command.ts +46 -14
- package/src/commands/init-command.ts +9 -8
- package/src/commands/plan-command.ts +35 -0
- package/src/commands/plan-save-command.ts +18 -7
- package/src/commands/rerun-command.ts +5 -0
- package/src/components/App.tsx +16 -5
- package/src/config.ts +12 -1
- package/src/execution-controller.ts +14 -3
- package/src/experience-tracker.ts +21 -72
- package/src/explorbot.ts +8 -8
- package/src/explorer.ts +13 -3
- package/src/observability.ts +50 -109
- package/src/playwright-recorder.ts +305 -0
- package/src/reporter.ts +4 -1
- package/src/test-plan.ts +12 -0
- package/src/utils/aria.ts +38 -1
- package/src/utils/error-page.ts +22 -7
- package/src/utils/next-steps.ts +51 -0
- package/src/utils/strings.ts +17 -0
package/src/ai/provider.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { LangfuseSpanProcessor } from '@langfuse/otel';
|
|
2
2
|
import { NodeSDK } from '@opentelemetry/sdk-node';
|
|
3
|
-
import { generateObject, generateText } from 'ai';
|
|
3
|
+
import { generateObject, generateText, stepCountIs } from 'ai';
|
|
4
4
|
import type { ModelMessage } from 'ai';
|
|
5
5
|
import { clearActivity, setActivity } from '../activity.ts';
|
|
6
6
|
import type { AIConfig } from '../config.js';
|
|
@@ -19,6 +19,20 @@ const responseLog = createDebug('explorbot:provider:in');
|
|
|
19
19
|
class AiError extends Error {}
|
|
20
20
|
export class ContextLengthError extends Error {}
|
|
21
21
|
|
|
22
|
+
function rejectAfterIdle(ms: number, signal: { cancelled: boolean }): Promise<never> {
|
|
23
|
+
return new Promise((_, reject) => {
|
|
24
|
+
const tick = () => {
|
|
25
|
+
if (signal.cancelled) return;
|
|
26
|
+
if (executionController.isAwaitingInput()) {
|
|
27
|
+
setTimeout(tick, ms);
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
reject(new Error('AI request timeout'));
|
|
31
|
+
};
|
|
32
|
+
setTimeout(tick, ms);
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
|
|
22
36
|
export class Provider {
|
|
23
37
|
private config: AIConfig;
|
|
24
38
|
private telemetryEnabled = false;
|
|
@@ -286,14 +300,19 @@ export class Provider {
|
|
|
286
300
|
promptLog(messages[messages.length - 1].content);
|
|
287
301
|
|
|
288
302
|
const telemetry = this.getTelemetry(options);
|
|
303
|
+
const maxRoundtrips = options.maxToolRoundtrips ?? 5;
|
|
304
|
+
const extraStop = options.stopWhen;
|
|
305
|
+
const stopConditions: any[] = [stepCountIs(maxRoundtrips)];
|
|
306
|
+
if (extraStop) stopConditions.push(extraStop);
|
|
307
|
+
const { stopWhen: _ignoredStopWhen, ...optionsWithoutStop } = options;
|
|
289
308
|
const config = this.mergeProviderOptions(
|
|
290
309
|
{
|
|
291
310
|
tools,
|
|
292
311
|
maxTokens: 16384,
|
|
293
|
-
maxToolRoundtrips: options.maxToolRoundtrips ?? 5,
|
|
294
312
|
toolChoice: 'auto',
|
|
295
313
|
...(this.config.config || {}),
|
|
296
|
-
...
|
|
314
|
+
...optionsWithoutStop,
|
|
315
|
+
stopWhen: stopConditions,
|
|
297
316
|
...(telemetry ? { experimental_telemetry: telemetry } : {}),
|
|
298
317
|
model,
|
|
299
318
|
abortSignal: executionController.getAbortSignal(),
|
|
@@ -303,13 +322,23 @@ export class Provider {
|
|
|
303
322
|
try {
|
|
304
323
|
const response = await withRetry(async () => {
|
|
305
324
|
const timeout = config.timeout || 30000;
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
325
|
+
const cancel = { cancelled: false };
|
|
326
|
+
try {
|
|
327
|
+
const result = (await Promise.race([
|
|
328
|
+
generateText({
|
|
329
|
+
messages,
|
|
330
|
+
...config,
|
|
331
|
+
}),
|
|
332
|
+
rejectAfterIdle(timeout, cancel),
|
|
333
|
+
])) as any;
|
|
334
|
+
const hasToolCall = (result.toolCalls?.length || 0) > 0;
|
|
335
|
+
if (!result.text && !hasToolCall && result.finishReason === 'length') {
|
|
336
|
+
throw new ContextLengthError('AI response empty: output truncated at maxTokens. Increase maxTokens in config or use a model with higher output capacity.');
|
|
337
|
+
}
|
|
338
|
+
return result;
|
|
339
|
+
} finally {
|
|
340
|
+
cancel.cancelled = true;
|
|
341
|
+
}
|
|
313
342
|
}, this.getRetryOptions(options));
|
|
314
343
|
|
|
315
344
|
clearActivity();
|
|
@@ -380,13 +409,18 @@ export class Provider {
|
|
|
380
409
|
promptLog(messages[messages.length - 1].content);
|
|
381
410
|
const response = await withRetry(async () => {
|
|
382
411
|
const timeout = config.timeout || 30000;
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
412
|
+
const cancel = { cancelled: false };
|
|
413
|
+
try {
|
|
414
|
+
return (await Promise.race([
|
|
415
|
+
generateObject({
|
|
416
|
+
messages,
|
|
417
|
+
...config,
|
|
418
|
+
}),
|
|
419
|
+
rejectAfterIdle(timeout, cancel),
|
|
420
|
+
])) as any;
|
|
421
|
+
} finally {
|
|
422
|
+
cancel.cancelled = true;
|
|
423
|
+
}
|
|
390
424
|
}, this.getRetryOptions(options));
|
|
391
425
|
|
|
392
426
|
clearActivity();
|
package/src/ai/researcher.ts
CHANGED
|
@@ -12,12 +12,11 @@ import type { StateManager } from '../state-manager.js';
|
|
|
12
12
|
import { WebPageState } from '../state-manager.js';
|
|
13
13
|
import { Stats } from '../stats.ts';
|
|
14
14
|
import { diffAriaSnapshots } from '../utils/aria.ts';
|
|
15
|
-
import { ErrorPageError,
|
|
15
|
+
import { ErrorPageError, detectPageCondition } from '../utils/error-page.ts';
|
|
16
16
|
import { HooksRunner } from '../utils/hooks-runner.ts';
|
|
17
17
|
import { isBodyEmpty } from '../utils/html.ts';
|
|
18
18
|
import { createDebug, pluralize, tag } from '../utils/logger.js';
|
|
19
19
|
import { mdq } from '../utils/markdown-query.ts';
|
|
20
|
-
import { withRetry } from '../utils/retry.ts';
|
|
21
20
|
import { RulesLoader } from '../utils/rules-loader.ts';
|
|
22
21
|
import type { Agent } from './agent.js';
|
|
23
22
|
import type { Navigator } from './navigator.ts';
|
|
@@ -132,11 +131,15 @@ export class Researcher extends ResearcherBase implements Agent {
|
|
|
132
131
|
debugLog(`Annotated ${annotatedElements.length} interactive elements with eidx`);
|
|
133
132
|
this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot: screenshot && this.provider.hasVision() });
|
|
134
133
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
134
|
+
const condition = detectPageCondition(this.actionResult!);
|
|
135
|
+
if (condition === 'error') {
|
|
136
|
+
tag('warning').log(`Detected error page at ${state.url}`);
|
|
137
|
+
throw new ErrorPageError(state.url, this.actionResult!.title);
|
|
138
|
+
}
|
|
139
|
+
if (condition === 'loading') {
|
|
140
|
+
const settled = await this.waitUntilSettled(screenshot);
|
|
141
|
+
if (!settled) {
|
|
142
|
+
tag('warning').log(`Page at ${state.url} did not finish loading within timeout, continuing with best-effort research`);
|
|
140
143
|
}
|
|
141
144
|
}
|
|
142
145
|
|
|
@@ -343,43 +346,52 @@ export class Researcher extends ResearcherBase implements Agent {
|
|
|
343
346
|
return;
|
|
344
347
|
}
|
|
345
348
|
|
|
346
|
-
if (isEmpty) {
|
|
347
|
-
debugLog('HTML body
|
|
348
|
-
tag('step').log('Page body is empty,
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
tag('step').log('Navigating to URL...');
|
|
349
|
+
if (isEmpty && isOnCurrentState) {
|
|
350
|
+
debugLog('HTML body empty on current URL, waiting for content');
|
|
351
|
+
tag('step').log('Page body is empty, waiting for content...');
|
|
352
|
+
await this.waitUntilSettled(screenshot ?? false);
|
|
353
|
+
return;
|
|
352
354
|
}
|
|
353
355
|
|
|
356
|
+
debugLog('Not on current state, navigating to URL');
|
|
357
|
+
tag('step').log('Navigating to URL...');
|
|
358
|
+
|
|
354
359
|
await this.explorer.visit(url);
|
|
355
360
|
this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot: screenshot ?? false });
|
|
356
361
|
}
|
|
357
362
|
|
|
358
|
-
private async
|
|
363
|
+
private async waitUntilSettled(screenshot: boolean): Promise<boolean> {
|
|
359
364
|
const errorPageTimeout = (this.explorer.getConfig().ai?.agents?.researcher as any)?.errorPageTimeout ?? 10;
|
|
360
365
|
if (errorPageTimeout <= 0) return false;
|
|
361
366
|
|
|
367
|
+
const page = this.explorer.playwrightHelper.page;
|
|
368
|
+
const includeScreenshot = screenshot && this.provider.hasVision();
|
|
369
|
+
|
|
362
370
|
try {
|
|
363
|
-
await
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
maxAttempts: Math.ceil(errorPageTimeout / 3) + 1,
|
|
373
|
-
baseDelay: 1000,
|
|
374
|
-
maxDelay: 5000,
|
|
375
|
-
backoffMultiplier: 2,
|
|
376
|
-
retryCondition: (e) => e.message === 'Error page detected',
|
|
377
|
-
}
|
|
378
|
-
);
|
|
379
|
-
return true;
|
|
380
|
-
} catch {
|
|
381
|
-
return false;
|
|
371
|
+
await page?.waitForLoadState('networkidle', { timeout: errorPageTimeout * 1000 });
|
|
372
|
+
} catch {}
|
|
373
|
+
|
|
374
|
+
await this.explorer.annotateElements();
|
|
375
|
+
this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot });
|
|
376
|
+
|
|
377
|
+
let condition = detectPageCondition(this.actionResult!);
|
|
378
|
+
if (condition === 'error') {
|
|
379
|
+
throw new ErrorPageError(this.actionResult!.url, this.actionResult!.title);
|
|
382
380
|
}
|
|
381
|
+
if (condition === 'ok') return true;
|
|
382
|
+
|
|
383
|
+
for (let i = 0; i < 3; i++) {
|
|
384
|
+
await new Promise((r) => setTimeout(r, 1000));
|
|
385
|
+
await this.explorer.annotateElements();
|
|
386
|
+
this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot });
|
|
387
|
+
condition = detectPageCondition(this.actionResult!);
|
|
388
|
+
if (condition === 'error') {
|
|
389
|
+
throw new ErrorPageError(this.actionResult!.url, this.actionResult!.title);
|
|
390
|
+
}
|
|
391
|
+
if (condition === 'ok') return true;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
return false;
|
|
383
395
|
}
|
|
384
396
|
|
|
385
397
|
private getConfiguredSections(): Record<string, string> {
|
package/src/ai/rules.ts
CHANGED
|
@@ -167,6 +167,21 @@ export const focusedElementRule = dedent`
|
|
|
167
167
|
</focused_element_actions>
|
|
168
168
|
`;
|
|
169
169
|
|
|
170
|
+
export const unexpectedPopupRule = dedent`
|
|
171
|
+
<unexpected_popup_rule>
|
|
172
|
+
If a modal/popup appeared that you didn't expect, dismiss it first before continuing with original task.
|
|
173
|
+
If elements become hidden or unclickable (timeout errors on visible elements), a dialog or overlay may have appeared on top.
|
|
174
|
+
If a click error mentions "intercepts pointer events", another element is covering the target — dismiss it first.
|
|
175
|
+
If buttons are disabled unexpectedly, check if a popup is blocking interaction or if required form fields are empty.
|
|
176
|
+
|
|
177
|
+
Dismiss strategy (try in order):
|
|
178
|
+
1. I.clickXY(0, 0) — click outside the popup to close it
|
|
179
|
+
2. I.pressKey('Escape') — press Escape to dismiss
|
|
180
|
+
3. I.click('Cancel') — click Cancel button if present
|
|
181
|
+
4. I.click({ role: 'button', text: 'Close' }) — click X/close button if present
|
|
182
|
+
</unexpected_popup_rule>
|
|
183
|
+
`;
|
|
184
|
+
|
|
170
185
|
export const sectionContextRule = dedent`
|
|
171
186
|
<section_context_rule>
|
|
172
187
|
Context parameter is DEFAULT for all interactions. ALWAYS use container from UI map sections unless locator is XPath or unique ID.
|
|
@@ -192,17 +207,7 @@ export const sectionContextRule = dedent`
|
|
|
192
207
|
- Locator is a unique ID (#specific-element)
|
|
193
208
|
</section_context_rule>
|
|
194
209
|
|
|
195
|
-
|
|
196
|
-
If a modal/popup appeared that you didn't expect, dismiss it first before continuing with original task.
|
|
197
|
-
If elements become hidden or unclickable (timeout errors on visible elements), a dialog or overlay may have appeared on top.
|
|
198
|
-
If buttons are disabled unexpectedly, check if a popup is blocking interaction or if required form fields are empty.
|
|
199
|
-
|
|
200
|
-
Dismiss strategy (try in order):
|
|
201
|
-
1. I.clickXY(0, 0) — click outside the popup to close it
|
|
202
|
-
2. I.pressKey('Escape') — press Escape to dismiss
|
|
203
|
-
3. I.click('Cancel') — click Cancel button if present
|
|
204
|
-
4. I.click({ role: 'button', text: 'Close' }) — click X/close button if present
|
|
205
|
-
</unexpected_popup_rule>
|
|
210
|
+
${unexpectedPopupRule}
|
|
206
211
|
`;
|
|
207
212
|
|
|
208
213
|
export function multipleTabsRule(tabs: Array<{ url: string; title: string }>): string {
|
|
@@ -274,12 +279,19 @@ export const actionRule = dedent`
|
|
|
274
279
|
I.fillField('Username', 'John', '.login-form'); // fills Username inside .login-form
|
|
275
280
|
I.fillField('Username', 'John'); // fills the field located by name or placeholder or label "Username" with the text "John"
|
|
276
281
|
I.fillField('//user/input', 'John'); // fills the field located by XPath "//user/input" with the text "John"
|
|
277
|
-
|
|
282
|
+
I.fillField('Description', 'Hello world', '.editor'); // works for rich text / code editors too
|
|
283
|
+
</example>
|
|
284
|
+
|
|
285
|
+
I.fillField handles plain inputs, textareas, contenteditable regions, and rich text / code editors
|
|
286
|
+
(Monaco, ProseMirror, CodeMirror, TipTap, Quill, Draft.js, Slate, etc.) transparently.
|
|
287
|
+
ALWAYS use I.fillField for rich editors — target the editor container or its nearest label/heading with a normal locator.
|
|
288
|
+
Do NOT open the editor with raw JS (executeScript, page.evaluate), do NOT dispatch synthetic events,
|
|
289
|
+
do NOT call the editor's own API (monaco.editor.setValue, view.dispatch, etc.) to write text.
|
|
278
290
|
|
|
279
291
|
### I.type
|
|
280
292
|
|
|
281
|
-
Types text into the currently focused element. Use when fillField
|
|
282
|
-
|
|
293
|
+
Types text into the currently focused element. Use only when there is no locator you can pass to I.fillField —
|
|
294
|
+
e.g. the target is implicit (a just-opened command palette, an autocomplete that steals focus, a canvas-based surface).
|
|
283
295
|
|
|
284
296
|
I.type(<text>)
|
|
285
297
|
|
|
@@ -291,6 +303,7 @@ export const actionRule = dedent`
|
|
|
291
303
|
DOES NOT receive any locator, just text to type.
|
|
292
304
|
NEVER write: I.type('text', locator) or I.type('text', {locator: '...'}) — this is INVALID.
|
|
293
305
|
To type into a specific field: use I.fillField(locator, text) or I.click(locator) then I.type(text).
|
|
306
|
+
Do NOT reach for I.type just because the target looks like a rich editor — I.fillField handles those.
|
|
294
307
|
|
|
295
308
|
### I.pressKey
|
|
296
309
|
|
package/src/ai/tester.ts
CHANGED
|
@@ -8,10 +8,12 @@ import { setActivity } from '../activity.ts';
|
|
|
8
8
|
import { ConfigParser } from '../config.ts';
|
|
9
9
|
import type { ExperienceTracker } from '../experience-tracker.ts';
|
|
10
10
|
import type Explorer from '../explorer.ts';
|
|
11
|
+
import { Observability } from '../observability.ts';
|
|
11
12
|
import type { StateTransition, WebPageState } from '../state-manager.ts';
|
|
12
13
|
import { Stats } from '../stats.ts';
|
|
13
14
|
import { type Note, type Test, TestResult, type TestResultType } from '../test-plan.ts';
|
|
14
15
|
import { detectFocusArea, extractFocusedElement } from '../utils/aria.ts';
|
|
16
|
+
import { ErrorPageError } from '../utils/error-page.ts';
|
|
15
17
|
import { HooksRunner } from '../utils/hooks-runner.ts';
|
|
16
18
|
import { codeToMarkdown } from '../utils/html.ts';
|
|
17
19
|
import { createDebug, tag } from '../utils/logger.ts';
|
|
@@ -154,10 +156,39 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
154
156
|
const initialPrompt = await this.buildTestPrompt(task, initialState);
|
|
155
157
|
conversation.addUserText(initialPrompt);
|
|
156
158
|
|
|
159
|
+
return await Observability.run(
|
|
160
|
+
`test: ${task.scenario}`,
|
|
161
|
+
{
|
|
162
|
+
sessionId: task.sessionName,
|
|
163
|
+
tags: ['tester'],
|
|
164
|
+
input: {
|
|
165
|
+
scenario: task.scenario,
|
|
166
|
+
startUrl: task.startUrl,
|
|
167
|
+
expected: task.expected,
|
|
168
|
+
},
|
|
169
|
+
},
|
|
170
|
+
async () => this.runTestSession(task, initialState, conversation, { offFailedRequest, page, onPageError, onConsoleMessage })
|
|
171
|
+
);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
private async runTestSession(task: Test, initialState: ActionResult, conversation: Conversation, handlers: { offFailedRequest?: () => void; page: any; onPageError: (err: Error) => void; onConsoleMessage: (msg: any) => void }): Promise<{ success: boolean }> {
|
|
175
|
+
const { offFailedRequest, page, onPageError, onConsoleMessage } = handlers;
|
|
176
|
+
|
|
157
177
|
if (this.pilot) {
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
178
|
+
try {
|
|
179
|
+
const plan = await this.pilot.planTest(task, initialState);
|
|
180
|
+
if (plan) {
|
|
181
|
+
conversation.addUserText(`Pilot's test plan:\n${plan}\n\nFollow this plan while executing the test.`);
|
|
182
|
+
}
|
|
183
|
+
} catch (err) {
|
|
184
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
185
|
+
tag('error').log(`Pilot planning failed: ${message}`);
|
|
186
|
+
task.addNote(`Planning failed: ${message}`, TestResult.FAILED);
|
|
187
|
+
task.finish(TestResult.FAILED);
|
|
188
|
+
offFailedRequest?.();
|
|
189
|
+
page?.off('pageerror', onPageError);
|
|
190
|
+
page?.off('console', onConsoleMessage);
|
|
191
|
+
return { success: false };
|
|
161
192
|
}
|
|
162
193
|
}
|
|
163
194
|
|
|
@@ -173,6 +204,7 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
173
204
|
await this.hooksRunner.runBeforeHook('tester', currentUrl);
|
|
174
205
|
|
|
175
206
|
const offStateChange = this.explorer.getStateManager().onStateChange((event: StateTransition) => {
|
|
207
|
+
if (task.hasFinished) return;
|
|
176
208
|
if (event.toState?.url === event.fromState?.url) return;
|
|
177
209
|
task.addNote(`Navigated to ${event.toState?.url}`, TestResult.PASSED);
|
|
178
210
|
task.states.push(event.toState);
|
|
@@ -224,6 +256,10 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
224
256
|
conversation.cleanupTag('page_aria', '...cleaned aria snapshot...', 2);
|
|
225
257
|
conversation.cleanupTag('page_html', '...cleaned HTML snapshot...', 1);
|
|
226
258
|
conversation.cleanupTag('experience', '...cleaned experience...', 1);
|
|
259
|
+
conversation.cleanupTag('applied_experience', '...cleaned past experience...', 1);
|
|
260
|
+
conversation.cleanupTag('page_ui_map', '...cleaned UI map...', 1);
|
|
261
|
+
conversation.cleanupTag('page_ui_map_overlay', '...cleaned UI overlay...', 1);
|
|
262
|
+
conversation.compactToolResults(3);
|
|
227
263
|
|
|
228
264
|
if (iteration > 1) {
|
|
229
265
|
const isNewPage = this.previousUrl !== null && this.previousUrl !== currentState.url;
|
|
@@ -245,6 +281,7 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
245
281
|
const result = await this.provider.invokeConversation(conversation, tools, {
|
|
246
282
|
maxToolRoundtrips: 5,
|
|
247
283
|
toolChoice: 'required',
|
|
284
|
+
stopWhen: () => task.hasFinished,
|
|
248
285
|
});
|
|
249
286
|
|
|
250
287
|
if (!result) throw new Error('Failed to get response from provider');
|
|
@@ -329,21 +366,11 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
329
366
|
context.setUserInput(result.message);
|
|
330
367
|
}
|
|
331
368
|
: undefined,
|
|
332
|
-
observability: {
|
|
333
|
-
name: `test: ${task.scenario}`,
|
|
334
|
-
agent: 'tester',
|
|
335
|
-
sessionId: task.sessionName,
|
|
336
|
-
metadata: {
|
|
337
|
-
input: {
|
|
338
|
-
scenario: task.scenario,
|
|
339
|
-
startUrl: task.startUrl,
|
|
340
|
-
expected: task.expected,
|
|
341
|
-
},
|
|
342
|
-
},
|
|
343
|
-
},
|
|
344
369
|
catch: async ({ error, stop }) => {
|
|
345
370
|
tag('error').log(`Test execution error: ${error}`);
|
|
346
|
-
task.
|
|
371
|
+
if (!task.hasFinished) {
|
|
372
|
+
task.addNote(`Execution error: ${error instanceof Error ? error.message : String(error)}`);
|
|
373
|
+
}
|
|
347
374
|
stop();
|
|
348
375
|
},
|
|
349
376
|
}
|
|
@@ -352,13 +379,19 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
352
379
|
if (task.hasFinished) break;
|
|
353
380
|
|
|
354
381
|
const finalState = this.getCurrentState();
|
|
355
|
-
const wantsContinue = await this.pilot!.finalReview(task, finalState, conversation);
|
|
382
|
+
const wantsContinue = await this.pilot!.finalReview(task, finalState, conversation, this.navigator);
|
|
356
383
|
|
|
357
384
|
if (!wantsContinue || task.hasFinished) break;
|
|
358
385
|
if (extensions >= this.MAX_EXTENSIONS) break;
|
|
359
386
|
|
|
360
387
|
extensions++;
|
|
361
388
|
tag('info').log(`Pilot extending test (${extensions}/${this.MAX_EXTENSIONS})`);
|
|
389
|
+
conversation.cleanupTag('page_aria', '...trimmed...', 1);
|
|
390
|
+
conversation.cleanupTag('page_html', '...trimmed...', 0);
|
|
391
|
+
conversation.cleanupTag('experience', '...trimmed...', 0);
|
|
392
|
+
conversation.cleanupTag('page_ui_map', '...trimmed...', 0);
|
|
393
|
+
conversation.cleanupTag('page_ui_map_overlay', '...trimmed...', 0);
|
|
394
|
+
conversation.compactToolResults(1);
|
|
362
395
|
shouldContinue = true;
|
|
363
396
|
}
|
|
364
397
|
|
|
@@ -464,7 +497,13 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
464
497
|
}
|
|
465
498
|
|
|
466
499
|
if (isNewUrl) {
|
|
467
|
-
|
|
500
|
+
let research = '';
|
|
501
|
+
try {
|
|
502
|
+
research = await this.researcher.research(currentState);
|
|
503
|
+
} catch (err) {
|
|
504
|
+
if (!(err instanceof ErrorPageError)) throw err;
|
|
505
|
+
tag('warning').log(`Research skipped: ${err.message}`);
|
|
506
|
+
}
|
|
468
507
|
this.pageStateHash = currentStateHash;
|
|
469
508
|
this.pageActionResult = currentState;
|
|
470
509
|
let uiMapSection = '';
|
|
@@ -646,7 +685,7 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
646
685
|
- Use finish() to complete the test, not record(). record() is for intermediate notes.
|
|
647
686
|
- Call finish(verify) when all goals are achieved — provide an assertion to verify
|
|
648
687
|
- ONLY call stop() if the scenario itself is completely irrelevant to this page and no expectations can be achieved
|
|
649
|
-
- Use reset()
|
|
688
|
+
- Use reset() ONLY as a last resort when the current page cannot host the scenario. Never reset after a successful flow just because an assertion or milestone did not match — verify differently or record() the finding instead. Reset is destructive and does not undo server-side side effects.
|
|
650
689
|
- Be precise with locators (CSS or XPath)
|
|
651
690
|
- Each click/type call returns the new page state automatically
|
|
652
691
|
- Check for success messages from tool calls to verify if expected outcomes are achieved
|
|
@@ -769,13 +808,25 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
769
808
|
return {
|
|
770
809
|
reset: tool({
|
|
771
810
|
description: dedent`
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
811
|
+
Navigate back to the start URL and discard progress in this iteration.
|
|
812
|
+
Reset is a LAST RESORT. It is destructive — any side effects already produced on the
|
|
813
|
+
server (records created, forms submitted) persist and cannot be undone by resetting.
|
|
814
|
+
|
|
815
|
+
Use reset ONLY for:
|
|
816
|
+
- navigation dead-ends where the current page cannot host the scenario
|
|
817
|
+
- irrecoverable errors that leave no actionable path forward
|
|
818
|
+
|
|
819
|
+
Do NOT use reset when:
|
|
820
|
+
- the previous action already succeeded (URL changed, record visible, confirmation shown)
|
|
821
|
+
and an assertion did not match — verify differently, record(), or finish() instead
|
|
822
|
+
- an expectation/milestone does not match app behavior but the flow worked — the work is
|
|
823
|
+
done; resetting just creates duplicates
|
|
824
|
+
- you want to "try again" after submitting a form — submitting again creates a duplicate
|
|
825
|
+
|
|
826
|
+
Pilot will review every reset and may veto it.
|
|
776
827
|
`,
|
|
777
828
|
inputSchema: z.object({
|
|
778
|
-
reason: z.string().optional().describe('Explanation why
|
|
829
|
+
reason: z.string().optional().describe('Explanation why reset is the only option'),
|
|
779
830
|
}),
|
|
780
831
|
execute: async ({ reason }) => {
|
|
781
832
|
if (this.getCurrentState().isInsideIframe) {
|
|
@@ -791,6 +842,20 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
791
842
|
};
|
|
792
843
|
}
|
|
793
844
|
|
|
845
|
+
task.resetCount += 1;
|
|
846
|
+
|
|
847
|
+
if (this.pilot) {
|
|
848
|
+
const currentStateForReview = this.getCurrentState();
|
|
849
|
+
const allowed = await this.pilot.reviewReset(task, currentStateForReview, reason ?? '', conversation);
|
|
850
|
+
if (!allowed) {
|
|
851
|
+
return {
|
|
852
|
+
success: false,
|
|
853
|
+
action: 'reset',
|
|
854
|
+
message: 'Reset rejected by Pilot; Continue execution',
|
|
855
|
+
};
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
|
|
794
859
|
const explanation = reason ? `${reason} (RESET)` : 'Resetting to initial page';
|
|
795
860
|
const targetUrl = resetUrl!;
|
|
796
861
|
task.addNote(explanation);
|
|
@@ -874,11 +939,14 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
874
939
|
verify: z.string().describe('Specific assertion to verify on the page before finishing (e.g., "New item appears in the list")'),
|
|
875
940
|
}),
|
|
876
941
|
execute: async ({ verify }) => {
|
|
942
|
+
if (task.hasFinished) {
|
|
943
|
+
return { success: true, action: 'finish', message: 'already finished' };
|
|
944
|
+
}
|
|
877
945
|
task.addNote(`Finish requested: ${verify}`);
|
|
878
946
|
|
|
879
947
|
if (this.pilot) {
|
|
880
948
|
const currentState = this.getCurrentState();
|
|
881
|
-
await this.pilot.reviewFinish(task, currentState, conversation);
|
|
949
|
+
await this.pilot.reviewFinish(task, currentState, conversation, this.navigator);
|
|
882
950
|
if (!task.hasFinished) {
|
|
883
951
|
return {
|
|
884
952
|
success: false,
|
|
@@ -953,7 +1021,7 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
953
1021
|
if (input.status !== null && task.isComplete()) {
|
|
954
1022
|
if (this.pilot) {
|
|
955
1023
|
const currentState = this.getCurrentState();
|
|
956
|
-
await this.pilot.reviewCompletion(task, currentState, conversation);
|
|
1024
|
+
await this.pilot.reviewCompletion(task, currentState, conversation, this.navigator);
|
|
957
1025
|
} else {
|
|
958
1026
|
const hasPassed = task.hasAchievedAny();
|
|
959
1027
|
task.finish(hasPassed ? TestResult.PASSED : TestResult.FAILED);
|
package/src/ai/tools.ts
CHANGED
|
@@ -100,7 +100,7 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) {
|
|
|
100
100
|
activeNote.screenshot = await action.saveScreenshot();
|
|
101
101
|
}
|
|
102
102
|
activeNote.commit(TestResult.PASSED);
|
|
103
|
-
return successToolResult('click', { ...toolResult, attempts, code: command });
|
|
103
|
+
return successToolResult('click', { ...toolResult, attempts, code: command }, action);
|
|
104
104
|
}
|
|
105
105
|
}
|
|
106
106
|
|
|
@@ -128,7 +128,7 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) {
|
|
|
128
128
|
activeNote.screenshot = await action.saveScreenshot();
|
|
129
129
|
}
|
|
130
130
|
activeNote.commit(TestResult.PASSED);
|
|
131
|
-
return successToolResult('click', { ...toolResult, attempts, code: retryCmd, disambiguated: true });
|
|
131
|
+
return successToolResult('click', { ...toolResult, attempts, code: retryCmd, disambiguated: true }, action);
|
|
132
132
|
}
|
|
133
133
|
}
|
|
134
134
|
|
|
@@ -208,12 +208,16 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) {
|
|
|
208
208
|
activeNote.screenshot = await action.saveScreenshot();
|
|
209
209
|
}
|
|
210
210
|
activeNote.commit(TestResult.PASSED);
|
|
211
|
-
return successToolResult(
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
211
|
+
return successToolResult(
|
|
212
|
+
'pressKey',
|
|
213
|
+
{
|
|
214
|
+
...toolResult,
|
|
215
|
+
message: `Automatically used type() for "${key}" (not a standard key press)`,
|
|
216
|
+
code: typeCommand,
|
|
217
|
+
fallback: true,
|
|
218
|
+
},
|
|
219
|
+
action
|
|
220
|
+
);
|
|
217
221
|
}
|
|
218
222
|
|
|
219
223
|
const errorMsg = `pressKey fallback to type() failed: ${action.lastError?.toString()}`;
|
|
@@ -261,11 +265,15 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) {
|
|
|
261
265
|
activeNote.screenshot = await action.saveScreenshot();
|
|
262
266
|
}
|
|
263
267
|
activeNote.commit(TestResult.PASSED);
|
|
264
|
-
return successToolResult(
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
268
|
+
return successToolResult(
|
|
269
|
+
'pressKey',
|
|
270
|
+
{
|
|
271
|
+
...toolResult,
|
|
272
|
+
message: `Pressed key: ${key}${modifier ? ` with modifier(s): ${Array.isArray(modifier) ? modifier.join('+') : modifier}` : ''}`,
|
|
273
|
+
code: pressKeyCommand,
|
|
274
|
+
},
|
|
275
|
+
action
|
|
276
|
+
);
|
|
269
277
|
}
|
|
270
278
|
|
|
271
279
|
const errorMsg = `pressKey() failed: ${action.lastError?.toString()}`;
|
|
@@ -383,13 +391,17 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) {
|
|
|
383
391
|
activeNote.screenshot = await action.saveScreenshot();
|
|
384
392
|
}
|
|
385
393
|
activeNote.commit(TestResult.PASSED);
|
|
386
|
-
return successToolResult(
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
394
|
+
return successToolResult(
|
|
395
|
+
'form',
|
|
396
|
+
{
|
|
397
|
+
...toolResult,
|
|
398
|
+
message: `Form completed successfully with ${lines.length} commands.`,
|
|
399
|
+
commandsExecuted: lines.length,
|
|
400
|
+
code: codeBlock,
|
|
401
|
+
suggestion: 'Verify the form was filled in correctly using see() tool. If needed to submit: try click() tool or form() with I.pressKey("Enter").',
|
|
402
|
+
},
|
|
403
|
+
action
|
|
404
|
+
);
|
|
393
405
|
} catch (error) {
|
|
394
406
|
activeNote.commit(TestResult.FAILED);
|
|
395
407
|
const errorMessage = error instanceof Error ? error.toString() : 'Unknown error occurred';
|
|
@@ -589,10 +601,14 @@ export function createAgentTools({
|
|
|
589
601
|
const result = await navigator.verifyState(assertion, actionResult);
|
|
590
602
|
|
|
591
603
|
if (result.verified) {
|
|
592
|
-
return successToolResult(
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
604
|
+
return successToolResult(
|
|
605
|
+
'verify',
|
|
606
|
+
{
|
|
607
|
+
message: `Verification passed: ${assertion}`,
|
|
608
|
+
code: result.successfulCodes.join('\n'),
|
|
609
|
+
},
|
|
610
|
+
{ assertionSteps: result.assertionSteps }
|
|
611
|
+
);
|
|
596
612
|
}
|
|
597
613
|
|
|
598
614
|
return failedToolResult('verify', `Verification failed: ${assertion}`, {
|
|
@@ -1017,8 +1033,14 @@ function countAriaChanges(ariaChanges: string): number {
|
|
|
1017
1033
|
return addedCount + removedCount;
|
|
1018
1034
|
}
|
|
1019
1035
|
|
|
1020
|
-
function successToolResult(action: string, data?: Record<string, any
|
|
1036
|
+
function successToolResult(action: string, data?: Record<string, any>, source?: { playwrightGroupId?: string | null; assertionSteps?: any[] }) {
|
|
1021
1037
|
const result: Record<string, any> = { success: true, action, ...data };
|
|
1038
|
+
if (source?.playwrightGroupId) {
|
|
1039
|
+
result.playwrightGroupId = source.playwrightGroupId;
|
|
1040
|
+
}
|
|
1041
|
+
if (source?.assertionSteps?.length) {
|
|
1042
|
+
result.assertionSteps = source.assertionSteps;
|
|
1043
|
+
}
|
|
1022
1044
|
if (data?.pageDiff) {
|
|
1023
1045
|
let suggestion = PAGE_DIFF_SUGGESTION;
|
|
1024
1046
|
const ariaChanges = data.pageDiff.ariaChanges || '';
|