explorbot 0.1.10 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -1
- package/bin/explorbot-cli.ts +27 -18
- package/dist/bin/explorbot-cli.js +26 -18
- package/dist/package.json +3 -3
- package/dist/rules/navigator/output.md +9 -0
- package/dist/rules/navigator/verification-actions.md +2 -0
- package/dist/src/action-result.js +23 -1
- package/dist/src/action.js +51 -42
- package/dist/src/ai/bosun.js +11 -1
- package/dist/src/ai/conversation.js +39 -0
- package/dist/src/ai/historian/codeceptjs.js +109 -0
- package/dist/src/ai/historian/experience.js +321 -0
- package/dist/src/ai/historian/mixin.js +2 -0
- package/dist/src/ai/historian/playwright.js +145 -0
- package/dist/src/ai/historian/screencast.js +121 -0
- package/dist/src/ai/historian/utils.js +18 -0
- package/dist/src/ai/historian.js +21 -405
- package/dist/src/ai/navigator.js +82 -29
- package/dist/src/ai/pilot.js +232 -13
- package/dist/src/ai/planner.js +29 -9
- package/dist/src/ai/provider.js +54 -17
- package/dist/src/ai/researcher.js +41 -32
- package/dist/src/ai/rules.js +26 -14
- package/dist/src/ai/tester.js +90 -26
- package/dist/src/ai/tools.js +13 -7
- package/dist/src/browser-server.js +16 -3
- package/dist/src/commands/add-rule-command.js +11 -8
- package/dist/src/commands/clean-command.js +2 -1
- package/dist/src/commands/explore-command.js +43 -15
- package/dist/src/commands/init-command.js +9 -8
- package/dist/src/commands/plan-command.js +32 -0
- package/dist/src/commands/plan-save-command.js +19 -7
- package/dist/src/commands/rerun-command.js +4 -0
- package/dist/src/components/App.js +15 -5
- package/dist/src/execution-controller.js +13 -2
- package/dist/src/experience-tracker.js +20 -64
- package/dist/src/explorbot.js +8 -8
- package/dist/src/explorer.js +11 -3
- package/dist/src/observability.js +50 -99
- package/dist/src/playwright-recorder.js +309 -0
- package/dist/src/reporter.js +4 -1
- package/dist/src/test-plan.js +12 -0
- package/dist/src/utils/aria.js +37 -1
- package/dist/src/utils/error-page.js +20 -7
- package/dist/src/utils/next-steps.js +37 -0
- package/dist/src/utils/strings.js +15 -0
- package/package.json +3 -3
- package/rules/navigator/output.md +9 -0
- package/rules/navigator/verification-actions.md +2 -0
- package/src/action-result.ts +26 -1
- package/src/action.ts +49 -41
- package/src/ai/bosun.ts +11 -1
- package/src/ai/conversation.ts +37 -0
- package/src/ai/historian/codeceptjs.ts +130 -0
- package/src/ai/historian/experience.ts +384 -0
- package/src/ai/historian/mixin.ts +4 -0
- package/src/ai/historian/playwright.ts +169 -0
- package/src/ai/historian/screencast.ts +133 -0
- package/src/ai/historian/utils.ts +23 -0
- package/src/ai/historian.ts +37 -473
- package/src/ai/navigator.ts +82 -29
- package/src/ai/pilot.ts +237 -14
- package/src/ai/planner.ts +29 -9
- package/src/ai/provider.ts +51 -17
- package/src/ai/researcher.ts +45 -33
- package/src/ai/rules.ts +27 -14
- package/src/ai/tester.ts +94 -26
- package/src/ai/tools.ts +47 -25
- package/src/browser-server.ts +17 -3
- package/src/commands/add-rule-command.ts +11 -7
- package/src/commands/clean-command.ts +2 -1
- package/src/commands/explore-command.ts +46 -14
- package/src/commands/init-command.ts +9 -8
- package/src/commands/plan-command.ts +35 -0
- package/src/commands/plan-save-command.ts +18 -7
- package/src/commands/rerun-command.ts +5 -0
- package/src/components/App.tsx +16 -5
- package/src/config.ts +12 -1
- package/src/execution-controller.ts +14 -3
- package/src/experience-tracker.ts +21 -72
- package/src/explorbot.ts +8 -8
- package/src/explorer.ts +13 -3
- package/src/observability.ts +50 -109
- package/src/playwright-recorder.ts +305 -0
- package/src/reporter.ts +4 -1
- package/src/test-plan.ts +12 -0
- package/src/utils/aria.ts +38 -1
- package/src/utils/error-page.ts +22 -7
- package/src/utils/next-steps.ts +51 -0
- package/src/utils/strings.ts +17 -0
package/dist/src/ai/provider.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { LangfuseSpanProcessor } from '@langfuse/otel';
|
|
2
2
|
import { NodeSDK } from '@opentelemetry/sdk-node';
|
|
3
|
-
import { generateObject, generateText } from 'ai';
|
|
3
|
+
import { generateObject, generateText, stepCountIs } from 'ai';
|
|
4
4
|
import { clearActivity, setActivity } from "../activity.js";
|
|
5
5
|
import { executionController } from "../execution-controller.js";
|
|
6
6
|
import { Observability } from "../observability.js";
|
|
@@ -16,6 +16,20 @@ class AiError extends Error {
|
|
|
16
16
|
}
|
|
17
17
|
export class ContextLengthError extends Error {
|
|
18
18
|
}
|
|
19
|
+
function rejectAfterIdle(ms, signal) {
|
|
20
|
+
return new Promise((_, reject) => {
|
|
21
|
+
const tick = () => {
|
|
22
|
+
if (signal.cancelled)
|
|
23
|
+
return;
|
|
24
|
+
if (executionController.isAwaitingInput()) {
|
|
25
|
+
setTimeout(tick, ms);
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
reject(new Error('AI request timeout'));
|
|
29
|
+
};
|
|
30
|
+
setTimeout(tick, ms);
|
|
31
|
+
});
|
|
32
|
+
}
|
|
19
33
|
export class Provider {
|
|
20
34
|
config;
|
|
21
35
|
telemetryEnabled = false;
|
|
@@ -247,13 +261,19 @@ export class Provider {
|
|
|
247
261
|
promptLog('Available tools:', toolNames);
|
|
248
262
|
promptLog(messages[messages.length - 1].content);
|
|
249
263
|
const telemetry = this.getTelemetry(options);
|
|
264
|
+
const maxRoundtrips = options.maxToolRoundtrips ?? 5;
|
|
265
|
+
const extraStop = options.stopWhen;
|
|
266
|
+
const stopConditions = [stepCountIs(maxRoundtrips)];
|
|
267
|
+
if (extraStop)
|
|
268
|
+
stopConditions.push(extraStop);
|
|
269
|
+
const { stopWhen: _ignoredStopWhen, ...optionsWithoutStop } = options;
|
|
250
270
|
const config = this.mergeProviderOptions({
|
|
251
271
|
tools,
|
|
252
272
|
maxTokens: 16384,
|
|
253
|
-
maxToolRoundtrips: options.maxToolRoundtrips ?? 5,
|
|
254
273
|
toolChoice: 'auto',
|
|
255
274
|
...(this.config.config || {}),
|
|
256
|
-
...
|
|
275
|
+
...optionsWithoutStop,
|
|
276
|
+
stopWhen: stopConditions,
|
|
257
277
|
...(telemetry ? { experimental_telemetry: telemetry } : {}),
|
|
258
278
|
model,
|
|
259
279
|
abortSignal: executionController.getAbortSignal(),
|
|
@@ -261,13 +281,24 @@ export class Provider {
|
|
|
261
281
|
try {
|
|
262
282
|
const response = await withRetry(async () => {
|
|
263
283
|
const timeout = config.timeout || 30000;
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
284
|
+
const cancel = { cancelled: false };
|
|
285
|
+
try {
|
|
286
|
+
const result = (await Promise.race([
|
|
287
|
+
generateText({
|
|
288
|
+
messages,
|
|
289
|
+
...config,
|
|
290
|
+
}),
|
|
291
|
+
rejectAfterIdle(timeout, cancel),
|
|
292
|
+
]));
|
|
293
|
+
const hasToolCall = (result.toolCalls?.length || 0) > 0;
|
|
294
|
+
if (!result.text && !hasToolCall && result.finishReason === 'length') {
|
|
295
|
+
throw new ContextLengthError('AI response empty: output truncated at maxTokens. Increase maxTokens in config or use a model with higher output capacity.');
|
|
296
|
+
}
|
|
297
|
+
return result;
|
|
298
|
+
}
|
|
299
|
+
finally {
|
|
300
|
+
cancel.cancelled = true;
|
|
301
|
+
}
|
|
271
302
|
}, this.getRetryOptions(options));
|
|
272
303
|
clearActivity();
|
|
273
304
|
// Log tool usage summary
|
|
@@ -330,13 +361,19 @@ export class Provider {
|
|
|
330
361
|
promptLog(messages[messages.length - 1].content);
|
|
331
362
|
const response = await withRetry(async () => {
|
|
332
363
|
const timeout = config.timeout || 30000;
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
364
|
+
const cancel = { cancelled: false };
|
|
365
|
+
try {
|
|
366
|
+
return (await Promise.race([
|
|
367
|
+
generateObject({
|
|
368
|
+
messages,
|
|
369
|
+
...config,
|
|
370
|
+
}),
|
|
371
|
+
rejectAfterIdle(timeout, cancel),
|
|
372
|
+
]));
|
|
373
|
+
}
|
|
374
|
+
finally {
|
|
375
|
+
cancel.cancelled = true;
|
|
376
|
+
}
|
|
340
377
|
}, this.getRetryOptions(options));
|
|
341
378
|
clearActivity();
|
|
342
379
|
responseLog(response.object);
|
|
@@ -6,12 +6,11 @@ import { executionController } from "../execution-controller.js";
|
|
|
6
6
|
import { Observability } from "../observability.js";
|
|
7
7
|
import { Stats } from "../stats.js";
|
|
8
8
|
import { diffAriaSnapshots } from "../utils/aria.js";
|
|
9
|
-
import { ErrorPageError,
|
|
9
|
+
import { ErrorPageError, detectPageCondition } from "../utils/error-page.js";
|
|
10
10
|
import { HooksRunner } from "../utils/hooks-runner.js";
|
|
11
11
|
import { isBodyEmpty } from "../utils/html.js";
|
|
12
12
|
import { createDebug, pluralize, tag } from '../utils/logger.js';
|
|
13
13
|
import { mdq } from "../utils/markdown-query.js";
|
|
14
|
-
import { withRetry } from "../utils/retry.js";
|
|
15
14
|
import { RulesLoader } from "../utils/rules-loader.js";
|
|
16
15
|
import { ContextLengthError } from './provider.js';
|
|
17
16
|
import { findSimilarResearch, getCachedResearch, saveResearch } from "./researcher/cache.js";
|
|
@@ -98,11 +97,15 @@ export class Researcher extends ResearcherBase {
|
|
|
98
97
|
const annotatedElements = await this.explorer.annotateElements();
|
|
99
98
|
debugLog(`Annotated ${annotatedElements.length} interactive elements with eidx`);
|
|
100
99
|
this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot: screenshot && this.provider.hasVision() });
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
100
|
+
const condition = detectPageCondition(this.actionResult);
|
|
101
|
+
if (condition === 'error') {
|
|
102
|
+
tag('warning').log(`Detected error page at ${state.url}`);
|
|
103
|
+
throw new ErrorPageError(state.url, this.actionResult.title);
|
|
104
|
+
}
|
|
105
|
+
if (condition === 'loading') {
|
|
106
|
+
const settled = await this.waitUntilSettled(screenshot);
|
|
107
|
+
if (!settled) {
|
|
108
|
+
tag('warning').log(`Page at ${state.url} did not finish loading within timeout, continuing with best-effort research`);
|
|
106
109
|
}
|
|
107
110
|
}
|
|
108
111
|
debugLog('Researching web page:', this.actionResult.url);
|
|
@@ -285,41 +288,47 @@ export class Researcher extends ResearcherBase {
|
|
|
285
288
|
}
|
|
286
289
|
return;
|
|
287
290
|
}
|
|
288
|
-
if (isEmpty) {
|
|
289
|
-
debugLog('HTML body
|
|
290
|
-
tag('step').log('Page body is empty,
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
debugLog('Not on current state, navigating to URL');
|
|
294
|
-
tag('step').log('Navigating to URL...');
|
|
291
|
+
if (isEmpty && isOnCurrentState) {
|
|
292
|
+
debugLog('HTML body empty on current URL, waiting for content');
|
|
293
|
+
tag('step').log('Page body is empty, waiting for content...');
|
|
294
|
+
await this.waitUntilSettled(screenshot ?? false);
|
|
295
|
+
return;
|
|
295
296
|
}
|
|
297
|
+
debugLog('Not on current state, navigating to URL');
|
|
298
|
+
tag('step').log('Navigating to URL...');
|
|
296
299
|
await this.explorer.visit(url);
|
|
297
300
|
this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot: screenshot ?? false });
|
|
298
301
|
}
|
|
299
|
-
async
|
|
302
|
+
async waitUntilSettled(screenshot) {
|
|
300
303
|
const errorPageTimeout = this.explorer.getConfig().ai?.agents?.researcher?.errorPageTimeout ?? 10;
|
|
301
304
|
if (errorPageTimeout <= 0)
|
|
302
305
|
return false;
|
|
306
|
+
const page = this.explorer.playwrightHelper.page;
|
|
307
|
+
const includeScreenshot = screenshot && this.provider.hasVision();
|
|
303
308
|
try {
|
|
304
|
-
await
|
|
305
|
-
await this.explorer.annotateElements();
|
|
306
|
-
this.actionResult = await this.explorer.createAction().capturePageState({
|
|
307
|
-
includeScreenshot: screenshot && this.provider.hasVision(),
|
|
308
|
-
});
|
|
309
|
-
if (isErrorPage(this.actionResult))
|
|
310
|
-
throw new Error('Error page detected');
|
|
311
|
-
}, {
|
|
312
|
-
maxAttempts: Math.ceil(errorPageTimeout / 3) + 1,
|
|
313
|
-
baseDelay: 1000,
|
|
314
|
-
maxDelay: 5000,
|
|
315
|
-
backoffMultiplier: 2,
|
|
316
|
-
retryCondition: (e) => e.message === 'Error page detected',
|
|
317
|
-
});
|
|
318
|
-
return true;
|
|
309
|
+
await page?.waitForLoadState('networkidle', { timeout: errorPageTimeout * 1000 });
|
|
319
310
|
}
|
|
320
|
-
catch {
|
|
321
|
-
|
|
311
|
+
catch { }
|
|
312
|
+
await this.explorer.annotateElements();
|
|
313
|
+
this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot });
|
|
314
|
+
let condition = detectPageCondition(this.actionResult);
|
|
315
|
+
if (condition === 'error') {
|
|
316
|
+
throw new ErrorPageError(this.actionResult.url, this.actionResult.title);
|
|
317
|
+
}
|
|
318
|
+
if (condition === 'ok')
|
|
319
|
+
return true;
|
|
320
|
+
for (let i = 0; i < 3; i++) {
|
|
321
|
+
await new Promise((r) => setTimeout(r, 1000));
|
|
322
|
+
await this.explorer.annotateElements();
|
|
323
|
+
this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot });
|
|
324
|
+
condition = detectPageCondition(this.actionResult);
|
|
325
|
+
if (condition === 'error') {
|
|
326
|
+
throw new ErrorPageError(this.actionResult.url, this.actionResult.title);
|
|
327
|
+
}
|
|
328
|
+
if (condition === 'ok')
|
|
329
|
+
return true;
|
|
322
330
|
}
|
|
331
|
+
return false;
|
|
323
332
|
}
|
|
324
333
|
getConfiguredSections() {
|
|
325
334
|
const configSections = this.explorer.getConfig().ai?.agents?.researcher?.sections;
|
package/dist/src/ai/rules.js
CHANGED
|
@@ -161,6 +161,20 @@ export const focusedElementRule = dedent `
|
|
|
161
161
|
If focus is on wrong element, click the correct field first.
|
|
162
162
|
</focused_element_actions>
|
|
163
163
|
`;
|
|
164
|
+
export const unexpectedPopupRule = dedent `
|
|
165
|
+
<unexpected_popup_rule>
|
|
166
|
+
If a modal/popup appeared that you didn't expect, dismiss it first before continuing with original task.
|
|
167
|
+
If elements become hidden or unclickable (timeout errors on visible elements), a dialog or overlay may have appeared on top.
|
|
168
|
+
If a click error mentions "intercepts pointer events", another element is covering the target — dismiss it first.
|
|
169
|
+
If buttons are disabled unexpectedly, check if a popup is blocking interaction or if required form fields are empty.
|
|
170
|
+
|
|
171
|
+
Dismiss strategy (try in order):
|
|
172
|
+
1. I.clickXY(0, 0) — click outside the popup to close it
|
|
173
|
+
2. I.pressKey('Escape') — press Escape to dismiss
|
|
174
|
+
3. I.click('Cancel') — click Cancel button if present
|
|
175
|
+
4. I.click({ role: 'button', text: 'Close' }) — click X/close button if present
|
|
176
|
+
</unexpected_popup_rule>
|
|
177
|
+
`;
|
|
164
178
|
export const sectionContextRule = dedent `
|
|
165
179
|
<section_context_rule>
|
|
166
180
|
Context parameter is DEFAULT for all interactions. ALWAYS use container from UI map sections unless locator is XPath or unique ID.
|
|
@@ -186,17 +200,7 @@ export const sectionContextRule = dedent `
|
|
|
186
200
|
- Locator is a unique ID (#specific-element)
|
|
187
201
|
</section_context_rule>
|
|
188
202
|
|
|
189
|
-
|
|
190
|
-
If a modal/popup appeared that you didn't expect, dismiss it first before continuing with original task.
|
|
191
|
-
If elements become hidden or unclickable (timeout errors on visible elements), a dialog or overlay may have appeared on top.
|
|
192
|
-
If buttons are disabled unexpectedly, check if a popup is blocking interaction or if required form fields are empty.
|
|
193
|
-
|
|
194
|
-
Dismiss strategy (try in order):
|
|
195
|
-
1. I.clickXY(0, 0) — click outside the popup to close it
|
|
196
|
-
2. I.pressKey('Escape') — press Escape to dismiss
|
|
197
|
-
3. I.click('Cancel') — click Cancel button if present
|
|
198
|
-
4. I.click({ role: 'button', text: 'Close' }) — click X/close button if present
|
|
199
|
-
</unexpected_popup_rule>
|
|
203
|
+
${unexpectedPopupRule}
|
|
200
204
|
`;
|
|
201
205
|
export function multipleTabsRule(tabs) {
|
|
202
206
|
const tabsList = tabs.map((tab, i) => ` ${i + 1}. ${tab.title} - ${tab.url}`).join('\n');
|
|
@@ -265,12 +269,19 @@ export const actionRule = dedent `
|
|
|
265
269
|
I.fillField('Username', 'John', '.login-form'); // fills Username inside .login-form
|
|
266
270
|
I.fillField('Username', 'John'); // fills the field located by name or placeholder or label "Username" with the text "John"
|
|
267
271
|
I.fillField('//user/input', 'John'); // fills the field located by XPath "//user/input" with the text "John"
|
|
268
|
-
|
|
272
|
+
I.fillField('Description', 'Hello world', '.editor'); // works for rich text / code editors too
|
|
273
|
+
</example>
|
|
274
|
+
|
|
275
|
+
I.fillField handles plain inputs, textareas, contenteditable regions, and rich text / code editors
|
|
276
|
+
(Monaco, ProseMirror, CodeMirror, TipTap, Quill, Draft.js, Slate, etc.) transparently.
|
|
277
|
+
ALWAYS use I.fillField for rich editors — target the editor container or its nearest label/heading with a normal locator.
|
|
278
|
+
Do NOT open the editor with raw JS (executeScript, page.evaluate), do NOT dispatch synthetic events,
|
|
279
|
+
do NOT call the editor's own API (monaco.editor.setValue, view.dispatch, etc.) to write text.
|
|
269
280
|
|
|
270
281
|
### I.type
|
|
271
282
|
|
|
272
|
-
Types text into the currently focused element. Use when fillField
|
|
273
|
-
|
|
283
|
+
Types text into the currently focused element. Use only when there is no locator you can pass to I.fillField —
|
|
284
|
+
e.g. the target is implicit (a just-opened command palette, an autocomplete that steals focus, a canvas-based surface).
|
|
274
285
|
|
|
275
286
|
I.type(<text>)
|
|
276
287
|
|
|
@@ -282,6 +293,7 @@ export const actionRule = dedent `
|
|
|
282
293
|
DOES NOT receive any locator, just text to type.
|
|
283
294
|
NEVER write: I.type('text', locator) or I.type('text', {locator: '...'}) — this is INVALID.
|
|
284
295
|
To type into a specific field: use I.fillField(locator, text) or I.click(locator) then I.type(text).
|
|
296
|
+
Do NOT reach for I.type just because the target looks like a rich editor — I.fillField handles those.
|
|
285
297
|
|
|
286
298
|
### I.pressKey
|
|
287
299
|
|
package/dist/src/ai/tester.js
CHANGED
|
@@ -6,9 +6,11 @@ import { z } from 'zod';
|
|
|
6
6
|
import { ActionResult } from "../action-result.js";
|
|
7
7
|
import { setActivity } from "../activity.js";
|
|
8
8
|
import { ConfigParser } from "../config.js";
|
|
9
|
+
import { Observability } from "../observability.js";
|
|
9
10
|
import { Stats } from "../stats.js";
|
|
10
11
|
import { TestResult } from "../test-plan.js";
|
|
11
12
|
import { detectFocusArea, extractFocusedElement } from "../utils/aria.js";
|
|
13
|
+
import { ErrorPageError } from "../utils/error-page.js";
|
|
12
14
|
import { HooksRunner } from "../utils/hooks-runner.js";
|
|
13
15
|
import { createDebug, tag } from "../utils/logger.js";
|
|
14
16
|
import { loop } from "../utils/loop.js";
|
|
@@ -121,10 +123,34 @@ export class Tester extends TaskAgent {
|
|
|
121
123
|
// Note: Markdown saving functionality removed from Conversation class
|
|
122
124
|
const initialPrompt = await this.buildTestPrompt(task, initialState);
|
|
123
125
|
conversation.addUserText(initialPrompt);
|
|
126
|
+
return await Observability.run(`test: ${task.scenario}`, {
|
|
127
|
+
sessionId: task.sessionName,
|
|
128
|
+
tags: ['tester'],
|
|
129
|
+
input: {
|
|
130
|
+
scenario: task.scenario,
|
|
131
|
+
startUrl: task.startUrl,
|
|
132
|
+
expected: task.expected,
|
|
133
|
+
},
|
|
134
|
+
}, async () => this.runTestSession(task, initialState, conversation, { offFailedRequest, page, onPageError, onConsoleMessage }));
|
|
135
|
+
}
|
|
136
|
+
async runTestSession(task, initialState, conversation, handlers) {
|
|
137
|
+
const { offFailedRequest, page, onPageError, onConsoleMessage } = handlers;
|
|
124
138
|
if (this.pilot) {
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
139
|
+
try {
|
|
140
|
+
const plan = await this.pilot.planTest(task, initialState);
|
|
141
|
+
if (plan) {
|
|
142
|
+
conversation.addUserText(`Pilot's test plan:\n${plan}\n\nFollow this plan while executing the test.`);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
catch (err) {
|
|
146
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
147
|
+
tag('error').log(`Pilot planning failed: ${message}`);
|
|
148
|
+
task.addNote(`Planning failed: ${message}`, TestResult.FAILED);
|
|
149
|
+
task.finish(TestResult.FAILED);
|
|
150
|
+
offFailedRequest?.();
|
|
151
|
+
page?.off('pageerror', onPageError);
|
|
152
|
+
page?.off('console', onConsoleMessage);
|
|
153
|
+
return { success: false };
|
|
128
154
|
}
|
|
129
155
|
}
|
|
130
156
|
debugLog('Starting test execution with tools');
|
|
@@ -135,6 +161,8 @@ export class Tester extends TaskAgent {
|
|
|
135
161
|
const currentUrl = this.explorer.getStateManager().getCurrentState()?.url || task.startUrl || '';
|
|
136
162
|
await this.hooksRunner.runBeforeHook('tester', currentUrl);
|
|
137
163
|
const offStateChange = this.explorer.getStateManager().onStateChange((event) => {
|
|
164
|
+
if (task.hasFinished)
|
|
165
|
+
return;
|
|
138
166
|
if (event.toState?.url === event.fromState?.url)
|
|
139
167
|
return;
|
|
140
168
|
task.addNote(`Navigated to ${event.toState?.url}`, TestResult.PASSED);
|
|
@@ -178,6 +206,10 @@ export class Tester extends TaskAgent {
|
|
|
178
206
|
conversation.cleanupTag('page_aria', '...cleaned aria snapshot...', 2);
|
|
179
207
|
conversation.cleanupTag('page_html', '...cleaned HTML snapshot...', 1);
|
|
180
208
|
conversation.cleanupTag('experience', '...cleaned experience...', 1);
|
|
209
|
+
conversation.cleanupTag('applied_experience', '...cleaned past experience...', 1);
|
|
210
|
+
conversation.cleanupTag('page_ui_map', '...cleaned UI map...', 1);
|
|
211
|
+
conversation.cleanupTag('page_ui_map_overlay', '...cleaned UI overlay...', 1);
|
|
212
|
+
conversation.compactToolResults(3);
|
|
181
213
|
if (iteration > 1) {
|
|
182
214
|
const isNewPage = this.previousUrl !== null && this.previousUrl !== currentState.url;
|
|
183
215
|
let nextStep = '';
|
|
@@ -199,6 +231,7 @@ export class Tester extends TaskAgent {
|
|
|
199
231
|
const result = await this.provider.invokeConversation(conversation, tools, {
|
|
200
232
|
maxToolRoundtrips: 5,
|
|
201
233
|
toolChoice: 'required',
|
|
234
|
+
stopWhen: () => task.hasFinished,
|
|
202
235
|
});
|
|
203
236
|
if (!result)
|
|
204
237
|
throw new Error('Failed to get response from provider');
|
|
@@ -273,34 +306,30 @@ export class Tester extends TaskAgent {
|
|
|
273
306
|
context.setUserInput(result.message);
|
|
274
307
|
}
|
|
275
308
|
: undefined,
|
|
276
|
-
observability: {
|
|
277
|
-
name: `test: ${task.scenario}`,
|
|
278
|
-
agent: 'tester',
|
|
279
|
-
sessionId: task.sessionName,
|
|
280
|
-
metadata: {
|
|
281
|
-
input: {
|
|
282
|
-
scenario: task.scenario,
|
|
283
|
-
startUrl: task.startUrl,
|
|
284
|
-
expected: task.expected,
|
|
285
|
-
},
|
|
286
|
-
},
|
|
287
|
-
},
|
|
288
309
|
catch: async ({ error, stop }) => {
|
|
289
310
|
tag('error').log(`Test execution error: ${error}`);
|
|
290
|
-
task.
|
|
311
|
+
if (!task.hasFinished) {
|
|
312
|
+
task.addNote(`Execution error: ${error instanceof Error ? error.message : String(error)}`);
|
|
313
|
+
}
|
|
291
314
|
stop();
|
|
292
315
|
},
|
|
293
316
|
});
|
|
294
317
|
if (task.hasFinished)
|
|
295
318
|
break;
|
|
296
319
|
const finalState = this.getCurrentState();
|
|
297
|
-
const wantsContinue = await this.pilot.finalReview(task, finalState, conversation);
|
|
320
|
+
const wantsContinue = await this.pilot.finalReview(task, finalState, conversation, this.navigator);
|
|
298
321
|
if (!wantsContinue || task.hasFinished)
|
|
299
322
|
break;
|
|
300
323
|
if (extensions >= this.MAX_EXTENSIONS)
|
|
301
324
|
break;
|
|
302
325
|
extensions++;
|
|
303
326
|
tag('info').log(`Pilot extending test (${extensions}/${this.MAX_EXTENSIONS})`);
|
|
327
|
+
conversation.cleanupTag('page_aria', '...trimmed...', 1);
|
|
328
|
+
conversation.cleanupTag('page_html', '...trimmed...', 0);
|
|
329
|
+
conversation.cleanupTag('experience', '...trimmed...', 0);
|
|
330
|
+
conversation.cleanupTag('page_ui_map', '...trimmed...', 0);
|
|
331
|
+
conversation.cleanupTag('page_ui_map_overlay', '...trimmed...', 0);
|
|
332
|
+
conversation.compactToolResults(1);
|
|
304
333
|
shouldContinue = true;
|
|
305
334
|
}
|
|
306
335
|
const finalUrl = this.explorer.getStateManager().getCurrentState()?.url || currentUrl;
|
|
@@ -391,7 +420,15 @@ export class Tester extends TaskAgent {
|
|
|
391
420
|
this.explorer.clearOtherTabsInfo();
|
|
392
421
|
}
|
|
393
422
|
if (isNewUrl) {
|
|
394
|
-
|
|
423
|
+
let research = '';
|
|
424
|
+
try {
|
|
425
|
+
research = await this.researcher.research(currentState);
|
|
426
|
+
}
|
|
427
|
+
catch (err) {
|
|
428
|
+
if (!(err instanceof ErrorPageError))
|
|
429
|
+
throw err;
|
|
430
|
+
tag('warning').log(`Research skipped: ${err.message}`);
|
|
431
|
+
}
|
|
395
432
|
this.pageStateHash = currentStateHash;
|
|
396
433
|
this.pageActionResult = currentState;
|
|
397
434
|
let uiMapSection = '';
|
|
@@ -562,7 +599,7 @@ export class Tester extends TaskAgent {
|
|
|
562
599
|
- Use finish() to complete the test, not record(). record() is for intermediate notes.
|
|
563
600
|
- Call finish(verify) when all goals are achieved — provide an assertion to verify
|
|
564
601
|
- ONLY call stop() if the scenario itself is completely irrelevant to this page and no expectations can be achieved
|
|
565
|
-
- Use reset()
|
|
602
|
+
- Use reset() ONLY as a last resort when the current page cannot host the scenario. Never reset after a successful flow just because an assertion or milestone did not match — verify differently or record() the finding instead. Reset is destructive and does not undo server-side side effects.
|
|
566
603
|
- Be precise with locators (CSS or XPath)
|
|
567
604
|
- Each click/type call returns the new page state automatically
|
|
568
605
|
- Check for success messages from tool calls to verify if expected outcomes are achieved
|
|
@@ -678,13 +715,25 @@ export class Tester extends TaskAgent {
|
|
|
678
715
|
return {
|
|
679
716
|
reset: tool({
|
|
680
717
|
description: dedent `
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
718
|
+
Navigate back to the start URL and discard progress in this iteration.
|
|
719
|
+
Reset is a LAST RESORT. It is destructive — any side effects already produced on the
|
|
720
|
+
server (records created, forms submitted) persist and cannot be undone by resetting.
|
|
721
|
+
|
|
722
|
+
Use reset ONLY for:
|
|
723
|
+
- navigation dead-ends where the current page cannot host the scenario
|
|
724
|
+
- irrecoverable errors that leave no actionable path forward
|
|
725
|
+
|
|
726
|
+
Do NOT use reset when:
|
|
727
|
+
- the previous action already succeeded (URL changed, record visible, confirmation shown)
|
|
728
|
+
and an assertion did not match — verify differently, record(), or finish() instead
|
|
729
|
+
- an expectation/milestone does not match app behavior but the flow worked — the work is
|
|
730
|
+
done; resetting just creates duplicates
|
|
731
|
+
- you want to "try again" after submitting a form — submitting again creates a duplicate
|
|
732
|
+
|
|
733
|
+
Pilot will review every reset and may veto it.
|
|
685
734
|
`,
|
|
686
735
|
inputSchema: z.object({
|
|
687
|
-
reason: z.string().optional().describe('Explanation why
|
|
736
|
+
reason: z.string().optional().describe('Explanation why reset is the only option'),
|
|
688
737
|
}),
|
|
689
738
|
execute: async ({ reason }) => {
|
|
690
739
|
if (this.getCurrentState().isInsideIframe) {
|
|
@@ -698,6 +747,18 @@ export class Tester extends TaskAgent {
|
|
|
698
747
|
action: 'reset',
|
|
699
748
|
};
|
|
700
749
|
}
|
|
750
|
+
task.resetCount += 1;
|
|
751
|
+
if (this.pilot) {
|
|
752
|
+
const currentStateForReview = this.getCurrentState();
|
|
753
|
+
const allowed = await this.pilot.reviewReset(task, currentStateForReview, reason ?? '', conversation);
|
|
754
|
+
if (!allowed) {
|
|
755
|
+
return {
|
|
756
|
+
success: false,
|
|
757
|
+
action: 'reset',
|
|
758
|
+
message: 'Reset rejected by Pilot; Continue execution',
|
|
759
|
+
};
|
|
760
|
+
}
|
|
761
|
+
}
|
|
701
762
|
const explanation = reason ? `${reason} (RESET)` : 'Resetting to initial page';
|
|
702
763
|
const targetUrl = resetUrl;
|
|
703
764
|
task.addNote(explanation);
|
|
@@ -776,10 +837,13 @@ export class Tester extends TaskAgent {
|
|
|
776
837
|
verify: z.string().describe('Specific assertion to verify on the page before finishing (e.g., "New item appears in the list")'),
|
|
777
838
|
}),
|
|
778
839
|
execute: async ({ verify }) => {
|
|
840
|
+
if (task.hasFinished) {
|
|
841
|
+
return { success: true, action: 'finish', message: 'already finished' };
|
|
842
|
+
}
|
|
779
843
|
task.addNote(`Finish requested: ${verify}`);
|
|
780
844
|
if (this.pilot) {
|
|
781
845
|
const currentState = this.getCurrentState();
|
|
782
|
-
await this.pilot.reviewFinish(task, currentState, conversation);
|
|
846
|
+
await this.pilot.reviewFinish(task, currentState, conversation, this.navigator);
|
|
783
847
|
if (!task.hasFinished) {
|
|
784
848
|
return {
|
|
785
849
|
success: false,
|
|
@@ -852,7 +916,7 @@ export class Tester extends TaskAgent {
|
|
|
852
916
|
if (input.status !== null && task.isComplete()) {
|
|
853
917
|
if (this.pilot) {
|
|
854
918
|
const currentState = this.getCurrentState();
|
|
855
|
-
await this.pilot.reviewCompletion(task, currentState, conversation);
|
|
919
|
+
await this.pilot.reviewCompletion(task, currentState, conversation, this.navigator);
|
|
856
920
|
}
|
|
857
921
|
else {
|
|
858
922
|
const hasPassed = task.hasAchievedAny();
|
package/dist/src/ai/tools.js
CHANGED
|
@@ -84,7 +84,7 @@ export function createCodeceptJSTools(explorer, task) {
|
|
|
84
84
|
activeNote.screenshot = await action.saveScreenshot();
|
|
85
85
|
}
|
|
86
86
|
activeNote.commit(TestResult.PASSED);
|
|
87
|
-
return successToolResult('click', { ...toolResult, attempts, code: command });
|
|
87
|
+
return successToolResult('click', { ...toolResult, attempts, code: command }, action);
|
|
88
88
|
}
|
|
89
89
|
}
|
|
90
90
|
let disambiguated = null;
|
|
@@ -109,7 +109,7 @@ export function createCodeceptJSTools(explorer, task) {
|
|
|
109
109
|
activeNote.screenshot = await action.saveScreenshot();
|
|
110
110
|
}
|
|
111
111
|
activeNote.commit(TestResult.PASSED);
|
|
112
|
-
return successToolResult('click', { ...toolResult, attempts, code: retryCmd, disambiguated: true });
|
|
112
|
+
return successToolResult('click', { ...toolResult, attempts, code: retryCmd, disambiguated: true }, action);
|
|
113
113
|
}
|
|
114
114
|
}
|
|
115
115
|
const toolResult = await ActionResult.fromState(stateManager.getCurrentState()).toToolResult(previousState, commands[0]);
|
|
@@ -183,7 +183,7 @@ export function createCodeceptJSTools(explorer, task) {
|
|
|
183
183
|
message: `Automatically used type() for "${key}" (not a standard key press)`,
|
|
184
184
|
code: typeCommand,
|
|
185
185
|
fallback: true,
|
|
186
|
-
});
|
|
186
|
+
}, action);
|
|
187
187
|
}
|
|
188
188
|
const errorMsg = `pressKey fallback to type() failed: ${action.lastError?.toString()}`;
|
|
189
189
|
if (toolResult?.pageDiff?.ariaChanges || toolResult?.pageDiff?.urlChanged) {
|
|
@@ -229,7 +229,7 @@ export function createCodeceptJSTools(explorer, task) {
|
|
|
229
229
|
...toolResult,
|
|
230
230
|
message: `Pressed key: ${key}${modifier ? ` with modifier(s): ${Array.isArray(modifier) ? modifier.join('+') : modifier}` : ''}`,
|
|
231
231
|
code: pressKeyCommand,
|
|
232
|
-
});
|
|
232
|
+
}, action);
|
|
233
233
|
}
|
|
234
234
|
const errorMsg = `pressKey() failed: ${action.lastError?.toString()}`;
|
|
235
235
|
if (toolResult?.pageDiff?.ariaChanges || toolResult?.pageDiff?.urlChanged) {
|
|
@@ -338,7 +338,7 @@ export function createCodeceptJSTools(explorer, task) {
|
|
|
338
338
|
commandsExecuted: lines.length,
|
|
339
339
|
code: codeBlock,
|
|
340
340
|
suggestion: 'Verify the form was filled in correctly using see() tool. If needed to submit: try click() tool or form() with I.pressKey("Enter").',
|
|
341
|
-
});
|
|
341
|
+
}, action);
|
|
342
342
|
}
|
|
343
343
|
catch (error) {
|
|
344
344
|
activeNote.commit(TestResult.FAILED);
|
|
@@ -511,7 +511,7 @@ export function createAgentTools({ explorer, researcher, navigator, experienceTr
|
|
|
511
511
|
return successToolResult('verify', {
|
|
512
512
|
message: `Verification passed: ${assertion}`,
|
|
513
513
|
code: result.successfulCodes.join('\n'),
|
|
514
|
-
});
|
|
514
|
+
}, { assertionSteps: result.assertionSteps });
|
|
515
515
|
}
|
|
516
516
|
return failedToolResult('verify', `Verification failed: ${assertion}`, {
|
|
517
517
|
suggestion: 'The assertion could not be verified. Check if the condition is actually present on the page or try a different assertion.',
|
|
@@ -886,8 +886,14 @@ function countAriaChanges(ariaChanges) {
|
|
|
886
886
|
const removedCount = removedMatch ? Number.parseInt(removedMatch[1]) : 0;
|
|
887
887
|
return addedCount + removedCount;
|
|
888
888
|
}
|
|
889
|
-
function successToolResult(action, data) {
|
|
889
|
+
function successToolResult(action, data, source) {
|
|
890
890
|
const result = { success: true, action, ...data };
|
|
891
|
+
if (source?.playwrightGroupId) {
|
|
892
|
+
result.playwrightGroupId = source.playwrightGroupId;
|
|
893
|
+
}
|
|
894
|
+
if (source?.assertionSteps?.length) {
|
|
895
|
+
result.assertionSteps = source.assertionSteps;
|
|
896
|
+
}
|
|
891
897
|
if (data?.pageDiff) {
|
|
892
898
|
let suggestion = PAGE_DIFF_SUGGESTION;
|
|
893
899
|
const ariaChanges = data.pageDiff.ariaChanges || '';
|
|
@@ -2,7 +2,9 @@ import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from '
|
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import { chromium, firefox, webkit } from 'playwright-core';
|
|
4
4
|
import { ConfigParser } from './config.js';
|
|
5
|
-
import {
|
|
5
|
+
import { getCliName } from "./utils/cli-name.js";
|
|
6
|
+
import { log } from './utils/logger.js';
|
|
7
|
+
import { printNextSteps } from "./utils/next-steps.js";
|
|
6
8
|
const ENDPOINT_FILENAME = '.browser-endpoint';
|
|
7
9
|
function getEndpointFilePath() {
|
|
8
10
|
const configParser = ConfigParser.getInstance();
|
|
@@ -50,8 +52,19 @@ async function launchServer(opts) {
|
|
|
50
52
|
const wsEndpoint = server.wsEndpoint();
|
|
51
53
|
writeEndpoint(wsEndpoint);
|
|
52
54
|
log(`Browser server started: ${browserName} (${opts.show ? 'headed' : 'headless'})`);
|
|
53
|
-
|
|
54
|
-
|
|
55
|
+
const cli = getCliName();
|
|
56
|
+
const sections = [
|
|
57
|
+
{
|
|
58
|
+
label: 'Browser server',
|
|
59
|
+
path: getEndpointFilePath(),
|
|
60
|
+
commands: [
|
|
61
|
+
{ label: 'Endpoint', command: wsEndpoint },
|
|
62
|
+
{ label: 'Status', command: `${cli} browser status` },
|
|
63
|
+
{ label: 'Stop', command: `${cli} browser stop` },
|
|
64
|
+
],
|
|
65
|
+
},
|
|
66
|
+
];
|
|
67
|
+
printNextSteps(sections);
|
|
55
68
|
return server;
|
|
56
69
|
}
|
|
57
70
|
async function getAliveEndpoint() {
|
|
@@ -3,6 +3,7 @@ import { join } from 'node:path';
|
|
|
3
3
|
import { render } from 'ink';
|
|
4
4
|
import React from 'react';
|
|
5
5
|
import { tag } from '../utils/logger.js';
|
|
6
|
+
import { printNextSteps, relativeToCwd } from "../utils/next-steps.js";
|
|
6
7
|
import { BaseCommand } from './base-command.js';
|
|
7
8
|
export class AddRuleCommand extends BaseCommand {
|
|
8
9
|
name = 'add-rule';
|
|
@@ -33,18 +34,20 @@ export class AddRuleCommand extends BaseCommand {
|
|
|
33
34
|
mkdirSync(rulesDir, { recursive: true });
|
|
34
35
|
const filePath = join(rulesDir, `${ruleName}.md`);
|
|
35
36
|
if (existsSync(filePath)) {
|
|
36
|
-
tag('warning').log(`Rule file already exists: ${filePath}`);
|
|
37
|
+
tag('warning').log(`Rule file already exists: ${relativeToCwd(filePath)}`);
|
|
37
38
|
return null;
|
|
38
39
|
}
|
|
39
40
|
const content = opts?.content || `Instructions for ${agentName} agent.`;
|
|
40
41
|
writeFileSync(filePath, `${content.trim()}\n`);
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
42
|
+
const configLine = opts?.urlPattern ? `ai.agents.${agentName}.rules: [{ '${opts.urlPattern}': '${ruleName}' }]` : `ai.agents.${agentName}.rules: ['${ruleName}']`;
|
|
43
|
+
const sections = [
|
|
44
|
+
{
|
|
45
|
+
label: 'Rule',
|
|
46
|
+
path: filePath,
|
|
47
|
+
commands: [{ label: 'Add to config', command: configLine }],
|
|
48
|
+
},
|
|
49
|
+
];
|
|
50
|
+
printNextSteps(sections);
|
|
48
51
|
return filePath;
|
|
49
52
|
}
|
|
50
53
|
}
|