explorbot 0.1.18 → 0.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/explorbot-cli.ts +3 -1
- package/dist/bin/explorbot-cli.js +2 -1
- package/dist/package.json +1 -1
- package/dist/src/action.js +8 -3
- package/dist/src/ai/driller.js +1 -1
- package/dist/src/ai/navigator.js +43 -2
- package/dist/src/ai/pilot.js +5 -0
- package/dist/src/ai/planner.js +21 -5
- package/dist/src/ai/rerunner.js +1 -1
- package/dist/src/ai/researcher/deep-analysis.js +22 -7
- package/dist/src/ai/researcher.js +10 -5
- package/dist/src/ai/session-analyst.js +24 -0
- package/dist/src/ai/tester.js +3 -3
- package/dist/src/ai/tools.js +3 -2
- package/dist/src/commands/explore-command.js +5 -1
- package/dist/src/components/LogPane.js +34 -4
- package/package.json +1 -1
- package/src/action.ts +8 -3
- package/src/ai/driller.ts +1 -1
- package/src/ai/navigator.ts +43 -2
- package/src/ai/pilot.ts +5 -0
- package/src/ai/planner.ts +22 -5
- package/src/ai/rerunner.ts +1 -1
- package/src/ai/researcher/deep-analysis.ts +20 -7
- package/src/ai/researcher.ts +9 -5
- package/src/ai/session-analyst.ts +24 -0
- package/src/ai/tester.ts +3 -3
- package/src/ai/tools.ts +3 -2
- package/src/commands/explore-command.ts +6 -1
- package/src/components/LogPane.tsx +42 -9
package/bin/explorbot-cli.ts
CHANGED
|
@@ -43,6 +43,8 @@ interface CLIOptions {
|
|
|
43
43
|
}
|
|
44
44
|
|
|
45
45
|
function buildExplorBotOptions(from: string | undefined, options: CLIOptions): ExplorBotOptions {
|
|
46
|
+
const sessionFile = options.session === true ? path.join(path.resolve(options.path || process.cwd()), 'output', 'session.json') : options.session;
|
|
47
|
+
|
|
46
48
|
return {
|
|
47
49
|
from,
|
|
48
50
|
verbose: options.verbose || options.debug,
|
|
@@ -51,7 +53,7 @@ function buildExplorBotOptions(from: string | undefined, options: CLIOptions): E
|
|
|
51
53
|
show: options.show,
|
|
52
54
|
headless: options.headless,
|
|
53
55
|
incognito: options.incognito,
|
|
54
|
-
session:
|
|
56
|
+
session: sessionFile,
|
|
55
57
|
} as ExplorBotOptions;
|
|
56
58
|
}
|
|
57
59
|
|
|
@@ -27,6 +27,7 @@ if (!process.env.EXPLORBOT_NO_BANNER) {
|
|
|
27
27
|
console.log(`⛵ ${chalk.yellow.bold(`Explorbot v${pkgVersion}`)} ${chalk.dim('Autonomous Testing Agent')}`);
|
|
28
28
|
}
|
|
29
29
|
function buildExplorBotOptions(from, options) {
|
|
30
|
+
const sessionFile = options.session === true ? path.join(path.resolve(options.path || process.cwd()), 'output', 'session.json') : options.session;
|
|
30
31
|
return {
|
|
31
32
|
from,
|
|
32
33
|
verbose: options.verbose || options.debug,
|
|
@@ -35,7 +36,7 @@ function buildExplorBotOptions(from, options) {
|
|
|
35
36
|
show: options.show,
|
|
36
37
|
headless: options.headless,
|
|
37
38
|
incognito: options.incognito,
|
|
38
|
-
session:
|
|
39
|
+
session: sessionFile,
|
|
39
40
|
};
|
|
40
41
|
}
|
|
41
42
|
function addCommonOptions(cmd) {
|
package/dist/package.json
CHANGED
package/dist/src/action.js
CHANGED
|
@@ -2,7 +2,6 @@ import fs from 'node:fs';
|
|
|
2
2
|
import { join } from 'node:path';
|
|
3
3
|
import { faker } from '@faker-js/faker';
|
|
4
4
|
import { context, trace } from '@opentelemetry/api';
|
|
5
|
-
import { highlight } from 'cli-highlight';
|
|
6
5
|
import { container, recorder } from 'codeceptjs';
|
|
7
6
|
import * as codeceptjs from 'codeceptjs';
|
|
8
7
|
import { hopeThat, retryTo, tryTo, within } from 'codeceptjs/lib/effects';
|
|
@@ -12,7 +11,7 @@ import { clearActivity, setActivity } from "./activity.js";
|
|
|
12
11
|
import { ConfigParser, outputPath } from './config.js';
|
|
13
12
|
import { Observability } from "./observability.js";
|
|
14
13
|
import { htmlCombinedSnapshot, minifyHtml } from './utils/html.js';
|
|
15
|
-
import { createDebug,
|
|
14
|
+
import { createDebug, setStepSpanParent, tag } from './utils/logger.js';
|
|
16
15
|
import { safeFilename } from "./utils/strings.js";
|
|
17
16
|
const debugLog = createDebug('explorbot:action');
|
|
18
17
|
const FATAL_BROWSER_ERRORS = /Frame was detached|Target closed|Execution context was destroyed|Protocol error|Session closed/i;
|
|
@@ -257,7 +256,13 @@ class Action {
|
|
|
257
256
|
async expect(codeOrFunction) {
|
|
258
257
|
const codeString = typeof codeOrFunction === 'string' ? codeOrFunction : codeOrFunction.toString();
|
|
259
258
|
this.expectation = codeString.toString();
|
|
260
|
-
|
|
259
|
+
const expectationPreview = sanitizeCodeBlock(codeString)
|
|
260
|
+
.split('\n')
|
|
261
|
+
.map((line) => line.trim())
|
|
262
|
+
.filter(Boolean)
|
|
263
|
+
.slice(0, 2)
|
|
264
|
+
.join(' ');
|
|
265
|
+
tag('step').log(`Expecting: ${expectationPreview || 'assertion'}`);
|
|
261
266
|
try {
|
|
262
267
|
debugLog('Executing expectation:', codeString);
|
|
263
268
|
let codeFunction;
|
package/dist/src/ai/driller.js
CHANGED
|
@@ -96,7 +96,7 @@ export class Driller extends TaskAgent {
|
|
|
96
96
|
const sessionName = `driller_${Date.now().toString(36)}`;
|
|
97
97
|
this.allResults = [];
|
|
98
98
|
return Observability.run(`driller: ${currentState.url}`, { tags: ['driller'], sessionId: sessionName }, async () => {
|
|
99
|
-
tag('
|
|
99
|
+
tag('step').log(`Drilling page: ${currentState.url}`);
|
|
100
100
|
await this.hooksRunner.runBeforeHook('driller', currentState.url);
|
|
101
101
|
const originalState = await this.captureAnnotatedState();
|
|
102
102
|
const components = await this.collectComponents(originalState, maxComponents);
|
package/dist/src/ai/navigator.js
CHANGED
|
@@ -68,8 +68,48 @@ class Navigator {
|
|
|
68
68
|
this.experienceTracker = experienceTracker || new ExperienceTracker();
|
|
69
69
|
this.hooksRunner = new HooksRunner(explorer, explorer.getConfig());
|
|
70
70
|
}
|
|
71
|
+
getBaseOrigin() {
|
|
72
|
+
const baseUrl = this.explorer.getConfig().playwright.url;
|
|
73
|
+
try {
|
|
74
|
+
return new URL(baseUrl).origin;
|
|
75
|
+
}
|
|
76
|
+
catch {
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
getComparableCurrentUrl(stateManager, expectedUrl) {
|
|
81
|
+
const currentState = stateManager.getCurrentState();
|
|
82
|
+
if (!currentState)
|
|
83
|
+
return '';
|
|
84
|
+
const current = /^https?:\/\//i.test(expectedUrl) ? currentState.fullUrl || currentState.url || '' : currentState.url || '';
|
|
85
|
+
return current;
|
|
86
|
+
}
|
|
87
|
+
isSameExpectedOrigin(expectedUrl, stateManager) {
|
|
88
|
+
const currentState = stateManager.getCurrentState();
|
|
89
|
+
if (!currentState)
|
|
90
|
+
return false;
|
|
91
|
+
const currentFullUrl = currentState.fullUrl || currentState.url || '';
|
|
92
|
+
if (!currentFullUrl)
|
|
93
|
+
return false;
|
|
94
|
+
try {
|
|
95
|
+
const currentOrigin = new URL(currentFullUrl).origin;
|
|
96
|
+
if (/^https?:\/\//i.test(expectedUrl)) {
|
|
97
|
+
return currentOrigin === new URL(expectedUrl).origin;
|
|
98
|
+
}
|
|
99
|
+
const baseOrigin = this.getBaseOrigin();
|
|
100
|
+
if (!baseOrigin)
|
|
101
|
+
return true;
|
|
102
|
+
return currentOrigin === baseOrigin;
|
|
103
|
+
}
|
|
104
|
+
catch {
|
|
105
|
+
return !/^https?:\/\//i.test(expectedUrl);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
71
108
|
isOnExpectedPage(expectedUrl, stateManager) {
|
|
72
|
-
|
|
109
|
+
if (!this.isSameExpectedOrigin(expectedUrl, stateManager)) {
|
|
110
|
+
return false;
|
|
111
|
+
}
|
|
112
|
+
const currentUrl = this.getComparableCurrentUrl(stateManager, expectedUrl);
|
|
73
113
|
return normalizeUrl(currentUrl) === normalizeUrl(expectedUrl);
|
|
74
114
|
}
|
|
75
115
|
async visit(url) {
|
|
@@ -249,7 +289,8 @@ class Navigator {
|
|
|
249
289
|
}
|
|
250
290
|
}
|
|
251
291
|
const freshState = await action.capturePageState();
|
|
252
|
-
const
|
|
292
|
+
const currentUrl = /^https?:\/\//i.test(expectedUrl) ? freshState.fullUrl || freshState.url || '' : freshState.url || '';
|
|
293
|
+
const urlMatches = this.isSameExpectedOrigin(expectedUrl, action.stateManager) && normalizeUrl(currentUrl) === normalizeUrl(expectedUrl);
|
|
253
294
|
const stateChanged = freshState.getStateHash() !== actionResult.getStateHash();
|
|
254
295
|
resolved = urlMatches && stateChanged;
|
|
255
296
|
if (!resolved && attemptOk) {
|
package/dist/src/ai/pilot.js
CHANGED
|
@@ -270,6 +270,9 @@ export class Pilot {
|
|
|
270
270
|
overrides the others — weigh them together. Tester's record() notes are the LEAST reliable; always
|
|
271
271
|
cross-check against actual actions and state. Visual screenshot analysis is strong for UI state
|
|
272
272
|
(active tabs, visible counts, colors).
|
|
273
|
+
If the final page clearly shows an equivalent success state in a different UI form, do not fail only
|
|
274
|
+
because one narrow assertion targeted a specific badge, count, toast, or wording that the product
|
|
275
|
+
represents differently.
|
|
273
276
|
|
|
274
277
|
SCENARIO TITLE defines what must happen. Action verbs require persisted evidence:
|
|
275
278
|
- "Create X" → X must exist (visible, redirected to its page, or success message). Opening a form is NOT enough.
|
|
@@ -311,6 +314,8 @@ export class Pilot {
|
|
|
311
314
|
|
|
312
315
|
GUIDANCE (required for "continue"): a specific next action on the current page — which tool, what
|
|
313
316
|
to verify, how to record. Do not suggest repeating actions that already succeeded.
|
|
317
|
+
If progress is blocked only because the page lacks target data for the scenario, prefer precondition()
|
|
318
|
+
over repeated UI attempts.
|
|
314
319
|
`;
|
|
315
320
|
}
|
|
316
321
|
buildVerdictSystemPrompt(type, task) {
|
package/dist/src/ai/planner.js
CHANGED
|
@@ -64,6 +64,9 @@ export class Planner extends PlannerBase {
|
|
|
64
64
|
get sectionOrder() {
|
|
65
65
|
return ConfigParser.getInstance().getConfig().ai?.agents?.researcher?.sections || Object.keys(POSSIBLE_SECTIONS);
|
|
66
66
|
}
|
|
67
|
+
getDefaultStartUrl(state) {
|
|
68
|
+
return state.fullUrl || state.url;
|
|
69
|
+
}
|
|
67
70
|
getSystemMessage(feature) {
|
|
68
71
|
const currentUrl = this.stateManager.getCurrentState()?.url;
|
|
69
72
|
const customPrompt = this.provider.getSystemPromptForAgent('planner', currentUrl);
|
|
@@ -138,7 +141,6 @@ export class Planner extends PlannerBase {
|
|
|
138
141
|
}
|
|
139
142
|
this.freshStart = false;
|
|
140
143
|
setActivity(`${this.emoji} Planning...`, 'action');
|
|
141
|
-
tag('info').log(`Planning test scenarios for ${state.url}`);
|
|
142
144
|
if (style)
|
|
143
145
|
tag('info').log(`Planning style: ${style}`);
|
|
144
146
|
const tags = ['planner'];
|
|
@@ -162,7 +164,8 @@ export class Planner extends PlannerBase {
|
|
|
162
164
|
if (aiResult.object.scenarios.length === 0 && !this.currentPlan) {
|
|
163
165
|
throw new Error('No tasks were created successfully');
|
|
164
166
|
}
|
|
165
|
-
const
|
|
167
|
+
const defaultStartUrl = this.getDefaultStartUrl(state);
|
|
168
|
+
const fromPlanning = aiResult.object.scenarios.map((s) => new Test(s.scenario, s.priority, s.expectedOutcomes, s.startUrl || defaultStartUrl, s.steps || []));
|
|
166
169
|
return { tests: fromPlanning, planName: aiResult.object.planName };
|
|
167
170
|
});
|
|
168
171
|
const tests = result.tests;
|
|
@@ -171,7 +174,8 @@ export class Planner extends PlannerBase {
|
|
|
171
174
|
const cached = state.url ? getRegisteredPlan(state.url) : null;
|
|
172
175
|
const planName = feature || cached?.plan.title || result.planName || state.url;
|
|
173
176
|
this.currentPlan = new Plan(planName);
|
|
174
|
-
this.currentPlan.url = state
|
|
177
|
+
this.currentPlan.url = this.getDefaultStartUrl(state);
|
|
178
|
+
const defaultStartUrl = this.getDefaultStartUrl(state);
|
|
175
179
|
if (parentPlan)
|
|
176
180
|
this.currentPlan.parentPlan = parentPlan;
|
|
177
181
|
const allPreviousScenarios = this.getPreviousSessionScenarios();
|
|
@@ -182,14 +186,14 @@ export class Planner extends PlannerBase {
|
|
|
182
186
|
if (allPreviousScenarios.has(t.scenario.toLowerCase()))
|
|
183
187
|
continue;
|
|
184
188
|
t.style = this.lastStyleName;
|
|
185
|
-
t.startUrl =
|
|
189
|
+
t.startUrl = defaultStartUrl;
|
|
186
190
|
this.currentPlan.addTest(t);
|
|
187
191
|
}
|
|
188
192
|
}
|
|
189
193
|
else {
|
|
190
194
|
tag('step').log(`Expanding plan: "${this.currentPlan.title}"`);
|
|
191
195
|
this.currentPlan.nextIteration();
|
|
192
|
-
const newTests = this.addNewTests(tests, state
|
|
196
|
+
const newTests = this.addNewTests(tests, this.getDefaultStartUrl(state));
|
|
193
197
|
if (newTests.length > 0) {
|
|
194
198
|
const summary = `New scenarios:\n${newTests.map((t) => `+ [${t.priority}] ${t.scenario}`).join('\n')}`;
|
|
195
199
|
tag('multiline').log(summary);
|
|
@@ -292,6 +296,13 @@ export class Planner extends PlannerBase {
|
|
|
292
296
|
Focus on URL page change or data persistency after page reload.
|
|
293
297
|
If there are subpages (pages with same URL path) plan testing of those subpages as well
|
|
294
298
|
If you plan to test CRUD operations, plan them in correct order: create, read, update.
|
|
299
|
+
Do not invent specific route names, success messages, validation texts, badge counts, or welcome messages unless they are visible in research, visited pages, or prior observed flows.
|
|
300
|
+
If exact wording is unknown, describe the expected result generically, for example "an authentication error is shown" or "the user stays on the login page" instead of guessing the literal text.
|
|
301
|
+
If exact redirect destination is unknown, describe the destination by visible page identity, for example "the dashboard page opens" or "the current workspace home page opens" instead of inventing a URL slug.
|
|
302
|
+
Only propose scenarios whose prerequisites are evident from page research, visited pages, or API data preparation context.
|
|
303
|
+
If a scenario needs existing records, recipients, results, notifications, or other target data, propose it only when that data is visible or API preconditions can create it.
|
|
304
|
+
If the page appears read-only, degraded, demo-limited, maintenance-like, or lacks write controls, prefer read-only scenarios such as opening panels, inspecting visible lists, filtering, searching, or verifying current state.
|
|
305
|
+
Do not assume hidden data exists just because a control is present.
|
|
295
306
|
DO NOT propose "verification-only" tests that merely open a UI element (modal, dropdown, panel) and check it exists.
|
|
296
307
|
Every test must complete a meaningful action that changes application state or produces a business outcome.
|
|
297
308
|
Opening a modal is NOT a test — performing an action INSIDE the modal IS a test.
|
|
@@ -516,10 +527,15 @@ export class Planner extends PlannerBase {
|
|
|
516
527
|
- Good: "New suite 'My New Suite' appears in the suite list"
|
|
517
528
|
- Good: "Suite appears under Starred filter tab"
|
|
518
529
|
- Good: "Success message 'Suite created' is displayed"
|
|
530
|
+
- Good when wording is unknown: "An authentication error is displayed"
|
|
531
|
+
- Good when route is unknown: "The workspace home page is displayed"
|
|
519
532
|
- Bad: "Modal is displayed" (just verifying existence, no business value)
|
|
520
533
|
- Bad: "Dropdown menu is visible" (just verifying existence)
|
|
534
|
+
- Bad: "Welcome message is displayed" if no welcome message is visible in research
|
|
535
|
+
- Bad: "Redirected to /dashboard" if no such route was observed
|
|
521
536
|
- Each outcome should be independently verifiable
|
|
522
537
|
- Avoid combining multiple checks into one outcome
|
|
538
|
+
- Prefer durable user-facing results over fragile micro-signals
|
|
523
539
|
- Expected outcomes describe WHAT TO VERIFY
|
|
524
540
|
|
|
525
541
|
FORMATTING RULES:
|
package/dist/src/ai/rerunner.js
CHANGED
|
@@ -67,7 +67,7 @@ export class Rerunner extends TaskAgent {
|
|
|
67
67
|
tag('error').log(`Test file not found: ${absPath}`);
|
|
68
68
|
return { total: 0, passed: 0, failed: 0, healed: 0 };
|
|
69
69
|
}
|
|
70
|
-
tag('
|
|
70
|
+
tag('step').log(`Re-running tests from: ${relative(process.cwd(), absPath)}`);
|
|
71
71
|
setActivity('🔄 Re-running tests...', 'action');
|
|
72
72
|
this.healedSteps = [];
|
|
73
73
|
this.setupPlugins();
|
|
@@ -12,7 +12,7 @@ export function WithDeepAnalysis(Base) {
|
|
|
12
12
|
return class extends Base {
|
|
13
13
|
async performDeepAnalysis(state, result) {
|
|
14
14
|
tag('info').log('Starting deep analysis of expandable elements');
|
|
15
|
-
await this.navigateTo(state.url);
|
|
15
|
+
await this.navigateTo(state.fullUrl || state.url);
|
|
16
16
|
let expandables = await this._discoverExpandables(result.text);
|
|
17
17
|
if (expandables.length === 0) {
|
|
18
18
|
tag('info').log('No expandable elements identified by AI');
|
|
@@ -21,7 +21,7 @@ export function WithDeepAnalysis(Base) {
|
|
|
21
21
|
tag('substep').log(`Identified ${expandables.length} expandable elements`);
|
|
22
22
|
const maxClicks = this.explorer.getConfig().ai?.agents?.researcher?.maxExpandableClicks ?? DEFAULT_MAX_EXPANDABLE_CLICKS;
|
|
23
23
|
if (expandables.length > maxClicks) {
|
|
24
|
-
expandables = await this._selectExpandables(expandables, state.url, maxClicks);
|
|
24
|
+
expandables = await this._selectExpandables(expandables, state.fullUrl || state.url, maxClicks);
|
|
25
25
|
tag('substep').log(`Selected ${expandables.length} expandables to click (max: ${maxClicks})`);
|
|
26
26
|
}
|
|
27
27
|
const elements = expandables
|
|
@@ -144,7 +144,15 @@ export function WithDeepAnalysis(Base) {
|
|
|
144
144
|
`;
|
|
145
145
|
visionCall = this.provider.processImage(visionPrompt, screenshot.toString('base64'));
|
|
146
146
|
}
|
|
147
|
-
|
|
147
|
+
let textRes = null;
|
|
148
|
+
let visionRes = null;
|
|
149
|
+
try {
|
|
150
|
+
[textRes, visionRes] = await Promise.all([textCall, visionCall]);
|
|
151
|
+
}
|
|
152
|
+
catch (err) {
|
|
153
|
+
tag('warning').log(`Expandable discovery failed, skipping deep analysis: ${err instanceof Error ? err.message : err}`);
|
|
154
|
+
return [];
|
|
155
|
+
}
|
|
148
156
|
const eidxSet = new Set();
|
|
149
157
|
const parseRefs = (text) => {
|
|
150
158
|
if (!text)
|
|
@@ -204,10 +212,17 @@ export function WithDeepAnalysis(Base) {
|
|
|
204
212
|
- Respond with comma-separated numbers to keep, e.g.: 1, 3, 5
|
|
205
213
|
`;
|
|
206
214
|
const model = this.provider.getModelForAgent('researcher');
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
215
|
+
let r;
|
|
216
|
+
try {
|
|
217
|
+
r = await this.provider.chat([{ role: 'user', content: prompt }], model, {
|
|
218
|
+
agentName: 'researcher',
|
|
219
|
+
telemetryFunctionId: 'researcher.selectExpandables',
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
catch (err) {
|
|
223
|
+
tag('warning').log(`Expandable selection failed, using first ${maxClicks}: ${err instanceof Error ? err.message : err}`);
|
|
224
|
+
return expandables.slice(0, maxClicks);
|
|
225
|
+
}
|
|
211
226
|
const nums = (r.text || '').match(/\d+/g)?.map(Number) || [];
|
|
212
227
|
const selected = expandables.filter((_, i) => nums.includes(i + 1));
|
|
213
228
|
return selected.length > 0 ? selected.slice(0, maxClicks) : expandables.slice(0, maxClicks);
|
|
@@ -93,7 +93,7 @@ export class Researcher extends ResearcherBase {
|
|
|
93
93
|
const displayUrl = state.fullUrl || state.url;
|
|
94
94
|
tag('info').log(`Researching ${displayUrl} to understand the context...`);
|
|
95
95
|
setActivity(`${this.emoji} Researching...`, 'action');
|
|
96
|
-
await this.ensureNavigated(
|
|
96
|
+
await this.ensureNavigated(displayUrl, screenshot && this.provider.hasVision());
|
|
97
97
|
await this.hooksRunner.runBeforeHook('researcher', state.url);
|
|
98
98
|
const annotatedElements = await this.explorer.annotateElements();
|
|
99
99
|
debugLog(`Annotated ${annotatedElements.length} interactive elements with eidx`);
|
|
@@ -114,11 +114,11 @@ export class Researcher extends ResearcherBase {
|
|
|
114
114
|
if (!deep && !force) {
|
|
115
115
|
const similar = await findSimilarResearch(combinedHtml);
|
|
116
116
|
if (similar) {
|
|
117
|
-
tag('
|
|
117
|
+
tag('substep').log('Similar research found, reusing cached result');
|
|
118
118
|
if (stateHash)
|
|
119
119
|
saveResearch(stateHash, similar, combinedHtml);
|
|
120
120
|
tag('multiline').log(formatResearchSummary(similar));
|
|
121
|
-
tag('success').log(
|
|
121
|
+
tag('success').log('Research complete (reused)');
|
|
122
122
|
await this.hooksRunner.runAfterHook('researcher', state.url);
|
|
123
123
|
return similar;
|
|
124
124
|
}
|
|
@@ -236,7 +236,12 @@ export class Researcher extends ResearcherBase {
|
|
|
236
236
|
markSectionAsFocused(result, fallback);
|
|
237
237
|
}
|
|
238
238
|
if (!interrupted() && deep) {
|
|
239
|
-
|
|
239
|
+
try {
|
|
240
|
+
await this.performDeepAnalysis(state, result);
|
|
241
|
+
}
|
|
242
|
+
catch (err) {
|
|
243
|
+
tag('warning').log(`Deep analysis failed, continuing with best-effort research: ${err instanceof Error ? err.message : err}`);
|
|
244
|
+
}
|
|
240
245
|
}
|
|
241
246
|
if (!interrupted() && data) {
|
|
242
247
|
const extractedData = await this.extractData(state);
|
|
@@ -257,7 +262,7 @@ export class Researcher extends ResearcherBase {
|
|
|
257
262
|
this.experienceTracker.updateSummary(this.actionResult, summaryLine);
|
|
258
263
|
}
|
|
259
264
|
tag('multiline').log(formatResearchSummary(result.text, { visionUsed: this.hasScreenshotToAnalyze }));
|
|
260
|
-
tag('success').log(
|
|
265
|
+
tag('success').log('Research complete');
|
|
261
266
|
if (researchFile)
|
|
262
267
|
tag('substep').log(`Research file saved to: ${researchFile}`);
|
|
263
268
|
if (this.actionResult?.screenshotFile) {
|
|
@@ -105,12 +105,36 @@ export class SessionAnalyst {
|
|
|
105
105
|
.slice(-30)
|
|
106
106
|
.map((entry) => ` - [${entry.type}] ${entry.content}`)
|
|
107
107
|
.join('\n');
|
|
108
|
+
const checked = test.getCheckedExpectations().join(' | ') || '(none)';
|
|
109
|
+
const remaining = test.getRemainingExpectations().join(' | ') || '(none)';
|
|
110
|
+
const notes = test
|
|
111
|
+
.getPrintableNotes()
|
|
112
|
+
.slice(-12)
|
|
113
|
+
.map((note) => ` - ${note}`)
|
|
114
|
+
.join('\n');
|
|
115
|
+
const visitedUrls = test.getVisitedUrls({ localOnly: true }).join(' | ') || '(none)';
|
|
116
|
+
const verification = test.verification
|
|
117
|
+
? dedent `
|
|
118
|
+
verification_status: ${test.verification.status || 'unknown'}
|
|
119
|
+
verification_message: ${test.verification.message || '(none)'}
|
|
120
|
+
verification_url: ${test.verification.url || '(none)'}
|
|
121
|
+
verification_page: ${test.verification.pageLabel || '(none)'}
|
|
122
|
+
verification_details:
|
|
123
|
+
${(test.verification.details.length > 0 ? test.verification.details : ['(none)']).map((detail) => ` - ${detail}`).join('\n')}
|
|
124
|
+
`
|
|
125
|
+
: 'verification_status: none';
|
|
108
126
|
return dedent `
|
|
109
127
|
<test ref="#${ref}">
|
|
110
128
|
url: ${test.startUrl || '/'}
|
|
111
129
|
scenario: ${test.scenario}
|
|
112
130
|
result: ${test.result || 'unknown'}
|
|
113
131
|
expected: ${test.expected.join(' | ') || '(none)'}
|
|
132
|
+
checked_expectations: ${checked}
|
|
133
|
+
remaining_expectations: ${remaining}
|
|
134
|
+
visited_urls: ${visitedUrls}
|
|
135
|
+
${verification}
|
|
136
|
+
notes:
|
|
137
|
+
${notes || ' - (none)'}
|
|
114
138
|
log:
|
|
115
139
|
${log}
|
|
116
140
|
</test>
|
package/dist/src/ai/tester.js
CHANGED
|
@@ -92,7 +92,6 @@ export class Tester extends TaskAgent {
|
|
|
92
92
|
const state = this.explorer.getStateManager().getCurrentState();
|
|
93
93
|
if (!state)
|
|
94
94
|
throw new Error('No state found');
|
|
95
|
-
tag('info').log(`Testing scenario: ${task.scenario}`);
|
|
96
95
|
setActivity(`🧪 Testing: ${task.scenario}`, 'action');
|
|
97
96
|
this.previousUrl = null;
|
|
98
97
|
this.previousStateHash = null;
|
|
@@ -595,7 +594,6 @@ export class Tester extends TaskAgent {
|
|
|
595
594
|
if (!task.hasFinished) {
|
|
596
595
|
task.finish(TestResult.FAILED);
|
|
597
596
|
}
|
|
598
|
-
tag('info').log(`Finished: ${task.scenario}`);
|
|
599
597
|
if (task.isSuccessful) {
|
|
600
598
|
tag('success').log(`Successful test: ${task.scenario}`);
|
|
601
599
|
}
|
|
@@ -792,7 +790,9 @@ export class Tester extends TaskAgent {
|
|
|
792
790
|
if (this.getCurrentState().isInsideIframe) {
|
|
793
791
|
await this.explorer.switchToMainFrame();
|
|
794
792
|
}
|
|
795
|
-
|
|
793
|
+
const currentState = this.explorer.getStateManager().getCurrentState();
|
|
794
|
+
const currentUrl = currentState?.fullUrl || currentState?.url;
|
|
795
|
+
if (currentUrl === resetUrl) {
|
|
796
796
|
return {
|
|
797
797
|
success: false,
|
|
798
798
|
message: 'Reset failed - already on initial page!',
|
package/dist/src/ai/tools.js
CHANGED
|
@@ -731,11 +731,12 @@ export function createAgentTools({ explorer, researcher, navigator, experienceTr
|
|
|
731
731
|
}),
|
|
732
732
|
execute: async ({ reason }) => {
|
|
733
733
|
const stateManager = explorer.getStateManager();
|
|
734
|
-
const
|
|
734
|
+
const currentState = stateManager.getCurrentState();
|
|
735
|
+
const currentUrl = currentState?.fullUrl || currentState?.url;
|
|
735
736
|
const history = stateManager.getStateHistory();
|
|
736
737
|
let targetUrl = null;
|
|
737
738
|
for (let i = history.length - 1; i >= 0; i--) {
|
|
738
|
-
const url = history[i].toState.url;
|
|
739
|
+
const url = history[i].toState.fullUrl || history[i].toState.url;
|
|
739
740
|
if (url !== currentUrl) {
|
|
740
741
|
targetUrl = url;
|
|
741
742
|
break;
|
|
@@ -34,6 +34,10 @@ export class ExploreCommand extends BaseCommand {
|
|
|
34
34
|
failedSubPages = new Set();
|
|
35
35
|
oldTestRefs = new Set();
|
|
36
36
|
priorityFilter;
|
|
37
|
+
getCurrentPageUrl() {
|
|
38
|
+
const state = this.explorBot.getExplorer().getStateManager().getCurrentState();
|
|
39
|
+
return state?.fullUrl || state?.url;
|
|
40
|
+
}
|
|
37
41
|
async execute(args) {
|
|
38
42
|
const { opts, args: remaining } = this.parseArgs(args);
|
|
39
43
|
if (opts.maxTests) {
|
|
@@ -49,7 +53,7 @@ export class ExploreCommand extends BaseCommand {
|
|
|
49
53
|
tag('info').log('Dry-run mode: planner runs to discover new tests; test execution is skipped');
|
|
50
54
|
Stats.mode ??= 'explore';
|
|
51
55
|
Stats.focus ??= feature;
|
|
52
|
-
const mainUrl = this.
|
|
56
|
+
const mainUrl = this.getCurrentPageUrl();
|
|
53
57
|
if (cfg.enabled) {
|
|
54
58
|
await this.runReuseMode(mainUrl, feature, cfg);
|
|
55
59
|
}
|
|
@@ -10,6 +10,15 @@ const LogPane = React.memo(({ verboseMode }) => {
|
|
|
10
10
|
const [logs, setLogs] = useState([]);
|
|
11
11
|
const pendingLogsRef = React.useRef([]);
|
|
12
12
|
const flushTimeoutRef = React.useRef(null);
|
|
13
|
+
const MAX_MULTILINE_LINES = 16;
|
|
14
|
+
const MAX_STEP_LINES = 8;
|
|
15
|
+
const MAX_SUBSTEP_LINES = 6;
|
|
16
|
+
const formatCollapsedContent = useCallback((lines, collapsedCount, label) => {
|
|
17
|
+
if (collapsedCount <= 0) {
|
|
18
|
+
return lines.join('\n');
|
|
19
|
+
}
|
|
20
|
+
return [`... ${collapsedCount} earlier ${label}`, ...lines].join('\n');
|
|
21
|
+
}, []);
|
|
13
22
|
const flushLogs = useCallback(() => {
|
|
14
23
|
if (pendingLogsRef.current.length === 0)
|
|
15
24
|
return;
|
|
@@ -27,11 +36,33 @@ const LogPane = React.memo(({ verboseMode }) => {
|
|
|
27
36
|
if (lastLog.type === logEntry.type && lastLog.content === logEntry.content && Math.abs((lastLog.timestamp?.getTime() || 0) - (logEntry.timestamp?.getTime() || 0)) < 1000) {
|
|
28
37
|
continue;
|
|
29
38
|
}
|
|
39
|
+
if ((logEntry.type === 'step' || logEntry.type === 'substep') && lastLog.type === logEntry.type && Math.abs((lastLog.timestamp?.getTime() || 0) - (logEntry.timestamp?.getTime() || 0)) < 1500) {
|
|
40
|
+
const currentLines = String(logEntry.content)
|
|
41
|
+
.split('\n')
|
|
42
|
+
.filter((line) => line.length > 0);
|
|
43
|
+
const previousLines = String(lastLog.content)
|
|
44
|
+
.split('\n')
|
|
45
|
+
.filter((line) => line.length > 0);
|
|
46
|
+
const visiblePreviousLines = lastLog.collapsedCount ? previousLines.slice(1) : previousLines;
|
|
47
|
+
const maxLines = logEntry.type === 'step' ? MAX_STEP_LINES : MAX_SUBSTEP_LINES;
|
|
48
|
+
const mergedLines = [...visiblePreviousLines, ...currentLines];
|
|
49
|
+
const overflow = Math.max(0, mergedLines.length - maxLines);
|
|
50
|
+
const collapsedCount = (lastLog.collapsedCount || 0) + overflow;
|
|
51
|
+
const visibleLines = mergedLines.slice(-maxLines);
|
|
52
|
+
const label = logEntry.type === 'step' ? 'steps' : 'details';
|
|
53
|
+
result[result.length - 1] = {
|
|
54
|
+
...lastLog,
|
|
55
|
+
content: formatCollapsedContent(visibleLines, collapsedCount, label),
|
|
56
|
+
timestamp: logEntry.timestamp,
|
|
57
|
+
collapsedCount,
|
|
58
|
+
};
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
30
61
|
result.push(logEntry);
|
|
31
62
|
}
|
|
32
63
|
return result;
|
|
33
64
|
});
|
|
34
|
-
}, []);
|
|
65
|
+
}, [formatCollapsedContent]);
|
|
35
66
|
const addLog = useCallback((logEntry) => {
|
|
36
67
|
pendingLogsRef.current.push(logEntry);
|
|
37
68
|
if (!flushTimeoutRef.current) {
|
|
@@ -85,10 +116,9 @@ const LogPane = React.memo(({ verboseMode }) => {
|
|
|
85
116
|
const cleaned = stripAnsi(dedent(log.content));
|
|
86
117
|
const parsed = parseMarkdownToTerminal(cleaned);
|
|
87
118
|
const lines = parsed.split('\n');
|
|
88
|
-
const
|
|
89
|
-
const truncated = lines.length > maxLines ? `${lines.slice(0, maxLines).join('\n')}\n... (${lines.length - maxLines} more lines)` : cleaned;
|
|
119
|
+
const truncated = lines.length > MAX_MULTILINE_LINES ? `${lines.slice(0, MAX_MULTILINE_LINES).join('\n')}\n... (${lines.length - MAX_MULTILINE_LINES} more lines)` : parsed;
|
|
90
120
|
return (React.createElement(Box, { key: index, borderStyle: "classic", borderLeft: false, borderRight: false, marginY: 1, padding: 1, borderColor: "dim", overflow: "hidden" },
|
|
91
|
-
React.createElement(Text, { color: "gray", dimColor: true },
|
|
121
|
+
React.createElement(Text, { color: "gray", dimColor: true }, truncated)));
|
|
92
122
|
}
|
|
93
123
|
if (log.type === 'html') {
|
|
94
124
|
// Convert HTML to markdown, then render as multiline
|
package/package.json
CHANGED
package/src/action.ts
CHANGED
|
@@ -2,7 +2,6 @@ import fs from 'node:fs';
|
|
|
2
2
|
import { join } from 'node:path';
|
|
3
3
|
import { faker } from '@faker-js/faker';
|
|
4
4
|
import { context, trace } from '@opentelemetry/api';
|
|
5
|
-
import { highlight } from 'cli-highlight';
|
|
6
5
|
import { container, recorder } from 'codeceptjs';
|
|
7
6
|
import * as codeceptjs from 'codeceptjs';
|
|
8
7
|
import { hopeThat, retryTo, tryTo, within } from 'codeceptjs/lib/effects';
|
|
@@ -21,7 +20,7 @@ import type { PlaywrightRecorder } from './playwright-recorder.ts';
|
|
|
21
20
|
import type { StateManager } from './state-manager.js';
|
|
22
21
|
import { extractCodeBlocks } from './utils/code-extractor.js';
|
|
23
22
|
import { htmlCombinedSnapshot, minifyHtml } from './utils/html.js';
|
|
24
|
-
import { createDebug,
|
|
23
|
+
import { createDebug, setStepSpanParent, tag } from './utils/logger.js';
|
|
25
24
|
import { safeFilename } from './utils/strings.ts';
|
|
26
25
|
import { throttle } from './utils/throttle.ts';
|
|
27
26
|
|
|
@@ -296,7 +295,13 @@ class Action {
|
|
|
296
295
|
async expect(codeOrFunction: string | ((I: CodeceptJS.I) => void)): Promise<Action> {
|
|
297
296
|
const codeString = typeof codeOrFunction === 'string' ? codeOrFunction : codeOrFunction.toString();
|
|
298
297
|
this.expectation = codeString.toString();
|
|
299
|
-
|
|
298
|
+
const expectationPreview = sanitizeCodeBlock(codeString)
|
|
299
|
+
.split('\n')
|
|
300
|
+
.map((line) => line.trim())
|
|
301
|
+
.filter(Boolean)
|
|
302
|
+
.slice(0, 2)
|
|
303
|
+
.join(' ');
|
|
304
|
+
tag('step').log(`Expecting: ${expectationPreview || 'assertion'}`);
|
|
300
305
|
try {
|
|
301
306
|
debugLog('Executing expectation:', codeString);
|
|
302
307
|
|
package/src/ai/driller.ts
CHANGED
|
@@ -168,7 +168,7 @@ export class Driller extends TaskAgent implements Agent {
|
|
|
168
168
|
this.allResults = [];
|
|
169
169
|
|
|
170
170
|
return Observability.run(`driller: ${currentState.url}`, { tags: ['driller'], sessionId: sessionName }, async () => {
|
|
171
|
-
tag('
|
|
171
|
+
tag('step').log(`Drilling page: ${currentState.url}`);
|
|
172
172
|
await this.hooksRunner.runBeforeHook('driller', currentState.url);
|
|
173
173
|
|
|
174
174
|
const originalState = await this.captureAnnotatedState();
|
package/src/ai/navigator.ts
CHANGED
|
@@ -80,8 +80,48 @@ class Navigator implements Agent {
|
|
|
80
80
|
this.hooksRunner = new HooksRunner(explorer, explorer.getConfig());
|
|
81
81
|
}
|
|
82
82
|
|
|
83
|
+
private getBaseOrigin(): string | null {
|
|
84
|
+
const baseUrl = this.explorer.getConfig().playwright.url;
|
|
85
|
+
try {
|
|
86
|
+
return new URL(baseUrl).origin;
|
|
87
|
+
} catch {
|
|
88
|
+
return null;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
private getComparableCurrentUrl(stateManager: any, expectedUrl: string): string {
|
|
93
|
+
const currentState = stateManager.getCurrentState();
|
|
94
|
+
if (!currentState) return '';
|
|
95
|
+
const current = /^https?:\/\//i.test(expectedUrl) ? currentState.fullUrl || currentState.url || '' : currentState.url || '';
|
|
96
|
+
return current;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
private isSameExpectedOrigin(expectedUrl: string, stateManager: any): boolean {
|
|
100
|
+
const currentState = stateManager.getCurrentState();
|
|
101
|
+
if (!currentState) return false;
|
|
102
|
+
|
|
103
|
+
const currentFullUrl = currentState.fullUrl || currentState.url || '';
|
|
104
|
+
if (!currentFullUrl) return false;
|
|
105
|
+
|
|
106
|
+
try {
|
|
107
|
+
const currentOrigin = new URL(currentFullUrl).origin;
|
|
108
|
+
if (/^https?:\/\//i.test(expectedUrl)) {
|
|
109
|
+
return currentOrigin === new URL(expectedUrl).origin;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const baseOrigin = this.getBaseOrigin();
|
|
113
|
+
if (!baseOrigin) return true;
|
|
114
|
+
return currentOrigin === baseOrigin;
|
|
115
|
+
} catch {
|
|
116
|
+
return !/^https?:\/\//i.test(expectedUrl);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
83
120
|
private isOnExpectedPage(expectedUrl: string, stateManager: any): boolean {
|
|
84
|
-
|
|
121
|
+
if (!this.isSameExpectedOrigin(expectedUrl, stateManager)) {
|
|
122
|
+
return false;
|
|
123
|
+
}
|
|
124
|
+
const currentUrl = this.getComparableCurrentUrl(stateManager, expectedUrl);
|
|
85
125
|
return normalizeUrl(currentUrl) === normalizeUrl(expectedUrl);
|
|
86
126
|
}
|
|
87
127
|
|
|
@@ -282,7 +322,8 @@ class Navigator implements Agent {
|
|
|
282
322
|
}
|
|
283
323
|
}
|
|
284
324
|
const freshState = await action.capturePageState();
|
|
285
|
-
const
|
|
325
|
+
const currentUrl = /^https?:\/\//i.test(expectedUrl) ? freshState.fullUrl || freshState.url || '' : freshState.url || '';
|
|
326
|
+
const urlMatches = this.isSameExpectedOrigin(expectedUrl, action.stateManager) && normalizeUrl(currentUrl) === normalizeUrl(expectedUrl);
|
|
286
327
|
const stateChanged = freshState.getStateHash() !== actionResult.getStateHash();
|
|
287
328
|
resolved = urlMatches && stateChanged;
|
|
288
329
|
|
package/src/ai/pilot.ts
CHANGED
|
@@ -313,6 +313,9 @@ export class Pilot implements Agent {
|
|
|
313
313
|
overrides the others — weigh them together. Tester's record() notes are the LEAST reliable; always
|
|
314
314
|
cross-check against actual actions and state. Visual screenshot analysis is strong for UI state
|
|
315
315
|
(active tabs, visible counts, colors).
|
|
316
|
+
If the final page clearly shows an equivalent success state in a different UI form, do not fail only
|
|
317
|
+
because one narrow assertion targeted a specific badge, count, toast, or wording that the product
|
|
318
|
+
represents differently.
|
|
316
319
|
|
|
317
320
|
SCENARIO TITLE defines what must happen. Action verbs require persisted evidence:
|
|
318
321
|
- "Create X" → X must exist (visible, redirected to its page, or success message). Opening a form is NOT enough.
|
|
@@ -355,6 +358,8 @@ export class Pilot implements Agent {
|
|
|
355
358
|
|
|
356
359
|
GUIDANCE (required for "continue"): a specific next action on the current page — which tool, what
|
|
357
360
|
to verify, how to record. Do not suggest repeating actions that already succeeded.
|
|
361
|
+
If progress is blocked only because the page lacks target data for the scenario, prefer precondition()
|
|
362
|
+
over repeated UI attempts.
|
|
358
363
|
`;
|
|
359
364
|
}
|
|
360
365
|
|
package/src/ai/planner.ts
CHANGED
|
@@ -80,6 +80,10 @@ export class Planner extends PlannerBase implements Agent {
|
|
|
80
80
|
return ConfigParser.getInstance().getConfig().ai?.agents?.researcher?.sections || Object.keys(POSSIBLE_SECTIONS);
|
|
81
81
|
}
|
|
82
82
|
|
|
83
|
+
private getDefaultStartUrl(state: { url: string; fullUrl?: string }): string {
|
|
84
|
+
return state.fullUrl || state.url;
|
|
85
|
+
}
|
|
86
|
+
|
|
83
87
|
getSystemMessage(feature?: string): string {
|
|
84
88
|
const currentUrl = this.stateManager.getCurrentState()?.url;
|
|
85
89
|
const customPrompt = this.provider.getSystemPromptForAgent('planner', currentUrl);
|
|
@@ -160,7 +164,6 @@ export class Planner extends PlannerBase implements Agent {
|
|
|
160
164
|
this.freshStart = false;
|
|
161
165
|
|
|
162
166
|
setActivity(`${this.emoji} Planning...`, 'action');
|
|
163
|
-
tag('info').log(`Planning test scenarios for ${state.url}`);
|
|
164
167
|
if (style) tag('info').log(`Planning style: ${style}`);
|
|
165
168
|
|
|
166
169
|
const tags = ['planner'];
|
|
@@ -188,7 +191,8 @@ export class Planner extends PlannerBase implements Agent {
|
|
|
188
191
|
throw new Error('No tasks were created successfully');
|
|
189
192
|
}
|
|
190
193
|
|
|
191
|
-
const
|
|
194
|
+
const defaultStartUrl = this.getDefaultStartUrl(state);
|
|
195
|
+
const fromPlanning = aiResult.object.scenarios.map((s: any) => new Test(s.scenario, s.priority, s.expectedOutcomes, s.startUrl || defaultStartUrl, s.steps || []));
|
|
192
196
|
|
|
193
197
|
return { tests: fromPlanning, planName: aiResult.object.planName };
|
|
194
198
|
});
|
|
@@ -200,7 +204,8 @@ export class Planner extends PlannerBase implements Agent {
|
|
|
200
204
|
const cached = state.url ? getRegisteredPlan(state.url) : null;
|
|
201
205
|
const planName = feature || cached?.plan.title || result.planName || state.url;
|
|
202
206
|
this.currentPlan = new Plan(planName);
|
|
203
|
-
this.currentPlan.url = state
|
|
207
|
+
this.currentPlan.url = this.getDefaultStartUrl(state);
|
|
208
|
+
const defaultStartUrl = this.getDefaultStartUrl(state);
|
|
204
209
|
if (parentPlan) this.currentPlan.parentPlan = parentPlan;
|
|
205
210
|
const allPreviousScenarios = this.getPreviousSessionScenarios();
|
|
206
211
|
const existingTestScenarios = this.getExistingTestFileScenarios(state.url);
|
|
@@ -208,13 +213,13 @@ export class Planner extends PlannerBase implements Agent {
|
|
|
208
213
|
for (const t of tests) {
|
|
209
214
|
if (allPreviousScenarios.has(t.scenario.toLowerCase())) continue;
|
|
210
215
|
t.style = this.lastStyleName;
|
|
211
|
-
t.startUrl =
|
|
216
|
+
t.startUrl = defaultStartUrl;
|
|
212
217
|
this.currentPlan.addTest(t);
|
|
213
218
|
}
|
|
214
219
|
} else {
|
|
215
220
|
tag('step').log(`Expanding plan: "${this.currentPlan.title}"`);
|
|
216
221
|
this.currentPlan.nextIteration();
|
|
217
|
-
const newTests = this.addNewTests(tests, state
|
|
222
|
+
const newTests = this.addNewTests(tests, this.getDefaultStartUrl(state));
|
|
218
223
|
if (newTests.length > 0) {
|
|
219
224
|
const summary = `New scenarios:\n${newTests.map((t) => `+ [${t.priority}] ${t.scenario}`).join('\n')}`;
|
|
220
225
|
tag('multiline').log(summary);
|
|
@@ -331,6 +336,13 @@ export class Planner extends PlannerBase implements Agent {
|
|
|
331
336
|
Focus on URL page change or data persistency after page reload.
|
|
332
337
|
If there are subpages (pages with same URL path) plan testing of those subpages as well
|
|
333
338
|
If you plan to test CRUD operations, plan them in correct order: create, read, update.
|
|
339
|
+
Do not invent specific route names, success messages, validation texts, badge counts, or welcome messages unless they are visible in research, visited pages, or prior observed flows.
|
|
340
|
+
If exact wording is unknown, describe the expected result generically, for example "an authentication error is shown" or "the user stays on the login page" instead of guessing the literal text.
|
|
341
|
+
If exact redirect destination is unknown, describe the destination by visible page identity, for example "the dashboard page opens" or "the current workspace home page opens" instead of inventing a URL slug.
|
|
342
|
+
Only propose scenarios whose prerequisites are evident from page research, visited pages, or API data preparation context.
|
|
343
|
+
If a scenario needs existing records, recipients, results, notifications, or other target data, propose it only when that data is visible or API preconditions can create it.
|
|
344
|
+
If the page appears read-only, degraded, demo-limited, maintenance-like, or lacks write controls, prefer read-only scenarios such as opening panels, inspecting visible lists, filtering, searching, or verifying current state.
|
|
345
|
+
Do not assume hidden data exists just because a control is present.
|
|
334
346
|
DO NOT propose "verification-only" tests that merely open a UI element (modal, dropdown, panel) and check it exists.
|
|
335
347
|
Every test must complete a meaningful action that changes application state or produces a business outcome.
|
|
336
348
|
Opening a modal is NOT a test — performing an action INSIDE the modal IS a test.
|
|
@@ -566,10 +578,15 @@ export class Planner extends PlannerBase implements Agent {
|
|
|
566
578
|
- Good: "New suite 'My New Suite' appears in the suite list"
|
|
567
579
|
- Good: "Suite appears under Starred filter tab"
|
|
568
580
|
- Good: "Success message 'Suite created' is displayed"
|
|
581
|
+
- Good when wording is unknown: "An authentication error is displayed"
|
|
582
|
+
- Good when route is unknown: "The workspace home page is displayed"
|
|
569
583
|
- Bad: "Modal is displayed" (just verifying existence, no business value)
|
|
570
584
|
- Bad: "Dropdown menu is visible" (just verifying existence)
|
|
585
|
+
- Bad: "Welcome message is displayed" if no welcome message is visible in research
|
|
586
|
+
- Bad: "Redirected to /dashboard" if no such route was observed
|
|
571
587
|
- Each outcome should be independently verifiable
|
|
572
588
|
- Avoid combining multiple checks into one outcome
|
|
589
|
+
- Prefer durable user-facing results over fragile micro-signals
|
|
573
590
|
- Expected outcomes describe WHAT TO VERIFY
|
|
574
591
|
|
|
575
592
|
FORMATTING RULES:
|
package/src/ai/rerunner.ts
CHANGED
|
@@ -87,7 +87,7 @@ export class Rerunner extends TaskAgent implements Agent {
|
|
|
87
87
|
return { total: 0, passed: 0, failed: 0, healed: 0 };
|
|
88
88
|
}
|
|
89
89
|
|
|
90
|
-
tag('
|
|
90
|
+
tag('step').log(`Re-running tests from: ${relative(process.cwd(), absPath)}`);
|
|
91
91
|
setActivity('🔄 Re-running tests...', 'action');
|
|
92
92
|
|
|
93
93
|
this.healedSteps = [];
|
|
@@ -24,7 +24,7 @@ export function WithDeepAnalysis<T extends Constructor>(Base: T) {
|
|
|
24
24
|
|
|
25
25
|
async performDeepAnalysis(state: WebPageState, result: ResearchResult): Promise<void> {
|
|
26
26
|
tag('info').log('Starting deep analysis of expandable elements');
|
|
27
|
-
await (this as any).navigateTo(state.url);
|
|
27
|
+
await (this as any).navigateTo(state.fullUrl || state.url);
|
|
28
28
|
|
|
29
29
|
let expandables = await this._discoverExpandables(result.text);
|
|
30
30
|
if (expandables.length === 0) {
|
|
@@ -35,7 +35,7 @@ export function WithDeepAnalysis<T extends Constructor>(Base: T) {
|
|
|
35
35
|
|
|
36
36
|
const maxClicks = (this.explorer.getConfig().ai?.agents?.researcher as any)?.maxExpandableClicks ?? DEFAULT_MAX_EXPANDABLE_CLICKS;
|
|
37
37
|
if (expandables.length > maxClicks) {
|
|
38
|
-
expandables = await this._selectExpandables(expandables, state.url, maxClicks);
|
|
38
|
+
expandables = await this._selectExpandables(expandables, state.fullUrl || state.url, maxClicks);
|
|
39
39
|
tag('substep').log(`Selected ${expandables.length} expandables to click (max: ${maxClicks})`);
|
|
40
40
|
}
|
|
41
41
|
|
|
@@ -177,7 +177,14 @@ export function WithDeepAnalysis<T extends Constructor>(Base: T) {
|
|
|
177
177
|
visionCall = this.provider.processImage(visionPrompt, screenshot.toString('base64'));
|
|
178
178
|
}
|
|
179
179
|
|
|
180
|
-
|
|
180
|
+
let textRes: { text?: string } | null = null;
|
|
181
|
+
let visionRes: { text?: string } | null = null;
|
|
182
|
+
try {
|
|
183
|
+
[textRes, visionRes] = await Promise.all([textCall, visionCall]);
|
|
184
|
+
} catch (err) {
|
|
185
|
+
tag('warning').log(`Expandable discovery failed, skipping deep analysis: ${err instanceof Error ? err.message : err}`);
|
|
186
|
+
return [];
|
|
187
|
+
}
|
|
181
188
|
|
|
182
189
|
const eidxSet = new Set<string>();
|
|
183
190
|
const parseRefs = (text: string | undefined) => {
|
|
@@ -244,10 +251,16 @@ export function WithDeepAnalysis<T extends Constructor>(Base: T) {
|
|
|
244
251
|
`;
|
|
245
252
|
|
|
246
253
|
const model = this.provider.getModelForAgent('researcher');
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
254
|
+
let r: { text?: string };
|
|
255
|
+
try {
|
|
256
|
+
r = await this.provider.chat([{ role: 'user', content: prompt }], model, {
|
|
257
|
+
agentName: 'researcher',
|
|
258
|
+
telemetryFunctionId: 'researcher.selectExpandables',
|
|
259
|
+
});
|
|
260
|
+
} catch (err) {
|
|
261
|
+
tag('warning').log(`Expandable selection failed, using first ${maxClicks}: ${err instanceof Error ? err.message : err}`);
|
|
262
|
+
return expandables.slice(0, maxClicks);
|
|
263
|
+
}
|
|
251
264
|
|
|
252
265
|
const nums = (r.text || '').match(/\d+/g)?.map(Number) || [];
|
|
253
266
|
const selected = expandables.filter((_, i) => nums.includes(i + 1));
|
package/src/ai/researcher.ts
CHANGED
|
@@ -125,7 +125,7 @@ export class Researcher extends ResearcherBase implements Agent {
|
|
|
125
125
|
tag('info').log(`Researching ${displayUrl} to understand the context...`);
|
|
126
126
|
setActivity(`${this.emoji} Researching...`, 'action');
|
|
127
127
|
|
|
128
|
-
await this.ensureNavigated(
|
|
128
|
+
await this.ensureNavigated(displayUrl, screenshot && this.provider.hasVision());
|
|
129
129
|
await this.hooksRunner.runBeforeHook('researcher', state.url);
|
|
130
130
|
|
|
131
131
|
const annotatedElements = await this.explorer.annotateElements();
|
|
@@ -151,10 +151,10 @@ export class Researcher extends ResearcherBase implements Agent {
|
|
|
151
151
|
if (!deep && !force) {
|
|
152
152
|
const similar = await findSimilarResearch(combinedHtml);
|
|
153
153
|
if (similar) {
|
|
154
|
-
tag('
|
|
154
|
+
tag('substep').log('Similar research found, reusing cached result');
|
|
155
155
|
if (stateHash) saveResearch(stateHash, similar, combinedHtml);
|
|
156
156
|
tag('multiline').log(formatResearchSummary(similar));
|
|
157
|
-
tag('success').log(
|
|
157
|
+
tag('success').log('Research complete (reused)');
|
|
158
158
|
await this.hooksRunner.runAfterHook('researcher', state.url);
|
|
159
159
|
return similar;
|
|
160
160
|
}
|
|
@@ -285,7 +285,11 @@ export class Researcher extends ResearcherBase implements Agent {
|
|
|
285
285
|
}
|
|
286
286
|
|
|
287
287
|
if (!interrupted() && deep) {
|
|
288
|
-
|
|
288
|
+
try {
|
|
289
|
+
await this.performDeepAnalysis(state, result);
|
|
290
|
+
} catch (err) {
|
|
291
|
+
tag('warning').log(`Deep analysis failed, continuing with best-effort research: ${err instanceof Error ? err.message : err}`);
|
|
292
|
+
}
|
|
289
293
|
}
|
|
290
294
|
|
|
291
295
|
if (!interrupted() && data) {
|
|
@@ -311,7 +315,7 @@ export class Researcher extends ResearcherBase implements Agent {
|
|
|
311
315
|
}
|
|
312
316
|
|
|
313
317
|
tag('multiline').log(formatResearchSummary(result.text, { visionUsed: this.hasScreenshotToAnalyze }));
|
|
314
|
-
tag('success').log(
|
|
318
|
+
tag('success').log('Research complete');
|
|
315
319
|
if (researchFile) tag('substep').log(`Research file saved to: ${researchFile}`);
|
|
316
320
|
if (this.actionResult?.screenshotFile) {
|
|
317
321
|
const screenshotPath = outputPath('states', this.actionResult.screenshotFile);
|
|
@@ -120,6 +120,24 @@ export class SessionAnalyst implements Agent {
|
|
|
120
120
|
.slice(-30)
|
|
121
121
|
.map((entry) => ` - [${entry.type}] ${entry.content}`)
|
|
122
122
|
.join('\n');
|
|
123
|
+
const checked = test.getCheckedExpectations().join(' | ') || '(none)';
|
|
124
|
+
const remaining = test.getRemainingExpectations().join(' | ') || '(none)';
|
|
125
|
+
const notes = test
|
|
126
|
+
.getPrintableNotes()
|
|
127
|
+
.slice(-12)
|
|
128
|
+
.map((note) => ` - ${note}`)
|
|
129
|
+
.join('\n');
|
|
130
|
+
const visitedUrls = test.getVisitedUrls({ localOnly: true }).join(' | ') || '(none)';
|
|
131
|
+
const verification = test.verification
|
|
132
|
+
? dedent`
|
|
133
|
+
verification_status: ${test.verification.status || 'unknown'}
|
|
134
|
+
verification_message: ${test.verification.message || '(none)'}
|
|
135
|
+
verification_url: ${test.verification.url || '(none)'}
|
|
136
|
+
verification_page: ${test.verification.pageLabel || '(none)'}
|
|
137
|
+
verification_details:
|
|
138
|
+
${(test.verification.details.length > 0 ? test.verification.details : ['(none)']).map((detail) => ` - ${detail}`).join('\n')}
|
|
139
|
+
`
|
|
140
|
+
: 'verification_status: none';
|
|
123
141
|
|
|
124
142
|
return dedent`
|
|
125
143
|
<test ref="#${ref}">
|
|
@@ -127,6 +145,12 @@ export class SessionAnalyst implements Agent {
|
|
|
127
145
|
scenario: ${test.scenario}
|
|
128
146
|
result: ${test.result || 'unknown'}
|
|
129
147
|
expected: ${test.expected.join(' | ') || '(none)'}
|
|
148
|
+
checked_expectations: ${checked}
|
|
149
|
+
remaining_expectations: ${remaining}
|
|
150
|
+
visited_urls: ${visitedUrls}
|
|
151
|
+
${verification}
|
|
152
|
+
notes:
|
|
153
|
+
${notes || ' - (none)'}
|
|
130
154
|
log:
|
|
131
155
|
${log}
|
|
132
156
|
</test>
|
package/src/ai/tester.ts
CHANGED
|
@@ -118,7 +118,6 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
118
118
|
const state = this.explorer.getStateManager().getCurrentState();
|
|
119
119
|
if (!state) throw new Error('No state found');
|
|
120
120
|
|
|
121
|
-
tag('info').log(`Testing scenario: ${task.scenario}`);
|
|
122
121
|
setActivity(`🧪 Testing: ${task.scenario}`, 'action');
|
|
123
122
|
|
|
124
123
|
this.previousUrl = null;
|
|
@@ -678,7 +677,6 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
678
677
|
if (!task.hasFinished) {
|
|
679
678
|
task.finish(TestResult.FAILED);
|
|
680
679
|
}
|
|
681
|
-
tag('info').log(`Finished: ${task.scenario}`);
|
|
682
680
|
|
|
683
681
|
if (task.isSuccessful) {
|
|
684
682
|
tag('success').log(`Successful test: ${task.scenario}`);
|
|
@@ -882,7 +880,9 @@ export class Tester extends TaskAgent implements Agent {
|
|
|
882
880
|
await this.explorer.switchToMainFrame();
|
|
883
881
|
}
|
|
884
882
|
|
|
885
|
-
|
|
883
|
+
const currentState = this.explorer.getStateManager().getCurrentState();
|
|
884
|
+
const currentUrl = currentState?.fullUrl || currentState?.url;
|
|
885
|
+
if (currentUrl === resetUrl!) {
|
|
886
886
|
return {
|
|
887
887
|
success: false,
|
|
888
888
|
message: 'Reset failed - already on initial page!',
|
package/src/ai/tools.ts
CHANGED
|
@@ -854,12 +854,13 @@ export function createAgentTools({
|
|
|
854
854
|
}),
|
|
855
855
|
execute: async ({ reason }) => {
|
|
856
856
|
const stateManager = explorer.getStateManager();
|
|
857
|
-
const
|
|
857
|
+
const currentState = stateManager.getCurrentState();
|
|
858
|
+
const currentUrl = currentState?.fullUrl || currentState?.url;
|
|
858
859
|
const history = stateManager.getStateHistory();
|
|
859
860
|
|
|
860
861
|
let targetUrl: string | null = null;
|
|
861
862
|
for (let i = history.length - 1; i >= 0; i--) {
|
|
862
|
-
const url = history[i].toState.url;
|
|
863
|
+
const url = history[i].toState.fullUrl || history[i].toState.url;
|
|
863
864
|
if (url !== currentUrl) {
|
|
864
865
|
targetUrl = url;
|
|
865
866
|
break;
|
|
@@ -38,6 +38,11 @@ export class ExploreCommand extends BaseCommand {
|
|
|
38
38
|
private oldTestRefs = new Set<Test>();
|
|
39
39
|
private priorityFilter?: Set<string>;
|
|
40
40
|
|
|
41
|
+
private getCurrentPageUrl(): string | undefined {
|
|
42
|
+
const state = this.explorBot.getExplorer().getStateManager().getCurrentState();
|
|
43
|
+
return state?.fullUrl || state?.url;
|
|
44
|
+
}
|
|
45
|
+
|
|
41
46
|
async execute(args: string): Promise<void> {
|
|
42
47
|
const { opts, args: remaining } = this.parseArgs(args);
|
|
43
48
|
if (opts.maxTests) {
|
|
@@ -51,7 +56,7 @@ export class ExploreCommand extends BaseCommand {
|
|
|
51
56
|
if (this.dryRun) tag('info').log('Dry-run mode: planner runs to discover new tests; test execution is skipped');
|
|
52
57
|
Stats.mode ??= 'explore';
|
|
53
58
|
Stats.focus ??= feature;
|
|
54
|
-
const mainUrl = this.
|
|
59
|
+
const mainUrl = this.getCurrentPageUrl();
|
|
55
60
|
|
|
56
61
|
if (cfg.enabled) {
|
|
57
62
|
await this.runReuseMode(mainUrl, feature, cfg);
|
|
@@ -7,21 +7,32 @@ import { parseMarkdownToTerminal } from '../utils/markdown-terminal.js';
|
|
|
7
7
|
|
|
8
8
|
import { Box, Text } from 'ink';
|
|
9
9
|
import type { LogType, TaggedLogEntry } from '../utils/logger.js';
|
|
10
|
-
import { isDebugMode, registerLogPane,
|
|
10
|
+
import { isDebugMode, registerLogPane, unregisterLogPane } from '../utils/logger.js';
|
|
11
11
|
|
|
12
12
|
// marked.use(new markedTerminal());
|
|
13
13
|
|
|
14
|
-
type LogEntry = TaggedLogEntry;
|
|
14
|
+
type LogEntry = TaggedLogEntry & { collapsedCount?: number };
|
|
15
15
|
|
|
16
16
|
interface LogPaneProps {
|
|
17
17
|
verboseMode: boolean;
|
|
18
18
|
}
|
|
19
19
|
|
|
20
20
|
const LogPane: React.FC<LogPaneProps> = React.memo(({ verboseMode }) => {
|
|
21
|
-
const [logs, setLogs] = useState<
|
|
22
|
-
const pendingLogsRef = React.useRef<
|
|
21
|
+
const [logs, setLogs] = useState<LogEntry[]>([]);
|
|
22
|
+
const pendingLogsRef = React.useRef<LogEntry[]>([]);
|
|
23
23
|
const flushTimeoutRef = React.useRef<ReturnType<typeof setTimeout> | null>(null);
|
|
24
24
|
|
|
25
|
+
const MAX_MULTILINE_LINES = 16;
|
|
26
|
+
const MAX_STEP_LINES = 8;
|
|
27
|
+
const MAX_SUBSTEP_LINES = 6;
|
|
28
|
+
|
|
29
|
+
const formatCollapsedContent = useCallback((lines: string[], collapsedCount: number, label: string) => {
|
|
30
|
+
if (collapsedCount <= 0) {
|
|
31
|
+
return lines.join('\n');
|
|
32
|
+
}
|
|
33
|
+
return [`... ${collapsedCount} earlier ${label}`, ...lines].join('\n');
|
|
34
|
+
}, []);
|
|
35
|
+
|
|
25
36
|
const flushLogs = useCallback(() => {
|
|
26
37
|
if (pendingLogsRef.current.length === 0) return;
|
|
27
38
|
|
|
@@ -29,7 +40,7 @@ const LogPane: React.FC<LogPaneProps> = React.memo(({ verboseMode }) => {
|
|
|
29
40
|
pendingLogsRef.current = [];
|
|
30
41
|
flushTimeoutRef.current = null;
|
|
31
42
|
|
|
32
|
-
setLogs((prevLogs:
|
|
43
|
+
setLogs((prevLogs: LogEntry[]) => {
|
|
33
44
|
const result = [...prevLogs];
|
|
34
45
|
|
|
35
46
|
for (const logEntry of newLogs) {
|
|
@@ -43,12 +54,35 @@ const LogPane: React.FC<LogPaneProps> = React.memo(({ verboseMode }) => {
|
|
|
43
54
|
continue;
|
|
44
55
|
}
|
|
45
56
|
|
|
57
|
+
if ((logEntry.type === 'step' || logEntry.type === 'substep') && lastLog.type === logEntry.type && Math.abs((lastLog.timestamp?.getTime() || 0) - (logEntry.timestamp?.getTime() || 0)) < 1500) {
|
|
58
|
+
const currentLines = String(logEntry.content)
|
|
59
|
+
.split('\n')
|
|
60
|
+
.filter((line) => line.length > 0);
|
|
61
|
+
const previousLines = String(lastLog.content)
|
|
62
|
+
.split('\n')
|
|
63
|
+
.filter((line) => line.length > 0);
|
|
64
|
+
const visiblePreviousLines = lastLog.collapsedCount ? previousLines.slice(1) : previousLines;
|
|
65
|
+
const maxLines = logEntry.type === 'step' ? MAX_STEP_LINES : MAX_SUBSTEP_LINES;
|
|
66
|
+
const mergedLines = [...visiblePreviousLines, ...currentLines];
|
|
67
|
+
const overflow = Math.max(0, mergedLines.length - maxLines);
|
|
68
|
+
const collapsedCount = (lastLog.collapsedCount || 0) + overflow;
|
|
69
|
+
const visibleLines = mergedLines.slice(-maxLines);
|
|
70
|
+
const label = logEntry.type === 'step' ? 'steps' : 'details';
|
|
71
|
+
result[result.length - 1] = {
|
|
72
|
+
...lastLog,
|
|
73
|
+
content: formatCollapsedContent(visibleLines, collapsedCount, label),
|
|
74
|
+
timestamp: logEntry.timestamp,
|
|
75
|
+
collapsedCount,
|
|
76
|
+
};
|
|
77
|
+
continue;
|
|
78
|
+
}
|
|
79
|
+
|
|
46
80
|
result.push(logEntry);
|
|
47
81
|
}
|
|
48
82
|
|
|
49
83
|
return result;
|
|
50
84
|
});
|
|
51
|
-
}, []);
|
|
85
|
+
}, [formatCollapsedContent]);
|
|
52
86
|
|
|
53
87
|
const addLog = useCallback(
|
|
54
88
|
(logEntry: TaggedLogEntry) => {
|
|
@@ -112,12 +146,11 @@ const LogPane: React.FC<LogPaneProps> = React.memo(({ verboseMode }) => {
|
|
|
112
146
|
const cleaned = stripAnsi(dedent(log.content));
|
|
113
147
|
const parsed = parseMarkdownToTerminal(cleaned);
|
|
114
148
|
const lines = parsed.split('\n');
|
|
115
|
-
const
|
|
116
|
-
const truncated = lines.length > maxLines ? `${lines.slice(0, maxLines).join('\n')}\n... (${lines.length - maxLines} more lines)` : cleaned;
|
|
149
|
+
const truncated = lines.length > MAX_MULTILINE_LINES ? `${lines.slice(0, MAX_MULTILINE_LINES).join('\n')}\n... (${lines.length - MAX_MULTILINE_LINES} more lines)` : parsed;
|
|
117
150
|
return (
|
|
118
151
|
<Box key={index} borderStyle="classic" borderLeft={false} borderRight={false} marginY={1} padding={1} borderColor="dim" overflow="hidden">
|
|
119
152
|
<Text color="gray" dimColor>
|
|
120
|
-
{
|
|
153
|
+
{truncated}
|
|
121
154
|
</Text>
|
|
122
155
|
</Box>
|
|
123
156
|
);
|