explorbot 0.1.18 → 0.1.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,6 +43,8 @@ interface CLIOptions {
43
43
  }
44
44
 
45
45
  function buildExplorBotOptions(from: string | undefined, options: CLIOptions): ExplorBotOptions {
46
+ const sessionFile = options.session === true ? path.join(path.resolve(options.path || process.cwd()), 'output', 'session.json') : options.session;
47
+
46
48
  return {
47
49
  from,
48
50
  verbose: options.verbose || options.debug,
@@ -51,7 +53,7 @@ function buildExplorBotOptions(from: string | undefined, options: CLIOptions): E
51
53
  show: options.show,
52
54
  headless: options.headless,
53
55
  incognito: options.incognito,
54
- session: options.session === true ? 'output/session.json' : options.session,
56
+ session: sessionFile,
55
57
  } as ExplorBotOptions;
56
58
  }
57
59
 
@@ -597,6 +599,22 @@ addCommonOptions(program.command('research <url>').description('Research a page
597
599
  }
598
600
  );
599
601
 
602
+ addCommonOptions(program.command('navigate <url>').description('Navigate to a URL using the AI Navigator. Exits 0 if reachable, 1 otherwise.')).action(async (url, options) => {
603
+ try {
604
+ const explorBot = new ExplorBot(buildExplorBotOptions(url, options));
605
+ await explorBot.start();
606
+
607
+ const { NavigateCommand } = await import('../src/commands/navigate-command.js');
608
+ await new NavigateCommand(explorBot).execute(url);
609
+
610
+ await explorBot.stop();
611
+ await showStatsAndExit(0);
612
+ } catch (error) {
613
+ console.error('Failed:', error instanceof Error ? error.message : 'Unknown error');
614
+ await showStatsAndExit(1);
615
+ }
616
+ });
617
+
600
618
  addCommonOptions(
601
619
  program.command('drill <url>').alias('driller').description('Drill all components on a page to learn interactions').option('--knowledge <path>', 'Save learned interactions to knowledge file at this URL path').option('--max-components <count>', 'Maximum number of components to drill')
602
620
  ).action(async (url, options) => {
@@ -27,6 +27,7 @@ if (!process.env.EXPLORBOT_NO_BANNER) {
27
27
  console.log(`⛵ ${chalk.yellow.bold(`Explorbot v${pkgVersion}`)} ${chalk.dim('Autonomous Testing Agent')}`);
28
28
  }
29
29
  function buildExplorBotOptions(from, options) {
30
+ const sessionFile = options.session === true ? path.join(path.resolve(options.path || process.cwd()), 'output', 'session.json') : options.session;
30
31
  return {
31
32
  from,
32
33
  verbose: options.verbose || options.debug,
@@ -35,7 +36,7 @@ function buildExplorBotOptions(from, options) {
35
36
  show: options.show,
36
37
  headless: options.headless,
37
38
  incognito: options.incognito,
38
- session: options.session === true ? 'output/session.json' : options.session,
39
+ session: sessionFile,
39
40
  };
40
41
  }
41
42
  function addCommonOptions(cmd) {
@@ -543,6 +544,20 @@ addCommonOptions(program.command('research <url>').description('Research a page
543
544
  await showStatsAndExit(1);
544
545
  }
545
546
  });
547
+ addCommonOptions(program.command('navigate <url>').description('Navigate to a URL using the AI Navigator. Exits 0 if reachable, 1 otherwise.')).action(async (url, options) => {
548
+ try {
549
+ const explorBot = new ExplorBot(buildExplorBotOptions(url, options));
550
+ await explorBot.start();
551
+ const { NavigateCommand } = await import('../src/commands/navigate-command.js');
552
+ await new NavigateCommand(explorBot).execute(url);
553
+ await explorBot.stop();
554
+ await showStatsAndExit(0);
555
+ }
556
+ catch (error) {
557
+ console.error('Failed:', error instanceof Error ? error.message : 'Unknown error');
558
+ await showStatsAndExit(1);
559
+ }
560
+ });
546
561
  addCommonOptions(program.command('drill <url>').alias('driller').description('Drill all components on a page to learn interactions').option('--knowledge <path>', 'Save learned interactions to knowledge file at this URL path').option('--max-components <count>', 'Maximum number of components to drill')).action(async (url, options) => {
547
562
  try {
548
563
  const explorBot = new ExplorBot(buildExplorBotOptions(url, options));
package/dist/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "explorbot",
3
- "version": "0.1.18",
3
+ "version": "0.1.20",
4
4
  "description": "CLI app built with React Ink, CodeceptJS, and Playwright",
5
5
  "license": "Elastic-2.0",
6
6
  "type": "module",
@@ -2,7 +2,6 @@ import fs from 'node:fs';
2
2
  import { join } from 'node:path';
3
3
  import { faker } from '@faker-js/faker';
4
4
  import { context, trace } from '@opentelemetry/api';
5
- import { highlight } from 'cli-highlight';
6
5
  import { container, recorder } from 'codeceptjs';
7
6
  import * as codeceptjs from 'codeceptjs';
8
7
  import { hopeThat, retryTo, tryTo, within } from 'codeceptjs/lib/effects';
@@ -12,7 +11,7 @@ import { clearActivity, setActivity } from "./activity.js";
12
11
  import { ConfigParser, outputPath } from './config.js';
13
12
  import { Observability } from "./observability.js";
14
13
  import { htmlCombinedSnapshot, minifyHtml } from './utils/html.js';
15
- import { createDebug, log, setStepSpanParent, tag } from './utils/logger.js';
14
+ import { createDebug, setStepSpanParent, tag } from './utils/logger.js';
16
15
  import { safeFilename } from "./utils/strings.js";
17
16
  const debugLog = createDebug('explorbot:action');
18
17
  const FATAL_BROWSER_ERRORS = /Frame was detached|Target closed|Execution context was destroyed|Protocol error|Session closed/i;
@@ -257,7 +256,13 @@ class Action {
257
256
  async expect(codeOrFunction) {
258
257
  const codeString = typeof codeOrFunction === 'string' ? codeOrFunction : codeOrFunction.toString();
259
258
  this.expectation = codeString.toString();
260
- log('Expecting', highlight(codeString, { language: 'javascript' }));
259
+ const expectationPreview = sanitizeCodeBlock(codeString)
260
+ .split('\n')
261
+ .map((line) => line.trim())
262
+ .filter(Boolean)
263
+ .slice(0, 2)
264
+ .join(' ');
265
+ tag('step').log(`Expecting: ${expectationPreview || 'assertion'}`);
261
266
  try {
262
267
  debugLog('Executing expectation:', codeString);
263
268
  let codeFunction;
@@ -96,7 +96,7 @@ export class Driller extends TaskAgent {
96
96
  const sessionName = `driller_${Date.now().toString(36)}`;
97
97
  this.allResults = [];
98
98
  return Observability.run(`driller: ${currentState.url}`, { tags: ['driller'], sessionId: sessionName }, async () => {
99
- tag('info').log(`Driller starting on ${currentState.url}`);
99
+ tag('step').log(`Drilling page: ${currentState.url}`);
100
100
  await this.hooksRunner.runBeforeHook('driller', currentState.url);
101
101
  const originalState = await this.captureAnnotatedState();
102
102
  const components = await this.collectComponents(originalState, maxComponents);
@@ -68,8 +68,48 @@ class Navigator {
68
68
  this.experienceTracker = experienceTracker || new ExperienceTracker();
69
69
  this.hooksRunner = new HooksRunner(explorer, explorer.getConfig());
70
70
  }
71
+ getBaseOrigin() {
72
+ const baseUrl = this.explorer.getConfig().playwright.url;
73
+ try {
74
+ return new URL(baseUrl).origin;
75
+ }
76
+ catch {
77
+ return null;
78
+ }
79
+ }
80
+ getComparableCurrentUrl(stateManager, expectedUrl) {
81
+ const currentState = stateManager.getCurrentState();
82
+ if (!currentState)
83
+ return '';
84
+ const current = /^https?:\/\//i.test(expectedUrl) ? currentState.fullUrl || currentState.url || '' : currentState.url || '';
85
+ return current;
86
+ }
87
+ isSameExpectedOrigin(expectedUrl, stateManager) {
88
+ const currentState = stateManager.getCurrentState();
89
+ if (!currentState)
90
+ return false;
91
+ const currentFullUrl = currentState.fullUrl || currentState.url || '';
92
+ if (!currentFullUrl)
93
+ return false;
94
+ try {
95
+ const currentOrigin = new URL(currentFullUrl).origin;
96
+ if (/^https?:\/\//i.test(expectedUrl)) {
97
+ return currentOrigin === new URL(expectedUrl).origin;
98
+ }
99
+ const baseOrigin = this.getBaseOrigin();
100
+ if (!baseOrigin)
101
+ return true;
102
+ return currentOrigin === baseOrigin;
103
+ }
104
+ catch {
105
+ return !/^https?:\/\//i.test(expectedUrl);
106
+ }
107
+ }
71
108
  isOnExpectedPage(expectedUrl, stateManager) {
72
- const currentUrl = stateManager.getCurrentState()?.url || '';
109
+ if (!this.isSameExpectedOrigin(expectedUrl, stateManager)) {
110
+ return false;
111
+ }
112
+ const currentUrl = this.getComparableCurrentUrl(stateManager, expectedUrl);
73
113
  return normalizeUrl(currentUrl) === normalizeUrl(expectedUrl);
74
114
  }
75
115
  async visit(url) {
@@ -249,7 +289,8 @@ class Navigator {
249
289
  }
250
290
  }
251
291
  const freshState = await action.capturePageState();
252
- const urlMatches = normalizeUrl(freshState.url || '') === normalizeUrl(expectedUrl);
292
+ const currentUrl = /^https?:\/\//i.test(expectedUrl) ? freshState.fullUrl || freshState.url || '' : freshState.url || '';
293
+ const urlMatches = this.isSameExpectedOrigin(expectedUrl, action.stateManager) && normalizeUrl(currentUrl) === normalizeUrl(expectedUrl);
253
294
  const stateChanged = freshState.getStateHash() !== actionResult.getStateHash();
254
295
  resolved = urlMatches && stateChanged;
255
296
  if (!resolved && attemptOk) {
@@ -270,6 +270,9 @@ export class Pilot {
270
270
  overrides the others — weigh them together. Tester's record() notes are the LEAST reliable; always
271
271
  cross-check against actual actions and state. Visual screenshot analysis is strong for UI state
272
272
  (active tabs, visible counts, colors).
273
+ If the final page clearly shows an equivalent success state in a different UI form, do not fail only
274
+ because one narrow assertion targeted a specific badge, count, toast, or wording that the product
275
+ represents differently.
273
276
 
274
277
  SCENARIO TITLE defines what must happen. Action verbs require persisted evidence:
275
278
  - "Create X" → X must exist (visible, redirected to its page, or success message). Opening a form is NOT enough.
@@ -311,6 +314,8 @@ export class Pilot {
311
314
 
312
315
  GUIDANCE (required for "continue"): a specific next action on the current page — which tool, what
313
316
  to verify, how to record. Do not suggest repeating actions that already succeeded.
317
+ If progress is blocked only because the page lacks target data for the scenario, prefer precondition()
318
+ over repeated UI attempts.
314
319
  `;
315
320
  }
316
321
  buildVerdictSystemPrompt(type, task) {
@@ -64,6 +64,9 @@ export class Planner extends PlannerBase {
64
64
  get sectionOrder() {
65
65
  return ConfigParser.getInstance().getConfig().ai?.agents?.researcher?.sections || Object.keys(POSSIBLE_SECTIONS);
66
66
  }
67
+ getDefaultStartUrl(state) {
68
+ return state.fullUrl || state.url;
69
+ }
67
70
  getSystemMessage(feature) {
68
71
  const currentUrl = this.stateManager.getCurrentState()?.url;
69
72
  const customPrompt = this.provider.getSystemPromptForAgent('planner', currentUrl);
@@ -138,7 +141,6 @@ export class Planner extends PlannerBase {
138
141
  }
139
142
  this.freshStart = false;
140
143
  setActivity(`${this.emoji} Planning...`, 'action');
141
- tag('info').log(`Planning test scenarios for ${state.url}`);
142
144
  if (style)
143
145
  tag('info').log(`Planning style: ${style}`);
144
146
  const tags = ['planner'];
@@ -162,7 +164,8 @@ export class Planner extends PlannerBase {
162
164
  if (aiResult.object.scenarios.length === 0 && !this.currentPlan) {
163
165
  throw new Error('No tasks were created successfully');
164
166
  }
165
- const fromPlanning = aiResult.object.scenarios.map((s) => new Test(s.scenario, s.priority, s.expectedOutcomes, s.startUrl || state.url, s.steps || []));
167
+ const defaultStartUrl = this.getDefaultStartUrl(state);
168
+ const fromPlanning = aiResult.object.scenarios.map((s) => new Test(s.scenario, s.priority, s.expectedOutcomes, s.startUrl || defaultStartUrl, s.steps || []));
166
169
  return { tests: fromPlanning, planName: aiResult.object.planName };
167
170
  });
168
171
  const tests = result.tests;
@@ -171,7 +174,8 @@ export class Planner extends PlannerBase {
171
174
  const cached = state.url ? getRegisteredPlan(state.url) : null;
172
175
  const planName = feature || cached?.plan.title || result.planName || state.url;
173
176
  this.currentPlan = new Plan(planName);
174
- this.currentPlan.url = state.url;
177
+ this.currentPlan.url = this.getDefaultStartUrl(state);
178
+ const defaultStartUrl = this.getDefaultStartUrl(state);
175
179
  if (parentPlan)
176
180
  this.currentPlan.parentPlan = parentPlan;
177
181
  const allPreviousScenarios = this.getPreviousSessionScenarios();
@@ -182,14 +186,14 @@ export class Planner extends PlannerBase {
182
186
  if (allPreviousScenarios.has(t.scenario.toLowerCase()))
183
187
  continue;
184
188
  t.style = this.lastStyleName;
185
- t.startUrl = state.url;
189
+ t.startUrl = defaultStartUrl;
186
190
  this.currentPlan.addTest(t);
187
191
  }
188
192
  }
189
193
  else {
190
194
  tag('step').log(`Expanding plan: "${this.currentPlan.title}"`);
191
195
  this.currentPlan.nextIteration();
192
- const newTests = this.addNewTests(tests, state.url);
196
+ const newTests = this.addNewTests(tests, this.getDefaultStartUrl(state));
193
197
  if (newTests.length > 0) {
194
198
  const summary = `New scenarios:\n${newTests.map((t) => `+ [${t.priority}] ${t.scenario}`).join('\n')}`;
195
199
  tag('multiline').log(summary);
@@ -292,6 +296,13 @@ export class Planner extends PlannerBase {
292
296
  Focus on URL page change or data persistency after page reload.
293
297
  If there are subpages (pages with same URL path) plan testing of those subpages as well
294
298
  If you plan to test CRUD operations, plan them in correct order: create, read, update.
299
+ Do not invent specific route names, success messages, validation texts, badge counts, or welcome messages unless they are visible in research, visited pages, or prior observed flows.
300
+ If exact wording is unknown, describe the expected result generically, for example "an authentication error is shown" or "the user stays on the login page" instead of guessing the literal text.
301
+ If exact redirect destination is unknown, describe the destination by visible page identity, for example "the dashboard page opens" or "the current workspace home page opens" instead of inventing a URL slug.
302
+ Only propose scenarios whose prerequisites are evident from page research, visited pages, or API data preparation context.
303
+ If a scenario needs existing records, recipients, results, notifications, or other target data, propose it only when that data is visible or API preconditions can create it.
304
+ If the page appears read-only, degraded, demo-limited, maintenance-like, or lacks write controls, prefer read-only scenarios such as opening panels, inspecting visible lists, filtering, searching, or verifying current state.
305
+ Do not assume hidden data exists just because a control is present.
295
306
  DO NOT propose "verification-only" tests that merely open a UI element (modal, dropdown, panel) and check it exists.
296
307
  Every test must complete a meaningful action that changes application state or produces a business outcome.
297
308
  Opening a modal is NOT a test — performing an action INSIDE the modal IS a test.
@@ -516,10 +527,15 @@ export class Planner extends PlannerBase {
516
527
  - Good: "New suite 'My New Suite' appears in the suite list"
517
528
  - Good: "Suite appears under Starred filter tab"
518
529
  - Good: "Success message 'Suite created' is displayed"
530
+ - Good when wording is unknown: "An authentication error is displayed"
531
+ - Good when route is unknown: "The workspace home page is displayed"
519
532
  - Bad: "Modal is displayed" (just verifying existence, no business value)
520
533
  - Bad: "Dropdown menu is visible" (just verifying existence)
534
+ - Bad: "Welcome message is displayed" if no welcome message is visible in research
535
+ - Bad: "Redirected to /dashboard" if no such route was observed
521
536
  - Each outcome should be independently verifiable
522
537
  - Avoid combining multiple checks into one outcome
538
+ - Prefer durable user-facing results over fragile micro-signals
523
539
  - Expected outcomes describe WHAT TO VERIFY
524
540
 
525
541
  FORMATTING RULES:
@@ -67,7 +67,7 @@ export class Rerunner extends TaskAgent {
67
67
  tag('error').log(`Test file not found: ${absPath}`);
68
68
  return { total: 0, passed: 0, failed: 0, healed: 0 };
69
69
  }
70
- tag('info').log(`Re-running tests from: ${relative(process.cwd(), absPath)}`);
70
+ tag('step').log(`Re-running tests from: ${relative(process.cwd(), absPath)}`);
71
71
  setActivity('🔄 Re-running tests...', 'action');
72
72
  this.healedSteps = [];
73
73
  this.setupPlugins();
@@ -12,7 +12,7 @@ export function WithDeepAnalysis(Base) {
12
12
  return class extends Base {
13
13
  async performDeepAnalysis(state, result) {
14
14
  tag('info').log('Starting deep analysis of expandable elements');
15
- await this.navigateTo(state.url);
15
+ await this.navigateTo(state.fullUrl || state.url);
16
16
  let expandables = await this._discoverExpandables(result.text);
17
17
  if (expandables.length === 0) {
18
18
  tag('info').log('No expandable elements identified by AI');
@@ -21,7 +21,7 @@ export function WithDeepAnalysis(Base) {
21
21
  tag('substep').log(`Identified ${expandables.length} expandable elements`);
22
22
  const maxClicks = this.explorer.getConfig().ai?.agents?.researcher?.maxExpandableClicks ?? DEFAULT_MAX_EXPANDABLE_CLICKS;
23
23
  if (expandables.length > maxClicks) {
24
- expandables = await this._selectExpandables(expandables, state.url, maxClicks);
24
+ expandables = await this._selectExpandables(expandables, state.fullUrl || state.url, maxClicks);
25
25
  tag('substep').log(`Selected ${expandables.length} expandables to click (max: ${maxClicks})`);
26
26
  }
27
27
  const elements = expandables
@@ -144,7 +144,15 @@ export function WithDeepAnalysis(Base) {
144
144
  `;
145
145
  visionCall = this.provider.processImage(visionPrompt, screenshot.toString('base64'));
146
146
  }
147
- const [textRes, visionRes] = await Promise.all([textCall, visionCall]);
147
+ let textRes = null;
148
+ let visionRes = null;
149
+ try {
150
+ [textRes, visionRes] = await Promise.all([textCall, visionCall]);
151
+ }
152
+ catch (err) {
153
+ tag('warning').log(`Expandable discovery failed, skipping deep analysis: ${err instanceof Error ? err.message : err}`);
154
+ return [];
155
+ }
148
156
  const eidxSet = new Set();
149
157
  const parseRefs = (text) => {
150
158
  if (!text)
@@ -204,10 +212,17 @@ export function WithDeepAnalysis(Base) {
204
212
  - Respond with comma-separated numbers to keep, e.g.: 1, 3, 5
205
213
  `;
206
214
  const model = this.provider.getModelForAgent('researcher');
207
- const r = await this.provider.chat([{ role: 'user', content: prompt }], model, {
208
- agentName: 'researcher',
209
- telemetryFunctionId: 'researcher.selectExpandables',
210
- });
215
+ let r;
216
+ try {
217
+ r = await this.provider.chat([{ role: 'user', content: prompt }], model, {
218
+ agentName: 'researcher',
219
+ telemetryFunctionId: 'researcher.selectExpandables',
220
+ });
221
+ }
222
+ catch (err) {
223
+ tag('warning').log(`Expandable selection failed, using first ${maxClicks}: ${err instanceof Error ? err.message : err}`);
224
+ return expandables.slice(0, maxClicks);
225
+ }
211
226
  const nums = (r.text || '').match(/\d+/g)?.map(Number) || [];
212
227
  const selected = expandables.filter((_, i) => nums.includes(i + 1));
213
228
  return selected.length > 0 ? selected.slice(0, maxClicks) : expandables.slice(0, maxClicks);
@@ -93,7 +93,7 @@ export class Researcher extends ResearcherBase {
93
93
  const displayUrl = state.fullUrl || state.url;
94
94
  tag('info').log(`Researching ${displayUrl} to understand the context...`);
95
95
  setActivity(`${this.emoji} Researching...`, 'action');
96
- await this.ensureNavigated(state.url, screenshot && this.provider.hasVision());
96
+ await this.ensureNavigated(displayUrl, screenshot && this.provider.hasVision());
97
97
  await this.hooksRunner.runBeforeHook('researcher', state.url);
98
98
  const annotatedElements = await this.explorer.annotateElements();
99
99
  debugLog(`Annotated ${annotatedElements.length} interactive elements with eidx`);
@@ -114,11 +114,11 @@ export class Researcher extends ResearcherBase {
114
114
  if (!deep && !force) {
115
115
  const similar = await findSimilarResearch(combinedHtml);
116
116
  if (similar) {
117
- tag('info').log('Similar research found, reusing cached result');
117
+ tag('substep').log('Similar research found, reusing cached result');
118
118
  if (stateHash)
119
119
  saveResearch(stateHash, similar, combinedHtml);
120
120
  tag('multiline').log(formatResearchSummary(similar));
121
- tag('success').log(`Research complete! ${similar.length} characters (reused)`);
121
+ tag('success').log('Research complete (reused)');
122
122
  await this.hooksRunner.runAfterHook('researcher', state.url);
123
123
  return similar;
124
124
  }
@@ -236,7 +236,12 @@ export class Researcher extends ResearcherBase {
236
236
  markSectionAsFocused(result, fallback);
237
237
  }
238
238
  if (!interrupted() && deep) {
239
- await this.performDeepAnalysis(state, result);
239
+ try {
240
+ await this.performDeepAnalysis(state, result);
241
+ }
242
+ catch (err) {
243
+ tag('warning').log(`Deep analysis failed, continuing with best-effort research: ${err instanceof Error ? err.message : err}`);
244
+ }
240
245
  }
241
246
  if (!interrupted() && data) {
242
247
  const extractedData = await this.extractData(state);
@@ -257,7 +262,7 @@ export class Researcher extends ResearcherBase {
257
262
  this.experienceTracker.updateSummary(this.actionResult, summaryLine);
258
263
  }
259
264
  tag('multiline').log(formatResearchSummary(result.text, { visionUsed: this.hasScreenshotToAnalyze }));
260
- tag('success').log(`Research complete! ${result.text.length} characters`);
265
+ tag('success').log('Research complete');
261
266
  if (researchFile)
262
267
  tag('substep').log(`Research file saved to: ${researchFile}`);
263
268
  if (this.actionResult?.screenshotFile) {
@@ -105,12 +105,36 @@ export class SessionAnalyst {
105
105
  .slice(-30)
106
106
  .map((entry) => ` - [${entry.type}] ${entry.content}`)
107
107
  .join('\n');
108
+ const checked = test.getCheckedExpectations().join(' | ') || '(none)';
109
+ const remaining = test.getRemainingExpectations().join(' | ') || '(none)';
110
+ const notes = test
111
+ .getPrintableNotes()
112
+ .slice(-12)
113
+ .map((note) => ` - ${note}`)
114
+ .join('\n');
115
+ const visitedUrls = test.getVisitedUrls({ localOnly: true }).join(' | ') || '(none)';
116
+ const verification = test.verification
117
+ ? dedent `
118
+ verification_status: ${test.verification.status || 'unknown'}
119
+ verification_message: ${test.verification.message || '(none)'}
120
+ verification_url: ${test.verification.url || '(none)'}
121
+ verification_page: ${test.verification.pageLabel || '(none)'}
122
+ verification_details:
123
+ ${(test.verification.details.length > 0 ? test.verification.details : ['(none)']).map((detail) => ` - ${detail}`).join('\n')}
124
+ `
125
+ : 'verification_status: none';
108
126
  return dedent `
109
127
  <test ref="#${ref}">
110
128
  url: ${test.startUrl || '/'}
111
129
  scenario: ${test.scenario}
112
130
  result: ${test.result || 'unknown'}
113
131
  expected: ${test.expected.join(' | ') || '(none)'}
132
+ checked_expectations: ${checked}
133
+ remaining_expectations: ${remaining}
134
+ visited_urls: ${visitedUrls}
135
+ ${verification}
136
+ notes:
137
+ ${notes || ' - (none)'}
114
138
  log:
115
139
  ${log}
116
140
  </test>
@@ -92,7 +92,6 @@ export class Tester extends TaskAgent {
92
92
  const state = this.explorer.getStateManager().getCurrentState();
93
93
  if (!state)
94
94
  throw new Error('No state found');
95
- tag('info').log(`Testing scenario: ${task.scenario}`);
96
95
  setActivity(`🧪 Testing: ${task.scenario}`, 'action');
97
96
  this.previousUrl = null;
98
97
  this.previousStateHash = null;
@@ -595,7 +594,6 @@ export class Tester extends TaskAgent {
595
594
  if (!task.hasFinished) {
596
595
  task.finish(TestResult.FAILED);
597
596
  }
598
- tag('info').log(`Finished: ${task.scenario}`);
599
597
  if (task.isSuccessful) {
600
598
  tag('success').log(`Successful test: ${task.scenario}`);
601
599
  }
@@ -792,7 +790,9 @@ export class Tester extends TaskAgent {
792
790
  if (this.getCurrentState().isInsideIframe) {
793
791
  await this.explorer.switchToMainFrame();
794
792
  }
795
- if (this.explorer.getStateManager().getCurrentState()?.url === resetUrl) {
793
+ const currentState = this.explorer.getStateManager().getCurrentState();
794
+ const currentUrl = currentState?.fullUrl || currentState?.url;
795
+ if (currentUrl === resetUrl) {
796
796
  return {
797
797
  success: false,
798
798
  message: 'Reset failed - already on initial page!',
@@ -731,11 +731,12 @@ export function createAgentTools({ explorer, researcher, navigator, experienceTr
731
731
  }),
732
732
  execute: async ({ reason }) => {
733
733
  const stateManager = explorer.getStateManager();
734
- const currentUrl = stateManager.getCurrentState()?.url;
734
+ const currentState = stateManager.getCurrentState();
735
+ const currentUrl = currentState?.fullUrl || currentState?.url;
735
736
  const history = stateManager.getStateHistory();
736
737
  let targetUrl = null;
737
738
  for (let i = history.length - 1; i >= 0; i--) {
738
- const url = history[i].toState.url;
739
+ const url = history[i].toState.fullUrl || history[i].toState.url;
739
740
  if (url !== currentUrl) {
740
741
  targetUrl = url;
741
742
  break;
@@ -34,6 +34,10 @@ export class ExploreCommand extends BaseCommand {
34
34
  failedSubPages = new Set();
35
35
  oldTestRefs = new Set();
36
36
  priorityFilter;
37
+ getCurrentPageUrl() {
38
+ const state = this.explorBot.getExplorer().getStateManager().getCurrentState();
39
+ return state?.fullUrl || state?.url;
40
+ }
37
41
  async execute(args) {
38
42
  const { opts, args: remaining } = this.parseArgs(args);
39
43
  if (opts.maxTests) {
@@ -49,7 +53,7 @@ export class ExploreCommand extends BaseCommand {
49
53
  tag('info').log('Dry-run mode: planner runs to discover new tests; test execution is skipped');
50
54
  Stats.mode ??= 'explore';
51
55
  Stats.focus ??= feature;
52
- const mainUrl = this.explorBot.getExplorer().getStateManager().getCurrentState()?.url;
56
+ const mainUrl = this.getCurrentPageUrl();
53
57
  if (cfg.enabled) {
54
58
  await this.runReuseMode(mainUrl, feature, cfg);
55
59
  }
@@ -10,6 +10,15 @@ const LogPane = React.memo(({ verboseMode }) => {
10
10
  const [logs, setLogs] = useState([]);
11
11
  const pendingLogsRef = React.useRef([]);
12
12
  const flushTimeoutRef = React.useRef(null);
13
+ const MAX_MULTILINE_LINES = 16;
14
+ const MAX_STEP_LINES = 8;
15
+ const MAX_SUBSTEP_LINES = 6;
16
+ const formatCollapsedContent = useCallback((lines, collapsedCount, label) => {
17
+ if (collapsedCount <= 0) {
18
+ return lines.join('\n');
19
+ }
20
+ return [`... ${collapsedCount} earlier ${label}`, ...lines].join('\n');
21
+ }, []);
13
22
  const flushLogs = useCallback(() => {
14
23
  if (pendingLogsRef.current.length === 0)
15
24
  return;
@@ -27,11 +36,33 @@ const LogPane = React.memo(({ verboseMode }) => {
27
36
  if (lastLog.type === logEntry.type && lastLog.content === logEntry.content && Math.abs((lastLog.timestamp?.getTime() || 0) - (logEntry.timestamp?.getTime() || 0)) < 1000) {
28
37
  continue;
29
38
  }
39
+ if ((logEntry.type === 'step' || logEntry.type === 'substep') && lastLog.type === logEntry.type && Math.abs((lastLog.timestamp?.getTime() || 0) - (logEntry.timestamp?.getTime() || 0)) < 1500) {
40
+ const currentLines = String(logEntry.content)
41
+ .split('\n')
42
+ .filter((line) => line.length > 0);
43
+ const previousLines = String(lastLog.content)
44
+ .split('\n')
45
+ .filter((line) => line.length > 0);
46
+ const visiblePreviousLines = lastLog.collapsedCount ? previousLines.slice(1) : previousLines;
47
+ const maxLines = logEntry.type === 'step' ? MAX_STEP_LINES : MAX_SUBSTEP_LINES;
48
+ const mergedLines = [...visiblePreviousLines, ...currentLines];
49
+ const overflow = Math.max(0, mergedLines.length - maxLines);
50
+ const collapsedCount = (lastLog.collapsedCount || 0) + overflow;
51
+ const visibleLines = mergedLines.slice(-maxLines);
52
+ const label = logEntry.type === 'step' ? 'steps' : 'details';
53
+ result[result.length - 1] = {
54
+ ...lastLog,
55
+ content: formatCollapsedContent(visibleLines, collapsedCount, label),
56
+ timestamp: logEntry.timestamp,
57
+ collapsedCount,
58
+ };
59
+ continue;
60
+ }
30
61
  result.push(logEntry);
31
62
  }
32
63
  return result;
33
64
  });
34
- }, []);
65
+ }, [formatCollapsedContent]);
35
66
  const addLog = useCallback((logEntry) => {
36
67
  pendingLogsRef.current.push(logEntry);
37
68
  if (!flushTimeoutRef.current) {
@@ -85,10 +116,9 @@ const LogPane = React.memo(({ verboseMode }) => {
85
116
  const cleaned = stripAnsi(dedent(log.content));
86
117
  const parsed = parseMarkdownToTerminal(cleaned);
87
118
  const lines = parsed.split('\n');
88
- const maxLines = 30;
89
- const truncated = lines.length > maxLines ? `${lines.slice(0, maxLines).join('\n')}\n... (${lines.length - maxLines} more lines)` : cleaned;
119
+ const truncated = lines.length > MAX_MULTILINE_LINES ? `${lines.slice(0, MAX_MULTILINE_LINES).join('\n')}\n... (${lines.length - MAX_MULTILINE_LINES} more lines)` : parsed;
90
120
  return (React.createElement(Box, { key: index, borderStyle: "classic", borderLeft: false, borderRight: false, marginY: 1, padding: 1, borderColor: "dim", overflow: "hidden" },
91
- React.createElement(Text, { color: "gray", dimColor: true }, parsed)));
121
+ React.createElement(Text, { color: "gray", dimColor: true }, truncated)));
92
122
  }
93
123
  if (log.type === 'html') {
94
124
  // Convert HTML to markdown, then render as multiline
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "explorbot",
3
- "version": "0.1.18",
3
+ "version": "0.1.20",
4
4
  "description": "CLI app built with React Ink, CodeceptJS, and Playwright",
5
5
  "license": "Elastic-2.0",
6
6
  "type": "module",
package/src/action.ts CHANGED
@@ -2,7 +2,6 @@ import fs from 'node:fs';
2
2
  import { join } from 'node:path';
3
3
  import { faker } from '@faker-js/faker';
4
4
  import { context, trace } from '@opentelemetry/api';
5
- import { highlight } from 'cli-highlight';
6
5
  import { container, recorder } from 'codeceptjs';
7
6
  import * as codeceptjs from 'codeceptjs';
8
7
  import { hopeThat, retryTo, tryTo, within } from 'codeceptjs/lib/effects';
@@ -21,7 +20,7 @@ import type { PlaywrightRecorder } from './playwright-recorder.ts';
21
20
  import type { StateManager } from './state-manager.js';
22
21
  import { extractCodeBlocks } from './utils/code-extractor.js';
23
22
  import { htmlCombinedSnapshot, minifyHtml } from './utils/html.js';
24
- import { createDebug, log, setStepSpanParent, tag } from './utils/logger.js';
23
+ import { createDebug, setStepSpanParent, tag } from './utils/logger.js';
25
24
  import { safeFilename } from './utils/strings.ts';
26
25
  import { throttle } from './utils/throttle.ts';
27
26
 
@@ -296,7 +295,13 @@ class Action {
296
295
  async expect(codeOrFunction: string | ((I: CodeceptJS.I) => void)): Promise<Action> {
297
296
  const codeString = typeof codeOrFunction === 'string' ? codeOrFunction : codeOrFunction.toString();
298
297
  this.expectation = codeString.toString();
299
- log('Expecting', highlight(codeString, { language: 'javascript' }));
298
+ const expectationPreview = sanitizeCodeBlock(codeString)
299
+ .split('\n')
300
+ .map((line) => line.trim())
301
+ .filter(Boolean)
302
+ .slice(0, 2)
303
+ .join(' ');
304
+ tag('step').log(`Expecting: ${expectationPreview || 'assertion'}`);
300
305
  try {
301
306
  debugLog('Executing expectation:', codeString);
302
307
 
package/src/ai/driller.ts CHANGED
@@ -168,7 +168,7 @@ export class Driller extends TaskAgent implements Agent {
168
168
  this.allResults = [];
169
169
 
170
170
  return Observability.run(`driller: ${currentState.url}`, { tags: ['driller'], sessionId: sessionName }, async () => {
171
- tag('info').log(`Driller starting on ${currentState.url}`);
171
+ tag('step').log(`Drilling page: ${currentState.url}`);
172
172
  await this.hooksRunner.runBeforeHook('driller', currentState.url);
173
173
 
174
174
  const originalState = await this.captureAnnotatedState();
@@ -80,8 +80,48 @@ class Navigator implements Agent {
80
80
  this.hooksRunner = new HooksRunner(explorer, explorer.getConfig());
81
81
  }
82
82
 
83
+ private getBaseOrigin(): string | null {
84
+ const baseUrl = this.explorer.getConfig().playwright.url;
85
+ try {
86
+ return new URL(baseUrl).origin;
87
+ } catch {
88
+ return null;
89
+ }
90
+ }
91
+
92
+ private getComparableCurrentUrl(stateManager: any, expectedUrl: string): string {
93
+ const currentState = stateManager.getCurrentState();
94
+ if (!currentState) return '';
95
+ const current = /^https?:\/\//i.test(expectedUrl) ? currentState.fullUrl || currentState.url || '' : currentState.url || '';
96
+ return current;
97
+ }
98
+
99
+ private isSameExpectedOrigin(expectedUrl: string, stateManager: any): boolean {
100
+ const currentState = stateManager.getCurrentState();
101
+ if (!currentState) return false;
102
+
103
+ const currentFullUrl = currentState.fullUrl || currentState.url || '';
104
+ if (!currentFullUrl) return false;
105
+
106
+ try {
107
+ const currentOrigin = new URL(currentFullUrl).origin;
108
+ if (/^https?:\/\//i.test(expectedUrl)) {
109
+ return currentOrigin === new URL(expectedUrl).origin;
110
+ }
111
+
112
+ const baseOrigin = this.getBaseOrigin();
113
+ if (!baseOrigin) return true;
114
+ return currentOrigin === baseOrigin;
115
+ } catch {
116
+ return !/^https?:\/\//i.test(expectedUrl);
117
+ }
118
+ }
119
+
83
120
  private isOnExpectedPage(expectedUrl: string, stateManager: any): boolean {
84
- const currentUrl = stateManager.getCurrentState()?.url || '';
121
+ if (!this.isSameExpectedOrigin(expectedUrl, stateManager)) {
122
+ return false;
123
+ }
124
+ const currentUrl = this.getComparableCurrentUrl(stateManager, expectedUrl);
85
125
  return normalizeUrl(currentUrl) === normalizeUrl(expectedUrl);
86
126
  }
87
127
 
@@ -282,7 +322,8 @@ class Navigator implements Agent {
282
322
  }
283
323
  }
284
324
  const freshState = await action.capturePageState();
285
- const urlMatches = normalizeUrl(freshState.url || '') === normalizeUrl(expectedUrl);
325
+ const currentUrl = /^https?:\/\//i.test(expectedUrl) ? freshState.fullUrl || freshState.url || '' : freshState.url || '';
326
+ const urlMatches = this.isSameExpectedOrigin(expectedUrl, action.stateManager) && normalizeUrl(currentUrl) === normalizeUrl(expectedUrl);
286
327
  const stateChanged = freshState.getStateHash() !== actionResult.getStateHash();
287
328
  resolved = urlMatches && stateChanged;
288
329
 
package/src/ai/pilot.ts CHANGED
@@ -313,6 +313,9 @@ export class Pilot implements Agent {
313
313
  overrides the others — weigh them together. Tester's record() notes are the LEAST reliable; always
314
314
  cross-check against actual actions and state. Visual screenshot analysis is strong for UI state
315
315
  (active tabs, visible counts, colors).
316
+ If the final page clearly shows an equivalent success state in a different UI form, do not fail only
317
+ because one narrow assertion targeted a specific badge, count, toast, or wording that the product
318
+ represents differently.
316
319
 
317
320
  SCENARIO TITLE defines what must happen. Action verbs require persisted evidence:
318
321
  - "Create X" → X must exist (visible, redirected to its page, or success message). Opening a form is NOT enough.
@@ -355,6 +358,8 @@ export class Pilot implements Agent {
355
358
 
356
359
  GUIDANCE (required for "continue"): a specific next action on the current page — which tool, what
357
360
  to verify, how to record. Do not suggest repeating actions that already succeeded.
361
+ If progress is blocked only because the page lacks target data for the scenario, prefer precondition()
362
+ over repeated UI attempts.
358
363
  `;
359
364
  }
360
365
 
package/src/ai/planner.ts CHANGED
@@ -80,6 +80,10 @@ export class Planner extends PlannerBase implements Agent {
80
80
  return ConfigParser.getInstance().getConfig().ai?.agents?.researcher?.sections || Object.keys(POSSIBLE_SECTIONS);
81
81
  }
82
82
 
83
+ private getDefaultStartUrl(state: { url: string; fullUrl?: string }): string {
84
+ return state.fullUrl || state.url;
85
+ }
86
+
83
87
  getSystemMessage(feature?: string): string {
84
88
  const currentUrl = this.stateManager.getCurrentState()?.url;
85
89
  const customPrompt = this.provider.getSystemPromptForAgent('planner', currentUrl);
@@ -160,7 +164,6 @@ export class Planner extends PlannerBase implements Agent {
160
164
  this.freshStart = false;
161
165
 
162
166
  setActivity(`${this.emoji} Planning...`, 'action');
163
- tag('info').log(`Planning test scenarios for ${state.url}`);
164
167
  if (style) tag('info').log(`Planning style: ${style}`);
165
168
 
166
169
  const tags = ['planner'];
@@ -188,7 +191,8 @@ export class Planner extends PlannerBase implements Agent {
188
191
  throw new Error('No tasks were created successfully');
189
192
  }
190
193
 
191
- const fromPlanning = aiResult.object.scenarios.map((s: any) => new Test(s.scenario, s.priority, s.expectedOutcomes, s.startUrl || state.url, s.steps || []));
194
+ const defaultStartUrl = this.getDefaultStartUrl(state);
195
+ const fromPlanning = aiResult.object.scenarios.map((s: any) => new Test(s.scenario, s.priority, s.expectedOutcomes, s.startUrl || defaultStartUrl, s.steps || []));
192
196
 
193
197
  return { tests: fromPlanning, planName: aiResult.object.planName };
194
198
  });
@@ -200,7 +204,8 @@ export class Planner extends PlannerBase implements Agent {
200
204
  const cached = state.url ? getRegisteredPlan(state.url) : null;
201
205
  const planName = feature || cached?.plan.title || result.planName || state.url;
202
206
  this.currentPlan = new Plan(planName);
203
- this.currentPlan.url = state.url;
207
+ this.currentPlan.url = this.getDefaultStartUrl(state);
208
+ const defaultStartUrl = this.getDefaultStartUrl(state);
204
209
  if (parentPlan) this.currentPlan.parentPlan = parentPlan;
205
210
  const allPreviousScenarios = this.getPreviousSessionScenarios();
206
211
  const existingTestScenarios = this.getExistingTestFileScenarios(state.url);
@@ -208,13 +213,13 @@ export class Planner extends PlannerBase implements Agent {
208
213
  for (const t of tests) {
209
214
  if (allPreviousScenarios.has(t.scenario.toLowerCase())) continue;
210
215
  t.style = this.lastStyleName;
211
- t.startUrl = state.url;
216
+ t.startUrl = defaultStartUrl;
212
217
  this.currentPlan.addTest(t);
213
218
  }
214
219
  } else {
215
220
  tag('step').log(`Expanding plan: "${this.currentPlan.title}"`);
216
221
  this.currentPlan.nextIteration();
217
- const newTests = this.addNewTests(tests, state.url);
222
+ const newTests = this.addNewTests(tests, this.getDefaultStartUrl(state));
218
223
  if (newTests.length > 0) {
219
224
  const summary = `New scenarios:\n${newTests.map((t) => `+ [${t.priority}] ${t.scenario}`).join('\n')}`;
220
225
  tag('multiline').log(summary);
@@ -331,6 +336,13 @@ export class Planner extends PlannerBase implements Agent {
331
336
  Focus on URL page change or data persistency after page reload.
332
337
  If there are subpages (pages with same URL path) plan testing of those subpages as well
333
338
  If you plan to test CRUD operations, plan them in correct order: create, read, update.
339
+ Do not invent specific route names, success messages, validation texts, badge counts, or welcome messages unless they are visible in research, visited pages, or prior observed flows.
340
+ If exact wording is unknown, describe the expected result generically, for example "an authentication error is shown" or "the user stays on the login page" instead of guessing the literal text.
341
+ If exact redirect destination is unknown, describe the destination by visible page identity, for example "the dashboard page opens" or "the current workspace home page opens" instead of inventing a URL slug.
342
+ Only propose scenarios whose prerequisites are evident from page research, visited pages, or API data preparation context.
343
+ If a scenario needs existing records, recipients, results, notifications, or other target data, propose it only when that data is visible or API preconditions can create it.
344
+ If the page appears read-only, degraded, demo-limited, maintenance-like, or lacks write controls, prefer read-only scenarios such as opening panels, inspecting visible lists, filtering, searching, or verifying current state.
345
+ Do not assume hidden data exists just because a control is present.
334
346
  DO NOT propose "verification-only" tests that merely open a UI element (modal, dropdown, panel) and check it exists.
335
347
  Every test must complete a meaningful action that changes application state or produces a business outcome.
336
348
  Opening a modal is NOT a test — performing an action INSIDE the modal IS a test.
@@ -566,10 +578,15 @@ export class Planner extends PlannerBase implements Agent {
566
578
  - Good: "New suite 'My New Suite' appears in the suite list"
567
579
  - Good: "Suite appears under Starred filter tab"
568
580
  - Good: "Success message 'Suite created' is displayed"
581
+ - Good when wording is unknown: "An authentication error is displayed"
582
+ - Good when route is unknown: "The workspace home page is displayed"
569
583
  - Bad: "Modal is displayed" (just verifying existence, no business value)
570
584
  - Bad: "Dropdown menu is visible" (just verifying existence)
585
+ - Bad: "Welcome message is displayed" if no welcome message is visible in research
586
+ - Bad: "Redirected to /dashboard" if no such route was observed
571
587
  - Each outcome should be independently verifiable
572
588
  - Avoid combining multiple checks into one outcome
589
+ - Prefer durable user-facing results over fragile micro-signals
573
590
  - Expected outcomes describe WHAT TO VERIFY
574
591
 
575
592
  FORMATTING RULES:
@@ -87,7 +87,7 @@ export class Rerunner extends TaskAgent implements Agent {
87
87
  return { total: 0, passed: 0, failed: 0, healed: 0 };
88
88
  }
89
89
 
90
- tag('info').log(`Re-running tests from: ${relative(process.cwd(), absPath)}`);
90
+ tag('step').log(`Re-running tests from: ${relative(process.cwd(), absPath)}`);
91
91
  setActivity('🔄 Re-running tests...', 'action');
92
92
 
93
93
  this.healedSteps = [];
@@ -24,7 +24,7 @@ export function WithDeepAnalysis<T extends Constructor>(Base: T) {
24
24
 
25
25
  async performDeepAnalysis(state: WebPageState, result: ResearchResult): Promise<void> {
26
26
  tag('info').log('Starting deep analysis of expandable elements');
27
- await (this as any).navigateTo(state.url);
27
+ await (this as any).navigateTo(state.fullUrl || state.url);
28
28
 
29
29
  let expandables = await this._discoverExpandables(result.text);
30
30
  if (expandables.length === 0) {
@@ -35,7 +35,7 @@ export function WithDeepAnalysis<T extends Constructor>(Base: T) {
35
35
 
36
36
  const maxClicks = (this.explorer.getConfig().ai?.agents?.researcher as any)?.maxExpandableClicks ?? DEFAULT_MAX_EXPANDABLE_CLICKS;
37
37
  if (expandables.length > maxClicks) {
38
- expandables = await this._selectExpandables(expandables, state.url, maxClicks);
38
+ expandables = await this._selectExpandables(expandables, state.fullUrl || state.url, maxClicks);
39
39
  tag('substep').log(`Selected ${expandables.length} expandables to click (max: ${maxClicks})`);
40
40
  }
41
41
 
@@ -177,7 +177,14 @@ export function WithDeepAnalysis<T extends Constructor>(Base: T) {
177
177
  visionCall = this.provider.processImage(visionPrompt, screenshot.toString('base64'));
178
178
  }
179
179
 
180
- const [textRes, visionRes] = await Promise.all([textCall, visionCall]);
180
+ let textRes: { text?: string } | null = null;
181
+ let visionRes: { text?: string } | null = null;
182
+ try {
183
+ [textRes, visionRes] = await Promise.all([textCall, visionCall]);
184
+ } catch (err) {
185
+ tag('warning').log(`Expandable discovery failed, skipping deep analysis: ${err instanceof Error ? err.message : err}`);
186
+ return [];
187
+ }
181
188
 
182
189
  const eidxSet = new Set<string>();
183
190
  const parseRefs = (text: string | undefined) => {
@@ -244,10 +251,16 @@ export function WithDeepAnalysis<T extends Constructor>(Base: T) {
244
251
  `;
245
252
 
246
253
  const model = this.provider.getModelForAgent('researcher');
247
- const r = await this.provider.chat([{ role: 'user', content: prompt }], model, {
248
- agentName: 'researcher',
249
- telemetryFunctionId: 'researcher.selectExpandables',
250
- });
254
+ let r: { text?: string };
255
+ try {
256
+ r = await this.provider.chat([{ role: 'user', content: prompt }], model, {
257
+ agentName: 'researcher',
258
+ telemetryFunctionId: 'researcher.selectExpandables',
259
+ });
260
+ } catch (err) {
261
+ tag('warning').log(`Expandable selection failed, using first ${maxClicks}: ${err instanceof Error ? err.message : err}`);
262
+ return expandables.slice(0, maxClicks);
263
+ }
251
264
 
252
265
  const nums = (r.text || '').match(/\d+/g)?.map(Number) || [];
253
266
  const selected = expandables.filter((_, i) => nums.includes(i + 1));
@@ -125,7 +125,7 @@ export class Researcher extends ResearcherBase implements Agent {
125
125
  tag('info').log(`Researching ${displayUrl} to understand the context...`);
126
126
  setActivity(`${this.emoji} Researching...`, 'action');
127
127
 
128
- await this.ensureNavigated(state.url, screenshot && this.provider.hasVision());
128
+ await this.ensureNavigated(displayUrl, screenshot && this.provider.hasVision());
129
129
  await this.hooksRunner.runBeforeHook('researcher', state.url);
130
130
 
131
131
  const annotatedElements = await this.explorer.annotateElements();
@@ -151,10 +151,10 @@ export class Researcher extends ResearcherBase implements Agent {
151
151
  if (!deep && !force) {
152
152
  const similar = await findSimilarResearch(combinedHtml);
153
153
  if (similar) {
154
- tag('info').log('Similar research found, reusing cached result');
154
+ tag('substep').log('Similar research found, reusing cached result');
155
155
  if (stateHash) saveResearch(stateHash, similar, combinedHtml);
156
156
  tag('multiline').log(formatResearchSummary(similar));
157
- tag('success').log(`Research complete! ${similar.length} characters (reused)`);
157
+ tag('success').log('Research complete (reused)');
158
158
  await this.hooksRunner.runAfterHook('researcher', state.url);
159
159
  return similar;
160
160
  }
@@ -285,7 +285,11 @@ export class Researcher extends ResearcherBase implements Agent {
285
285
  }
286
286
 
287
287
  if (!interrupted() && deep) {
288
- await this.performDeepAnalysis(state, result);
288
+ try {
289
+ await this.performDeepAnalysis(state, result);
290
+ } catch (err) {
291
+ tag('warning').log(`Deep analysis failed, continuing with best-effort research: ${err instanceof Error ? err.message : err}`);
292
+ }
289
293
  }
290
294
 
291
295
  if (!interrupted() && data) {
@@ -311,7 +315,7 @@ export class Researcher extends ResearcherBase implements Agent {
311
315
  }
312
316
 
313
317
  tag('multiline').log(formatResearchSummary(result.text, { visionUsed: this.hasScreenshotToAnalyze }));
314
- tag('success').log(`Research complete! ${result.text.length} characters`);
318
+ tag('success').log('Research complete');
315
319
  if (researchFile) tag('substep').log(`Research file saved to: ${researchFile}`);
316
320
  if (this.actionResult?.screenshotFile) {
317
321
  const screenshotPath = outputPath('states', this.actionResult.screenshotFile);
@@ -120,6 +120,24 @@ export class SessionAnalyst implements Agent {
120
120
  .slice(-30)
121
121
  .map((entry) => ` - [${entry.type}] ${entry.content}`)
122
122
  .join('\n');
123
+ const checked = test.getCheckedExpectations().join(' | ') || '(none)';
124
+ const remaining = test.getRemainingExpectations().join(' | ') || '(none)';
125
+ const notes = test
126
+ .getPrintableNotes()
127
+ .slice(-12)
128
+ .map((note) => ` - ${note}`)
129
+ .join('\n');
130
+ const visitedUrls = test.getVisitedUrls({ localOnly: true }).join(' | ') || '(none)';
131
+ const verification = test.verification
132
+ ? dedent`
133
+ verification_status: ${test.verification.status || 'unknown'}
134
+ verification_message: ${test.verification.message || '(none)'}
135
+ verification_url: ${test.verification.url || '(none)'}
136
+ verification_page: ${test.verification.pageLabel || '(none)'}
137
+ verification_details:
138
+ ${(test.verification.details.length > 0 ? test.verification.details : ['(none)']).map((detail) => ` - ${detail}`).join('\n')}
139
+ `
140
+ : 'verification_status: none';
123
141
 
124
142
  return dedent`
125
143
  <test ref="#${ref}">
@@ -127,6 +145,12 @@ export class SessionAnalyst implements Agent {
127
145
  scenario: ${test.scenario}
128
146
  result: ${test.result || 'unknown'}
129
147
  expected: ${test.expected.join(' | ') || '(none)'}
148
+ checked_expectations: ${checked}
149
+ remaining_expectations: ${remaining}
150
+ visited_urls: ${visitedUrls}
151
+ ${verification}
152
+ notes:
153
+ ${notes || ' - (none)'}
130
154
  log:
131
155
  ${log}
132
156
  </test>
package/src/ai/tester.ts CHANGED
@@ -118,7 +118,6 @@ export class Tester extends TaskAgent implements Agent {
118
118
  const state = this.explorer.getStateManager().getCurrentState();
119
119
  if (!state) throw new Error('No state found');
120
120
 
121
- tag('info').log(`Testing scenario: ${task.scenario}`);
122
121
  setActivity(`🧪 Testing: ${task.scenario}`, 'action');
123
122
 
124
123
  this.previousUrl = null;
@@ -678,7 +677,6 @@ export class Tester extends TaskAgent implements Agent {
678
677
  if (!task.hasFinished) {
679
678
  task.finish(TestResult.FAILED);
680
679
  }
681
- tag('info').log(`Finished: ${task.scenario}`);
682
680
 
683
681
  if (task.isSuccessful) {
684
682
  tag('success').log(`Successful test: ${task.scenario}`);
@@ -882,7 +880,9 @@ export class Tester extends TaskAgent implements Agent {
882
880
  await this.explorer.switchToMainFrame();
883
881
  }
884
882
 
885
- if (this.explorer.getStateManager().getCurrentState()?.url === resetUrl!) {
883
+ const currentState = this.explorer.getStateManager().getCurrentState();
884
+ const currentUrl = currentState?.fullUrl || currentState?.url;
885
+ if (currentUrl === resetUrl!) {
886
886
  return {
887
887
  success: false,
888
888
  message: 'Reset failed - already on initial page!',
package/src/ai/tools.ts CHANGED
@@ -854,12 +854,13 @@ export function createAgentTools({
854
854
  }),
855
855
  execute: async ({ reason }) => {
856
856
  const stateManager = explorer.getStateManager();
857
- const currentUrl = stateManager.getCurrentState()?.url;
857
+ const currentState = stateManager.getCurrentState();
858
+ const currentUrl = currentState?.fullUrl || currentState?.url;
858
859
  const history = stateManager.getStateHistory();
859
860
 
860
861
  let targetUrl: string | null = null;
861
862
  for (let i = history.length - 1; i >= 0; i--) {
862
- const url = history[i].toState.url;
863
+ const url = history[i].toState.fullUrl || history[i].toState.url;
863
864
  if (url !== currentUrl) {
864
865
  targetUrl = url;
865
866
  break;
@@ -38,6 +38,11 @@ export class ExploreCommand extends BaseCommand {
38
38
  private oldTestRefs = new Set<Test>();
39
39
  private priorityFilter?: Set<string>;
40
40
 
41
+ private getCurrentPageUrl(): string | undefined {
42
+ const state = this.explorBot.getExplorer().getStateManager().getCurrentState();
43
+ return state?.fullUrl || state?.url;
44
+ }
45
+
41
46
  async execute(args: string): Promise<void> {
42
47
  const { opts, args: remaining } = this.parseArgs(args);
43
48
  if (opts.maxTests) {
@@ -51,7 +56,7 @@ export class ExploreCommand extends BaseCommand {
51
56
  if (this.dryRun) tag('info').log('Dry-run mode: planner runs to discover new tests; test execution is skipped');
52
57
  Stats.mode ??= 'explore';
53
58
  Stats.focus ??= feature;
54
- const mainUrl = this.explorBot.getExplorer().getStateManager().getCurrentState()?.url;
59
+ const mainUrl = this.getCurrentPageUrl();
55
60
 
56
61
  if (cfg.enabled) {
57
62
  await this.runReuseMode(mainUrl, feature, cfg);
@@ -7,21 +7,32 @@ import { parseMarkdownToTerminal } from '../utils/markdown-terminal.js';
7
7
 
8
8
  import { Box, Text } from 'ink';
9
9
  import type { LogType, TaggedLogEntry } from '../utils/logger.js';
10
- import { isDebugMode, registerLogPane, setVerboseMode, unregisterLogPane } from '../utils/logger.js';
10
+ import { isDebugMode, registerLogPane, unregisterLogPane } from '../utils/logger.js';
11
11
 
12
12
  // marked.use(new markedTerminal());
13
13
 
14
- type LogEntry = TaggedLogEntry;
14
+ type LogEntry = TaggedLogEntry & { collapsedCount?: number };
15
15
 
16
16
  interface LogPaneProps {
17
17
  verboseMode: boolean;
18
18
  }
19
19
 
20
20
  const LogPane: React.FC<LogPaneProps> = React.memo(({ verboseMode }) => {
21
- const [logs, setLogs] = useState<TaggedLogEntry[]>([]);
22
- const pendingLogsRef = React.useRef<TaggedLogEntry[]>([]);
21
+ const [logs, setLogs] = useState<LogEntry[]>([]);
22
+ const pendingLogsRef = React.useRef<LogEntry[]>([]);
23
23
  const flushTimeoutRef = React.useRef<ReturnType<typeof setTimeout> | null>(null);
24
24
 
25
+ const MAX_MULTILINE_LINES = 16;
26
+ const MAX_STEP_LINES = 8;
27
+ const MAX_SUBSTEP_LINES = 6;
28
+
29
+ const formatCollapsedContent = useCallback((lines: string[], collapsedCount: number, label: string) => {
30
+ if (collapsedCount <= 0) {
31
+ return lines.join('\n');
32
+ }
33
+ return [`... ${collapsedCount} earlier ${label}`, ...lines].join('\n');
34
+ }, []);
35
+
25
36
  const flushLogs = useCallback(() => {
26
37
  if (pendingLogsRef.current.length === 0) return;
27
38
 
@@ -29,7 +40,7 @@ const LogPane: React.FC<LogPaneProps> = React.memo(({ verboseMode }) => {
29
40
  pendingLogsRef.current = [];
30
41
  flushTimeoutRef.current = null;
31
42
 
32
- setLogs((prevLogs: TaggedLogEntry[]) => {
43
+ setLogs((prevLogs: LogEntry[]) => {
33
44
  const result = [...prevLogs];
34
45
 
35
46
  for (const logEntry of newLogs) {
@@ -43,12 +54,35 @@ const LogPane: React.FC<LogPaneProps> = React.memo(({ verboseMode }) => {
43
54
  continue;
44
55
  }
45
56
 
57
+ if ((logEntry.type === 'step' || logEntry.type === 'substep') && lastLog.type === logEntry.type && Math.abs((lastLog.timestamp?.getTime() || 0) - (logEntry.timestamp?.getTime() || 0)) < 1500) {
58
+ const currentLines = String(logEntry.content)
59
+ .split('\n')
60
+ .filter((line) => line.length > 0);
61
+ const previousLines = String(lastLog.content)
62
+ .split('\n')
63
+ .filter((line) => line.length > 0);
64
+ const visiblePreviousLines = lastLog.collapsedCount ? previousLines.slice(1) : previousLines;
65
+ const maxLines = logEntry.type === 'step' ? MAX_STEP_LINES : MAX_SUBSTEP_LINES;
66
+ const mergedLines = [...visiblePreviousLines, ...currentLines];
67
+ const overflow = Math.max(0, mergedLines.length - maxLines);
68
+ const collapsedCount = (lastLog.collapsedCount || 0) + overflow;
69
+ const visibleLines = mergedLines.slice(-maxLines);
70
+ const label = logEntry.type === 'step' ? 'steps' : 'details';
71
+ result[result.length - 1] = {
72
+ ...lastLog,
73
+ content: formatCollapsedContent(visibleLines, collapsedCount, label),
74
+ timestamp: logEntry.timestamp,
75
+ collapsedCount,
76
+ };
77
+ continue;
78
+ }
79
+
46
80
  result.push(logEntry);
47
81
  }
48
82
 
49
83
  return result;
50
84
  });
51
- }, []);
85
+ }, [formatCollapsedContent]);
52
86
 
53
87
  const addLog = useCallback(
54
88
  (logEntry: TaggedLogEntry) => {
@@ -112,12 +146,11 @@ const LogPane: React.FC<LogPaneProps> = React.memo(({ verboseMode }) => {
112
146
  const cleaned = stripAnsi(dedent(log.content));
113
147
  const parsed = parseMarkdownToTerminal(cleaned);
114
148
  const lines = parsed.split('\n');
115
- const maxLines = 30;
116
- const truncated = lines.length > maxLines ? `${lines.slice(0, maxLines).join('\n')}\n... (${lines.length - maxLines} more lines)` : cleaned;
149
+ const truncated = lines.length > MAX_MULTILINE_LINES ? `${lines.slice(0, MAX_MULTILINE_LINES).join('\n')}\n... (${lines.length - MAX_MULTILINE_LINES} more lines)` : parsed;
117
150
  return (
118
151
  <Box key={index} borderStyle="classic" borderLeft={false} borderRight={false} marginY={1} padding={1} borderColor="dim" overflow="hidden">
119
152
  <Text color="gray" dimColor>
120
- {parsed}
153
+ {truncated}
121
154
  </Text>
122
155
  </Box>
123
156
  );