explorbot 0.1.16 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -122,7 +122,15 @@ addCommonOptions(program.command('start [path]').description('Start web explorat
122
122
  await startTUI(explorBot);
123
123
  });
124
124
 
125
- addCommonOptions(program.command('explore <path>').description('Explore a page autonomously and run invented scenarios').option('--max-tests <count>', 'Maximum number of tests to run').option('--focus <feature>', 'Focus area for exploration')).action(async (explorePath, options) => {
125
+ addCommonOptions(
126
+ program
127
+ .command('explore <path>')
128
+ .description('Explore a page autonomously and run invented scenarios')
129
+ .option('--max-tests <count>', 'Maximum number of tests to run')
130
+ .option('--focus <feature>', 'Focus area for exploration')
131
+ .option('--configure <spec>', 'Reuse spec: keys new|from|style|subpages|pick_by|priority, e.g. "new:25%;pick_by=random;priority=critical,high"')
132
+ .option('--dry-run', 'Mark picked tests as skipped without executing or generating new ones')
133
+ ).action(async (explorePath, options) => {
126
134
  try {
127
135
  const explorBot = new ExplorBot(buildExplorBotOptions(explorePath, options));
128
136
  await explorBot.start();
@@ -130,8 +138,11 @@ addCommonOptions(program.command('explore <path>').description('Explore a page a
130
138
  const { ExploreCommand } = await import('../src/commands/explore-command.js');
131
139
  const cmd = new ExploreCommand(explorBot);
132
140
  if (options.maxTests) cmd.maxTests = Number.parseInt(options.maxTests, 10);
141
+ if (options.dryRun) cmd.dryRun = true;
133
142
  const execArgs: string[] = [];
134
143
  if (options.focus) execArgs.push('--focus', `"${options.focus}"`);
144
+ if (options.configure) execArgs.push('--configure', `"${options.configure}"`);
145
+ if (options.dryRun) execArgs.push('--dry-run');
135
146
  await cmd.execute(execArgs.join(' '));
136
147
  await explorBot.stop();
137
148
  await showStatsAndExit(0);
@@ -93,7 +93,13 @@ addCommonOptions(program.command('start [path]').description('Start web explorat
93
93
  await explorBot.start();
94
94
  await startTUI(explorBot);
95
95
  });
96
- addCommonOptions(program.command('explore <path>').description('Explore a page autonomously and run invented scenarios').option('--max-tests <count>', 'Maximum number of tests to run').option('--focus <feature>', 'Focus area for exploration')).action(async (explorePath, options) => {
96
+ addCommonOptions(program
97
+ .command('explore <path>')
98
+ .description('Explore a page autonomously and run invented scenarios')
99
+ .option('--max-tests <count>', 'Maximum number of tests to run')
100
+ .option('--focus <feature>', 'Focus area for exploration')
101
+ .option('--configure <spec>', 'Reuse spec: keys new|from|style|subpages|pick_by|priority, e.g. "new:25%;pick_by=random;priority=critical,high"')
102
+ .option('--dry-run', 'Mark picked tests as skipped without executing or generating new ones')).action(async (explorePath, options) => {
97
103
  try {
98
104
  const explorBot = new ExplorBot(buildExplorBotOptions(explorePath, options));
99
105
  await explorBot.start();
@@ -102,9 +108,15 @@ addCommonOptions(program.command('explore <path>').description('Explore a page a
102
108
  const cmd = new ExploreCommand(explorBot);
103
109
  if (options.maxTests)
104
110
  cmd.maxTests = Number.parseInt(options.maxTests, 10);
111
+ if (options.dryRun)
112
+ cmd.dryRun = true;
105
113
  const execArgs = [];
106
114
  if (options.focus)
107
115
  execArgs.push('--focus', `"${options.focus}"`);
116
+ if (options.configure)
117
+ execArgs.push('--configure', `"${options.configure}"`);
118
+ if (options.dryRun)
119
+ execArgs.push('--dry-run');
108
120
  await cmd.execute(execArgs.join(' '));
109
121
  await explorBot.stop();
110
122
  await showStatsAndExit(0);
package/dist/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "explorbot",
3
- "version": "0.1.16",
3
+ "version": "0.1.17",
4
4
  "description": "CLI app built with React Ink, CodeceptJS, and Playwright",
5
5
  "license": "Elastic-2.0",
6
6
  "type": "module",
@@ -277,14 +277,9 @@ export class Pilot {
277
277
  - "Edit X" → updated value must be persisted (visible in list/detail). Opening edit is NOT enough; redirect after save with the new value visible IS enough.
278
278
  - Negative tests ("without a name", "invalid", "duplicate", "unauthorized") → success means the system PREVENTED the action with validation/error.
279
279
 
280
- PROVENANCE for create/edit scenarios: the task prompt instructs the tester to inject the
281
- session marker "${task.sessionName ?? ''}" into newly created or edited free-text values.
282
- When that marker COULD be injected, the entity used as proof MUST contain it. A record
283
- matching the goal by text alone but missing the marker is a stale leftover from a prior
284
- run — it is NOT evidence the current scenario produced anything. Vote \`fail\`, not \`pass\`.
285
- This does not apply when the field is restricted (numeric only, enum, etc.) or when the
286
- session_log shows no fillField/type/select actions were attempted at all (in that case
287
- the scenario clearly didn't run — also vote \`fail\`).
280
+ PROVENANCE: the entity you cite as proof must appear by name in <notes> or
281
+ <session_log> tool inputs for THIS run. Name absent from tester activity = stale
282
+ coincidence, vote \`fail\`. Same if no fillField/type/select/click on a target ran.
288
283
 
289
284
  Expected results are MILESTONES, not the goal. Never fail because a milestone (toast, icon, styling)
290
285
  didn't match if the scenario goal IS accomplished.
@@ -648,6 +648,7 @@ export class Tester extends TaskAgent {
648
648
  - Use pressKey() for pressing special keys (Enter, Escape, Tab, Arrow keys) or key combinations with modifiers (Ctrl+A, Shift+Delete, etc.)
649
649
  - Use container CSS locators from <page_ui_map> to interact with elements inside sections
650
650
  - Systematically use record({ notes: ["..."] }) to write your findings, planned actions, observations, etc.
651
+ - When creating/editing/deleting a named entity, include its identifier verbatim in the note — Pilot uses it to confirm provenance.
651
652
  - Call record({ notes: ["..."], status: "success" }) when you see success/info message on a page or when expected outcome is achieved
652
653
  - Call record({ notes: ["..."], status: "fail" }) when an expected outcome cannot be achieved or has failed or you see error/alert/warning message on a page
653
654
  - NEVER call record(status: "success") if your last verify() or see() call FAILED. A failed check means the outcome is NOT confirmed — use record(status: "fail") instead, or retry with a different approach.
@@ -3,6 +3,7 @@ import { getStyles } from '../ai/planner/styles.js';
3
3
  import { outputPath } from '../config.js';
4
4
  import { normalizeUrl } from '../state-manager.js';
5
5
  import { Stats } from '../stats.js';
6
+ import { TestResult } from '../test-plan.js';
6
7
  import { getCliName } from "../utils/cli-name.js";
7
8
  import { ErrorPageError } from "../utils/error-page.js";
8
9
  import { tag } from '../utils/logger.js';
@@ -11,12 +12,15 @@ import { printNextSteps, relativeToCwd } from "../utils/next-steps.js";
11
12
  import { safeFilename } from "../utils/strings.js";
12
13
  import { BaseCommand } from './base-command.js';
13
14
  const MAX_SUB_PAGE_ATTEMPTS = 30;
15
+ const PRIORITY_ORDER = { critical: 0, important: 1, high: 2, normal: 3, low: 4 };
14
16
  export class ExploreCommand extends BaseCommand {
15
17
  name = 'explore';
16
18
  description = 'Start web exploration';
17
19
  options = [
18
20
  { flags: '--max-tests <number>', description: 'Maximum number of tests to run' },
19
21
  { flags: '--focus <feature>', description: 'Focus area for exploration' },
22
+ { flags: '--configure <spec>', description: 'Reuse spec: keys new|from|style|subpages|pick_by|priority, e.g. "new:25%;pick_by=random;priority=critical,high"' },
23
+ { flags: '--dry-run', description: 'Mark picked tests as skipped without executing or generating new ones' },
20
24
  ];
21
25
  suggestions = [
22
26
  { command: 'navigate <page>', hint: 'go to another page' },
@@ -24,93 +28,370 @@ export class ExploreCommand extends BaseCommand {
24
28
  { command: 'plan <feature>', hint: 'plan testing' },
25
29
  ];
26
30
  maxTests;
31
+ dryRun = false;
27
32
  testsRun = 0;
28
33
  completedPlans = [];
29
34
  failedSubPages = new Set();
35
+ oldTestRefs = new Set();
36
+ priorityFilter;
30
37
  async execute(args) {
31
38
  const { opts, args: remaining } = this.parseArgs(args);
32
39
  if (opts.maxTests) {
33
40
  this.maxTests = Number.parseInt(opts.maxTests, 10);
34
41
  }
35
42
  const feature = opts.focus || remaining.join(' ') || undefined;
43
+ const cfg = this.parseConfigure(opts.configure);
44
+ if (cfg.priorities)
45
+ this.priorityFilter = new Set(cfg.priorities);
46
+ if (opts.dryRun)
47
+ this.dryRun = true;
48
+ if (this.dryRun)
49
+ tag('info').log('Dry-run mode: planner runs to discover new tests; test execution is skipped');
36
50
  Stats.mode ??= 'explore';
37
51
  Stats.focus ??= feature;
38
52
  const mainUrl = this.explorBot.getExplorer().getStateManager().getCurrentState()?.url;
39
- await this.runAllStyles(mainUrl, feature);
53
+ if (cfg.enabled) {
54
+ await this.runReuseMode(mainUrl, feature, cfg);
55
+ }
56
+ else {
57
+ await this.runFreshMode(mainUrl, feature, cfg.styles);
58
+ }
59
+ const mainPlan = this.completedPlans[0];
60
+ if (mainPlan)
61
+ this.explorBot.setCurrentPlan(mainPlan);
62
+ if (this.dryRun) {
63
+ this.printResults();
64
+ return;
65
+ }
66
+ if (mainUrl)
67
+ await this.explorBot.visit(mainUrl);
68
+ const savedPath = this.explorBot.savePlans(this.completedPlans);
69
+ this.printResults();
70
+ await this.explorBot.printSessionAnalysis();
71
+ this.printNextSteps(savedPath);
72
+ }
73
+ originLabel(test) {
74
+ return this.oldTestRefs.has(test) ? 'OLD' : 'NEW';
75
+ }
76
+ printPreview(label, tests) {
77
+ if (tests.length === 0)
78
+ return;
79
+ const lines = [label];
80
+ for (let i = 0; i < tests.length; i++) {
81
+ const t = tests[i];
82
+ lines.push(` ${String(i + 1).padStart(2)}. [${this.originLabel(t)}] [${t.priority.padEnd(9)}] ${t.scenario}`);
83
+ }
84
+ tag('multiline').log(lines.join('\n'));
85
+ }
86
+ async runFreshMode(mainUrl, feature, styles) {
87
+ await this.runAllStyles(mainUrl, feature, undefined, undefined, styles);
40
88
  const mainPlan = this.explorBot.getCurrentPlan();
41
89
  if (!mainPlan)
42
90
  return;
43
91
  this.completedPlans.push(mainPlan);
44
- if (!feature && !this.isLimitReached()) {
45
- const planner = this.explorBot.agentPlanner();
46
- let attempts = 0;
47
- while (attempts < MAX_SUB_PAGE_ATTEMPTS) {
48
- attempts++;
92
+ if (feature || this.isLimitReached())
93
+ return;
94
+ await this.discoverNewSubPages(mainPlan, mainUrl, styles, new Set());
95
+ }
96
+ async runReuseMode(mainUrl, feature, cfg) {
97
+ const filename = cfg.fromPath || this.explorBot.generatePlanFilename(feature);
98
+ let loadedPlans = [];
99
+ try {
100
+ loadedPlans = this.explorBot.loadPlans(filename);
101
+ }
102
+ catch (err) {
103
+ tag('warning').log(`Reuse plan not found (${err instanceof Error ? err.message : err}); falling back to fresh planning`);
104
+ await this.runFreshMode(mainUrl, feature, cfg.styles);
105
+ return;
106
+ }
107
+ if (loadedPlans.length === 0) {
108
+ tag('warning').log('Reuse plan empty; falling back to fresh planning');
109
+ await this.runFreshMode(mainUrl, feature, cfg.styles);
110
+ return;
111
+ }
112
+ const mainPlan = loadedPlans[0];
113
+ const subPlans = loadedPlans.slice(1);
114
+ const totalCap = this.maxTests ?? Number.POSITIVE_INFINITY;
115
+ let newQuota = Number.POSITIVE_INFINITY;
116
+ let oldQuota = Number.POSITIVE_INFINITY;
117
+ if (Number.isFinite(totalCap)) {
118
+ newQuota = Math.round(totalCap * cfg.newRatio);
119
+ oldQuota = Math.max(0, totalCap - newQuota);
120
+ }
121
+ for (const p of loadedPlans) {
122
+ for (const t of p.tests)
123
+ this.oldTestRefs.add(t);
124
+ }
125
+ const allOldTests = loadedPlans.flatMap((p) => p.tests.filter((t) => t.status === 'pending'));
126
+ let matchingOldTests = allOldTests;
127
+ if (cfg.styles) {
128
+ matchingOldTests = matchingOldTests.filter((t) => !t.style || cfg.styles.includes(t.style));
129
+ }
130
+ if (this.priorityFilter) {
131
+ matchingOldTests = matchingOldTests.filter((t) => this.priorityFilter.has(t.priority));
132
+ }
133
+ const pickBy = cfg.pickBy ?? 'priority';
134
+ const orderedOldTests = matchingOldTests.slice();
135
+ if (pickBy === 'priority') {
136
+ orderedOldTests.sort((a, b) => (PRIORITY_ORDER[a.priority] ?? 99) - (PRIORITY_ORDER[b.priority] ?? 99));
137
+ }
138
+ else if (pickBy === 'random') {
139
+ for (let i = orderedOldTests.length - 1; i > 0; i--) {
140
+ const j = Math.floor(Math.random() * (i + 1));
141
+ [orderedOldTests[i], orderedOldTests[j]] = [orderedOldTests[j], orderedOldTests[i]];
142
+ }
143
+ }
144
+ let pickCount = orderedOldTests.length;
145
+ if (Number.isFinite(oldQuota))
146
+ pickCount = Math.min(oldQuota, orderedOldTests.length);
147
+ const picked = orderedOldTests.slice(0, pickCount);
148
+ const pickedSet = new Set(picked);
149
+ for (const t of allOldTests) {
150
+ if (!pickedSet.has(t))
151
+ t.enabled = false;
152
+ }
153
+ let newQuotaLabel = 'unlimited';
154
+ if (Number.isFinite(newQuota))
155
+ newQuotaLabel = String(newQuota);
156
+ let priorityNote = '';
157
+ if (this.priorityFilter)
158
+ priorityNote = `, priority=[${[...this.priorityFilter].join(',')}]`;
159
+ tag('info').log(`Reuse: loaded ${allOldTests.length} old test(s), running ${picked.length} (pick_by=${pickBy}${priorityNote}), reserving ${newQuotaLabel} for new`);
160
+ const planner = this.explorBot.agentPlanner();
161
+ for (const p of loadedPlans)
162
+ planner.registerPlanInSession(p);
163
+ this.completedPlans.push(...loadedPlans);
164
+ this.printPreview(`Picked old tests (${picked.length}):`, picked);
165
+ let currentPlanRef;
166
+ for (const test of picked) {
167
+ if (this.isLimitReached())
168
+ break;
169
+ const owningPlan = test.plan;
170
+ if (owningPlan && owningPlan !== currentPlanRef) {
171
+ this.explorBot.setCurrentPlan(owningPlan);
172
+ if (owningPlan.url && !this.dryRun)
173
+ await this.explorBot.visit(owningPlan.url);
174
+ currentPlanRef = owningPlan;
175
+ }
176
+ await this.runOneTest(test);
177
+ }
178
+ if (this.isLimitReached() || newQuota <= 0)
179
+ return;
180
+ const subpagesMode = cfg.subpages || 'both';
181
+ if (mainUrl && !this.dryRun)
182
+ await this.explorBot.visit(mainUrl);
183
+ await this.replanAndRun(mainUrl, feature, mainPlan, cfg.styles);
184
+ if (this.isLimitReached())
185
+ return;
186
+ if (subpagesMode === 'same' || subpagesMode === 'both') {
187
+ for (const subPlan of subPlans) {
49
188
  if (this.isLimitReached())
50
189
  break;
51
- const candidates = planner.collectSubPageCandidates(mainPlan, mainUrl || '/').filter((c) => !this.failedSubPages.has(normalizeUrl(c.url)));
52
- if (candidates.length === 0)
53
- break;
54
- const pick = await planner.pickNextSubPage(candidates);
55
- if (!pick)
56
- break;
57
- tag('info').log(`Exploring sub-page: ${pick.url} (${pick.reason})`);
190
+ if (!subPlan.url)
191
+ continue;
58
192
  try {
59
- await this.explorBot.visit(pick.url);
60
- await this.runAllStyles(pick.url, undefined, mainPlan, this.completedPlans);
61
- const subPlan = this.explorBot.getCurrentPlan();
62
- if (subPlan) {
63
- this.completedPlans.push(subPlan);
64
- }
193
+ if (!this.dryRun)
194
+ await this.explorBot.visit(subPlan.url);
195
+ await this.replanAndRun(subPlan.url, undefined, subPlan, cfg.styles);
65
196
  }
66
197
  catch (err) {
67
- this.failedSubPages.add(normalizeUrl(pick.url));
68
- tag('warning').log(`Sub-page exploration failed: ${err instanceof Error ? err.message : err}`);
198
+ this.failedSubPages.add(normalizeUrl(subPlan.url));
199
+ tag('warning').log(`Sub-page re-planning failed: ${err instanceof Error ? err.message : err}`);
69
200
  }
70
201
  }
71
202
  }
72
- this.explorBot.setCurrentPlan(mainPlan);
73
- if (mainUrl)
74
- await this.explorBot.visit(mainUrl);
75
- const savedPath = this.explorBot.savePlans(this.completedPlans);
76
- this.printResults();
77
- await this.explorBot.printSessionAnalysis();
78
- this.printNextSteps(savedPath);
203
+ if (this.isLimitReached())
204
+ return;
205
+ if (subpagesMode === 'new' || subpagesMode === 'both') {
206
+ const knownUrls = new Set();
207
+ for (const p of loadedPlans) {
208
+ if (p.url)
209
+ knownUrls.add(normalizeUrl(p.url));
210
+ }
211
+ await this.discoverNewSubPages(mainPlan, mainUrl, cfg.styles, knownUrls);
212
+ }
79
213
  }
80
- async runAllStyles(pageUrl, feature, parentPlan, completedPlans) {
214
+ async discoverNewSubPages(mainPlan, mainUrl, styles, knownUrls) {
215
+ const planner = this.explorBot.agentPlanner();
216
+ let attempts = 0;
217
+ while (attempts < MAX_SUB_PAGE_ATTEMPTS) {
218
+ attempts++;
219
+ if (this.isLimitReached())
220
+ break;
221
+ const candidates = planner.collectSubPageCandidates(mainPlan, mainUrl || '/').filter((c) => {
222
+ const norm = normalizeUrl(c.url);
223
+ return !this.failedSubPages.has(norm) && !knownUrls.has(norm);
224
+ });
225
+ if (candidates.length === 0)
226
+ break;
227
+ const pick = await planner.pickNextSubPage(candidates);
228
+ if (!pick)
229
+ break;
230
+ tag('info').log(`Exploring sub-page: ${pick.url} (${pick.reason})`);
231
+ try {
232
+ await this.explorBot.visit(pick.url);
233
+ await this.runAllStyles(pick.url, undefined, mainPlan, this.completedPlans, styles);
234
+ const subPlan = this.explorBot.getCurrentPlan();
235
+ if (subPlan && !this.completedPlans.includes(subPlan)) {
236
+ this.completedPlans.push(subPlan);
237
+ }
238
+ knownUrls.add(normalizeUrl(pick.url));
239
+ }
240
+ catch (err) {
241
+ this.failedSubPages.add(normalizeUrl(pick.url));
242
+ tag('warning').log(`Sub-page exploration failed: ${err instanceof Error ? err.message : err}`);
243
+ }
244
+ }
245
+ }
246
+ async replanAndRun(pageUrl, feature, existingPlan, styles) {
247
+ const styleList = styles ?? Object.keys(getStyles());
248
+ for (const style of styleList) {
249
+ if (this.isLimitReached())
250
+ break;
251
+ this.explorBot.setCurrentPlan(existingPlan);
252
+ const opts = { fresh: false, style, completedPlans: this.completedPlans };
253
+ if (this.dryRun)
254
+ opts.noSave = true;
255
+ await this.planWithRetry(feature, opts, pageUrl);
256
+ await this.runPendingTests();
257
+ }
258
+ }
259
+ async runAllStyles(pageUrl, feature, parentPlan, completedPlans, styles) {
260
+ const styleList = styles ?? Object.keys(getStyles());
81
261
  let fresh = true;
82
- for (const style of Object.keys(getStyles())) {
83
- if (!fresh && pageUrl) {
262
+ for (const style of styleList) {
263
+ if (!fresh && pageUrl && !this.dryRun) {
84
264
  await this.explorBot.visit(pageUrl);
85
265
  }
86
266
  const opts = { fresh, style, completedPlans };
87
267
  if (fresh && parentPlan)
88
268
  opts.extend = parentPlan;
269
+ if (this.dryRun)
270
+ opts.noSave = true;
89
271
  await this.planWithRetry(feature, opts, pageUrl);
90
272
  await this.runPendingTests();
91
273
  fresh = false;
92
274
  }
93
275
  }
94
276
  async planWithRetry(feature, opts, pageUrl) {
95
- await this.explorBot.plan(feature, opts);
96
- if (!this.explorBot.lastPlanError)
97
- return;
98
- if (this.explorBot.lastPlanError instanceof ErrorPageError) {
99
- throw this.explorBot.lastPlanError;
100
- }
101
- tag('info').log(`Retrying planning style '${opts.style}'...`);
102
- if (pageUrl)
103
- await this.explorBot.visit(pageUrl);
277
+ const before = new Set(this.explorBot.getCurrentPlan()?.tests ?? []);
104
278
  await this.explorBot.plan(feature, opts);
105
279
  if (this.explorBot.lastPlanError) {
106
- tag('warning').log(`Planning style '${opts.style}' failed after retry, skipping`);
280
+ if (this.explorBot.lastPlanError instanceof ErrorPageError) {
281
+ throw this.explorBot.lastPlanError;
282
+ }
283
+ tag('info').log(`Retrying planning style '${opts.style}'...`);
284
+ if (pageUrl && !this.dryRun)
285
+ await this.explorBot.visit(pageUrl);
286
+ await this.explorBot.plan(feature, opts);
287
+ if (this.explorBot.lastPlanError) {
288
+ tag('warning').log(`Planning style '${opts.style}' failed after retry, skipping`);
289
+ return;
290
+ }
291
+ }
292
+ const planAfter = this.explorBot.getCurrentPlan();
293
+ if (!planAfter)
294
+ return;
295
+ const added = planAfter.tests.filter((t) => !before.has(t));
296
+ if (added.length === 0)
297
+ return;
298
+ const urlNote = pageUrl ? ` for ${pageUrl}` : '';
299
+ this.printPreview(`Planner added ${added.length} new test(s) [style=${opts.style}]${urlNote}:`, added);
300
+ }
301
+ parseConfigure(raw) {
302
+ const cfg = { enabled: false, newRatio: 1.0 };
303
+ if (!raw)
304
+ return cfg;
305
+ const allStyles = Object.keys(getStyles());
306
+ const validSubpages = new Set(['none', 'same', 'new', 'both']);
307
+ let hasReuseSignal = false;
308
+ for (const pair of raw.split(';')) {
309
+ const trimmed = pair.trim();
310
+ if (!trimmed)
311
+ continue;
312
+ const sepMatch = trimmed.match(/^([^:=]+)\s*[:=]\s*(.*)$/);
313
+ if (!sepMatch) {
314
+ tag('warning').log(`Ignoring malformed configure pair: ${trimmed}`);
315
+ continue;
316
+ }
317
+ const key = sepMatch[1].trim().toLowerCase();
318
+ const value = sepMatch[2].trim();
319
+ if (key === 'new') {
320
+ const ratio = parseRatio(value);
321
+ if (ratio == null) {
322
+ tag('warning').log(`Ignoring invalid 'new' value: ${value}`);
323
+ continue;
324
+ }
325
+ cfg.newRatio = ratio;
326
+ hasReuseSignal = true;
327
+ continue;
328
+ }
329
+ if (key === 'from') {
330
+ cfg.fromPath = value;
331
+ hasReuseSignal = true;
332
+ continue;
333
+ }
334
+ if (key === 'style' || key === 'styles') {
335
+ const requested = value
336
+ .split(',')
337
+ .map((s) => s.trim())
338
+ .filter(Boolean);
339
+ const valid = [];
340
+ for (const s of requested) {
341
+ if (allStyles.includes(s)) {
342
+ valid.push(s);
343
+ continue;
344
+ }
345
+ tag('warning').log(`Unknown planning style: ${s}`);
346
+ }
347
+ if (valid.length)
348
+ cfg.styles = valid;
349
+ continue;
350
+ }
351
+ if (key === 'subpages') {
352
+ if (!validSubpages.has(value)) {
353
+ tag('warning').log(`Ignoring invalid 'subpages' value: ${value}`);
354
+ continue;
355
+ }
356
+ cfg.subpages = value;
357
+ continue;
358
+ }
359
+ if (key === 'pick_by' || key === 'pickby' || key === 'pick-by') {
360
+ if (value === 'priority' || value === 'random' || value === 'index') {
361
+ cfg.pickBy = value;
362
+ continue;
363
+ }
364
+ tag('warning').log(`Ignoring invalid 'pick_by' value: ${value} (use priority|random|index)`);
365
+ continue;
366
+ }
367
+ if (key === 'priority' || key === 'priorities') {
368
+ const requested = value
369
+ .split(',')
370
+ .map((s) => s.trim().toLowerCase())
371
+ .filter(Boolean);
372
+ const valid = [];
373
+ for (const p of requested) {
374
+ if (p in PRIORITY_ORDER) {
375
+ valid.push(p);
376
+ continue;
377
+ }
378
+ tag('warning').log(`Unknown priority: ${p} (use ${Object.keys(PRIORITY_ORDER).join('|')})`);
379
+ }
380
+ if (valid.length)
381
+ cfg.priorities = valid;
382
+ continue;
383
+ }
384
+ tag('warning').log(`Unknown configure key: ${key}`);
107
385
  }
386
+ cfg.enabled = hasReuseSignal;
387
+ return cfg;
108
388
  }
109
389
  printResults() {
110
- const allTests = this.completedPlans.flatMap((plan) => plan.tests.filter((t) => t.startTime != null).map((test) => ({ test, planTitle: plan.title })));
390
+ const allTests = this.completedPlans.flatMap((plan) => plan.tests.filter((t) => t.startTime != null).map((test) => ({ test, planTitle: plan.title }))).sort((a, b) => (a.test.startTime ?? 0) - (b.test.startTime ?? 0));
111
391
  if (allTests.length === 0)
112
392
  return;
113
393
  const hasSubPages = this.completedPlans.length > 1;
394
+ const hasOrigin = this.oldTestRefs.size > 0;
114
395
  const rows = allTests.map(({ test, planTitle }, index) => {
115
396
  const durationMs = test.getDurationMs();
116
397
  const duration = durationMs != null ? `${(durationMs / 1000).toFixed(1)}s` : '-';
@@ -127,12 +408,17 @@ export class ExploreCommand extends BaseCommand {
127
408
  Time: duration,
128
409
  Steps: String(Object.keys(test.notes).length),
129
410
  };
411
+ if (hasOrigin) {
412
+ row.Origin = this.originLabel(test);
413
+ }
130
414
  if (hasSubPages) {
131
415
  row.Plan = planTitle;
132
416
  }
133
417
  return row;
134
418
  });
135
419
  const columns = ['#', 'Status', 'Title', 'Priority', 'Time', 'Steps'];
420
+ if (hasOrigin)
421
+ columns.push('Origin');
136
422
  if (hasSubPages)
137
423
  columns.push('Plan');
138
424
  tag('multiline').log(jsonToTable(rows, columns));
@@ -185,11 +471,41 @@ export class ExploreCommand extends BaseCommand {
185
471
  const plan = this.explorBot.getCurrentPlan();
186
472
  if (!plan)
187
473
  return;
474
+ if (this.priorityFilter) {
475
+ for (const t of plan.getPendingTests()) {
476
+ if (!this.priorityFilter.has(t.priority))
477
+ t.enabled = false;
478
+ }
479
+ }
188
480
  for (const test of plan.getPendingTests()) {
189
481
  if (this.isLimitReached())
190
482
  break;
483
+ await this.runOneTest(test);
484
+ }
485
+ }
486
+ async runOneTest(test) {
487
+ if (this.dryRun) {
488
+ test.start();
489
+ test.finish(TestResult.SKIPPED);
490
+ }
491
+ else {
191
492
  await this.explorBot.agentTester().test(test);
192
- this.testsRun++;
193
493
  }
494
+ this.testsRun++;
495
+ }
496
+ }
497
+ function parseRatio(s) {
498
+ const trimmed = s.trim();
499
+ if (!trimmed)
500
+ return null;
501
+ if (trimmed.endsWith('%')) {
502
+ const n = Number.parseFloat(trimmed.slice(0, -1));
503
+ if (Number.isNaN(n) || n < 0 || n > 100)
504
+ return null;
505
+ return n / 100;
194
506
  }
507
+ const n = Number.parseFloat(trimmed);
508
+ if (Number.isNaN(n) || n < 0 || n > 1)
509
+ return null;
510
+ return n;
195
511
  }
@@ -24,6 +24,7 @@ import { ExperienceTracker } from "./experience-tracker.js";
24
24
  import Explorer from "./explorer.js";
25
25
  import { KnowledgeTracker } from "./knowledge-tracker.js";
26
26
  import { Plan } from "./test-plan.js";
27
+ import { parsePlansFromMarkdown } from "./utils/test-plan-markdown.js";
27
28
  import { setVerboseMode, tag } from "./utils/logger.js";
28
29
  import { relativeToCwd } from "./utils/next-steps.js";
29
30
  import { sanitizeFilename } from "./utils/strings.js";
@@ -323,7 +324,8 @@ export class ExplorBot {
323
324
  return undefined;
324
325
  return this.currentPlan;
325
326
  }
326
- this.savePlan();
327
+ if (!opts.noSave)
328
+ this.savePlan();
327
329
  return this.currentPlan;
328
330
  }
329
331
  getPlansDir() {
@@ -348,20 +350,21 @@ export class ExplorBot {
348
350
  this.lastSavedPlanPath = planPath;
349
351
  return planPath;
350
352
  }
351
- generatePlanFilename() {
353
+ generatePlanFilename(feature) {
352
354
  const state = this.explorer?.getStateManager().getCurrentState();
353
355
  const urlPath = state?.url || '/';
354
356
  const urlPart = sanitizeFilename(urlPath) || 'root';
355
357
  const suffix = '.md';
356
- if (!this.planFeature)
358
+ const f = feature ?? this.planFeature;
359
+ if (!f)
357
360
  return urlPart.slice(0, 256 - suffix.length) + suffix;
358
- const featurePart = `_${sanitizeFilename(this.planFeature)}`;
361
+ const featurePart = `_${sanitizeFilename(f)}`;
359
362
  const maxFeatureLen = 256 - suffix.length - urlPart.length;
360
363
  if (maxFeatureLen <= 1)
361
364
  return urlPart.slice(0, 256 - suffix.length) + suffix;
362
365
  return urlPart + featurePart.slice(0, maxFeatureLen) + suffix;
363
366
  }
364
- loadPlan(filename) {
367
+ resolvePlanPath(filename) {
365
368
  let planPath = filename;
366
369
  if (path.isAbsolute(filename)) {
367
370
  if (!existsSync(planPath) && !filename.endsWith('.md')) {
@@ -378,12 +381,23 @@ export class ExplorBot {
378
381
  planPath = path.join(plansDir, `${filename}.md`);
379
382
  }
380
383
  }
384
+ return planPath;
385
+ }
386
+ loadPlan(filename) {
387
+ const planPath = this.resolvePlanPath(filename);
381
388
  if (!existsSync(planPath)) {
382
389
  throw new Error(`Plan file not found: ${planPath}`);
383
390
  }
384
391
  this.setCurrentPlan(Plan.fromMarkdown(planPath));
385
392
  return this.currentPlan;
386
393
  }
394
+ loadPlans(filename) {
395
+ const planPath = this.resolvePlanPath(filename);
396
+ if (!existsSync(planPath)) {
397
+ throw new Error(`Plan file not found: ${planPath}`);
398
+ }
399
+ return parsePlansFromMarkdown(planPath);
400
+ }
387
401
  setCurrentPlan(plan) {
388
402
  this.currentPlan = plan;
389
403
  if (plan && !this.sessionPlans.includes(plan)) {
@@ -145,8 +145,15 @@ export function parsePlansFromMarkdown(filePath) {
145
145
  continue;
146
146
  if (line.startsWith('<!-- test')) {
147
147
  currentTest = null;
148
- const priorityMatch = line.match(/priority:\s*(\w+)/);
148
+ let block = line;
149
+ let j = i;
150
+ while (!block.includes('-->') && j + 1 < lines.length) {
151
+ j++;
152
+ block += `\n${lines[j].trim()}`;
153
+ }
154
+ const priorityMatch = block.match(/priority:\s*(\w+)/);
149
155
  priority = priorityMatch?.[1] || 'normal';
156
+ i = j;
150
157
  continue;
151
158
  }
152
159
  if (line.startsWith('# ') && currentTest === null) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "explorbot",
3
- "version": "0.1.16",
3
+ "version": "0.1.17",
4
4
  "description": "CLI app built with React Ink, CodeceptJS, and Playwright",
5
5
  "license": "Elastic-2.0",
6
6
  "type": "module",
package/src/ai/pilot.ts CHANGED
@@ -320,14 +320,9 @@ export class Pilot implements Agent {
320
320
  - "Edit X" → updated value must be persisted (visible in list/detail). Opening edit is NOT enough; redirect after save with the new value visible IS enough.
321
321
  - Negative tests ("without a name", "invalid", "duplicate", "unauthorized") → success means the system PREVENTED the action with validation/error.
322
322
 
323
- PROVENANCE for create/edit scenarios: the task prompt instructs the tester to inject the
324
- session marker "${task.sessionName ?? ''}" into newly created or edited free-text values.
325
- When that marker COULD be injected, the entity used as proof MUST contain it. A record
326
- matching the goal by text alone but missing the marker is a stale leftover from a prior
327
- run — it is NOT evidence the current scenario produced anything. Vote \`fail\`, not \`pass\`.
328
- This does not apply when the field is restricted (numeric only, enum, etc.) or when the
329
- session_log shows no fillField/type/select actions were attempted at all (in that case
330
- the scenario clearly didn't run — also vote \`fail\`).
323
+ PROVENANCE: the entity you cite as proof must appear by name in <notes> or
324
+ <session_log> tool inputs for THIS run. Name absent from tester activity = stale
325
+ coincidence, vote \`fail\`. Same if no fillField/type/select/click on a target ran.
331
326
 
332
327
  Expected results are MILESTONES, not the goal. Never fail because a milestone (toast, icon, styling)
333
328
  didn't match if the scenario goal IS accomplished.
package/src/ai/tester.ts CHANGED
@@ -730,6 +730,7 @@ export class Tester extends TaskAgent implements Agent {
730
730
  - Use pressKey() for pressing special keys (Enter, Escape, Tab, Arrow keys) or key combinations with modifiers (Ctrl+A, Shift+Delete, etc.)
731
731
  - Use container CSS locators from <page_ui_map> to interact with elements inside sections
732
732
  - Systematically use record({ notes: ["..."] }) to write your findings, planned actions, observations, etc.
733
+ - When creating/editing/deleting a named entity, include its identifier verbatim in the note — Pilot uses it to confirm provenance.
733
734
  - Call record({ notes: ["..."], status: "success" }) when you see success/info message on a page or when expected outcome is achieved
734
735
  - Call record({ notes: ["..."], status: "fail" }) when an expected outcome cannot be achieved or has failed or you see error/alert/warning message on a page
735
736
  - NEVER call record(status: "success") if your last verify() or see() call FAILED. A failed check means the outcome is NOT confirmed — use record(status: "fail") instead, or retry with a different approach.
@@ -3,7 +3,7 @@ import { getStyles } from '../ai/planner/styles.js';
3
3
  import { outputPath } from '../config.js';
4
4
  import { normalizeUrl } from '../state-manager.js';
5
5
  import { Stats } from '../stats.js';
6
- import type { Plan } from '../test-plan.js';
6
+ import { type Plan, type Test, TestResult } from '../test-plan.js';
7
7
  import { getCliName } from '../utils/cli-name.ts';
8
8
  import { ErrorPageError } from '../utils/error-page.ts';
9
9
  import { tag } from '../utils/logger.js';
@@ -13,6 +13,7 @@ import { safeFilename } from '../utils/strings.ts';
13
13
  import { BaseCommand, type Suggestion } from './base-command.js';
14
14
 
15
15
  const MAX_SUB_PAGE_ATTEMPTS = 30;
16
+ const PRIORITY_ORDER: Record<string, number> = { critical: 0, important: 1, high: 2, normal: 3, low: 4 };
16
17
 
17
18
  export class ExploreCommand extends BaseCommand {
18
19
  name = 'explore';
@@ -20,6 +21,8 @@ export class ExploreCommand extends BaseCommand {
20
21
  options = [
21
22
  { flags: '--max-tests <number>', description: 'Maximum number of tests to run' },
22
23
  { flags: '--focus <feature>', description: 'Focus area for exploration' },
24
+ { flags: '--configure <spec>', description: 'Reuse spec: keys new|from|style|subpages|pick_by|priority, e.g. "new:25%;pick_by=random;priority=critical,high"' },
25
+ { flags: '--dry-run', description: 'Mark picked tests as skipped without executing or generating new ones' },
23
26
  ];
24
27
  suggestions: Suggestion[] = [
25
28
  { command: 'navigate <page>', hint: 'go to another page' },
@@ -28,9 +31,12 @@ export class ExploreCommand extends BaseCommand {
28
31
  ];
29
32
 
30
33
  maxTests?: number;
34
+ dryRun = false;
31
35
  private testsRun = 0;
32
36
  private completedPlans: Plan[] = [];
33
37
  private failedSubPages = new Set<string>();
38
+ private oldTestRefs = new Set<Test>();
39
+ private priorityFilter?: Set<string>;
34
40
 
35
41
  async execute(args: string): Promise<void> {
36
42
  const { opts, args: remaining } = this.parseArgs(args);
@@ -39,86 +45,359 @@ export class ExploreCommand extends BaseCommand {
39
45
  }
40
46
 
41
47
  const feature = (opts.focus as string) || remaining.join(' ') || undefined;
48
+ const cfg = this.parseConfigure(opts.configure as string | undefined);
49
+ if (cfg.priorities) this.priorityFilter = new Set(cfg.priorities);
50
+ if (opts.dryRun) this.dryRun = true;
51
+ if (this.dryRun) tag('info').log('Dry-run mode: planner runs to discover new tests; test execution is skipped');
42
52
  Stats.mode ??= 'explore';
43
53
  Stats.focus ??= feature;
44
54
  const mainUrl = this.explorBot.getExplorer().getStateManager().getCurrentState()?.url;
45
55
 
46
- await this.runAllStyles(mainUrl, feature);
56
+ if (cfg.enabled) {
57
+ await this.runReuseMode(mainUrl, feature, cfg);
58
+ } else {
59
+ await this.runFreshMode(mainUrl, feature, cfg.styles);
60
+ }
61
+
62
+ const mainPlan = this.completedPlans[0];
63
+ if (mainPlan) this.explorBot.setCurrentPlan(mainPlan);
64
+ if (this.dryRun) {
65
+ this.printResults();
66
+ return;
67
+ }
68
+ if (mainUrl) await this.explorBot.visit(mainUrl);
69
+ const savedPath = this.explorBot.savePlans(this.completedPlans);
70
+ this.printResults();
71
+ await this.explorBot.printSessionAnalysis();
72
+ this.printNextSteps(savedPath);
73
+ }
74
+
75
+ private originLabel(test: Test): string {
76
+ return this.oldTestRefs.has(test) ? 'OLD' : 'NEW';
77
+ }
78
+
79
+ private printPreview(label: string, tests: Test[]): void {
80
+ if (tests.length === 0) return;
81
+ const lines = [label];
82
+ for (let i = 0; i < tests.length; i++) {
83
+ const t = tests[i];
84
+ lines.push(` ${String(i + 1).padStart(2)}. [${this.originLabel(t)}] [${t.priority.padEnd(9)}] ${t.scenario}`);
85
+ }
86
+ tag('multiline').log(lines.join('\n'));
87
+ }
88
+
89
+ private async runFreshMode(mainUrl: string | undefined, feature: string | undefined, styles?: string[]): Promise<void> {
90
+ await this.runAllStyles(mainUrl, feature, undefined, undefined, styles);
47
91
  const mainPlan = this.explorBot.getCurrentPlan();
48
92
  if (!mainPlan) return;
49
93
  this.completedPlans.push(mainPlan);
50
94
 
51
- if (!feature && !this.isLimitReached()) {
52
- const planner = this.explorBot.agentPlanner();
53
- let attempts = 0;
54
- while (attempts < MAX_SUB_PAGE_ATTEMPTS) {
55
- attempts++;
56
- if (this.isLimitReached()) break;
95
+ if (feature || this.isLimitReached()) return;
96
+
97
+ await this.discoverNewSubPages(mainPlan, mainUrl, styles, new Set());
98
+ }
99
+
100
+ private async runReuseMode(mainUrl: string | undefined, feature: string | undefined, cfg: ConfigureSpec): Promise<void> {
101
+ const filename = cfg.fromPath || this.explorBot.generatePlanFilename(feature);
102
+
103
+ let loadedPlans: Plan[] = [];
104
+ try {
105
+ loadedPlans = this.explorBot.loadPlans(filename);
106
+ } catch (err) {
107
+ tag('warning').log(`Reuse plan not found (${err instanceof Error ? err.message : err}); falling back to fresh planning`);
108
+ await this.runFreshMode(mainUrl, feature, cfg.styles);
109
+ return;
110
+ }
57
111
 
58
- const candidates = planner.collectSubPageCandidates(mainPlan, mainUrl || '/').filter((c) => !this.failedSubPages.has(normalizeUrl(c.url)));
59
- if (candidates.length === 0) break;
112
+ if (loadedPlans.length === 0) {
113
+ tag('warning').log('Reuse plan empty; falling back to fresh planning');
114
+ await this.runFreshMode(mainUrl, feature, cfg.styles);
115
+ return;
116
+ }
117
+
118
+ const mainPlan = loadedPlans[0];
119
+ const subPlans = loadedPlans.slice(1);
60
120
 
61
- const pick = await planner.pickNextSubPage(candidates);
62
- if (!pick) break;
121
+ const totalCap = this.maxTests ?? Number.POSITIVE_INFINITY;
122
+ let newQuota = Number.POSITIVE_INFINITY;
123
+ let oldQuota = Number.POSITIVE_INFINITY;
124
+ if (Number.isFinite(totalCap)) {
125
+ newQuota = Math.round(totalCap * cfg.newRatio);
126
+ oldQuota = Math.max(0, totalCap - newQuota);
127
+ }
63
128
 
64
- tag('info').log(`Exploring sub-page: ${pick.url} (${pick.reason})`);
129
+ for (const p of loadedPlans) {
130
+ for (const t of p.tests) this.oldTestRefs.add(t);
131
+ }
132
+
133
+ const allOldTests = loadedPlans.flatMap((p) => p.tests.filter((t) => t.status === 'pending'));
134
+ let matchingOldTests: Test[] = allOldTests;
135
+ if (cfg.styles) {
136
+ matchingOldTests = matchingOldTests.filter((t) => !t.style || cfg.styles!.includes(t.style));
137
+ }
138
+ if (this.priorityFilter) {
139
+ matchingOldTests = matchingOldTests.filter((t) => this.priorityFilter!.has(t.priority));
140
+ }
141
+ const pickBy = cfg.pickBy ?? 'priority';
142
+ const orderedOldTests = matchingOldTests.slice();
143
+ if (pickBy === 'priority') {
144
+ orderedOldTests.sort((a, b) => (PRIORITY_ORDER[a.priority] ?? 99) - (PRIORITY_ORDER[b.priority] ?? 99));
145
+ } else if (pickBy === 'random') {
146
+ for (let i = orderedOldTests.length - 1; i > 0; i--) {
147
+ const j = Math.floor(Math.random() * (i + 1));
148
+ [orderedOldTests[i], orderedOldTests[j]] = [orderedOldTests[j], orderedOldTests[i]];
149
+ }
150
+ }
151
+
152
+ let pickCount = orderedOldTests.length;
153
+ if (Number.isFinite(oldQuota)) pickCount = Math.min(oldQuota, orderedOldTests.length);
154
+ const picked = orderedOldTests.slice(0, pickCount);
155
+ const pickedSet = new Set(picked);
156
+
157
+ for (const t of allOldTests) {
158
+ if (!pickedSet.has(t)) t.enabled = false;
159
+ }
160
+
161
+ let newQuotaLabel = 'unlimited';
162
+ if (Number.isFinite(newQuota)) newQuotaLabel = String(newQuota);
163
+ let priorityNote = '';
164
+ if (this.priorityFilter) priorityNote = `, priority=[${[...this.priorityFilter].join(',')}]`;
165
+ tag('info').log(`Reuse: loaded ${allOldTests.length} old test(s), running ${picked.length} (pick_by=${pickBy}${priorityNote}), reserving ${newQuotaLabel} for new`);
166
+
167
+ const planner = this.explorBot.agentPlanner();
168
+ for (const p of loadedPlans) planner.registerPlanInSession(p);
169
+
170
+ this.completedPlans.push(...loadedPlans);
171
+
172
+ this.printPreview(`Picked old tests (${picked.length}):`, picked);
173
+
174
+ let currentPlanRef: Plan | undefined;
175
+ for (const test of picked) {
176
+ if (this.isLimitReached()) break;
177
+ const owningPlan = test.plan;
178
+ if (owningPlan && owningPlan !== currentPlanRef) {
179
+ this.explorBot.setCurrentPlan(owningPlan);
180
+ if (owningPlan.url && !this.dryRun) await this.explorBot.visit(owningPlan.url);
181
+ currentPlanRef = owningPlan;
182
+ }
183
+ await this.runOneTest(test);
184
+ }
185
+
186
+ if (this.isLimitReached() || newQuota <= 0) return;
187
+
188
+ const subpagesMode = cfg.subpages || 'both';
189
+
190
+ if (mainUrl && !this.dryRun) await this.explorBot.visit(mainUrl);
191
+ await this.replanAndRun(mainUrl, feature, mainPlan, cfg.styles);
192
+
193
+ if (this.isLimitReached()) return;
194
+
195
+ if (subpagesMode === 'same' || subpagesMode === 'both') {
196
+ for (const subPlan of subPlans) {
197
+ if (this.isLimitReached()) break;
198
+ if (!subPlan.url) continue;
65
199
  try {
66
- await this.explorBot.visit(pick.url);
67
- await this.runAllStyles(pick.url, undefined, mainPlan, this.completedPlans);
68
- const subPlan = this.explorBot.getCurrentPlan();
69
- if (subPlan) {
70
- this.completedPlans.push(subPlan);
71
- }
200
+ if (!this.dryRun) await this.explorBot.visit(subPlan.url);
201
+ await this.replanAndRun(subPlan.url, undefined, subPlan, cfg.styles);
72
202
  } catch (err) {
73
- this.failedSubPages.add(normalizeUrl(pick.url));
74
- tag('warning').log(`Sub-page exploration failed: ${err instanceof Error ? err.message : err}`);
203
+ this.failedSubPages.add(normalizeUrl(subPlan.url));
204
+ tag('warning').log(`Sub-page re-planning failed: ${err instanceof Error ? err.message : err}`);
75
205
  }
76
206
  }
77
207
  }
78
208
 
79
- this.explorBot.setCurrentPlan(mainPlan);
80
- if (mainUrl) await this.explorBot.visit(mainUrl);
81
- const savedPath = this.explorBot.savePlans(this.completedPlans);
82
- this.printResults();
83
- await this.explorBot.printSessionAnalysis();
84
- this.printNextSteps(savedPath);
209
+ if (this.isLimitReached()) return;
210
+
211
+ if (subpagesMode === 'new' || subpagesMode === 'both') {
212
+ const knownUrls = new Set<string>();
213
+ for (const p of loadedPlans) {
214
+ if (p.url) knownUrls.add(normalizeUrl(p.url));
215
+ }
216
+ await this.discoverNewSubPages(mainPlan, mainUrl, cfg.styles, knownUrls);
217
+ }
218
+ }
219
+
220
+ private async discoverNewSubPages(mainPlan: Plan, mainUrl: string | undefined, styles: string[] | undefined, knownUrls: Set<string>): Promise<void> {
221
+ const planner = this.explorBot.agentPlanner();
222
+ let attempts = 0;
223
+ while (attempts < MAX_SUB_PAGE_ATTEMPTS) {
224
+ attempts++;
225
+ if (this.isLimitReached()) break;
226
+
227
+ const candidates = planner.collectSubPageCandidates(mainPlan, mainUrl || '/').filter((c) => {
228
+ const norm = normalizeUrl(c.url);
229
+ return !this.failedSubPages.has(norm) && !knownUrls.has(norm);
230
+ });
231
+ if (candidates.length === 0) break;
232
+
233
+ const pick = await planner.pickNextSubPage(candidates);
234
+ if (!pick) break;
235
+
236
+ tag('info').log(`Exploring sub-page: ${pick.url} (${pick.reason})`);
237
+ try {
238
+ await this.explorBot.visit(pick.url);
239
+ await this.runAllStyles(pick.url, undefined, mainPlan, this.completedPlans, styles);
240
+ const subPlan = this.explorBot.getCurrentPlan();
241
+ if (subPlan && !this.completedPlans.includes(subPlan)) {
242
+ this.completedPlans.push(subPlan);
243
+ }
244
+ knownUrls.add(normalizeUrl(pick.url));
245
+ } catch (err) {
246
+ this.failedSubPages.add(normalizeUrl(pick.url));
247
+ tag('warning').log(`Sub-page exploration failed: ${err instanceof Error ? err.message : err}`);
248
+ }
249
+ }
85
250
  }
86
251
 
87
- private async runAllStyles(pageUrl?: string, feature?: string, parentPlan?: Plan, completedPlans?: Plan[]): Promise<void> {
252
+ private async replanAndRun(pageUrl: string | undefined, feature: string | undefined, existingPlan: Plan, styles?: string[]): Promise<void> {
253
+ const styleList = styles ?? Object.keys(getStyles());
254
+ for (const style of styleList) {
255
+ if (this.isLimitReached()) break;
256
+ this.explorBot.setCurrentPlan(existingPlan);
257
+ const opts: { fresh: boolean; style: string; completedPlans?: Plan[]; noSave?: boolean } = { fresh: false, style, completedPlans: this.completedPlans };
258
+ if (this.dryRun) opts.noSave = true;
259
+ await this.planWithRetry(feature, opts, pageUrl);
260
+ await this.runPendingTests();
261
+ }
262
+ }
263
+
264
+ private async runAllStyles(pageUrl?: string, feature?: string, parentPlan?: Plan, completedPlans?: Plan[], styles?: string[]): Promise<void> {
265
+ const styleList = styles ?? Object.keys(getStyles());
88
266
  let fresh = true;
89
- for (const style of Object.keys(getStyles())) {
90
- if (!fresh && pageUrl) {
267
+ for (const style of styleList) {
268
+ if (!fresh && pageUrl && !this.dryRun) {
91
269
  await this.explorBot.visit(pageUrl);
92
270
  }
93
- const opts: { fresh: boolean; style: string; extend?: Plan; completedPlans?: Plan[] } = { fresh, style, completedPlans };
271
+ const opts: { fresh: boolean; style: string; extend?: Plan; completedPlans?: Plan[]; noSave?: boolean } = { fresh, style, completedPlans };
94
272
  if (fresh && parentPlan) opts.extend = parentPlan;
273
+ if (this.dryRun) opts.noSave = true;
95
274
  await this.planWithRetry(feature, opts, pageUrl);
96
275
  await this.runPendingTests();
97
276
  fresh = false;
98
277
  }
99
278
  }
100
279
 
101
- private async planWithRetry(feature: string | undefined, opts: { fresh: boolean; style: string; extend?: Plan; completedPlans?: Plan[] }, pageUrl?: string): Promise<void> {
102
- await this.explorBot.plan(feature, opts);
103
- if (!this.explorBot.lastPlanError) return;
104
- if (this.explorBot.lastPlanError instanceof ErrorPageError) {
105
- throw this.explorBot.lastPlanError;
106
- }
280
+ private async planWithRetry(feature: string | undefined, opts: { fresh: boolean; style: string; extend?: Plan; completedPlans?: Plan[]; noSave?: boolean }, pageUrl?: string): Promise<void> {
281
+ const before = new Set(this.explorBot.getCurrentPlan()?.tests ?? []);
107
282
 
108
- tag('info').log(`Retrying planning style '${opts.style}'...`);
109
- if (pageUrl) await this.explorBot.visit(pageUrl);
110
283
  await this.explorBot.plan(feature, opts);
111
284
  if (this.explorBot.lastPlanError) {
112
- tag('warning').log(`Planning style '${opts.style}' failed after retry, skipping`);
285
+ if (this.explorBot.lastPlanError instanceof ErrorPageError) {
286
+ throw this.explorBot.lastPlanError;
287
+ }
288
+ tag('info').log(`Retrying planning style '${opts.style}'...`);
289
+ if (pageUrl && !this.dryRun) await this.explorBot.visit(pageUrl);
290
+ await this.explorBot.plan(feature, opts);
291
+ if (this.explorBot.lastPlanError) {
292
+ tag('warning').log(`Planning style '${opts.style}' failed after retry, skipping`);
293
+ return;
294
+ }
295
+ }
296
+
297
+ const planAfter = this.explorBot.getCurrentPlan();
298
+ if (!planAfter) return;
299
+ const added = planAfter.tests.filter((t) => !before.has(t));
300
+ if (added.length === 0) return;
301
+ const urlNote = pageUrl ? ` for ${pageUrl}` : '';
302
+ this.printPreview(`Planner added ${added.length} new test(s) [style=${opts.style}]${urlNote}:`, added);
303
+ }
304
+
305
+ private parseConfigure(raw: string | undefined): ConfigureSpec {
306
+ const cfg: ConfigureSpec = { enabled: false, newRatio: 1.0 };
307
+ if (!raw) return cfg;
308
+
309
+ const allStyles = Object.keys(getStyles());
310
+ const validSubpages = new Set(['none', 'same', 'new', 'both']);
311
+ let hasReuseSignal = false;
312
+
313
+ for (const pair of raw.split(';')) {
314
+ const trimmed = pair.trim();
315
+ if (!trimmed) continue;
316
+ const sepMatch = trimmed.match(/^([^:=]+)\s*[:=]\s*(.*)$/);
317
+ if (!sepMatch) {
318
+ tag('warning').log(`Ignoring malformed configure pair: ${trimmed}`);
319
+ continue;
320
+ }
321
+ const key = sepMatch[1].trim().toLowerCase();
322
+ const value = sepMatch[2].trim();
323
+
324
+ if (key === 'new') {
325
+ const ratio = parseRatio(value);
326
+ if (ratio == null) {
327
+ tag('warning').log(`Ignoring invalid 'new' value: ${value}`);
328
+ continue;
329
+ }
330
+ cfg.newRatio = ratio;
331
+ hasReuseSignal = true;
332
+ continue;
333
+ }
334
+ if (key === 'from') {
335
+ cfg.fromPath = value;
336
+ hasReuseSignal = true;
337
+ continue;
338
+ }
339
+ if (key === 'style' || key === 'styles') {
340
+ const requested = value
341
+ .split(',')
342
+ .map((s) => s.trim())
343
+ .filter(Boolean);
344
+ const valid: string[] = [];
345
+ for (const s of requested) {
346
+ if (allStyles.includes(s)) {
347
+ valid.push(s);
348
+ continue;
349
+ }
350
+ tag('warning').log(`Unknown planning style: ${s}`);
351
+ }
352
+ if (valid.length) cfg.styles = valid;
353
+ continue;
354
+ }
355
+ if (key === 'subpages') {
356
+ if (!validSubpages.has(value)) {
357
+ tag('warning').log(`Ignoring invalid 'subpages' value: ${value}`);
358
+ continue;
359
+ }
360
+ cfg.subpages = value as ConfigureSpec['subpages'];
361
+ continue;
362
+ }
363
+ if (key === 'pick_by' || key === 'pickby' || key === 'pick-by') {
364
+ if (value === 'priority' || value === 'random' || value === 'index') {
365
+ cfg.pickBy = value;
366
+ continue;
367
+ }
368
+ tag('warning').log(`Ignoring invalid 'pick_by' value: ${value} (use priority|random|index)`);
369
+ continue;
370
+ }
371
+ if (key === 'priority' || key === 'priorities') {
372
+ const requested = value
373
+ .split(',')
374
+ .map((s) => s.trim().toLowerCase())
375
+ .filter(Boolean);
376
+ const valid: string[] = [];
377
+ for (const p of requested) {
378
+ if (p in PRIORITY_ORDER) {
379
+ valid.push(p);
380
+ continue;
381
+ }
382
+ tag('warning').log(`Unknown priority: ${p} (use ${Object.keys(PRIORITY_ORDER).join('|')})`);
383
+ }
384
+ if (valid.length) cfg.priorities = valid;
385
+ continue;
386
+ }
387
+ tag('warning').log(`Unknown configure key: ${key}`);
113
388
  }
389
+
390
+ cfg.enabled = hasReuseSignal;
391
+ return cfg;
114
392
  }
115
393
 
116
394
  private printResults(): void {
117
- const allTests = this.completedPlans.flatMap((plan) => plan.tests.filter((t) => t.startTime != null).map((test) => ({ test, planTitle: plan.title })));
395
+ const allTests = this.completedPlans.flatMap((plan) => plan.tests.filter((t) => t.startTime != null).map((test) => ({ test, planTitle: plan.title }))).sort((a, b) => (a.test.startTime ?? 0) - (b.test.startTime ?? 0));
118
396
 
119
397
  if (allTests.length === 0) return;
120
398
 
121
399
  const hasSubPages = this.completedPlans.length > 1;
400
+ const hasOrigin = this.oldTestRefs.size > 0;
122
401
  const rows = allTests.map(({ test, planTitle }, index) => {
123
402
  const durationMs = test.getDurationMs();
124
403
  const duration = durationMs != null ? `${(durationMs / 1000).toFixed(1)}s` : '-';
@@ -133,12 +412,16 @@ export class ExploreCommand extends BaseCommand {
133
412
  Time: duration,
134
413
  Steps: String(Object.keys(test.notes).length),
135
414
  };
415
+ if (hasOrigin) {
416
+ row.Origin = this.originLabel(test);
417
+ }
136
418
  if (hasSubPages) {
137
419
  row.Plan = planTitle;
138
420
  }
139
421
  return row;
140
422
  });
141
423
  const columns = ['#', 'Status', 'Title', 'Priority', 'Time', 'Steps'];
424
+ if (hasOrigin) columns.push('Origin');
142
425
  if (hasSubPages) columns.push('Plan');
143
426
  tag('multiline').log(jsonToTable(rows, columns));
144
427
  tag('info').log(`${figureSet.tick} ${allTests.length} tests completed`);
@@ -197,10 +480,47 @@ export class ExploreCommand extends BaseCommand {
197
480
  private async runPendingTests(): Promise<void> {
198
481
  const plan = this.explorBot.getCurrentPlan();
199
482
  if (!plan) return;
483
+ if (this.priorityFilter) {
484
+ for (const t of plan.getPendingTests()) {
485
+ if (!this.priorityFilter.has(t.priority)) t.enabled = false;
486
+ }
487
+ }
200
488
  for (const test of plan.getPendingTests()) {
201
489
  if (this.isLimitReached()) break;
490
+ await this.runOneTest(test);
491
+ }
492
+ }
493
+
494
+ private async runOneTest(test: Test): Promise<void> {
495
+ if (this.dryRun) {
496
+ test.start();
497
+ test.finish(TestResult.SKIPPED);
498
+ } else {
202
499
  await this.explorBot.agentTester().test(test);
203
- this.testsRun++;
204
500
  }
501
+ this.testsRun++;
502
+ }
503
+ }
504
+
505
+ interface ConfigureSpec {
506
+ enabled: boolean;
507
+ newRatio: number;
508
+ fromPath?: string;
509
+ styles?: string[];
510
+ subpages?: 'none' | 'same' | 'new' | 'both';
511
+ pickBy?: 'priority' | 'random' | 'index';
512
+ priorities?: string[];
513
+ }
514
+
515
+ function parseRatio(s: string): number | null {
516
+ const trimmed = s.trim();
517
+ if (!trimmed) return null;
518
+ if (trimmed.endsWith('%')) {
519
+ const n = Number.parseFloat(trimmed.slice(0, -1));
520
+ if (Number.isNaN(n) || n < 0 || n > 100) return null;
521
+ return n / 100;
205
522
  }
523
+ const n = Number.parseFloat(trimmed);
524
+ if (Number.isNaN(n) || n < 0 || n > 1) return null;
525
+ return n;
206
526
  }
package/src/explorbot.ts CHANGED
@@ -27,6 +27,7 @@ import { KnowledgeTracker } from './knowledge-tracker.ts';
27
27
  import { WebPageState } from './state-manager.ts';
28
28
  import type { Suite } from './suite.ts';
29
29
  import { Plan, type Test } from './test-plan.ts';
30
+ import { parsePlansFromMarkdown } from './utils/test-plan-markdown.ts';
30
31
  import { setVerboseMode, tag } from './utils/logger.ts';
31
32
  import { relativeToCwd } from './utils/next-steps.ts';
32
33
  import { sanitizeFilename } from './utils/strings.ts';
@@ -349,7 +350,7 @@ export class ExplorBot {
349
350
  this.agents.planner = undefined;
350
351
  }
351
352
 
352
- async plan(feature?: string, opts: { fresh?: boolean; style?: string; extend?: Plan; completedPlans?: Plan[] } = {}) {
353
+ async plan(feature?: string, opts: { fresh?: boolean; style?: string; extend?: Plan; completedPlans?: Plan[]; noSave?: boolean } = {}) {
353
354
  this.planFeature = feature;
354
355
 
355
356
  if (opts.fresh) {
@@ -379,7 +380,7 @@ export class ExplorBot {
379
380
  return this.currentPlan;
380
381
  }
381
382
 
382
- this.savePlan();
383
+ if (!opts.noSave) this.savePlan();
383
384
 
384
385
  return this.currentPlan;
385
386
  }
@@ -409,19 +410,20 @@ export class ExplorBot {
409
410
  return planPath;
410
411
  }
411
412
 
412
- generatePlanFilename(): string {
413
+ generatePlanFilename(feature?: string): string {
413
414
  const state = this.explorer?.getStateManager().getCurrentState();
414
415
  const urlPath = state?.url || '/';
415
416
  const urlPart = sanitizeFilename(urlPath) || 'root';
416
417
  const suffix = '.md';
417
- if (!this.planFeature) return urlPart.slice(0, 256 - suffix.length) + suffix;
418
- const featurePart = `_${sanitizeFilename(this.planFeature)}`;
418
+ const f = feature ?? this.planFeature;
419
+ if (!f) return urlPart.slice(0, 256 - suffix.length) + suffix;
420
+ const featurePart = `_${sanitizeFilename(f)}`;
419
421
  const maxFeatureLen = 256 - suffix.length - urlPart.length;
420
422
  if (maxFeatureLen <= 1) return urlPart.slice(0, 256 - suffix.length) + suffix;
421
423
  return urlPart + featurePart.slice(0, maxFeatureLen) + suffix;
422
424
  }
423
425
 
424
- loadPlan(filename: string): Plan {
426
+ resolvePlanPath(filename: string): string {
425
427
  let planPath = filename;
426
428
 
427
429
  if (path.isAbsolute(filename)) {
@@ -438,14 +440,26 @@ export class ExplorBot {
438
440
  }
439
441
  }
440
442
 
443
+ return planPath;
444
+ }
445
+
446
+ loadPlan(filename: string): Plan {
447
+ const planPath = this.resolvePlanPath(filename);
441
448
  if (!existsSync(planPath)) {
442
449
  throw new Error(`Plan file not found: ${planPath}`);
443
450
  }
444
-
445
451
  this.setCurrentPlan(Plan.fromMarkdown(planPath));
446
452
  return this.currentPlan!;
447
453
  }
448
454
 
455
+ loadPlans(filename: string): Plan[] {
456
+ const planPath = this.resolvePlanPath(filename);
457
+ if (!existsSync(planPath)) {
458
+ throw new Error(`Plan file not found: ${planPath}`);
459
+ }
460
+ return parsePlansFromMarkdown(planPath);
461
+ }
462
+
449
463
  setCurrentPlan(plan?: Plan): void {
450
464
  this.currentPlan = plan;
451
465
  if (plan && !this.sessionPlans.includes(plan)) {
@@ -149,8 +149,15 @@ export function parsePlansFromMarkdown(filePath: string): Plan[] {
149
149
 
150
150
  if (line.startsWith('<!-- test')) {
151
151
  currentTest = null;
152
- const priorityMatch = line.match(/priority:\s*(\w+)/);
152
+ let block = line;
153
+ let j = i;
154
+ while (!block.includes('-->') && j + 1 < lines.length) {
155
+ j++;
156
+ block += `\n${lines[j].trim()}`;
157
+ }
158
+ const priorityMatch = block.match(/priority:\s*(\w+)/);
153
159
  priority = (priorityMatch?.[1] as 'critical' | 'important' | 'high' | 'normal' | 'low') || 'normal';
160
+ i = j;
154
161
  continue;
155
162
  }
156
163