explorbot 0.1.15 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,7 @@ import { getStyles } from '../ai/planner/styles.js';
3
3
  import { outputPath } from '../config.js';
4
4
  import { normalizeUrl } from '../state-manager.js';
5
5
  import { Stats } from '../stats.js';
6
+ import { TestResult } from '../test-plan.js';
6
7
  import { getCliName } from "../utils/cli-name.js";
7
8
  import { ErrorPageError } from "../utils/error-page.js";
8
9
  import { tag } from '../utils/logger.js';
@@ -11,12 +12,15 @@ import { printNextSteps, relativeToCwd } from "../utils/next-steps.js";
11
12
  import { safeFilename } from "../utils/strings.js";
12
13
  import { BaseCommand } from './base-command.js';
13
14
  const MAX_SUB_PAGE_ATTEMPTS = 30;
15
+ const PRIORITY_ORDER = { critical: 0, important: 1, high: 2, normal: 3, low: 4 };
14
16
  export class ExploreCommand extends BaseCommand {
15
17
  name = 'explore';
16
18
  description = 'Start web exploration';
17
19
  options = [
18
20
  { flags: '--max-tests <number>', description: 'Maximum number of tests to run' },
19
21
  { flags: '--focus <feature>', description: 'Focus area for exploration' },
22
+ { flags: '--configure <spec>', description: 'Reuse spec: keys new|from|style|subpages|pick_by|priority, e.g. "new:25%;pick_by=random;priority=critical,high"' },
23
+ { flags: '--dry-run', description: 'Mark picked tests as skipped without executing or generating new ones' },
20
24
  ];
21
25
  suggestions = [
22
26
  { command: 'navigate <page>', hint: 'go to another page' },
@@ -24,93 +28,370 @@ export class ExploreCommand extends BaseCommand {
24
28
  { command: 'plan <feature>', hint: 'plan testing' },
25
29
  ];
26
30
  maxTests;
31
+ dryRun = false;
27
32
  testsRun = 0;
28
33
  completedPlans = [];
29
34
  failedSubPages = new Set();
35
+ oldTestRefs = new Set();
36
+ priorityFilter;
30
37
  async execute(args) {
31
38
  const { opts, args: remaining } = this.parseArgs(args);
32
39
  if (opts.maxTests) {
33
40
  this.maxTests = Number.parseInt(opts.maxTests, 10);
34
41
  }
35
42
  const feature = opts.focus || remaining.join(' ') || undefined;
43
+ const cfg = this.parseConfigure(opts.configure);
44
+ if (cfg.priorities)
45
+ this.priorityFilter = new Set(cfg.priorities);
46
+ if (opts.dryRun)
47
+ this.dryRun = true;
48
+ if (this.dryRun)
49
+ tag('info').log('Dry-run mode: planner runs to discover new tests; test execution is skipped');
36
50
  Stats.mode ??= 'explore';
37
51
  Stats.focus ??= feature;
38
52
  const mainUrl = this.explorBot.getExplorer().getStateManager().getCurrentState()?.url;
39
- await this.runAllStyles(mainUrl, feature);
53
+ if (cfg.enabled) {
54
+ await this.runReuseMode(mainUrl, feature, cfg);
55
+ }
56
+ else {
57
+ await this.runFreshMode(mainUrl, feature, cfg.styles);
58
+ }
59
+ const mainPlan = this.completedPlans[0];
60
+ if (mainPlan)
61
+ this.explorBot.setCurrentPlan(mainPlan);
62
+ if (this.dryRun) {
63
+ this.printResults();
64
+ return;
65
+ }
66
+ if (mainUrl)
67
+ await this.explorBot.visit(mainUrl);
68
+ const savedPath = this.explorBot.savePlans(this.completedPlans);
69
+ this.printResults();
70
+ await this.explorBot.printSessionAnalysis();
71
+ this.printNextSteps(savedPath);
72
+ }
73
+ originLabel(test) {
74
+ return this.oldTestRefs.has(test) ? 'OLD' : 'NEW';
75
+ }
76
+ printPreview(label, tests) {
77
+ if (tests.length === 0)
78
+ return;
79
+ const lines = [label];
80
+ for (let i = 0; i < tests.length; i++) {
81
+ const t = tests[i];
82
+ lines.push(` ${String(i + 1).padStart(2)}. [${this.originLabel(t)}] [${t.priority.padEnd(9)}] ${t.scenario}`);
83
+ }
84
+ tag('multiline').log(lines.join('\n'));
85
+ }
86
+ async runFreshMode(mainUrl, feature, styles) {
87
+ await this.runAllStyles(mainUrl, feature, undefined, undefined, styles);
40
88
  const mainPlan = this.explorBot.getCurrentPlan();
41
89
  if (!mainPlan)
42
90
  return;
43
91
  this.completedPlans.push(mainPlan);
44
- if (!feature && !this.isLimitReached()) {
45
- const planner = this.explorBot.agentPlanner();
46
- let attempts = 0;
47
- while (attempts < MAX_SUB_PAGE_ATTEMPTS) {
48
- attempts++;
92
+ if (feature || this.isLimitReached())
93
+ return;
94
+ await this.discoverNewSubPages(mainPlan, mainUrl, styles, new Set());
95
+ }
96
+ async runReuseMode(mainUrl, feature, cfg) {
97
+ const filename = cfg.fromPath || this.explorBot.generatePlanFilename(feature);
98
+ let loadedPlans = [];
99
+ try {
100
+ loadedPlans = this.explorBot.loadPlans(filename);
101
+ }
102
+ catch (err) {
103
+ tag('warning').log(`Reuse plan not found (${err instanceof Error ? err.message : err}); falling back to fresh planning`);
104
+ await this.runFreshMode(mainUrl, feature, cfg.styles);
105
+ return;
106
+ }
107
+ if (loadedPlans.length === 0) {
108
+ tag('warning').log('Reuse plan empty; falling back to fresh planning');
109
+ await this.runFreshMode(mainUrl, feature, cfg.styles);
110
+ return;
111
+ }
112
+ const mainPlan = loadedPlans[0];
113
+ const subPlans = loadedPlans.slice(1);
114
+ const totalCap = this.maxTests ?? Number.POSITIVE_INFINITY;
115
+ let newQuota = Number.POSITIVE_INFINITY;
116
+ let oldQuota = Number.POSITIVE_INFINITY;
117
+ if (Number.isFinite(totalCap)) {
118
+ newQuota = Math.round(totalCap * cfg.newRatio);
119
+ oldQuota = Math.max(0, totalCap - newQuota);
120
+ }
121
+ for (const p of loadedPlans) {
122
+ for (const t of p.tests)
123
+ this.oldTestRefs.add(t);
124
+ }
125
+ const allOldTests = loadedPlans.flatMap((p) => p.tests.filter((t) => t.status === 'pending'));
126
+ let matchingOldTests = allOldTests;
127
+ if (cfg.styles) {
128
+ matchingOldTests = matchingOldTests.filter((t) => !t.style || cfg.styles.includes(t.style));
129
+ }
130
+ if (this.priorityFilter) {
131
+ matchingOldTests = matchingOldTests.filter((t) => this.priorityFilter.has(t.priority));
132
+ }
133
+ const pickBy = cfg.pickBy ?? 'priority';
134
+ const orderedOldTests = matchingOldTests.slice();
135
+ if (pickBy === 'priority') {
136
+ orderedOldTests.sort((a, b) => (PRIORITY_ORDER[a.priority] ?? 99) - (PRIORITY_ORDER[b.priority] ?? 99));
137
+ }
138
+ else if (pickBy === 'random') {
139
+ for (let i = orderedOldTests.length - 1; i > 0; i--) {
140
+ const j = Math.floor(Math.random() * (i + 1));
141
+ [orderedOldTests[i], orderedOldTests[j]] = [orderedOldTests[j], orderedOldTests[i]];
142
+ }
143
+ }
144
+ let pickCount = orderedOldTests.length;
145
+ if (Number.isFinite(oldQuota))
146
+ pickCount = Math.min(oldQuota, orderedOldTests.length);
147
+ const picked = orderedOldTests.slice(0, pickCount);
148
+ const pickedSet = new Set(picked);
149
+ for (const t of allOldTests) {
150
+ if (!pickedSet.has(t))
151
+ t.enabled = false;
152
+ }
153
+ let newQuotaLabel = 'unlimited';
154
+ if (Number.isFinite(newQuota))
155
+ newQuotaLabel = String(newQuota);
156
+ let priorityNote = '';
157
+ if (this.priorityFilter)
158
+ priorityNote = `, priority=[${[...this.priorityFilter].join(',')}]`;
159
+ tag('info').log(`Reuse: loaded ${allOldTests.length} old test(s), running ${picked.length} (pick_by=${pickBy}${priorityNote}), reserving ${newQuotaLabel} for new`);
160
+ const planner = this.explorBot.agentPlanner();
161
+ for (const p of loadedPlans)
162
+ planner.registerPlanInSession(p);
163
+ this.completedPlans.push(...loadedPlans);
164
+ this.printPreview(`Picked old tests (${picked.length}):`, picked);
165
+ let currentPlanRef;
166
+ for (const test of picked) {
167
+ if (this.isLimitReached())
168
+ break;
169
+ const owningPlan = test.plan;
170
+ if (owningPlan && owningPlan !== currentPlanRef) {
171
+ this.explorBot.setCurrentPlan(owningPlan);
172
+ if (owningPlan.url && !this.dryRun)
173
+ await this.explorBot.visit(owningPlan.url);
174
+ currentPlanRef = owningPlan;
175
+ }
176
+ await this.runOneTest(test);
177
+ }
178
+ if (this.isLimitReached() || newQuota <= 0)
179
+ return;
180
+ const subpagesMode = cfg.subpages || 'both';
181
+ if (mainUrl && !this.dryRun)
182
+ await this.explorBot.visit(mainUrl);
183
+ await this.replanAndRun(mainUrl, feature, mainPlan, cfg.styles);
184
+ if (this.isLimitReached())
185
+ return;
186
+ if (subpagesMode === 'same' || subpagesMode === 'both') {
187
+ for (const subPlan of subPlans) {
49
188
  if (this.isLimitReached())
50
189
  break;
51
- const candidates = planner.collectSubPageCandidates(mainPlan, mainUrl || '/').filter((c) => !this.failedSubPages.has(normalizeUrl(c.url)));
52
- if (candidates.length === 0)
53
- break;
54
- const pick = await planner.pickNextSubPage(candidates);
55
- if (!pick)
56
- break;
57
- tag('info').log(`Exploring sub-page: ${pick.url} (${pick.reason})`);
190
+ if (!subPlan.url)
191
+ continue;
58
192
  try {
59
- await this.explorBot.visit(pick.url);
60
- await this.runAllStyles(pick.url, undefined, mainPlan, this.completedPlans);
61
- const subPlan = this.explorBot.getCurrentPlan();
62
- if (subPlan) {
63
- this.completedPlans.push(subPlan);
64
- }
193
+ if (!this.dryRun)
194
+ await this.explorBot.visit(subPlan.url);
195
+ await this.replanAndRun(subPlan.url, undefined, subPlan, cfg.styles);
65
196
  }
66
197
  catch (err) {
67
- this.failedSubPages.add(normalizeUrl(pick.url));
68
- tag('warning').log(`Sub-page exploration failed: ${err instanceof Error ? err.message : err}`);
198
+ this.failedSubPages.add(normalizeUrl(subPlan.url));
199
+ tag('warning').log(`Sub-page re-planning failed: ${err instanceof Error ? err.message : err}`);
69
200
  }
70
201
  }
71
202
  }
72
- this.explorBot.setCurrentPlan(mainPlan);
73
- if (mainUrl)
74
- await this.explorBot.visit(mainUrl);
75
- const savedPath = this.explorBot.savePlans(this.completedPlans);
76
- this.printResults();
77
- await this.explorBot.printSessionAnalysis();
78
- this.printNextSteps(savedPath);
203
+ if (this.isLimitReached())
204
+ return;
205
+ if (subpagesMode === 'new' || subpagesMode === 'both') {
206
+ const knownUrls = new Set();
207
+ for (const p of loadedPlans) {
208
+ if (p.url)
209
+ knownUrls.add(normalizeUrl(p.url));
210
+ }
211
+ await this.discoverNewSubPages(mainPlan, mainUrl, cfg.styles, knownUrls);
212
+ }
79
213
  }
80
- async runAllStyles(pageUrl, feature, parentPlan, completedPlans) {
214
+ async discoverNewSubPages(mainPlan, mainUrl, styles, knownUrls) {
215
+ const planner = this.explorBot.agentPlanner();
216
+ let attempts = 0;
217
+ while (attempts < MAX_SUB_PAGE_ATTEMPTS) {
218
+ attempts++;
219
+ if (this.isLimitReached())
220
+ break;
221
+ const candidates = planner.collectSubPageCandidates(mainPlan, mainUrl || '/').filter((c) => {
222
+ const norm = normalizeUrl(c.url);
223
+ return !this.failedSubPages.has(norm) && !knownUrls.has(norm);
224
+ });
225
+ if (candidates.length === 0)
226
+ break;
227
+ const pick = await planner.pickNextSubPage(candidates);
228
+ if (!pick)
229
+ break;
230
+ tag('info').log(`Exploring sub-page: ${pick.url} (${pick.reason})`);
231
+ try {
232
+ await this.explorBot.visit(pick.url);
233
+ await this.runAllStyles(pick.url, undefined, mainPlan, this.completedPlans, styles);
234
+ const subPlan = this.explorBot.getCurrentPlan();
235
+ if (subPlan && !this.completedPlans.includes(subPlan)) {
236
+ this.completedPlans.push(subPlan);
237
+ }
238
+ knownUrls.add(normalizeUrl(pick.url));
239
+ }
240
+ catch (err) {
241
+ this.failedSubPages.add(normalizeUrl(pick.url));
242
+ tag('warning').log(`Sub-page exploration failed: ${err instanceof Error ? err.message : err}`);
243
+ }
244
+ }
245
+ }
246
+ async replanAndRun(pageUrl, feature, existingPlan, styles) {
247
+ const styleList = styles ?? Object.keys(getStyles());
248
+ for (const style of styleList) {
249
+ if (this.isLimitReached())
250
+ break;
251
+ this.explorBot.setCurrentPlan(existingPlan);
252
+ const opts = { fresh: false, style, completedPlans: this.completedPlans };
253
+ if (this.dryRun)
254
+ opts.noSave = true;
255
+ await this.planWithRetry(feature, opts, pageUrl);
256
+ await this.runPendingTests();
257
+ }
258
+ }
259
+ async runAllStyles(pageUrl, feature, parentPlan, completedPlans, styles) {
260
+ const styleList = styles ?? Object.keys(getStyles());
81
261
  let fresh = true;
82
- for (const style of Object.keys(getStyles())) {
83
- if (!fresh && pageUrl) {
262
+ for (const style of styleList) {
263
+ if (!fresh && pageUrl && !this.dryRun) {
84
264
  await this.explorBot.visit(pageUrl);
85
265
  }
86
266
  const opts = { fresh, style, completedPlans };
87
267
  if (fresh && parentPlan)
88
268
  opts.extend = parentPlan;
269
+ if (this.dryRun)
270
+ opts.noSave = true;
89
271
  await this.planWithRetry(feature, opts, pageUrl);
90
272
  await this.runPendingTests();
91
273
  fresh = false;
92
274
  }
93
275
  }
94
276
  async planWithRetry(feature, opts, pageUrl) {
95
- await this.explorBot.plan(feature, opts);
96
- if (!this.explorBot.lastPlanError)
97
- return;
98
- if (this.explorBot.lastPlanError instanceof ErrorPageError) {
99
- throw this.explorBot.lastPlanError;
100
- }
101
- tag('info').log(`Retrying planning style '${opts.style}'...`);
102
- if (pageUrl)
103
- await this.explorBot.visit(pageUrl);
277
+ const before = new Set(this.explorBot.getCurrentPlan()?.tests ?? []);
104
278
  await this.explorBot.plan(feature, opts);
105
279
  if (this.explorBot.lastPlanError) {
106
- tag('warning').log(`Planning style '${opts.style}' failed after retry, skipping`);
280
+ if (this.explorBot.lastPlanError instanceof ErrorPageError) {
281
+ throw this.explorBot.lastPlanError;
282
+ }
283
+ tag('info').log(`Retrying planning style '${opts.style}'...`);
284
+ if (pageUrl && !this.dryRun)
285
+ await this.explorBot.visit(pageUrl);
286
+ await this.explorBot.plan(feature, opts);
287
+ if (this.explorBot.lastPlanError) {
288
+ tag('warning').log(`Planning style '${opts.style}' failed after retry, skipping`);
289
+ return;
290
+ }
291
+ }
292
+ const planAfter = this.explorBot.getCurrentPlan();
293
+ if (!planAfter)
294
+ return;
295
+ const added = planAfter.tests.filter((t) => !before.has(t));
296
+ if (added.length === 0)
297
+ return;
298
+ const urlNote = pageUrl ? ` for ${pageUrl}` : '';
299
+ this.printPreview(`Planner added ${added.length} new test(s) [style=${opts.style}]${urlNote}:`, added);
300
+ }
301
+ parseConfigure(raw) {
302
+ const cfg = { enabled: false, newRatio: 1.0 };
303
+ if (!raw)
304
+ return cfg;
305
+ const allStyles = Object.keys(getStyles());
306
+ const validSubpages = new Set(['none', 'same', 'new', 'both']);
307
+ let hasReuseSignal = false;
308
+ for (const pair of raw.split(';')) {
309
+ const trimmed = pair.trim();
310
+ if (!trimmed)
311
+ continue;
312
+ const sepMatch = trimmed.match(/^([^:=]+)\s*[:=]\s*(.*)$/);
313
+ if (!sepMatch) {
314
+ tag('warning').log(`Ignoring malformed configure pair: ${trimmed}`);
315
+ continue;
316
+ }
317
+ const key = sepMatch[1].trim().toLowerCase();
318
+ const value = sepMatch[2].trim();
319
+ if (key === 'new') {
320
+ const ratio = parseRatio(value);
321
+ if (ratio == null) {
322
+ tag('warning').log(`Ignoring invalid 'new' value: ${value}`);
323
+ continue;
324
+ }
325
+ cfg.newRatio = ratio;
326
+ hasReuseSignal = true;
327
+ continue;
328
+ }
329
+ if (key === 'from') {
330
+ cfg.fromPath = value;
331
+ hasReuseSignal = true;
332
+ continue;
333
+ }
334
+ if (key === 'style' || key === 'styles') {
335
+ const requested = value
336
+ .split(',')
337
+ .map((s) => s.trim())
338
+ .filter(Boolean);
339
+ const valid = [];
340
+ for (const s of requested) {
341
+ if (allStyles.includes(s)) {
342
+ valid.push(s);
343
+ continue;
344
+ }
345
+ tag('warning').log(`Unknown planning style: ${s}`);
346
+ }
347
+ if (valid.length)
348
+ cfg.styles = valid;
349
+ continue;
350
+ }
351
+ if (key === 'subpages') {
352
+ if (!validSubpages.has(value)) {
353
+ tag('warning').log(`Ignoring invalid 'subpages' value: ${value}`);
354
+ continue;
355
+ }
356
+ cfg.subpages = value;
357
+ continue;
358
+ }
359
+ if (key === 'pick_by' || key === 'pickby' || key === 'pick-by') {
360
+ if (value === 'priority' || value === 'random' || value === 'index') {
361
+ cfg.pickBy = value;
362
+ continue;
363
+ }
364
+ tag('warning').log(`Ignoring invalid 'pick_by' value: ${value} (use priority|random|index)`);
365
+ continue;
366
+ }
367
+ if (key === 'priority' || key === 'priorities') {
368
+ const requested = value
369
+ .split(',')
370
+ .map((s) => s.trim().toLowerCase())
371
+ .filter(Boolean);
372
+ const valid = [];
373
+ for (const p of requested) {
374
+ if (p in PRIORITY_ORDER) {
375
+ valid.push(p);
376
+ continue;
377
+ }
378
+ tag('warning').log(`Unknown priority: ${p} (use ${Object.keys(PRIORITY_ORDER).join('|')})`);
379
+ }
380
+ if (valid.length)
381
+ cfg.priorities = valid;
382
+ continue;
383
+ }
384
+ tag('warning').log(`Unknown configure key: ${key}`);
107
385
  }
386
+ cfg.enabled = hasReuseSignal;
387
+ return cfg;
108
388
  }
109
389
  printResults() {
110
- const allTests = this.completedPlans.flatMap((plan) => plan.tests.filter((t) => t.startTime != null).map((test) => ({ test, planTitle: plan.title })));
390
+ const allTests = this.completedPlans.flatMap((plan) => plan.tests.filter((t) => t.startTime != null).map((test) => ({ test, planTitle: plan.title }))).sort((a, b) => (a.test.startTime ?? 0) - (b.test.startTime ?? 0));
111
391
  if (allTests.length === 0)
112
392
  return;
113
393
  const hasSubPages = this.completedPlans.length > 1;
394
+ const hasOrigin = this.oldTestRefs.size > 0;
114
395
  const rows = allTests.map(({ test, planTitle }, index) => {
115
396
  const durationMs = test.getDurationMs();
116
397
  const duration = durationMs != null ? `${(durationMs / 1000).toFixed(1)}s` : '-';
@@ -127,12 +408,17 @@ export class ExploreCommand extends BaseCommand {
127
408
  Time: duration,
128
409
  Steps: String(Object.keys(test.notes).length),
129
410
  };
411
+ if (hasOrigin) {
412
+ row.Origin = this.originLabel(test);
413
+ }
130
414
  if (hasSubPages) {
131
415
  row.Plan = planTitle;
132
416
  }
133
417
  return row;
134
418
  });
135
419
  const columns = ['#', 'Status', 'Title', 'Priority', 'Time', 'Steps'];
420
+ if (hasOrigin)
421
+ columns.push('Origin');
136
422
  if (hasSubPages)
137
423
  columns.push('Plan');
138
424
  tag('multiline').log(jsonToTable(rows, columns));
@@ -185,11 +471,41 @@ export class ExploreCommand extends BaseCommand {
185
471
  const plan = this.explorBot.getCurrentPlan();
186
472
  if (!plan)
187
473
  return;
474
+ if (this.priorityFilter) {
475
+ for (const t of plan.getPendingTests()) {
476
+ if (!this.priorityFilter.has(t.priority))
477
+ t.enabled = false;
478
+ }
479
+ }
188
480
  for (const test of plan.getPendingTests()) {
189
481
  if (this.isLimitReached())
190
482
  break;
483
+ await this.runOneTest(test);
484
+ }
485
+ }
486
+ async runOneTest(test) {
487
+ if (this.dryRun) {
488
+ test.start();
489
+ test.finish(TestResult.SKIPPED);
490
+ }
491
+ else {
191
492
  await this.explorBot.agentTester().test(test);
192
- this.testsRun++;
193
493
  }
494
+ this.testsRun++;
495
+ }
496
+ }
497
+ function parseRatio(s) {
498
+ const trimmed = s.trim();
499
+ if (!trimmed)
500
+ return null;
501
+ if (trimmed.endsWith('%')) {
502
+ const n = Number.parseFloat(trimmed.slice(0, -1));
503
+ if (Number.isNaN(n) || n < 0 || n > 100)
504
+ return null;
505
+ return n / 100;
194
506
  }
507
+ const n = Number.parseFloat(trimmed);
508
+ if (Number.isNaN(n) || n < 0 || n > 1)
509
+ return null;
510
+ return n;
195
511
  }
@@ -24,6 +24,7 @@ import { ExperienceTracker } from "./experience-tracker.js";
24
24
  import Explorer from "./explorer.js";
25
25
  import { KnowledgeTracker } from "./knowledge-tracker.js";
26
26
  import { Plan } from "./test-plan.js";
27
+ import { parsePlansFromMarkdown } from "./utils/test-plan-markdown.js";
27
28
  import { setVerboseMode, tag } from "./utils/logger.js";
28
29
  import { relativeToCwd } from "./utils/next-steps.js";
29
30
  import { sanitizeFilename } from "./utils/strings.js";
@@ -323,7 +324,8 @@ export class ExplorBot {
323
324
  return undefined;
324
325
  return this.currentPlan;
325
326
  }
326
- this.savePlan();
327
+ if (!opts.noSave)
328
+ this.savePlan();
327
329
  return this.currentPlan;
328
330
  }
329
331
  getPlansDir() {
@@ -348,20 +350,21 @@ export class ExplorBot {
348
350
  this.lastSavedPlanPath = planPath;
349
351
  return planPath;
350
352
  }
351
- generatePlanFilename() {
353
+ generatePlanFilename(feature) {
352
354
  const state = this.explorer?.getStateManager().getCurrentState();
353
355
  const urlPath = state?.url || '/';
354
356
  const urlPart = sanitizeFilename(urlPath) || 'root';
355
357
  const suffix = '.md';
356
- if (!this.planFeature)
358
+ const f = feature ?? this.planFeature;
359
+ if (!f)
357
360
  return urlPart.slice(0, 256 - suffix.length) + suffix;
358
- const featurePart = `_${sanitizeFilename(this.planFeature)}`;
361
+ const featurePart = `_${sanitizeFilename(f)}`;
359
362
  const maxFeatureLen = 256 - suffix.length - urlPart.length;
360
363
  if (maxFeatureLen <= 1)
361
364
  return urlPart.slice(0, 256 - suffix.length) + suffix;
362
365
  return urlPart + featurePart.slice(0, maxFeatureLen) + suffix;
363
366
  }
364
- loadPlan(filename) {
367
+ resolvePlanPath(filename) {
365
368
  let planPath = filename;
366
369
  if (path.isAbsolute(filename)) {
367
370
  if (!existsSync(planPath) && !filename.endsWith('.md')) {
@@ -378,12 +381,23 @@ export class ExplorBot {
378
381
  planPath = path.join(plansDir, `${filename}.md`);
379
382
  }
380
383
  }
384
+ return planPath;
385
+ }
386
+ loadPlan(filename) {
387
+ const planPath = this.resolvePlanPath(filename);
381
388
  if (!existsSync(planPath)) {
382
389
  throw new Error(`Plan file not found: ${planPath}`);
383
390
  }
384
391
  this.setCurrentPlan(Plan.fromMarkdown(planPath));
385
392
  return this.currentPlan;
386
393
  }
394
+ loadPlans(filename) {
395
+ const planPath = this.resolvePlanPath(filename);
396
+ if (!existsSync(planPath)) {
397
+ throw new Error(`Plan file not found: ${planPath}`);
398
+ }
399
+ return parsePlansFromMarkdown(planPath);
400
+ }
387
401
  setCurrentPlan(plan) {
388
402
  this.currentPlan = plan;
389
403
  if (plan && !this.sessionPlans.includes(plan)) {
@@ -145,8 +145,15 @@ export function parsePlansFromMarkdown(filePath) {
145
145
  continue;
146
146
  if (line.startsWith('<!-- test')) {
147
147
  currentTest = null;
148
- const priorityMatch = line.match(/priority:\s*(\w+)/);
148
+ let block = line;
149
+ let j = i;
150
+ while (!block.includes('-->') && j + 1 < lines.length) {
151
+ j++;
152
+ block += `\n${lines[j].trim()}`;
153
+ }
154
+ const priorityMatch = block.match(/priority:\s*(\w+)/);
149
155
  priority = priorityMatch?.[1] || 'normal';
156
+ i = j;
150
157
  continue;
151
158
  }
152
159
  if (line.startsWith('# ') && currentTest === null) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "explorbot",
3
- "version": "0.1.15",
3
+ "version": "0.1.17",
4
4
  "description": "CLI app built with React Ink, CodeceptJS, and Playwright",
5
5
  "license": "Elastic-2.0",
6
6
  "type": "module",
package/src/ai/pilot.ts CHANGED
@@ -320,14 +320,9 @@ export class Pilot implements Agent {
320
320
  - "Edit X" → updated value must be persisted (visible in list/detail). Opening edit is NOT enough; redirect after save with the new value visible IS enough.
321
321
  - Negative tests ("without a name", "invalid", "duplicate", "unauthorized") → success means the system PREVENTED the action with validation/error.
322
322
 
323
- PROVENANCE for create/edit scenarios: the task prompt instructs the tester to inject the
324
- session marker "${task.sessionName ?? ''}" into newly created or edited free-text values.
325
- When that marker COULD be injected, the entity used as proof MUST contain it. A record
326
- matching the goal by text alone but missing the marker is a stale leftover from a prior
327
- run — it is NOT evidence the current scenario produced anything. Vote \`fail\`, not \`pass\`.
328
- This does not apply when the field is restricted (numeric only, enum, etc.) or when the
329
- session_log shows no fillField/type/select actions were attempted at all (in that case
330
- the scenario clearly didn't run — also vote \`fail\`).
323
+ PROVENANCE: the entity you cite as proof must appear by name in <notes> or
324
+ <session_log> tool inputs for THIS run. Name absent from tester activity = stale
325
+ coincidence, vote \`fail\`. Same if no fillField/type/select/click on a target ran.
331
326
 
332
327
  Expected results are MILESTONES, not the goal. Never fail because a milestone (toast, icon, styling)
333
328
  didn't match if the scenario goal IS accomplished.