explorbot 0.1.15 → 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/explorbot-cli.ts +12 -1
- package/dist/bin/explorbot-cli.js +13 -1
- package/dist/package.json +1 -1
- package/dist/src/ai/pilot.js +3 -8
- package/dist/src/ai/researcher/focus.js +51 -10
- package/dist/src/ai/researcher/sections.js +8 -4
- package/dist/src/ai/researcher.js +9 -24
- package/dist/src/ai/tester.js +8 -2
- package/dist/src/commands/explore-command.js +359 -43
- package/dist/src/explorbot.js +19 -5
- package/dist/src/utils/test-plan-markdown.js +8 -1
- package/package.json +1 -1
- package/src/ai/pilot.ts +3 -8
- package/src/ai/researcher/focus.ts +57 -8
- package/src/ai/researcher/sections.ts +7 -3
- package/src/ai/researcher.ts +8 -23
- package/src/ai/tester.ts +8 -2
- package/src/commands/explore-command.ts +362 -42
- package/src/explorbot.ts +21 -7
- package/src/utils/test-plan-markdown.ts +8 -1
|
@@ -3,6 +3,7 @@ import { getStyles } from '../ai/planner/styles.js';
|
|
|
3
3
|
import { outputPath } from '../config.js';
|
|
4
4
|
import { normalizeUrl } from '../state-manager.js';
|
|
5
5
|
import { Stats } from '../stats.js';
|
|
6
|
+
import { TestResult } from '../test-plan.js';
|
|
6
7
|
import { getCliName } from "../utils/cli-name.js";
|
|
7
8
|
import { ErrorPageError } from "../utils/error-page.js";
|
|
8
9
|
import { tag } from '../utils/logger.js';
|
|
@@ -11,12 +12,15 @@ import { printNextSteps, relativeToCwd } from "../utils/next-steps.js";
|
|
|
11
12
|
import { safeFilename } from "../utils/strings.js";
|
|
12
13
|
import { BaseCommand } from './base-command.js';
|
|
13
14
|
const MAX_SUB_PAGE_ATTEMPTS = 30;
|
|
15
|
+
const PRIORITY_ORDER = { critical: 0, important: 1, high: 2, normal: 3, low: 4 };
|
|
14
16
|
export class ExploreCommand extends BaseCommand {
|
|
15
17
|
name = 'explore';
|
|
16
18
|
description = 'Start web exploration';
|
|
17
19
|
options = [
|
|
18
20
|
{ flags: '--max-tests <number>', description: 'Maximum number of tests to run' },
|
|
19
21
|
{ flags: '--focus <feature>', description: 'Focus area for exploration' },
|
|
22
|
+
{ flags: '--configure <spec>', description: 'Reuse spec: keys new|from|style|subpages|pick_by|priority, e.g. "new:25%;pick_by=random;priority=critical,high"' },
|
|
23
|
+
{ flags: '--dry-run', description: 'Mark picked tests as skipped without executing or generating new ones' },
|
|
20
24
|
];
|
|
21
25
|
suggestions = [
|
|
22
26
|
{ command: 'navigate <page>', hint: 'go to another page' },
|
|
@@ -24,93 +28,370 @@ export class ExploreCommand extends BaseCommand {
|
|
|
24
28
|
{ command: 'plan <feature>', hint: 'plan testing' },
|
|
25
29
|
];
|
|
26
30
|
maxTests;
|
|
31
|
+
dryRun = false;
|
|
27
32
|
testsRun = 0;
|
|
28
33
|
completedPlans = [];
|
|
29
34
|
failedSubPages = new Set();
|
|
35
|
+
oldTestRefs = new Set();
|
|
36
|
+
priorityFilter;
|
|
30
37
|
async execute(args) {
|
|
31
38
|
const { opts, args: remaining } = this.parseArgs(args);
|
|
32
39
|
if (opts.maxTests) {
|
|
33
40
|
this.maxTests = Number.parseInt(opts.maxTests, 10);
|
|
34
41
|
}
|
|
35
42
|
const feature = opts.focus || remaining.join(' ') || undefined;
|
|
43
|
+
const cfg = this.parseConfigure(opts.configure);
|
|
44
|
+
if (cfg.priorities)
|
|
45
|
+
this.priorityFilter = new Set(cfg.priorities);
|
|
46
|
+
if (opts.dryRun)
|
|
47
|
+
this.dryRun = true;
|
|
48
|
+
if (this.dryRun)
|
|
49
|
+
tag('info').log('Dry-run mode: planner runs to discover new tests; test execution is skipped');
|
|
36
50
|
Stats.mode ??= 'explore';
|
|
37
51
|
Stats.focus ??= feature;
|
|
38
52
|
const mainUrl = this.explorBot.getExplorer().getStateManager().getCurrentState()?.url;
|
|
39
|
-
|
|
53
|
+
if (cfg.enabled) {
|
|
54
|
+
await this.runReuseMode(mainUrl, feature, cfg);
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
await this.runFreshMode(mainUrl, feature, cfg.styles);
|
|
58
|
+
}
|
|
59
|
+
const mainPlan = this.completedPlans[0];
|
|
60
|
+
if (mainPlan)
|
|
61
|
+
this.explorBot.setCurrentPlan(mainPlan);
|
|
62
|
+
if (this.dryRun) {
|
|
63
|
+
this.printResults();
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
if (mainUrl)
|
|
67
|
+
await this.explorBot.visit(mainUrl);
|
|
68
|
+
const savedPath = this.explorBot.savePlans(this.completedPlans);
|
|
69
|
+
this.printResults();
|
|
70
|
+
await this.explorBot.printSessionAnalysis();
|
|
71
|
+
this.printNextSteps(savedPath);
|
|
72
|
+
}
|
|
73
|
+
originLabel(test) {
|
|
74
|
+
return this.oldTestRefs.has(test) ? 'OLD' : 'NEW';
|
|
75
|
+
}
|
|
76
|
+
printPreview(label, tests) {
|
|
77
|
+
if (tests.length === 0)
|
|
78
|
+
return;
|
|
79
|
+
const lines = [label];
|
|
80
|
+
for (let i = 0; i < tests.length; i++) {
|
|
81
|
+
const t = tests[i];
|
|
82
|
+
lines.push(` ${String(i + 1).padStart(2)}. [${this.originLabel(t)}] [${t.priority.padEnd(9)}] ${t.scenario}`);
|
|
83
|
+
}
|
|
84
|
+
tag('multiline').log(lines.join('\n'));
|
|
85
|
+
}
|
|
86
|
+
async runFreshMode(mainUrl, feature, styles) {
|
|
87
|
+
await this.runAllStyles(mainUrl, feature, undefined, undefined, styles);
|
|
40
88
|
const mainPlan = this.explorBot.getCurrentPlan();
|
|
41
89
|
if (!mainPlan)
|
|
42
90
|
return;
|
|
43
91
|
this.completedPlans.push(mainPlan);
|
|
44
|
-
if (
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
92
|
+
if (feature || this.isLimitReached())
|
|
93
|
+
return;
|
|
94
|
+
await this.discoverNewSubPages(mainPlan, mainUrl, styles, new Set());
|
|
95
|
+
}
|
|
96
|
+
async runReuseMode(mainUrl, feature, cfg) {
|
|
97
|
+
const filename = cfg.fromPath || this.explorBot.generatePlanFilename(feature);
|
|
98
|
+
let loadedPlans = [];
|
|
99
|
+
try {
|
|
100
|
+
loadedPlans = this.explorBot.loadPlans(filename);
|
|
101
|
+
}
|
|
102
|
+
catch (err) {
|
|
103
|
+
tag('warning').log(`Reuse plan not found (${err instanceof Error ? err.message : err}); falling back to fresh planning`);
|
|
104
|
+
await this.runFreshMode(mainUrl, feature, cfg.styles);
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
if (loadedPlans.length === 0) {
|
|
108
|
+
tag('warning').log('Reuse plan empty; falling back to fresh planning');
|
|
109
|
+
await this.runFreshMode(mainUrl, feature, cfg.styles);
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
const mainPlan = loadedPlans[0];
|
|
113
|
+
const subPlans = loadedPlans.slice(1);
|
|
114
|
+
const totalCap = this.maxTests ?? Number.POSITIVE_INFINITY;
|
|
115
|
+
let newQuota = Number.POSITIVE_INFINITY;
|
|
116
|
+
let oldQuota = Number.POSITIVE_INFINITY;
|
|
117
|
+
if (Number.isFinite(totalCap)) {
|
|
118
|
+
newQuota = Math.round(totalCap * cfg.newRatio);
|
|
119
|
+
oldQuota = Math.max(0, totalCap - newQuota);
|
|
120
|
+
}
|
|
121
|
+
for (const p of loadedPlans) {
|
|
122
|
+
for (const t of p.tests)
|
|
123
|
+
this.oldTestRefs.add(t);
|
|
124
|
+
}
|
|
125
|
+
const allOldTests = loadedPlans.flatMap((p) => p.tests.filter((t) => t.status === 'pending'));
|
|
126
|
+
let matchingOldTests = allOldTests;
|
|
127
|
+
if (cfg.styles) {
|
|
128
|
+
matchingOldTests = matchingOldTests.filter((t) => !t.style || cfg.styles.includes(t.style));
|
|
129
|
+
}
|
|
130
|
+
if (this.priorityFilter) {
|
|
131
|
+
matchingOldTests = matchingOldTests.filter((t) => this.priorityFilter.has(t.priority));
|
|
132
|
+
}
|
|
133
|
+
const pickBy = cfg.pickBy ?? 'priority';
|
|
134
|
+
const orderedOldTests = matchingOldTests.slice();
|
|
135
|
+
if (pickBy === 'priority') {
|
|
136
|
+
orderedOldTests.sort((a, b) => (PRIORITY_ORDER[a.priority] ?? 99) - (PRIORITY_ORDER[b.priority] ?? 99));
|
|
137
|
+
}
|
|
138
|
+
else if (pickBy === 'random') {
|
|
139
|
+
for (let i = orderedOldTests.length - 1; i > 0; i--) {
|
|
140
|
+
const j = Math.floor(Math.random() * (i + 1));
|
|
141
|
+
[orderedOldTests[i], orderedOldTests[j]] = [orderedOldTests[j], orderedOldTests[i]];
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
let pickCount = orderedOldTests.length;
|
|
145
|
+
if (Number.isFinite(oldQuota))
|
|
146
|
+
pickCount = Math.min(oldQuota, orderedOldTests.length);
|
|
147
|
+
const picked = orderedOldTests.slice(0, pickCount);
|
|
148
|
+
const pickedSet = new Set(picked);
|
|
149
|
+
for (const t of allOldTests) {
|
|
150
|
+
if (!pickedSet.has(t))
|
|
151
|
+
t.enabled = false;
|
|
152
|
+
}
|
|
153
|
+
let newQuotaLabel = 'unlimited';
|
|
154
|
+
if (Number.isFinite(newQuota))
|
|
155
|
+
newQuotaLabel = String(newQuota);
|
|
156
|
+
let priorityNote = '';
|
|
157
|
+
if (this.priorityFilter)
|
|
158
|
+
priorityNote = `, priority=[${[...this.priorityFilter].join(',')}]`;
|
|
159
|
+
tag('info').log(`Reuse: loaded ${allOldTests.length} old test(s), running ${picked.length} (pick_by=${pickBy}${priorityNote}), reserving ${newQuotaLabel} for new`);
|
|
160
|
+
const planner = this.explorBot.agentPlanner();
|
|
161
|
+
for (const p of loadedPlans)
|
|
162
|
+
planner.registerPlanInSession(p);
|
|
163
|
+
this.completedPlans.push(...loadedPlans);
|
|
164
|
+
this.printPreview(`Picked old tests (${picked.length}):`, picked);
|
|
165
|
+
let currentPlanRef;
|
|
166
|
+
for (const test of picked) {
|
|
167
|
+
if (this.isLimitReached())
|
|
168
|
+
break;
|
|
169
|
+
const owningPlan = test.plan;
|
|
170
|
+
if (owningPlan && owningPlan !== currentPlanRef) {
|
|
171
|
+
this.explorBot.setCurrentPlan(owningPlan);
|
|
172
|
+
if (owningPlan.url && !this.dryRun)
|
|
173
|
+
await this.explorBot.visit(owningPlan.url);
|
|
174
|
+
currentPlanRef = owningPlan;
|
|
175
|
+
}
|
|
176
|
+
await this.runOneTest(test);
|
|
177
|
+
}
|
|
178
|
+
if (this.isLimitReached() || newQuota <= 0)
|
|
179
|
+
return;
|
|
180
|
+
const subpagesMode = cfg.subpages || 'both';
|
|
181
|
+
if (mainUrl && !this.dryRun)
|
|
182
|
+
await this.explorBot.visit(mainUrl);
|
|
183
|
+
await this.replanAndRun(mainUrl, feature, mainPlan, cfg.styles);
|
|
184
|
+
if (this.isLimitReached())
|
|
185
|
+
return;
|
|
186
|
+
if (subpagesMode === 'same' || subpagesMode === 'both') {
|
|
187
|
+
for (const subPlan of subPlans) {
|
|
49
188
|
if (this.isLimitReached())
|
|
50
189
|
break;
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
break;
|
|
54
|
-
const pick = await planner.pickNextSubPage(candidates);
|
|
55
|
-
if (!pick)
|
|
56
|
-
break;
|
|
57
|
-
tag('info').log(`Exploring sub-page: ${pick.url} (${pick.reason})`);
|
|
190
|
+
if (!subPlan.url)
|
|
191
|
+
continue;
|
|
58
192
|
try {
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
if (subPlan) {
|
|
63
|
-
this.completedPlans.push(subPlan);
|
|
64
|
-
}
|
|
193
|
+
if (!this.dryRun)
|
|
194
|
+
await this.explorBot.visit(subPlan.url);
|
|
195
|
+
await this.replanAndRun(subPlan.url, undefined, subPlan, cfg.styles);
|
|
65
196
|
}
|
|
66
197
|
catch (err) {
|
|
67
|
-
this.failedSubPages.add(normalizeUrl(
|
|
68
|
-
tag('warning').log(`Sub-page
|
|
198
|
+
this.failedSubPages.add(normalizeUrl(subPlan.url));
|
|
199
|
+
tag('warning').log(`Sub-page re-planning failed: ${err instanceof Error ? err.message : err}`);
|
|
69
200
|
}
|
|
70
201
|
}
|
|
71
202
|
}
|
|
72
|
-
this.
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
203
|
+
if (this.isLimitReached())
|
|
204
|
+
return;
|
|
205
|
+
if (subpagesMode === 'new' || subpagesMode === 'both') {
|
|
206
|
+
const knownUrls = new Set();
|
|
207
|
+
for (const p of loadedPlans) {
|
|
208
|
+
if (p.url)
|
|
209
|
+
knownUrls.add(normalizeUrl(p.url));
|
|
210
|
+
}
|
|
211
|
+
await this.discoverNewSubPages(mainPlan, mainUrl, cfg.styles, knownUrls);
|
|
212
|
+
}
|
|
79
213
|
}
|
|
80
|
-
async
|
|
214
|
+
async discoverNewSubPages(mainPlan, mainUrl, styles, knownUrls) {
|
|
215
|
+
const planner = this.explorBot.agentPlanner();
|
|
216
|
+
let attempts = 0;
|
|
217
|
+
while (attempts < MAX_SUB_PAGE_ATTEMPTS) {
|
|
218
|
+
attempts++;
|
|
219
|
+
if (this.isLimitReached())
|
|
220
|
+
break;
|
|
221
|
+
const candidates = planner.collectSubPageCandidates(mainPlan, mainUrl || '/').filter((c) => {
|
|
222
|
+
const norm = normalizeUrl(c.url);
|
|
223
|
+
return !this.failedSubPages.has(norm) && !knownUrls.has(norm);
|
|
224
|
+
});
|
|
225
|
+
if (candidates.length === 0)
|
|
226
|
+
break;
|
|
227
|
+
const pick = await planner.pickNextSubPage(candidates);
|
|
228
|
+
if (!pick)
|
|
229
|
+
break;
|
|
230
|
+
tag('info').log(`Exploring sub-page: ${pick.url} (${pick.reason})`);
|
|
231
|
+
try {
|
|
232
|
+
await this.explorBot.visit(pick.url);
|
|
233
|
+
await this.runAllStyles(pick.url, undefined, mainPlan, this.completedPlans, styles);
|
|
234
|
+
const subPlan = this.explorBot.getCurrentPlan();
|
|
235
|
+
if (subPlan && !this.completedPlans.includes(subPlan)) {
|
|
236
|
+
this.completedPlans.push(subPlan);
|
|
237
|
+
}
|
|
238
|
+
knownUrls.add(normalizeUrl(pick.url));
|
|
239
|
+
}
|
|
240
|
+
catch (err) {
|
|
241
|
+
this.failedSubPages.add(normalizeUrl(pick.url));
|
|
242
|
+
tag('warning').log(`Sub-page exploration failed: ${err instanceof Error ? err.message : err}`);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
async replanAndRun(pageUrl, feature, existingPlan, styles) {
|
|
247
|
+
const styleList = styles ?? Object.keys(getStyles());
|
|
248
|
+
for (const style of styleList) {
|
|
249
|
+
if (this.isLimitReached())
|
|
250
|
+
break;
|
|
251
|
+
this.explorBot.setCurrentPlan(existingPlan);
|
|
252
|
+
const opts = { fresh: false, style, completedPlans: this.completedPlans };
|
|
253
|
+
if (this.dryRun)
|
|
254
|
+
opts.noSave = true;
|
|
255
|
+
await this.planWithRetry(feature, opts, pageUrl);
|
|
256
|
+
await this.runPendingTests();
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
async runAllStyles(pageUrl, feature, parentPlan, completedPlans, styles) {
|
|
260
|
+
const styleList = styles ?? Object.keys(getStyles());
|
|
81
261
|
let fresh = true;
|
|
82
|
-
for (const style of
|
|
83
|
-
if (!fresh && pageUrl) {
|
|
262
|
+
for (const style of styleList) {
|
|
263
|
+
if (!fresh && pageUrl && !this.dryRun) {
|
|
84
264
|
await this.explorBot.visit(pageUrl);
|
|
85
265
|
}
|
|
86
266
|
const opts = { fresh, style, completedPlans };
|
|
87
267
|
if (fresh && parentPlan)
|
|
88
268
|
opts.extend = parentPlan;
|
|
269
|
+
if (this.dryRun)
|
|
270
|
+
opts.noSave = true;
|
|
89
271
|
await this.planWithRetry(feature, opts, pageUrl);
|
|
90
272
|
await this.runPendingTests();
|
|
91
273
|
fresh = false;
|
|
92
274
|
}
|
|
93
275
|
}
|
|
94
276
|
async planWithRetry(feature, opts, pageUrl) {
|
|
95
|
-
|
|
96
|
-
if (!this.explorBot.lastPlanError)
|
|
97
|
-
return;
|
|
98
|
-
if (this.explorBot.lastPlanError instanceof ErrorPageError) {
|
|
99
|
-
throw this.explorBot.lastPlanError;
|
|
100
|
-
}
|
|
101
|
-
tag('info').log(`Retrying planning style '${opts.style}'...`);
|
|
102
|
-
if (pageUrl)
|
|
103
|
-
await this.explorBot.visit(pageUrl);
|
|
277
|
+
const before = new Set(this.explorBot.getCurrentPlan()?.tests ?? []);
|
|
104
278
|
await this.explorBot.plan(feature, opts);
|
|
105
279
|
if (this.explorBot.lastPlanError) {
|
|
106
|
-
|
|
280
|
+
if (this.explorBot.lastPlanError instanceof ErrorPageError) {
|
|
281
|
+
throw this.explorBot.lastPlanError;
|
|
282
|
+
}
|
|
283
|
+
tag('info').log(`Retrying planning style '${opts.style}'...`);
|
|
284
|
+
if (pageUrl && !this.dryRun)
|
|
285
|
+
await this.explorBot.visit(pageUrl);
|
|
286
|
+
await this.explorBot.plan(feature, opts);
|
|
287
|
+
if (this.explorBot.lastPlanError) {
|
|
288
|
+
tag('warning').log(`Planning style '${opts.style}' failed after retry, skipping`);
|
|
289
|
+
return;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
const planAfter = this.explorBot.getCurrentPlan();
|
|
293
|
+
if (!planAfter)
|
|
294
|
+
return;
|
|
295
|
+
const added = planAfter.tests.filter((t) => !before.has(t));
|
|
296
|
+
if (added.length === 0)
|
|
297
|
+
return;
|
|
298
|
+
const urlNote = pageUrl ? ` for ${pageUrl}` : '';
|
|
299
|
+
this.printPreview(`Planner added ${added.length} new test(s) [style=${opts.style}]${urlNote}:`, added);
|
|
300
|
+
}
|
|
301
|
+
parseConfigure(raw) {
|
|
302
|
+
const cfg = { enabled: false, newRatio: 1.0 };
|
|
303
|
+
if (!raw)
|
|
304
|
+
return cfg;
|
|
305
|
+
const allStyles = Object.keys(getStyles());
|
|
306
|
+
const validSubpages = new Set(['none', 'same', 'new', 'both']);
|
|
307
|
+
let hasReuseSignal = false;
|
|
308
|
+
for (const pair of raw.split(';')) {
|
|
309
|
+
const trimmed = pair.trim();
|
|
310
|
+
if (!trimmed)
|
|
311
|
+
continue;
|
|
312
|
+
const sepMatch = trimmed.match(/^([^:=]+)\s*[:=]\s*(.*)$/);
|
|
313
|
+
if (!sepMatch) {
|
|
314
|
+
tag('warning').log(`Ignoring malformed configure pair: ${trimmed}`);
|
|
315
|
+
continue;
|
|
316
|
+
}
|
|
317
|
+
const key = sepMatch[1].trim().toLowerCase();
|
|
318
|
+
const value = sepMatch[2].trim();
|
|
319
|
+
if (key === 'new') {
|
|
320
|
+
const ratio = parseRatio(value);
|
|
321
|
+
if (ratio == null) {
|
|
322
|
+
tag('warning').log(`Ignoring invalid 'new' value: ${value}`);
|
|
323
|
+
continue;
|
|
324
|
+
}
|
|
325
|
+
cfg.newRatio = ratio;
|
|
326
|
+
hasReuseSignal = true;
|
|
327
|
+
continue;
|
|
328
|
+
}
|
|
329
|
+
if (key === 'from') {
|
|
330
|
+
cfg.fromPath = value;
|
|
331
|
+
hasReuseSignal = true;
|
|
332
|
+
continue;
|
|
333
|
+
}
|
|
334
|
+
if (key === 'style' || key === 'styles') {
|
|
335
|
+
const requested = value
|
|
336
|
+
.split(',')
|
|
337
|
+
.map((s) => s.trim())
|
|
338
|
+
.filter(Boolean);
|
|
339
|
+
const valid = [];
|
|
340
|
+
for (const s of requested) {
|
|
341
|
+
if (allStyles.includes(s)) {
|
|
342
|
+
valid.push(s);
|
|
343
|
+
continue;
|
|
344
|
+
}
|
|
345
|
+
tag('warning').log(`Unknown planning style: ${s}`);
|
|
346
|
+
}
|
|
347
|
+
if (valid.length)
|
|
348
|
+
cfg.styles = valid;
|
|
349
|
+
continue;
|
|
350
|
+
}
|
|
351
|
+
if (key === 'subpages') {
|
|
352
|
+
if (!validSubpages.has(value)) {
|
|
353
|
+
tag('warning').log(`Ignoring invalid 'subpages' value: ${value}`);
|
|
354
|
+
continue;
|
|
355
|
+
}
|
|
356
|
+
cfg.subpages = value;
|
|
357
|
+
continue;
|
|
358
|
+
}
|
|
359
|
+
if (key === 'pick_by' || key === 'pickby' || key === 'pick-by') {
|
|
360
|
+
if (value === 'priority' || value === 'random' || value === 'index') {
|
|
361
|
+
cfg.pickBy = value;
|
|
362
|
+
continue;
|
|
363
|
+
}
|
|
364
|
+
tag('warning').log(`Ignoring invalid 'pick_by' value: ${value} (use priority|random|index)`);
|
|
365
|
+
continue;
|
|
366
|
+
}
|
|
367
|
+
if (key === 'priority' || key === 'priorities') {
|
|
368
|
+
const requested = value
|
|
369
|
+
.split(',')
|
|
370
|
+
.map((s) => s.trim().toLowerCase())
|
|
371
|
+
.filter(Boolean);
|
|
372
|
+
const valid = [];
|
|
373
|
+
for (const p of requested) {
|
|
374
|
+
if (p in PRIORITY_ORDER) {
|
|
375
|
+
valid.push(p);
|
|
376
|
+
continue;
|
|
377
|
+
}
|
|
378
|
+
tag('warning').log(`Unknown priority: ${p} (use ${Object.keys(PRIORITY_ORDER).join('|')})`);
|
|
379
|
+
}
|
|
380
|
+
if (valid.length)
|
|
381
|
+
cfg.priorities = valid;
|
|
382
|
+
continue;
|
|
383
|
+
}
|
|
384
|
+
tag('warning').log(`Unknown configure key: ${key}`);
|
|
107
385
|
}
|
|
386
|
+
cfg.enabled = hasReuseSignal;
|
|
387
|
+
return cfg;
|
|
108
388
|
}
|
|
109
389
|
printResults() {
|
|
110
|
-
const allTests = this.completedPlans.flatMap((plan) => plan.tests.filter((t) => t.startTime != null).map((test) => ({ test, planTitle: plan.title })));
|
|
390
|
+
const allTests = this.completedPlans.flatMap((plan) => plan.tests.filter((t) => t.startTime != null).map((test) => ({ test, planTitle: plan.title }))).sort((a, b) => (a.test.startTime ?? 0) - (b.test.startTime ?? 0));
|
|
111
391
|
if (allTests.length === 0)
|
|
112
392
|
return;
|
|
113
393
|
const hasSubPages = this.completedPlans.length > 1;
|
|
394
|
+
const hasOrigin = this.oldTestRefs.size > 0;
|
|
114
395
|
const rows = allTests.map(({ test, planTitle }, index) => {
|
|
115
396
|
const durationMs = test.getDurationMs();
|
|
116
397
|
const duration = durationMs != null ? `${(durationMs / 1000).toFixed(1)}s` : '-';
|
|
@@ -127,12 +408,17 @@ export class ExploreCommand extends BaseCommand {
|
|
|
127
408
|
Time: duration,
|
|
128
409
|
Steps: String(Object.keys(test.notes).length),
|
|
129
410
|
};
|
|
411
|
+
if (hasOrigin) {
|
|
412
|
+
row.Origin = this.originLabel(test);
|
|
413
|
+
}
|
|
130
414
|
if (hasSubPages) {
|
|
131
415
|
row.Plan = planTitle;
|
|
132
416
|
}
|
|
133
417
|
return row;
|
|
134
418
|
});
|
|
135
419
|
const columns = ['#', 'Status', 'Title', 'Priority', 'Time', 'Steps'];
|
|
420
|
+
if (hasOrigin)
|
|
421
|
+
columns.push('Origin');
|
|
136
422
|
if (hasSubPages)
|
|
137
423
|
columns.push('Plan');
|
|
138
424
|
tag('multiline').log(jsonToTable(rows, columns));
|
|
@@ -185,11 +471,41 @@ export class ExploreCommand extends BaseCommand {
|
|
|
185
471
|
const plan = this.explorBot.getCurrentPlan();
|
|
186
472
|
if (!plan)
|
|
187
473
|
return;
|
|
474
|
+
if (this.priorityFilter) {
|
|
475
|
+
for (const t of plan.getPendingTests()) {
|
|
476
|
+
if (!this.priorityFilter.has(t.priority))
|
|
477
|
+
t.enabled = false;
|
|
478
|
+
}
|
|
479
|
+
}
|
|
188
480
|
for (const test of plan.getPendingTests()) {
|
|
189
481
|
if (this.isLimitReached())
|
|
190
482
|
break;
|
|
483
|
+
await this.runOneTest(test);
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
async runOneTest(test) {
|
|
487
|
+
if (this.dryRun) {
|
|
488
|
+
test.start();
|
|
489
|
+
test.finish(TestResult.SKIPPED);
|
|
490
|
+
}
|
|
491
|
+
else {
|
|
191
492
|
await this.explorBot.agentTester().test(test);
|
|
192
|
-
this.testsRun++;
|
|
193
493
|
}
|
|
494
|
+
this.testsRun++;
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
function parseRatio(s) {
|
|
498
|
+
const trimmed = s.trim();
|
|
499
|
+
if (!trimmed)
|
|
500
|
+
return null;
|
|
501
|
+
if (trimmed.endsWith('%')) {
|
|
502
|
+
const n = Number.parseFloat(trimmed.slice(0, -1));
|
|
503
|
+
if (Number.isNaN(n) || n < 0 || n > 100)
|
|
504
|
+
return null;
|
|
505
|
+
return n / 100;
|
|
194
506
|
}
|
|
507
|
+
const n = Number.parseFloat(trimmed);
|
|
508
|
+
if (Number.isNaN(n) || n < 0 || n > 1)
|
|
509
|
+
return null;
|
|
510
|
+
return n;
|
|
195
511
|
}
|
package/dist/src/explorbot.js
CHANGED
|
@@ -24,6 +24,7 @@ import { ExperienceTracker } from "./experience-tracker.js";
|
|
|
24
24
|
import Explorer from "./explorer.js";
|
|
25
25
|
import { KnowledgeTracker } from "./knowledge-tracker.js";
|
|
26
26
|
import { Plan } from "./test-plan.js";
|
|
27
|
+
import { parsePlansFromMarkdown } from "./utils/test-plan-markdown.js";
|
|
27
28
|
import { setVerboseMode, tag } from "./utils/logger.js";
|
|
28
29
|
import { relativeToCwd } from "./utils/next-steps.js";
|
|
29
30
|
import { sanitizeFilename } from "./utils/strings.js";
|
|
@@ -323,7 +324,8 @@ export class ExplorBot {
|
|
|
323
324
|
return undefined;
|
|
324
325
|
return this.currentPlan;
|
|
325
326
|
}
|
|
326
|
-
|
|
327
|
+
if (!opts.noSave)
|
|
328
|
+
this.savePlan();
|
|
327
329
|
return this.currentPlan;
|
|
328
330
|
}
|
|
329
331
|
getPlansDir() {
|
|
@@ -348,20 +350,21 @@ export class ExplorBot {
|
|
|
348
350
|
this.lastSavedPlanPath = planPath;
|
|
349
351
|
return planPath;
|
|
350
352
|
}
|
|
351
|
-
generatePlanFilename() {
|
|
353
|
+
generatePlanFilename(feature) {
|
|
352
354
|
const state = this.explorer?.getStateManager().getCurrentState();
|
|
353
355
|
const urlPath = state?.url || '/';
|
|
354
356
|
const urlPart = sanitizeFilename(urlPath) || 'root';
|
|
355
357
|
const suffix = '.md';
|
|
356
|
-
|
|
358
|
+
const f = feature ?? this.planFeature;
|
|
359
|
+
if (!f)
|
|
357
360
|
return urlPart.slice(0, 256 - suffix.length) + suffix;
|
|
358
|
-
const featurePart = `_${sanitizeFilename(
|
|
361
|
+
const featurePart = `_${sanitizeFilename(f)}`;
|
|
359
362
|
const maxFeatureLen = 256 - suffix.length - urlPart.length;
|
|
360
363
|
if (maxFeatureLen <= 1)
|
|
361
364
|
return urlPart.slice(0, 256 - suffix.length) + suffix;
|
|
362
365
|
return urlPart + featurePart.slice(0, maxFeatureLen) + suffix;
|
|
363
366
|
}
|
|
364
|
-
|
|
367
|
+
resolvePlanPath(filename) {
|
|
365
368
|
let planPath = filename;
|
|
366
369
|
if (path.isAbsolute(filename)) {
|
|
367
370
|
if (!existsSync(planPath) && !filename.endsWith('.md')) {
|
|
@@ -378,12 +381,23 @@ export class ExplorBot {
|
|
|
378
381
|
planPath = path.join(plansDir, `${filename}.md`);
|
|
379
382
|
}
|
|
380
383
|
}
|
|
384
|
+
return planPath;
|
|
385
|
+
}
|
|
386
|
+
loadPlan(filename) {
|
|
387
|
+
const planPath = this.resolvePlanPath(filename);
|
|
381
388
|
if (!existsSync(planPath)) {
|
|
382
389
|
throw new Error(`Plan file not found: ${planPath}`);
|
|
383
390
|
}
|
|
384
391
|
this.setCurrentPlan(Plan.fromMarkdown(planPath));
|
|
385
392
|
return this.currentPlan;
|
|
386
393
|
}
|
|
394
|
+
loadPlans(filename) {
|
|
395
|
+
const planPath = this.resolvePlanPath(filename);
|
|
396
|
+
if (!existsSync(planPath)) {
|
|
397
|
+
throw new Error(`Plan file not found: ${planPath}`);
|
|
398
|
+
}
|
|
399
|
+
return parsePlansFromMarkdown(planPath);
|
|
400
|
+
}
|
|
387
401
|
setCurrentPlan(plan) {
|
|
388
402
|
this.currentPlan = plan;
|
|
389
403
|
if (plan && !this.sessionPlans.includes(plan)) {
|
|
@@ -145,8 +145,15 @@ export function parsePlansFromMarkdown(filePath) {
|
|
|
145
145
|
continue;
|
|
146
146
|
if (line.startsWith('<!-- test')) {
|
|
147
147
|
currentTest = null;
|
|
148
|
-
|
|
148
|
+
let block = line;
|
|
149
|
+
let j = i;
|
|
150
|
+
while (!block.includes('-->') && j + 1 < lines.length) {
|
|
151
|
+
j++;
|
|
152
|
+
block += `\n${lines[j].trim()}`;
|
|
153
|
+
}
|
|
154
|
+
const priorityMatch = block.match(/priority:\s*(\w+)/);
|
|
149
155
|
priority = priorityMatch?.[1] || 'normal';
|
|
156
|
+
i = j;
|
|
150
157
|
continue;
|
|
151
158
|
}
|
|
152
159
|
if (line.startsWith('# ') && currentTest === null) {
|
package/package.json
CHANGED
package/src/ai/pilot.ts
CHANGED
|
@@ -320,14 +320,9 @@ export class Pilot implements Agent {
|
|
|
320
320
|
- "Edit X" → updated value must be persisted (visible in list/detail). Opening edit is NOT enough; redirect after save with the new value visible IS enough.
|
|
321
321
|
- Negative tests ("without a name", "invalid", "duplicate", "unauthorized") → success means the system PREVENTED the action with validation/error.
|
|
322
322
|
|
|
323
|
-
PROVENANCE
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
matching the goal by text alone but missing the marker is a stale leftover from a prior
|
|
327
|
-
run — it is NOT evidence the current scenario produced anything. Vote \`fail\`, not \`pass\`.
|
|
328
|
-
This does not apply when the field is restricted (numeric only, enum, etc.) or when the
|
|
329
|
-
session_log shows no fillField/type/select actions were attempted at all (in that case
|
|
330
|
-
the scenario clearly didn't run — also vote \`fail\`).
|
|
323
|
+
PROVENANCE: the entity you cite as proof must appear by name in <notes> or
|
|
324
|
+
<session_log> tool inputs for THIS run. Name absent from tester activity = stale
|
|
325
|
+
coincidence, vote \`fail\`. Same if no fillField/type/select/click on a target ran.
|
|
331
326
|
|
|
332
327
|
Expected results are MILESTONES, not the goal. Never fail because a milestone (toast, icon, styling)
|
|
333
328
|
didn't match if the scenario goal IS accomplished.
|