explorbot 0.1.16 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/explorbot-cli.ts +14 -1
- package/boat/doc-collector/bin/doc-collector-cli.ts +5 -0
- package/boat/doc-collector/package.json +24 -0
- package/boat/doc-collector/src/ai/documentarian.ts +184 -0
- package/boat/doc-collector/src/cli.ts +119 -0
- package/boat/doc-collector/src/config.ts +162 -0
- package/boat/doc-collector/src/docbot.ts +391 -0
- package/boat/doc-collector/src/docs-renderer.ts +187 -0
- package/boat/doc-collector/src/path-filter.ts +46 -0
- package/boat/doc-collector/src/research-navigation.ts +90 -0
- package/dist/bin/explorbot-cli.js +15 -1
- package/dist/boat/doc-collector/bin/doc-collector-cli.js +4 -0
- package/dist/boat/doc-collector/src/ai/documentarian.js +157 -0
- package/dist/boat/doc-collector/src/cli.js +104 -0
- package/dist/boat/doc-collector/src/config.js +129 -0
- package/dist/boat/doc-collector/src/docbot.js +326 -0
- package/dist/boat/doc-collector/src/docs-renderer.js +141 -0
- package/dist/boat/doc-collector/src/path-filter.js +35 -0
- package/dist/boat/doc-collector/src/research-navigation.js +71 -0
- package/dist/package.json +4 -1
- package/dist/src/ai/pilot.js +3 -8
- package/dist/src/ai/researcher/coordinates.js +1 -1
- package/dist/src/ai/researcher/parser.js +3 -0
- package/dist/src/ai/researcher.js +2 -1
- package/dist/src/ai/tester.js +1 -0
- package/dist/src/commands/explore-command.js +359 -43
- package/dist/src/config.js +10 -3
- package/dist/src/explorbot.js +19 -5
- package/dist/src/explorer.js +14 -1
- package/dist/src/state-manager.js +3 -0
- package/dist/src/utils/test-plan-markdown.js +8 -1
- package/dist/src/utils/url-matcher.js +5 -3
- package/dist/src/utils/web-element.js +3 -2
- package/package.json +4 -1
- package/src/ai/pilot.ts +3 -8
- package/src/ai/researcher/coordinates.ts +1 -1
- package/src/ai/researcher/parser.ts +3 -0
- package/src/ai/researcher.ts +2 -1
- package/src/ai/tester.ts +1 -0
- package/src/commands/explore-command.ts +362 -42
- package/src/config.ts +13 -3
- package/src/explorbot.ts +22 -7
- package/src/explorer.ts +12 -1
- package/src/state-manager.ts +4 -0
- package/src/utils/test-plan-markdown.ts +8 -1
- package/src/utils/url-matcher.ts +5 -2
- package/src/utils/web-element.ts +3 -2
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { ExplorBot } from "../../../src/explorbot.js";
|
|
4
|
+
import { normalizeUrl } from "../../../src/state-manager.js";
|
|
5
|
+
import { sanitizeFilename } from "../../../src/utils/strings.js";
|
|
6
|
+
import { tag } from "../../../src/utils/logger.js";
|
|
7
|
+
import { Documentarian } from "./ai/documentarian.js";
|
|
8
|
+
import { DocbotConfigParser } from "./config.js";
|
|
9
|
+
import { renderPageDocumentation, renderSpecIndex } from "./docs-renderer.js";
|
|
10
|
+
import { getDocPageKey, shouldCrawlDocPath } from "./path-filter.js";
|
|
11
|
+
import { extractResearchNavigationTargets } from "./research-navigation.js";
|
|
12
|
+
class DocBot {
|
|
13
|
+
explorBot;
|
|
14
|
+
configParser;
|
|
15
|
+
config = {};
|
|
16
|
+
documentarian;
|
|
17
|
+
options;
|
|
18
|
+
scopeRoot = '/';
|
|
19
|
+
constructor(options = {}) {
|
|
20
|
+
this.options = options;
|
|
21
|
+
const baseUrl = this.extractAbsoluteBaseUrl(options.startUrl || '/');
|
|
22
|
+
this.explorBot = new ExplorBot({
|
|
23
|
+
baseUrl,
|
|
24
|
+
verbose: options.verbose,
|
|
25
|
+
config: options.config,
|
|
26
|
+
path: options.path,
|
|
27
|
+
show: options.show,
|
|
28
|
+
headless: options.headless,
|
|
29
|
+
incognito: options.incognito,
|
|
30
|
+
session: options.session,
|
|
31
|
+
});
|
|
32
|
+
this.configParser = DocbotConfigParser.getInstance();
|
|
33
|
+
}
|
|
34
|
+
async start() {
|
|
35
|
+
await this.explorBot.start();
|
|
36
|
+
this.config = await this.configParser.loadConfig({
|
|
37
|
+
config: this.options.docsConfig,
|
|
38
|
+
path: this.options.path,
|
|
39
|
+
});
|
|
40
|
+
this.documentarian = new Documentarian(this.explorBot.getProvider(), this.config);
|
|
41
|
+
this.ensureDirectory(this.configParser.getOutputDir());
|
|
42
|
+
this.ensureDirectory(this.getPagesDir());
|
|
43
|
+
}
|
|
44
|
+
async stop() {
|
|
45
|
+
await this.explorBot.stop();
|
|
46
|
+
}
|
|
47
|
+
async collect(startPath, opts = {}) {
|
|
48
|
+
const effectiveStartPath = this.normalizeStartPath(startPath);
|
|
49
|
+
this.scopeRoot = this.getScopeRoot(effectiveStartPath);
|
|
50
|
+
const effectiveMaxPages = this.getMaxPages(opts.maxPages);
|
|
51
|
+
const queue = [];
|
|
52
|
+
const queued = new Set();
|
|
53
|
+
const documented = new Set();
|
|
54
|
+
const pages = [];
|
|
55
|
+
const skipped = [];
|
|
56
|
+
const baseUrl = this.explorBot.getConfig().playwright.url;
|
|
57
|
+
this.enqueuePath(effectiveStartPath, queue, queued);
|
|
58
|
+
while (queue.length > 0 && pages.length < effectiveMaxPages) {
|
|
59
|
+
const target = queue.shift();
|
|
60
|
+
if (!target) {
|
|
61
|
+
continue;
|
|
62
|
+
}
|
|
63
|
+
const targetKey = this.getPageKey(target);
|
|
64
|
+
if (documented.has(targetKey)) {
|
|
65
|
+
continue;
|
|
66
|
+
}
|
|
67
|
+
const stateManager = this.explorBot.getExplorer().getStateManager();
|
|
68
|
+
if (stateManager.hasVisitedState(target)) {
|
|
69
|
+
continue;
|
|
70
|
+
}
|
|
71
|
+
try {
|
|
72
|
+
tag('info').log(`Collecting docs for ${this.toDisplayUrl(target, baseUrl)}`);
|
|
73
|
+
await this.explorBot.visit(target);
|
|
74
|
+
if (stateManager.isInDeadLoop()) {
|
|
75
|
+
tag('warning').log('Dead loop detected during docs crawl, stopping collection');
|
|
76
|
+
skipped.push({
|
|
77
|
+
url: target,
|
|
78
|
+
reason: 'dead loop detected during crawl',
|
|
79
|
+
});
|
|
80
|
+
break;
|
|
81
|
+
}
|
|
82
|
+
const state = this.explorBot.getCurrentState();
|
|
83
|
+
if (!state) {
|
|
84
|
+
skipped.push({
|
|
85
|
+
url: target,
|
|
86
|
+
reason: 'page state was not captured after navigation',
|
|
87
|
+
});
|
|
88
|
+
continue;
|
|
89
|
+
}
|
|
90
|
+
const pageKey = this.getPageKey(state.url || target);
|
|
91
|
+
if (documented.has(pageKey)) {
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
const research = await this.explorBot.agentResearcher().research(state, {
|
|
95
|
+
screenshot: this.shouldUseScreenshots(),
|
|
96
|
+
force: true,
|
|
97
|
+
});
|
|
98
|
+
const documentation = await this.documentarian.document(state, research);
|
|
99
|
+
const lowSignalReason = this.getLowSignalReason(documentation, research);
|
|
100
|
+
if (lowSignalReason) {
|
|
101
|
+
skipped.push({
|
|
102
|
+
url: state.url,
|
|
103
|
+
reason: lowSignalReason,
|
|
104
|
+
});
|
|
105
|
+
documented.add(pageKey);
|
|
106
|
+
continue;
|
|
107
|
+
}
|
|
108
|
+
const filePath = this.savePageDocumentation(state, documentation);
|
|
109
|
+
pages.push({
|
|
110
|
+
url: state.url,
|
|
111
|
+
title: state.title || '',
|
|
112
|
+
summary: documentation.summary,
|
|
113
|
+
canCount: documentation.can.length,
|
|
114
|
+
mightCount: documentation.might.length,
|
|
115
|
+
canActions: documentation.can.map((item) => item.action),
|
|
116
|
+
mightActions: documentation.might.map((item) => item.action),
|
|
117
|
+
filePath,
|
|
118
|
+
});
|
|
119
|
+
documented.add(pageKey);
|
|
120
|
+
const nextPaths = this.extractNextPaths(state, baseUrl, research);
|
|
121
|
+
for (const nextPath of nextPaths) {
|
|
122
|
+
if (documented.has(this.getPageKey(nextPath))) {
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
if (stateManager.hasVisitedState(nextPath)) {
|
|
126
|
+
continue;
|
|
127
|
+
}
|
|
128
|
+
this.enqueuePath(nextPath, queue, queued);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
catch (error) {
|
|
132
|
+
const reason = error instanceof Error ? error.message : String(error);
|
|
133
|
+
tag('warning').log(`Skipping ${target}: ${reason}`);
|
|
134
|
+
skipped.push({
|
|
135
|
+
url: target,
|
|
136
|
+
reason,
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
const indexPath = this.saveIndex(effectiveStartPath, pages, skipped, effectiveMaxPages);
|
|
141
|
+
return {
|
|
142
|
+
pages,
|
|
143
|
+
skipped,
|
|
144
|
+
indexPath,
|
|
145
|
+
outputDir: this.configParser.getOutputDir(),
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
getMaxPages(override) {
|
|
149
|
+
if (override && override > 0) {
|
|
150
|
+
return override;
|
|
151
|
+
}
|
|
152
|
+
const configured = this.config.docs?.maxPages;
|
|
153
|
+
if (configured && configured > 0) {
|
|
154
|
+
return configured;
|
|
155
|
+
}
|
|
156
|
+
return 100;
|
|
157
|
+
}
|
|
158
|
+
shouldUseScreenshots() {
|
|
159
|
+
const screenshot = this.config.docs?.screenshot;
|
|
160
|
+
if (screenshot === false) {
|
|
161
|
+
return false;
|
|
162
|
+
}
|
|
163
|
+
return true;
|
|
164
|
+
}
|
|
165
|
+
extractNextPaths(state, baseUrl, research) {
|
|
166
|
+
const paths = [];
|
|
167
|
+
const seen = new Set();
|
|
168
|
+
for (const link of state.links || []) {
|
|
169
|
+
const nextPath = this.resolveLink(link, baseUrl);
|
|
170
|
+
if (!nextPath) {
|
|
171
|
+
continue;
|
|
172
|
+
}
|
|
173
|
+
if (!shouldCrawlDocPath(nextPath, this.config)) {
|
|
174
|
+
continue;
|
|
175
|
+
}
|
|
176
|
+
if (!this.isInScope(nextPath)) {
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
if (seen.has(nextPath)) {
|
|
180
|
+
continue;
|
|
181
|
+
}
|
|
182
|
+
seen.add(nextPath);
|
|
183
|
+
paths.push(nextPath);
|
|
184
|
+
}
|
|
185
|
+
for (const target of extractResearchNavigationTargets(state, research)) {
|
|
186
|
+
if (!shouldCrawlDocPath(target, this.config)) {
|
|
187
|
+
continue;
|
|
188
|
+
}
|
|
189
|
+
if (!this.isInScope(target)) {
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
if (seen.has(target)) {
|
|
193
|
+
continue;
|
|
194
|
+
}
|
|
195
|
+
seen.add(target);
|
|
196
|
+
paths.push(target);
|
|
197
|
+
}
|
|
198
|
+
return paths;
|
|
199
|
+
}
|
|
200
|
+
resolveLink(link, baseUrl) {
|
|
201
|
+
let resolved;
|
|
202
|
+
try {
|
|
203
|
+
resolved = new URL(link.url, baseUrl);
|
|
204
|
+
}
|
|
205
|
+
catch {
|
|
206
|
+
return null;
|
|
207
|
+
}
|
|
208
|
+
const base = new URL(baseUrl);
|
|
209
|
+
if (resolved.origin !== base.origin) {
|
|
210
|
+
return null;
|
|
211
|
+
}
|
|
212
|
+
const pathName = resolved.pathname || '/';
|
|
213
|
+
return `${pathName}${resolved.search}${resolved.hash}`;
|
|
214
|
+
}
|
|
215
|
+
toDisplayUrl(target, baseUrl) {
|
|
216
|
+
try {
|
|
217
|
+
return new URL(target, baseUrl).toString();
|
|
218
|
+
}
|
|
219
|
+
catch {
|
|
220
|
+
return target;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
enqueuePath(inputPath, queue, queued) {
|
|
224
|
+
const normalized = normalizeUrl(inputPath);
|
|
225
|
+
const pageKey = this.getPageKey(inputPath);
|
|
226
|
+
if (queued.has(pageKey)) {
|
|
227
|
+
return;
|
|
228
|
+
}
|
|
229
|
+
queued.add(pageKey);
|
|
230
|
+
if (!inputPath.startsWith('/')) {
|
|
231
|
+
queue.push(`/${normalized}`);
|
|
232
|
+
return;
|
|
233
|
+
}
|
|
234
|
+
queue.push(inputPath);
|
|
235
|
+
}
|
|
236
|
+
getPageKey(pageUrl) {
|
|
237
|
+
return getDocPageKey(pageUrl, this.config);
|
|
238
|
+
}
|
|
239
|
+
normalizeStartPath(startPath) {
|
|
240
|
+
try {
|
|
241
|
+
const parsed = new URL(startPath);
|
|
242
|
+
return `${parsed.pathname || '/'}${parsed.search}${parsed.hash}`;
|
|
243
|
+
}
|
|
244
|
+
catch {
|
|
245
|
+
return startPath;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
extractAbsoluteBaseUrl(startPath) {
|
|
249
|
+
try {
|
|
250
|
+
const parsed = new URL(startPath);
|
|
251
|
+
return parsed.origin;
|
|
252
|
+
}
|
|
253
|
+
catch {
|
|
254
|
+
return undefined;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
isInScope(target) {
|
|
258
|
+
const normalized = this.normalizeStartPath(target);
|
|
259
|
+
const scope = this.config.docs?.scope || 'site';
|
|
260
|
+
if (scope === 'site') {
|
|
261
|
+
return true;
|
|
262
|
+
}
|
|
263
|
+
if (scope === 'subtree') {
|
|
264
|
+
return normalized === this.scopeRoot || normalized.startsWith(`${this.scopeRoot}/`);
|
|
265
|
+
}
|
|
266
|
+
if (scope === 'section') {
|
|
267
|
+
return normalized === this.scopeRoot || normalized.startsWith(`${this.scopeRoot}/`) || normalized.startsWith(`${this.scopeRoot}-`);
|
|
268
|
+
}
|
|
269
|
+
return true;
|
|
270
|
+
}
|
|
271
|
+
getScopeRoot(startPath) {
|
|
272
|
+
const normalized = this.normalizeStartPath(startPath);
|
|
273
|
+
const parts = normalized.split('/').filter(Boolean);
|
|
274
|
+
if (parts.length === 0) {
|
|
275
|
+
return '/';
|
|
276
|
+
}
|
|
277
|
+
if (parts.length >= 4) {
|
|
278
|
+
return `/${parts.slice(0, 4).join('/')}`;
|
|
279
|
+
}
|
|
280
|
+
return `/${parts.join('/')}`;
|
|
281
|
+
}
|
|
282
|
+
getLowSignalReason(documentation, research) {
|
|
283
|
+
const minCanActions = this.config.docs?.minCanActions ?? 1;
|
|
284
|
+
const minInteractiveElements = this.config.docs?.minInteractiveElements ?? 3;
|
|
285
|
+
if (documentation.can.length >= minCanActions) {
|
|
286
|
+
return null;
|
|
287
|
+
}
|
|
288
|
+
const interactiveCount = this.countInteractiveElements(research);
|
|
289
|
+
if (interactiveCount >= minInteractiveElements) {
|
|
290
|
+
return null;
|
|
291
|
+
}
|
|
292
|
+
return `low-signal page: only ${documentation.can.length} proven actions and ${interactiveCount} interactive elements`;
|
|
293
|
+
}
|
|
294
|
+
countInteractiveElements(research) {
|
|
295
|
+
const matches = [...research.matchAll(/\((\d+) elements?\)/g)];
|
|
296
|
+
return matches.reduce((sum, match) => sum + Number.parseInt(match[1], 10), 0);
|
|
297
|
+
}
|
|
298
|
+
savePageDocumentation(state, documentation) {
|
|
299
|
+
const pagePath = this.getPageFilePath(state.url);
|
|
300
|
+
writeFileSync(pagePath, renderPageDocumentation(state, documentation), 'utf8');
|
|
301
|
+
return pagePath;
|
|
302
|
+
}
|
|
303
|
+
saveIndex(startPath, pages, skipped, maxPages) {
|
|
304
|
+
const indexPath = path.join(this.configParser.getOutputDir(), 'spec.md');
|
|
305
|
+
writeFileSync(indexPath, renderSpecIndex(this.configParser.getOutputDir(), startPath, pages, skipped, maxPages), 'utf8');
|
|
306
|
+
return indexPath;
|
|
307
|
+
}
|
|
308
|
+
getPagesDir() {
|
|
309
|
+
return path.join(this.configParser.getOutputDir(), 'pages');
|
|
310
|
+
}
|
|
311
|
+
getPageFilePath(pageUrl) {
|
|
312
|
+
const normalized = normalizeUrl(pageUrl || '/');
|
|
313
|
+
const baseName = sanitizeFilename(normalized || 'root');
|
|
314
|
+
if (baseName) {
|
|
315
|
+
return path.join(this.getPagesDir(), `${baseName}.md`);
|
|
316
|
+
}
|
|
317
|
+
return path.join(this.getPagesDir(), 'root.md');
|
|
318
|
+
}
|
|
319
|
+
ensureDirectory(dirPath) {
|
|
320
|
+
if (existsSync(dirPath)) {
|
|
321
|
+
return;
|
|
322
|
+
}
|
|
323
|
+
mkdirSync(dirPath, { recursive: true });
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
export { DocBot };
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
function renderPageDocumentation(state, documentation) {
|
|
3
|
+
const lines = [];
|
|
4
|
+
lines.push(`# ${state.url}`);
|
|
5
|
+
lines.push('');
|
|
6
|
+
if (state.title) {
|
|
7
|
+
lines.push(`Title: ${normalizeInlineText(state.title)}`);
|
|
8
|
+
lines.push('');
|
|
9
|
+
}
|
|
10
|
+
lines.push('## Purpose');
|
|
11
|
+
lines.push('');
|
|
12
|
+
lines.push(ensureSentence(documentation.summary));
|
|
13
|
+
lines.push('');
|
|
14
|
+
lines.push('## User Can');
|
|
15
|
+
lines.push('');
|
|
16
|
+
if (documentation.can.length === 0) {
|
|
17
|
+
lines.push('- No proven actions were identified from the collected research.');
|
|
18
|
+
lines.push('');
|
|
19
|
+
}
|
|
20
|
+
for (const item of documentation.can) {
|
|
21
|
+
lines.push(`- ${normalizeAction(item.action)} -> ${item.scope}`);
|
|
22
|
+
lines.push(` Proof: ${ensureSentence(item.evidence)}`);
|
|
23
|
+
}
|
|
24
|
+
if (documentation.can.length > 0) {
|
|
25
|
+
lines.push('');
|
|
26
|
+
}
|
|
27
|
+
lines.push('## User Might');
|
|
28
|
+
lines.push('');
|
|
29
|
+
if (documentation.might.length === 0) {
|
|
30
|
+
lines.push('- No assumption-based actions were identified.');
|
|
31
|
+
lines.push('');
|
|
32
|
+
}
|
|
33
|
+
for (const item of documentation.might) {
|
|
34
|
+
lines.push(`- ${normalizeAction(item.action, 'might')} -> ${item.scope}`);
|
|
35
|
+
lines.push(` Signal: ${ensureSentence(item.evidence)}`);
|
|
36
|
+
}
|
|
37
|
+
if (documentation.might.length > 0) {
|
|
38
|
+
lines.push('');
|
|
39
|
+
}
|
|
40
|
+
return `${lines.join('\n').trimEnd()}\n`;
|
|
41
|
+
}
|
|
42
|
+
function renderSpecIndex(outputDir, startPath, pages, skipped, maxPages) {
|
|
43
|
+
const lines = [];
|
|
44
|
+
lines.push('# Website Spec');
|
|
45
|
+
lines.push('');
|
|
46
|
+
lines.push('## Overview');
|
|
47
|
+
lines.push('');
|
|
48
|
+
lines.push(`Start page: ${startPath}`);
|
|
49
|
+
lines.push(`Pages documented: ${pages.length}`);
|
|
50
|
+
lines.push(`Pages skipped: ${skipped.length}`);
|
|
51
|
+
lines.push(`Max pages: ${maxPages}`);
|
|
52
|
+
lines.push('');
|
|
53
|
+
lines.push('## Pages');
|
|
54
|
+
lines.push('');
|
|
55
|
+
if (pages.length === 0) {
|
|
56
|
+
lines.push('- No pages were documented.');
|
|
57
|
+
lines.push('');
|
|
58
|
+
}
|
|
59
|
+
for (const page of pages) {
|
|
60
|
+
const relativeFile = path.relative(outputDir, page.filePath).replaceAll('\\', '/');
|
|
61
|
+
lines.push(`### [${page.url}](${relativeFile})`);
|
|
62
|
+
lines.push('');
|
|
63
|
+
lines.push(`Purpose: ${ensureSentence(page.summary)}`);
|
|
64
|
+
lines.push(`Proven actions: ${page.canCount}`);
|
|
65
|
+
lines.push(`Possible actions: ${page.mightCount}`);
|
|
66
|
+
if (page.title) {
|
|
67
|
+
lines.push(`Title: ${normalizeInlineText(page.title)}`);
|
|
68
|
+
}
|
|
69
|
+
lines.push('');
|
|
70
|
+
if (page.canActions.length > 0) {
|
|
71
|
+
lines.push('User Can:');
|
|
72
|
+
for (const action of page.canActions) {
|
|
73
|
+
lines.push(`- ${normalizeAction(action, 'can')}`);
|
|
74
|
+
}
|
|
75
|
+
lines.push('');
|
|
76
|
+
}
|
|
77
|
+
if (page.mightActions.length > 0) {
|
|
78
|
+
lines.push('User Might:');
|
|
79
|
+
for (const action of page.mightActions) {
|
|
80
|
+
lines.push(`- ${normalizeAction(action, 'might')}`);
|
|
81
|
+
}
|
|
82
|
+
lines.push('');
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
if (skipped.length > 0) {
|
|
86
|
+
lines.push('## Skipped');
|
|
87
|
+
lines.push('');
|
|
88
|
+
for (const page of skipped) {
|
|
89
|
+
lines.push(`- ${page.url}. Reason: ${ensureSentence(page.reason)}`);
|
|
90
|
+
}
|
|
91
|
+
lines.push('');
|
|
92
|
+
}
|
|
93
|
+
return `${lines.join('\n').trimEnd()}\n`;
|
|
94
|
+
}
|
|
95
|
+
function normalizeAction(action, kind = 'can') {
|
|
96
|
+
const trimmed = normalizeInlineText(action);
|
|
97
|
+
if (!trimmed) {
|
|
98
|
+
return 'user can interact with this page';
|
|
99
|
+
}
|
|
100
|
+
const normalized = ensureSentence(trimmed).slice(0, -1);
|
|
101
|
+
const lower = normalized.toLowerCase();
|
|
102
|
+
if (kind === 'can') {
|
|
103
|
+
if (lower.startsWith('user can ')) {
|
|
104
|
+
return normalized;
|
|
105
|
+
}
|
|
106
|
+
if (lower.startsWith('can ')) {
|
|
107
|
+
return `user can ${normalized.slice(4)}`;
|
|
108
|
+
}
|
|
109
|
+
if (lower.startsWith('user might ')) {
|
|
110
|
+
return `user can ${normalized.slice(11)}`;
|
|
111
|
+
}
|
|
112
|
+
return `user can ${normalized}`;
|
|
113
|
+
}
|
|
114
|
+
if (lower.startsWith('user might ')) {
|
|
115
|
+
return normalized;
|
|
116
|
+
}
|
|
117
|
+
if (lower.startsWith('might ')) {
|
|
118
|
+
return `user might ${normalized.slice(6)}`;
|
|
119
|
+
}
|
|
120
|
+
if (lower.startsWith('user can ')) {
|
|
121
|
+
return `user might ${normalized.slice(9)}`;
|
|
122
|
+
}
|
|
123
|
+
if (lower.startsWith('can ')) {
|
|
124
|
+
return `user might ${normalized.slice(4)}`;
|
|
125
|
+
}
|
|
126
|
+
return `user might ${normalized}`;
|
|
127
|
+
}
|
|
128
|
+
function ensureSentence(text) {
|
|
129
|
+
const trimmed = normalizeInlineText(text);
|
|
130
|
+
if (!trimmed) {
|
|
131
|
+
return '';
|
|
132
|
+
}
|
|
133
|
+
if (/[.!?]$/.test(trimmed)) {
|
|
134
|
+
return trimmed;
|
|
135
|
+
}
|
|
136
|
+
return `${trimmed}.`;
|
|
137
|
+
}
|
|
138
|
+
function normalizeInlineText(text) {
|
|
139
|
+
return text.normalize('NFKC').replace(/\s+/g, ' ').trim();
|
|
140
|
+
}
|
|
141
|
+
export { renderPageDocumentation, renderSpecIndex, ensureSentence, normalizeAction };
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { normalizeUrl } from "../../../src/state-manager.js";
|
|
2
|
+
import { matchesUrl, generalizeUrl } from "../../../src/utils/url-matcher.js";
|
|
3
|
+
const DEFAULT_DENIED_PATH_SEGMENTS = ['callback', 'callbacks', 'logout', 'signout', 'sign_out', 'destroy', 'delete', 'remove'];
|
|
4
|
+
export function shouldCrawlDocPath(nextPath, config = {}) {
|
|
5
|
+
const parsed = new URL(nextPath, 'http://localhost');
|
|
6
|
+
const segments = parsed.pathname
|
|
7
|
+
.split('/')
|
|
8
|
+
.map((segment) => segment.trim().toLowerCase())
|
|
9
|
+
.filter(Boolean);
|
|
10
|
+
const normalizedPath = parsed.pathname || '/';
|
|
11
|
+
const includePaths = config.docs?.includePaths || [];
|
|
12
|
+
if (includePaths.length > 0) {
|
|
13
|
+
return includePaths.some((pattern) => matchesUrl(pattern, normalizedPath));
|
|
14
|
+
}
|
|
15
|
+
const excludePaths = config.docs?.excludePaths || [];
|
|
16
|
+
if (excludePaths.some((pattern) => matchesUrl(pattern, normalizedPath))) {
|
|
17
|
+
return false;
|
|
18
|
+
}
|
|
19
|
+
if (segments.length === 0) {
|
|
20
|
+
return true;
|
|
21
|
+
}
|
|
22
|
+
const terminalActions = new Set((config.docs?.deniedPathSegments || DEFAULT_DENIED_PATH_SEGMENTS).map((segment) => segment.trim().toLowerCase()).filter(Boolean));
|
|
23
|
+
if (segments.some((segment) => terminalActions.has(segment))) {
|
|
24
|
+
return false;
|
|
25
|
+
}
|
|
26
|
+
return true;
|
|
27
|
+
}
|
|
28
|
+
export function getDocPageKey(pageUrl, config = {}) {
|
|
29
|
+
const normalized = normalizeUrl(pageUrl || '/');
|
|
30
|
+
const path = normalized.startsWith('/') ? normalized : `/${normalized}`;
|
|
31
|
+
if (config.docs?.collapseDynamicPages === false) {
|
|
32
|
+
return normalizeUrl(path);
|
|
33
|
+
}
|
|
34
|
+
return normalizeUrl(generalizeUrl(path));
|
|
35
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { parseResearchSections } from "../../../src/ai/researcher/parser.js";
|
|
2
|
+
const OPEN_API_TAG_SELECTOR_PATTERN = /api-\d+\/tag\/([a-z0-9-]+)(?:["'#/\]\s]|$)/i;
|
|
3
|
+
const OPEN_API_NAVIGATION_SECTION_KEYWORDS = ['navigation', 'menu'];
|
|
4
|
+
export function extractResearchNavigationTargets(state, research) {
|
|
5
|
+
const currentUrl = state.url || '/';
|
|
6
|
+
const sections = parseResearchSections(research);
|
|
7
|
+
const targets = [];
|
|
8
|
+
const seen = new Set();
|
|
9
|
+
for (const section of sections) {
|
|
10
|
+
const sectionName = section.name.toLowerCase();
|
|
11
|
+
if (!OPEN_API_NAVIGATION_SECTION_KEYWORDS.some((keyword) => sectionName.includes(keyword))) {
|
|
12
|
+
continue;
|
|
13
|
+
}
|
|
14
|
+
for (const element of section.elements) {
|
|
15
|
+
const target = extractNavigationTarget(currentUrl, element);
|
|
16
|
+
if (!target || seen.has(target)) {
|
|
17
|
+
continue;
|
|
18
|
+
}
|
|
19
|
+
seen.add(target);
|
|
20
|
+
targets.push(target);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return targets;
|
|
24
|
+
}
|
|
25
|
+
function extractNavigationTarget(currentUrl, element) {
|
|
26
|
+
const openApiTagFromCss = extractOpenApiTagHashFromCss(element.css);
|
|
27
|
+
if (openApiTagFromCss) {
|
|
28
|
+
return buildSamePageHashTarget(currentUrl, openApiTagFromCss);
|
|
29
|
+
}
|
|
30
|
+
if (!currentUrl.includes('#tag/')) {
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
const inferredOpenApiTag = inferOpenApiTagSlugFromLabel(element.name);
|
|
34
|
+
if (!inferredOpenApiTag) {
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
return buildSamePageHashTarget(currentUrl, `tag/${inferredOpenApiTag}`);
|
|
38
|
+
}
|
|
39
|
+
function extractOpenApiTagHashFromCss(css) {
|
|
40
|
+
if (!css) {
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
const normalizedSelector = css.replaceAll('\\/', '/');
|
|
44
|
+
const match = normalizedSelector.match(OPEN_API_TAG_SELECTOR_PATTERN);
|
|
45
|
+
if (!match?.[1]) {
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
48
|
+
return `tag/${match[1].toLowerCase()}`;
|
|
49
|
+
}
|
|
50
|
+
function inferOpenApiTagSlugFromLabel(name) {
|
|
51
|
+
const cleanedLabel = name
|
|
52
|
+
.replace(/^'+|'+$/g, '')
|
|
53
|
+
.replace(/\(expanded\)|\(collapsed\)|open group|close group|show more/gi, '')
|
|
54
|
+
.trim();
|
|
55
|
+
if (!cleanedLabel.includes('/')) {
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
const slug = cleanedLabel
|
|
59
|
+
.split('/')
|
|
60
|
+
.map((part) => part.trim().toLowerCase())
|
|
61
|
+
.filter(Boolean)
|
|
62
|
+
.join('-')
|
|
63
|
+
.replace(/[^a-z0-9-]+/g, '-')
|
|
64
|
+
.replace(/-+/g, '-')
|
|
65
|
+
.replace(/^-|-$/g, '');
|
|
66
|
+
return slug || null;
|
|
67
|
+
}
|
|
68
|
+
function buildSamePageHashTarget(currentUrl, hashPath) {
|
|
69
|
+
const [baseWithSearch] = currentUrl.split('#');
|
|
70
|
+
return `${baseWithSearch}#${hashPath}`;
|
|
71
|
+
}
|
package/dist/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "explorbot",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.18",
|
|
4
4
|
"description": "CLI app built with React Ink, CodeceptJS, and Playwright",
|
|
5
5
|
"license": "Elastic-2.0",
|
|
6
6
|
"type": "module",
|
|
@@ -20,6 +20,9 @@
|
|
|
20
20
|
"src/**/*.tsx",
|
|
21
21
|
"bin/**/*.ts",
|
|
22
22
|
"boat/api-tester/src/**/*.ts",
|
|
23
|
+
"boat/doc-collector/src/**/*.ts",
|
|
24
|
+
"boat/doc-collector/bin/**/*.ts",
|
|
25
|
+
"boat/doc-collector/package.json",
|
|
23
26
|
"rules/",
|
|
24
27
|
"assets/sample-files/"
|
|
25
28
|
],
|
package/dist/src/ai/pilot.js
CHANGED
|
@@ -277,14 +277,9 @@ export class Pilot {
|
|
|
277
277
|
- "Edit X" → updated value must be persisted (visible in list/detail). Opening edit is NOT enough; redirect after save with the new value visible IS enough.
|
|
278
278
|
- Negative tests ("without a name", "invalid", "duplicate", "unauthorized") → success means the system PREVENTED the action with validation/error.
|
|
279
279
|
|
|
280
|
-
PROVENANCE
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
matching the goal by text alone but missing the marker is a stale leftover from a prior
|
|
284
|
-
run — it is NOT evidence the current scenario produced anything. Vote \`fail\`, not \`pass\`.
|
|
285
|
-
This does not apply when the field is restricted (numeric only, enum, etc.) or when the
|
|
286
|
-
session_log shows no fillField/type/select actions were attempted at all (in that case
|
|
287
|
-
the scenario clearly didn't run — also vote \`fail\`).
|
|
280
|
+
PROVENANCE: the entity you cite as proof must appear by name in <notes> or
|
|
281
|
+
<session_log> tool inputs for THIS run. Name absent from tester activity = stale
|
|
282
|
+
coincidence, vote \`fail\`. Same if no fillField/type/select/click on a target ran.
|
|
288
283
|
|
|
289
284
|
Expected results are MILESTONES, not the goal. Never fail because a milestone (toast, icon, styling)
|
|
290
285
|
didn't match if the scenario goal IS accomplished.
|
|
@@ -182,7 +182,7 @@ export function WithCoordinates(Base) {
|
|
|
182
182
|
const eidxWithoutCoords = [];
|
|
183
183
|
for (const section of sections) {
|
|
184
184
|
for (const el of section.elements) {
|
|
185
|
-
if (el.eidx && !el.coordinates)
|
|
185
|
+
if (el.eidx && /^e\d+$/i.test(el.eidx) && !el.coordinates)
|
|
186
186
|
eidxWithoutCoords.push(el.eidx);
|
|
187
187
|
}
|
|
188
188
|
}
|
|
@@ -40,6 +40,9 @@ export function mapRowToElement(row) {
|
|
|
40
40
|
let eidxRaw = (colMap.eidx || '').trim();
|
|
41
41
|
if (eidxRaw && /^\d+$/.test(eidxRaw))
|
|
42
42
|
eidxRaw = `e${eidxRaw}`;
|
|
43
|
+
if (eidxRaw && !/^e\d+$/i.test(eidxRaw)) {
|
|
44
|
+
eidxRaw = '';
|
|
45
|
+
}
|
|
43
46
|
const aria = parseAriaLocator(colMap.aria || '-');
|
|
44
47
|
return {
|
|
45
48
|
name,
|
|
@@ -90,7 +90,8 @@ export class Researcher extends ResearcherBase {
|
|
|
90
90
|
Stats.researches++;
|
|
91
91
|
const sessionName = `researcher: ${state.url}`;
|
|
92
92
|
return Observability.run(sessionName, { tags: ['researcher'], sessionId: stateHash }, async () => {
|
|
93
|
-
|
|
93
|
+
const displayUrl = state.fullUrl || state.url;
|
|
94
|
+
tag('info').log(`Researching ${displayUrl} to understand the context...`);
|
|
94
95
|
setActivity(`${this.emoji} Researching...`, 'action');
|
|
95
96
|
await this.ensureNavigated(state.url, screenshot && this.provider.hasVision());
|
|
96
97
|
await this.hooksRunner.runBeforeHook('researcher', state.url);
|
package/dist/src/ai/tester.js
CHANGED
|
@@ -648,6 +648,7 @@ export class Tester extends TaskAgent {
|
|
|
648
648
|
- Use pressKey() for pressing special keys (Enter, Escape, Tab, Arrow keys) or key combinations with modifiers (Ctrl+A, Shift+Delete, etc.)
|
|
649
649
|
- Use container CSS locators from <page_ui_map> to interact with elements inside sections
|
|
650
650
|
- Systematically use record({ notes: ["..."] }) to write your findings, planned actions, observations, etc.
|
|
651
|
+
- When creating/editing/deleting a named entity, include its identifier verbatim in the note — Pilot uses it to confirm provenance.
|
|
651
652
|
- Call record({ notes: ["..."], status: "success" }) when you see success/info message on a page or when expected outcome is achieved
|
|
652
653
|
- Call record({ notes: ["..."], status: "fail" }) when an expected outcome cannot be achieved or has failed or you see error/alert/warning message on a page
|
|
653
654
|
- NEVER call record(status: "success") if your last verify() or see() call FAILED. A failed check means the outcome is NOT confirmed — use record(status: "fail") instead, or retry with a different approach.
|