explorbot 0.1.16 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/bin/explorbot-cli.ts +14 -1
  2. package/boat/doc-collector/bin/doc-collector-cli.ts +5 -0
  3. package/boat/doc-collector/package.json +24 -0
  4. package/boat/doc-collector/src/ai/documentarian.ts +184 -0
  5. package/boat/doc-collector/src/cli.ts +119 -0
  6. package/boat/doc-collector/src/config.ts +162 -0
  7. package/boat/doc-collector/src/docbot.ts +391 -0
  8. package/boat/doc-collector/src/docs-renderer.ts +187 -0
  9. package/boat/doc-collector/src/path-filter.ts +46 -0
  10. package/boat/doc-collector/src/research-navigation.ts +90 -0
  11. package/dist/bin/explorbot-cli.js +15 -1
  12. package/dist/boat/doc-collector/bin/doc-collector-cli.js +4 -0
  13. package/dist/boat/doc-collector/src/ai/documentarian.js +157 -0
  14. package/dist/boat/doc-collector/src/cli.js +104 -0
  15. package/dist/boat/doc-collector/src/config.js +129 -0
  16. package/dist/boat/doc-collector/src/docbot.js +326 -0
  17. package/dist/boat/doc-collector/src/docs-renderer.js +141 -0
  18. package/dist/boat/doc-collector/src/path-filter.js +35 -0
  19. package/dist/boat/doc-collector/src/research-navigation.js +71 -0
  20. package/dist/package.json +4 -1
  21. package/dist/src/ai/pilot.js +3 -8
  22. package/dist/src/ai/researcher/coordinates.js +1 -1
  23. package/dist/src/ai/researcher/parser.js +3 -0
  24. package/dist/src/ai/researcher.js +2 -1
  25. package/dist/src/ai/tester.js +1 -0
  26. package/dist/src/commands/explore-command.js +359 -43
  27. package/dist/src/config.js +10 -3
  28. package/dist/src/explorbot.js +19 -5
  29. package/dist/src/explorer.js +14 -1
  30. package/dist/src/state-manager.js +3 -0
  31. package/dist/src/utils/test-plan-markdown.js +8 -1
  32. package/dist/src/utils/url-matcher.js +5 -3
  33. package/dist/src/utils/web-element.js +3 -2
  34. package/package.json +4 -1
  35. package/src/ai/pilot.ts +3 -8
  36. package/src/ai/researcher/coordinates.ts +1 -1
  37. package/src/ai/researcher/parser.ts +3 -0
  38. package/src/ai/researcher.ts +2 -1
  39. package/src/ai/tester.ts +1 -0
  40. package/src/commands/explore-command.ts +362 -42
  41. package/src/config.ts +13 -3
  42. package/src/explorbot.ts +22 -7
  43. package/src/explorer.ts +12 -1
  44. package/src/state-manager.ts +4 -0
  45. package/src/utils/test-plan-markdown.ts +8 -1
  46. package/src/utils/url-matcher.ts +5 -2
  47. package/src/utils/web-element.ts +3 -2
@@ -122,7 +122,15 @@ addCommonOptions(program.command('start [path]').description('Start web explorat
122
122
  await startTUI(explorBot);
123
123
  });
124
124
 
125
- addCommonOptions(program.command('explore <path>').description('Explore a page autonomously and run invented scenarios').option('--max-tests <count>', 'Maximum number of tests to run').option('--focus <feature>', 'Focus area for exploration')).action(async (explorePath, options) => {
125
+ addCommonOptions(
126
+ program
127
+ .command('explore <path>')
128
+ .description('Explore a page autonomously and run invented scenarios')
129
+ .option('--max-tests <count>', 'Maximum number of tests to run')
130
+ .option('--focus <feature>', 'Focus area for exploration')
131
+ .option('--configure <spec>', 'Reuse spec: keys new|from|style|subpages|pick_by|priority, e.g. "new:25%;pick_by=random;priority=critical,high"')
132
+ .option('--dry-run', 'Mark picked tests as skipped without executing or generating new ones')
133
+ ).action(async (explorePath, options) => {
126
134
  try {
127
135
  const explorBot = new ExplorBot(buildExplorBotOptions(explorePath, options));
128
136
  await explorBot.start();
@@ -130,8 +138,11 @@ addCommonOptions(program.command('explore <path>').description('Explore a page a
130
138
  const { ExploreCommand } = await import('../src/commands/explore-command.js');
131
139
  const cmd = new ExploreCommand(explorBot);
132
140
  if (options.maxTests) cmd.maxTests = Number.parseInt(options.maxTests, 10);
141
+ if (options.dryRun) cmd.dryRun = true;
133
142
  const execArgs: string[] = [];
134
143
  if (options.focus) execArgs.push('--focus', `"${options.focus}"`);
144
+ if (options.configure) execArgs.push('--configure', `"${options.configure}"`);
145
+ if (options.dryRun) execArgs.push('--dry-run');
135
146
  await cmd.execute(execArgs.join(' '));
136
147
  await explorBot.stop();
137
148
  await showStatsAndExit(0);
@@ -812,6 +823,8 @@ program
812
823
  });
813
824
 
814
825
  import { createApiCommands } from '../boat/api-tester/src/cli.ts';
826
+ import { createDocsCommands } from '../boat/doc-collector/src/cli.ts';
815
827
  program.addCommand(createApiCommands('api'));
828
+ program.addCommand(createDocsCommands('docs'));
816
829
 
817
830
  program.parse();
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env bun
2
+ import { createDocsCommands } from '../src/cli.ts';
3
+
4
+ const program = createDocsCommands('doc-collector');
5
+ program.parse();
@@ -0,0 +1,24 @@
1
+ {
2
+ "name": "doc-collector",
3
+ "version": "1.0.0",
4
+ "description": "AI-powered website documentation collector",
5
+ "type": "module",
6
+ "bin": {
7
+ "doc-collector": "./bin/doc-collector-cli.ts"
8
+ },
9
+ "scripts": {
10
+ "format": "biome format --write .",
11
+ "lint:fix": "biome lint --write .",
12
+ "check:fix": "biome check --write ."
13
+ },
14
+ "dependencies": {
15
+ "ai": "^6.0.6",
16
+ "commander": "^14.0.1",
17
+ "dedent": "^1.6.0",
18
+ "zod": "^4.1.8"
19
+ },
20
+ "devDependencies": {
21
+ "@biomejs/biome": "^1.5.3",
22
+ "typescript": "^5.0.0"
23
+ }
24
+ }
@@ -0,0 +1,184 @@
1
+ import dedent from 'dedent';
2
+ import { z } from 'zod';
3
+ import type { AIProvider } from '../../../../src/ai/provider.ts';
4
+ import type { WebPageState } from '../../../../src/state-manager.ts';
5
+ import type { DocbotConfig } from '../config.ts';
6
+
7
+ class Documentarian {
8
+ private provider: AIProvider;
9
+ private config: DocbotConfig;
10
+
11
+ constructor(provider: AIProvider, config: DocbotConfig = {}) {
12
+ this.provider = provider;
13
+ this.config = config;
14
+ }
15
+
16
+ async document(state: WebPageState, research: string): Promise<PageDocumentation> {
17
+ try {
18
+ return await this.generateDocumentation(state, research);
19
+ } catch (error) {
20
+ if (!this.shouldRetryWithSanitizedResearch(error)) {
21
+ throw error;
22
+ }
23
+
24
+ return this.generateDocumentation(state, this.sanitizeResearch(research), true);
25
+ }
26
+ }
27
+
28
+ private getSystemPrompt(): string {
29
+ const customPrompt = this.config.docs?.prompt;
30
+ let promptSuffix = '';
31
+ if (customPrompt) {
32
+ promptSuffix = customPrompt;
33
+ }
34
+
35
+ return dedent`
36
+ <role>
37
+ You are a product analyst preparing functional website documentation from UI research.
38
+ </role>
39
+
40
+ <task>
41
+ Convert exploratory UI research into a precise spec of what users can do on the current page.
42
+ Distinguish proven capabilities from assumptions.
43
+ Prefer accuracy over coverage.
44
+ </task>
45
+
46
+ <rules>
47
+ Only list capabilities that are grounded in the provided page research.
48
+ Put actions into "can" only when there is direct evidence in the page context.
49
+ Put actions into "might" only when the UI strongly suggests a capability but proof is incomplete.
50
+ Describe each action from the end-user perspective.
51
+ Be explicit about scope:
52
+ - one item
53
+ - list of items
54
+ - bulk operations
55
+ - all items
56
+ - page-level
57
+ Avoid implementation details, selectors, and QA wording.
58
+ Avoid duplicate actions with different phrasing.
59
+ </rules>
60
+
61
+ ${promptSuffix}
62
+ `;
63
+ }
64
+
65
+ private buildPrompt(state: WebPageState, research: string, simplified = false): string {
66
+ const headings = [state.h1, state.h2, state.h3, state.h4].filter(Boolean).join(' | ');
67
+ const links = (state.links || [])
68
+ .slice(0, 50)
69
+ .map((link) => `- ${link.title}: ${link.url}`)
70
+ .join('\n');
71
+ const simplificationNote = simplified
72
+ ? dedent`
73
+ <fallback_mode>
74
+ The research text was simplified because the original formatting was noisy.
75
+ Ignore malformed table syntax and rely only on clear, repeated signals.
76
+ Prefer fewer actions over speculative coverage.
77
+ </fallback_mode>
78
+ `
79
+ : '';
80
+
81
+ return dedent`
82
+ <page>
83
+ URL: ${state.url}
84
+ Title: ${state.title || ''}
85
+ Headings: ${headings}
86
+ </page>
87
+
88
+ <navigation_links>
89
+ ${links}
90
+ </navigation_links>
91
+
92
+ <research>
93
+ ${research}
94
+ </research>
95
+
96
+ ${simplificationNote}
97
+
98
+ <output_requirements>
99
+ Return structured data.
100
+ summary: short page purpose statement.
101
+ can: actions you are 100% sure are available on page.
102
+ might: actions that look possible but are not fully proven.
103
+ For each action provide:
104
+ - action: concise user-facing capability phrased as "user can ..."
105
+ - scope: one of one item, list of items, bulk operations, all items, page-level
106
+ - evidence: short reason based on visible UI or research
107
+ </output_requirements>
108
+ `;
109
+ }
110
+
111
+ private async generateDocumentation(state: WebPageState, research: string, simplified = false): Promise<PageDocumentation> {
112
+ const messages = [
113
+ {
114
+ role: 'system' as const,
115
+ content: this.getSystemPrompt(),
116
+ },
117
+ {
118
+ role: 'user' as const,
119
+ content: this.buildPrompt(state, research, simplified),
120
+ },
121
+ ];
122
+
123
+ const response = await this.provider.generateObject(messages, pageDocumentationSchema, undefined, {
124
+ agentName: 'documentarian',
125
+ });
126
+
127
+ return response.object as PageDocumentation;
128
+ }
129
+
130
+ private shouldRetryWithSanitizedResearch(error: unknown): boolean {
131
+ const message = error instanceof Error ? error.message : String(error);
132
+ return message.includes('Failed to generate JSON') || message.includes('failed_generation');
133
+ }
134
+
135
+ private sanitizeResearch(research: string): string {
136
+ const lines = research.split('\n');
137
+ const sanitized: string[] = [];
138
+
139
+ for (const line of lines) {
140
+ if (!line.trim()) {
141
+ sanitized.push(line);
142
+ continue;
143
+ }
144
+
145
+ if (!line.includes('|')) {
146
+ sanitized.push(line);
147
+ continue;
148
+ }
149
+
150
+ const pipeCount = (line.match(/\|/g) || []).length;
151
+ if (pipeCount < 2) {
152
+ continue;
153
+ }
154
+
155
+ if (line.includes('|------')) {
156
+ sanitized.push(line);
157
+ continue;
158
+ }
159
+
160
+ if (line.trim().startsWith('|') && pipeCount >= 4) {
161
+ sanitized.push(line);
162
+ }
163
+ }
164
+
165
+ return sanitized.join('\n');
166
+ }
167
+ }
168
+
169
+ const capabilitySchema = z.object({
170
+ action: z.string(),
171
+ scope: z.enum(['one item', 'list of items', 'bulk operations', 'all items', 'page-level']),
172
+ evidence: z.string(),
173
+ });
174
+
175
+ const pageDocumentationSchema = z.object({
176
+ summary: z.string(),
177
+ can: z.array(capabilitySchema),
178
+ might: z.array(capabilitySchema),
179
+ });
180
+
181
+ type PageDocumentation = z.infer<typeof pageDocumentationSchema>;
182
+
183
+ export { Documentarian };
184
+ export type { PageDocumentation };
@@ -0,0 +1,119 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { Command } from 'commander';
4
+ import { setPreserveConsoleLogs } from '../../../src/utils/logger.ts';
5
+ import { DocBot, type DocbotOptions } from './docbot.ts';
6
+
7
+ function buildOptions(options: any): DocbotOptions {
8
+ let session = options.session;
9
+ if (options.session === true) {
10
+ session = 'output/session.json';
11
+ }
12
+
13
+ return {
14
+ verbose: options.verbose || options.debug,
15
+ config: options.config,
16
+ path: options.path,
17
+ show: options.show,
18
+ headless: options.headless,
19
+ incognito: options.incognito,
20
+ session,
21
+ docsConfig: options.docsConfig,
22
+ };
23
+ }
24
+
25
+ function addCommonOptions(cmd: Command): Command {
26
+ return cmd
27
+ .option('-v, --verbose', 'Enable verbose logging')
28
+ .option('--debug', 'Enable debug logging')
29
+ .option('-c, --config <path>', 'Path to explorbot configuration file')
30
+ .option('--docs-config <path>', 'Path to doc collector configuration file')
31
+ .option('-p, --path <path>', 'Working directory path')
32
+ .option('-s, --show', 'Show browser window')
33
+ .option('--headless', 'Run browser in headless mode')
34
+ .option('--incognito', 'Run without recording experiences')
35
+ .option('--session [file]', 'Save/restore browser session from file');
36
+ }
37
+
38
+ export function createDocsCommands(name = 'docs'): Command {
39
+ const cmd = new Command(name);
40
+ cmd.description('AI-powered website documentation collector');
41
+
42
+ addCommonOptions(cmd.command('collect <path>').description('Crawl pages and generate documentation spec').option('--max-pages <count>', 'Maximum number of pages to document')).action(async (startPath, options) => {
43
+ setPreserveConsoleLogs(true);
44
+
45
+ try {
46
+ const bot = new DocBot({
47
+ ...buildOptions(options),
48
+ startUrl: startPath,
49
+ });
50
+ await bot.start();
51
+
52
+ let maxPages: number | undefined;
53
+ if (options.maxPages) {
54
+ maxPages = Number.parseInt(options.maxPages, 10);
55
+ }
56
+
57
+ const result = await bot.collect(startPath, { maxPages });
58
+
59
+ console.log(`\nDocumented ${result.pages.length} page(s)`);
60
+ console.log(`Skipped ${result.skipped.length} page(s)`);
61
+ console.log(`Spec index: ${result.indexPath}`);
62
+ console.log(`Pages dir: ${path.join(result.outputDir, 'pages')}`);
63
+
64
+ await bot.stop();
65
+ process.exit(0);
66
+ } catch (error) {
67
+ console.error('Failed:', error instanceof Error ? error.message : 'Unknown error');
68
+ process.exit(1);
69
+ }
70
+ });
71
+
72
+ cmd
73
+ .command('init')
74
+ .description('Initialize doc collector configuration')
75
+ .option('-f, --force', 'Overwrite existing config file')
76
+ .option('-p, --path <path>', 'Working directory for initialization')
77
+ .action(async (options) => {
78
+ const originalCwd = process.cwd();
79
+ if (options.path) {
80
+ const resolvedPath = path.resolve(options.path);
81
+ fs.mkdirSync(resolvedPath, { recursive: true });
82
+ process.chdir(resolvedPath);
83
+ console.log(`Working in: ${resolvedPath}`);
84
+ }
85
+
86
+ const configPath = path.resolve('docbot.config.ts');
87
+ if (fs.existsSync(configPath) && !options.force) {
88
+ console.log(`Config file already exists: ${configPath}`);
89
+ console.log('Use --force to overwrite.');
90
+ process.exit(1);
91
+ }
92
+
93
+ const configContent = `export default {
94
+ docs: {
95
+ maxPages: 100,
96
+ output: 'docs',
97
+ screenshot: true,
98
+ collapseDynamicPages: true,
99
+ scope: 'site',
100
+ includePaths: [],
101
+ excludePaths: [],
102
+ deniedPathSegments: ['callback', 'callbacks', 'logout', 'signout', 'sign_out', 'destroy', 'delete', 'remove'],
103
+ minCanActions: 1,
104
+ minInteractiveElements: 3,
105
+ // prompt: 'Add domain-specific documentation guidance here',
106
+ },
107
+ };
108
+ `;
109
+
110
+ fs.writeFileSync(configPath, configContent, 'utf8');
111
+ console.log(`Created: ${configPath}`);
112
+
113
+ if (process.cwd() !== originalCwd) {
114
+ process.chdir(originalCwd);
115
+ }
116
+ });
117
+
118
+ return cmd;
119
+ }
@@ -0,0 +1,162 @@
1
+ import { existsSync, readFileSync } from 'node:fs';
2
+ import path, { resolve } from 'node:path';
3
+ import { parseEnv } from 'node:util';
4
+ import { ConfigParser } from '../../../src/config.ts';
5
+
6
+ class DocbotConfigParser {
7
+ private static instance: DocbotConfigParser;
8
+ private config: DocbotConfig | null = null;
9
+ private configPath: string | null = null;
10
+
11
+ private constructor() {}
12
+
13
+ static getInstance(): DocbotConfigParser {
14
+ if (!DocbotConfigParser.instance) {
15
+ DocbotConfigParser.instance = new DocbotConfigParser();
16
+ }
17
+ return DocbotConfigParser.instance;
18
+ }
19
+
20
+ static loadEnv(filePath: string): void {
21
+ const resolved = resolve(filePath);
22
+ if (!existsSync(resolved)) return;
23
+ Object.assign(process.env, parseEnv(readFileSync(resolved, 'utf8')));
24
+ }
25
+
26
+ async loadConfig(options?: { config?: string; path?: string }): Promise<DocbotConfig> {
27
+ if (this.config && !options?.config && !options?.path) {
28
+ return this.config;
29
+ }
30
+
31
+ const originalCwd = process.cwd();
32
+ if (options?.path) {
33
+ process.chdir(resolve(options.path));
34
+ }
35
+
36
+ DocbotConfigParser.loadEnv('.env');
37
+
38
+ try {
39
+ const resolvedPath = options?.config || this.findConfigFile();
40
+ if (!resolvedPath) {
41
+ this.config = this.mergeWithDefaults({});
42
+ this.configPath = null;
43
+ return this.config;
44
+ }
45
+
46
+ const configModule = await this.loadConfigModule(resolvedPath);
47
+ const loadedConfig = configModule.default || configModule;
48
+ this.config = this.mergeWithDefaults(loadedConfig || {});
49
+ this.configPath = resolvedPath;
50
+ return this.config;
51
+ } finally {
52
+ if (options?.path && originalCwd !== process.cwd()) {
53
+ process.chdir(originalCwd);
54
+ }
55
+ }
56
+ }
57
+
58
+ getConfig(): DocbotConfig {
59
+ if (this.config) {
60
+ return this.config;
61
+ }
62
+ return this.mergeWithDefaults({});
63
+ }
64
+
65
+ getConfigPath(): string | null {
66
+ return this.configPath;
67
+ }
68
+
69
+ getOutputDir(): string {
70
+ const outputDir = ConfigParser.getInstance().getOutputDir();
71
+ const docsOutput = this.getConfig().docs?.output;
72
+ if (!docsOutput) {
73
+ return path.join(outputDir, 'docs');
74
+ }
75
+ return path.join(outputDir, docsOutput);
76
+ }
77
+
78
+ private findConfigFile(): string | null {
79
+ const possiblePaths = ['docbot.config.js', 'docbot.config.mjs', 'docbot.config.ts'];
80
+
81
+ for (const candidate of possiblePaths) {
82
+ const fullPath = resolve(process.cwd(), candidate);
83
+ if (existsSync(fullPath)) {
84
+ return fullPath;
85
+ }
86
+ }
87
+
88
+ return null;
89
+ }
90
+
91
+ private async loadConfigModule(configPath: string): Promise<any> {
92
+ const ext = configPath.split('.').pop();
93
+
94
+ if (ext === 'ts') {
95
+ try {
96
+ return await import(configPath);
97
+ } catch {
98
+ const require = (await import('node:module')).createRequire(import.meta.url);
99
+ return require(configPath);
100
+ }
101
+ }
102
+
103
+ if (ext === 'js' || ext === 'mjs') {
104
+ return await import(configPath);
105
+ }
106
+
107
+ return JSON.parse(readFileSync(configPath, 'utf8'));
108
+ }
109
+
110
+ private mergeWithDefaults(config: Partial<DocbotConfig>): DocbotConfig {
111
+ return this.deepMerge(
112
+ {
113
+ docs: {
114
+ maxPages: 100,
115
+ output: 'docs',
116
+ screenshot: true,
117
+ collapseDynamicPages: true,
118
+ scope: 'site',
119
+ includePaths: [],
120
+ excludePaths: [],
121
+ deniedPathSegments: ['callback', 'callbacks', 'logout', 'signout', 'sign_out', 'destroy', 'delete', 'remove'],
122
+ minCanActions: 1,
123
+ minInteractiveElements: 3,
124
+ },
125
+ },
126
+ config
127
+ );
128
+ }
129
+
130
+ private deepMerge(target: any, source: any): any {
131
+ const result = { ...target };
132
+
133
+ for (const key in source) {
134
+ if (source[key] && typeof source[key] === 'object' && !Array.isArray(source[key]) && source[key].constructor === Object) {
135
+ result[key] = this.deepMerge(result[key] || {}, source[key]);
136
+ continue;
137
+ }
138
+ result[key] = source[key];
139
+ }
140
+
141
+ return result;
142
+ }
143
+ }
144
+
145
+ interface DocbotConfig {
146
+ docs?: {
147
+ maxPages?: number;
148
+ output?: string;
149
+ screenshot?: boolean;
150
+ prompt?: string;
151
+ collapseDynamicPages?: boolean;
152
+ scope?: 'site' | 'section' | 'subtree';
153
+ includePaths?: string[];
154
+ excludePaths?: string[];
155
+ deniedPathSegments?: string[];
156
+ minCanActions?: number;
157
+ minInteractiveElements?: number;
158
+ };
159
+ }
160
+
161
+ export { DocbotConfigParser };
162
+ export type { DocbotConfig };