explorbot 0.1.16 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/bin/explorbot-cli.ts +14 -1
  2. package/boat/doc-collector/bin/doc-collector-cli.ts +5 -0
  3. package/boat/doc-collector/package.json +24 -0
  4. package/boat/doc-collector/src/ai/documentarian.ts +184 -0
  5. package/boat/doc-collector/src/cli.ts +119 -0
  6. package/boat/doc-collector/src/config.ts +162 -0
  7. package/boat/doc-collector/src/docbot.ts +391 -0
  8. package/boat/doc-collector/src/docs-renderer.ts +187 -0
  9. package/boat/doc-collector/src/path-filter.ts +46 -0
  10. package/boat/doc-collector/src/research-navigation.ts +90 -0
  11. package/dist/bin/explorbot-cli.js +15 -1
  12. package/dist/boat/doc-collector/bin/doc-collector-cli.js +4 -0
  13. package/dist/boat/doc-collector/src/ai/documentarian.js +157 -0
  14. package/dist/boat/doc-collector/src/cli.js +104 -0
  15. package/dist/boat/doc-collector/src/config.js +129 -0
  16. package/dist/boat/doc-collector/src/docbot.js +326 -0
  17. package/dist/boat/doc-collector/src/docs-renderer.js +141 -0
  18. package/dist/boat/doc-collector/src/path-filter.js +35 -0
  19. package/dist/boat/doc-collector/src/research-navigation.js +71 -0
  20. package/dist/package.json +4 -1
  21. package/dist/src/ai/pilot.js +3 -8
  22. package/dist/src/ai/researcher/coordinates.js +1 -1
  23. package/dist/src/ai/researcher/parser.js +3 -0
  24. package/dist/src/ai/researcher.js +2 -1
  25. package/dist/src/ai/tester.js +1 -0
  26. package/dist/src/commands/explore-command.js +359 -43
  27. package/dist/src/config.js +10 -3
  28. package/dist/src/explorbot.js +19 -5
  29. package/dist/src/explorer.js +14 -1
  30. package/dist/src/state-manager.js +3 -0
  31. package/dist/src/utils/test-plan-markdown.js +8 -1
  32. package/dist/src/utils/url-matcher.js +5 -3
  33. package/dist/src/utils/web-element.js +3 -2
  34. package/package.json +4 -1
  35. package/src/ai/pilot.ts +3 -8
  36. package/src/ai/researcher/coordinates.ts +1 -1
  37. package/src/ai/researcher/parser.ts +3 -0
  38. package/src/ai/researcher.ts +2 -1
  39. package/src/ai/tester.ts +1 -0
  40. package/src/commands/explore-command.ts +362 -42
  41. package/src/config.ts +13 -3
  42. package/src/explorbot.ts +22 -7
  43. package/src/explorer.ts +12 -1
  44. package/src/state-manager.ts +4 -0
  45. package/src/utils/test-plan-markdown.ts +8 -1
  46. package/src/utils/url-matcher.ts +5 -2
  47. package/src/utils/web-element.ts +3 -2
@@ -93,7 +93,13 @@ addCommonOptions(program.command('start [path]').description('Start web explorat
93
93
  await explorBot.start();
94
94
  await startTUI(explorBot);
95
95
  });
96
- addCommonOptions(program.command('explore <path>').description('Explore a page autonomously and run invented scenarios').option('--max-tests <count>', 'Maximum number of tests to run').option('--focus <feature>', 'Focus area for exploration')).action(async (explorePath, options) => {
96
+ addCommonOptions(program
97
+ .command('explore <path>')
98
+ .description('Explore a page autonomously and run invented scenarios')
99
+ .option('--max-tests <count>', 'Maximum number of tests to run')
100
+ .option('--focus <feature>', 'Focus area for exploration')
101
+ .option('--configure <spec>', 'Reuse spec: keys new|from|style|subpages|pick_by|priority, e.g. "new:25%;pick_by=random;priority=critical,high"')
102
+ .option('--dry-run', 'Mark picked tests as skipped without executing or generating new ones')).action(async (explorePath, options) => {
97
103
  try {
98
104
  const explorBot = new ExplorBot(buildExplorBotOptions(explorePath, options));
99
105
  await explorBot.start();
@@ -102,9 +108,15 @@ addCommonOptions(program.command('explore <path>').description('Explore a page a
102
108
  const cmd = new ExploreCommand(explorBot);
103
109
  if (options.maxTests)
104
110
  cmd.maxTests = Number.parseInt(options.maxTests, 10);
111
+ if (options.dryRun)
112
+ cmd.dryRun = true;
105
113
  const execArgs = [];
106
114
  if (options.focus)
107
115
  execArgs.push('--focus', `"${options.focus}"`);
116
+ if (options.configure)
117
+ execArgs.push('--configure', `"${options.configure}"`);
118
+ if (options.dryRun)
119
+ execArgs.push('--dry-run');
108
120
  await cmd.execute(execArgs.join(' '));
109
121
  await explorBot.stop();
110
122
  await showStatsAndExit(0);
@@ -736,5 +748,7 @@ program
736
748
  });
737
749
  });
738
750
  import { createApiCommands } from "../boat/api-tester/src/cli.js";
751
+ import { createDocsCommands } from "../boat/doc-collector/src/cli.js";
739
752
  program.addCommand(createApiCommands('api'));
753
+ program.addCommand(createDocsCommands('docs'));
740
754
  program.parse();
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env bun
2
+ import { createDocsCommands } from "../src/cli.js";
3
+ const program = createDocsCommands('doc-collector');
4
+ program.parse();
@@ -0,0 +1,157 @@
1
+ import dedent from 'dedent';
2
+ import { z } from 'zod';
3
+ class Documentarian {
4
+ provider;
5
+ config;
6
+ constructor(provider, config = {}) {
7
+ this.provider = provider;
8
+ this.config = config;
9
+ }
10
+ async document(state, research) {
11
+ try {
12
+ return await this.generateDocumentation(state, research);
13
+ }
14
+ catch (error) {
15
+ if (!this.shouldRetryWithSanitizedResearch(error)) {
16
+ throw error;
17
+ }
18
+ return this.generateDocumentation(state, this.sanitizeResearch(research), true);
19
+ }
20
+ }
21
+ getSystemPrompt() {
22
+ const customPrompt = this.config.docs?.prompt;
23
+ let promptSuffix = '';
24
+ if (customPrompt) {
25
+ promptSuffix = customPrompt;
26
+ }
27
+ return dedent `
28
+ <role>
29
+ You are a product analyst preparing functional website documentation from UI research.
30
+ </role>
31
+
32
+ <task>
33
+ Convert exploratory UI research into a precise spec of what users can do on the current page.
34
+ Distinguish proven capabilities from assumptions.
35
+ Prefer accuracy over coverage.
36
+ </task>
37
+
38
+ <rules>
39
+ Only list capabilities that are grounded in the provided page research.
40
+ Put actions into "can" only when there is direct evidence in the page context.
41
+ Put actions into "might" only when the UI strongly suggests a capability but proof is incomplete.
42
+ Describe each action from the end-user perspective.
43
+ Be explicit about scope:
44
+ - one item
45
+ - list of items
46
+ - bulk operations
47
+ - all items
48
+ - page-level
49
+ Avoid implementation details, selectors, and QA wording.
50
+ Avoid duplicate actions with different phrasing.
51
+ </rules>
52
+
53
+ ${promptSuffix}
54
+ `;
55
+ }
56
+ buildPrompt(state, research, simplified = false) {
57
+ const headings = [state.h1, state.h2, state.h3, state.h4].filter(Boolean).join(' | ');
58
+ const links = (state.links || [])
59
+ .slice(0, 50)
60
+ .map((link) => `- ${link.title}: ${link.url}`)
61
+ .join('\n');
62
+ const simplificationNote = simplified
63
+ ? dedent `
64
+ <fallback_mode>
65
+ The research text was simplified because the original formatting was noisy.
66
+ Ignore malformed table syntax and rely only on clear, repeated signals.
67
+ Prefer fewer actions over speculative coverage.
68
+ </fallback_mode>
69
+ `
70
+ : '';
71
+ return dedent `
72
+ <page>
73
+ URL: ${state.url}
74
+ Title: ${state.title || ''}
75
+ Headings: ${headings}
76
+ </page>
77
+
78
+ <navigation_links>
79
+ ${links}
80
+ </navigation_links>
81
+
82
+ <research>
83
+ ${research}
84
+ </research>
85
+
86
+ ${simplificationNote}
87
+
88
+ <output_requirements>
89
+ Return structured data.
90
+ summary: short page purpose statement.
91
+ can: actions you are 100% sure are available on page.
92
+ might: actions that look possible but are not fully proven.
93
+ For each action provide:
94
+ - action: concise user-facing capability phrased as "user can ..."
95
+ - scope: one of one item, list of items, bulk operations, all items, page-level
96
+ - evidence: short reason based on visible UI or research
97
+ </output_requirements>
98
+ `;
99
+ }
100
+ async generateDocumentation(state, research, simplified = false) {
101
+ const messages = [
102
+ {
103
+ role: 'system',
104
+ content: this.getSystemPrompt(),
105
+ },
106
+ {
107
+ role: 'user',
108
+ content: this.buildPrompt(state, research, simplified),
109
+ },
110
+ ];
111
+ const response = await this.provider.generateObject(messages, pageDocumentationSchema, undefined, {
112
+ agentName: 'documentarian',
113
+ });
114
+ return response.object;
115
+ }
116
+ shouldRetryWithSanitizedResearch(error) {
117
+ const message = error instanceof Error ? error.message : String(error);
118
+ return message.includes('Failed to generate JSON') || message.includes('failed_generation');
119
+ }
120
+ sanitizeResearch(research) {
121
+ const lines = research.split('\n');
122
+ const sanitized = [];
123
+ for (const line of lines) {
124
+ if (!line.trim()) {
125
+ sanitized.push(line);
126
+ continue;
127
+ }
128
+ if (!line.includes('|')) {
129
+ sanitized.push(line);
130
+ continue;
131
+ }
132
+ const pipeCount = (line.match(/\|/g) || []).length;
133
+ if (pipeCount < 2) {
134
+ continue;
135
+ }
136
+ if (line.includes('|------')) {
137
+ sanitized.push(line);
138
+ continue;
139
+ }
140
+ if (line.trim().startsWith('|') && pipeCount >= 4) {
141
+ sanitized.push(line);
142
+ }
143
+ }
144
+ return sanitized.join('\n');
145
+ }
146
+ }
147
+ const capabilitySchema = z.object({
148
+ action: z.string(),
149
+ scope: z.enum(['one item', 'list of items', 'bulk operations', 'all items', 'page-level']),
150
+ evidence: z.string(),
151
+ });
152
+ const pageDocumentationSchema = z.object({
153
+ summary: z.string(),
154
+ can: z.array(capabilitySchema),
155
+ might: z.array(capabilitySchema),
156
+ });
157
+ export { Documentarian };
@@ -0,0 +1,104 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { Command } from 'commander';
4
+ import { setPreserveConsoleLogs } from "../../../src/utils/logger.js";
5
+ import { DocBot } from "./docbot.js";
6
+ function buildOptions(options) {
7
+ let session = options.session;
8
+ if (options.session === true) {
9
+ session = 'output/session.json';
10
+ }
11
+ return {
12
+ verbose: options.verbose || options.debug,
13
+ config: options.config,
14
+ path: options.path,
15
+ show: options.show,
16
+ headless: options.headless,
17
+ incognito: options.incognito,
18
+ session,
19
+ docsConfig: options.docsConfig,
20
+ };
21
+ }
22
+ function addCommonOptions(cmd) {
23
+ return cmd
24
+ .option('-v, --verbose', 'Enable verbose logging')
25
+ .option('--debug', 'Enable debug logging')
26
+ .option('-c, --config <path>', 'Path to explorbot configuration file')
27
+ .option('--docs-config <path>', 'Path to doc collector configuration file')
28
+ .option('-p, --path <path>', 'Working directory path')
29
+ .option('-s, --show', 'Show browser window')
30
+ .option('--headless', 'Run browser in headless mode')
31
+ .option('--incognito', 'Run without recording experiences')
32
+ .option('--session [file]', 'Save/restore browser session from file');
33
+ }
34
+ export function createDocsCommands(name = 'docs') {
35
+ const cmd = new Command(name);
36
+ cmd.description('AI-powered website documentation collector');
37
+ addCommonOptions(cmd.command('collect <path>').description('Crawl pages and generate documentation spec').option('--max-pages <count>', 'Maximum number of pages to document')).action(async (startPath, options) => {
38
+ setPreserveConsoleLogs(true);
39
+ try {
40
+ const bot = new DocBot({
41
+ ...buildOptions(options),
42
+ startUrl: startPath,
43
+ });
44
+ await bot.start();
45
+ let maxPages;
46
+ if (options.maxPages) {
47
+ maxPages = Number.parseInt(options.maxPages, 10);
48
+ }
49
+ const result = await bot.collect(startPath, { maxPages });
50
+ console.log(`\nDocumented ${result.pages.length} page(s)`);
51
+ console.log(`Skipped ${result.skipped.length} page(s)`);
52
+ console.log(`Spec index: ${result.indexPath}`);
53
+ console.log(`Pages dir: ${path.join(result.outputDir, 'pages')}`);
54
+ await bot.stop();
55
+ process.exit(0);
56
+ }
57
+ catch (error) {
58
+ console.error('Failed:', error instanceof Error ? error.message : 'Unknown error');
59
+ process.exit(1);
60
+ }
61
+ });
62
+ cmd
63
+ .command('init')
64
+ .description('Initialize doc collector configuration')
65
+ .option('-f, --force', 'Overwrite existing config file')
66
+ .option('-p, --path <path>', 'Working directory for initialization')
67
+ .action(async (options) => {
68
+ const originalCwd = process.cwd();
69
+ if (options.path) {
70
+ const resolvedPath = path.resolve(options.path);
71
+ fs.mkdirSync(resolvedPath, { recursive: true });
72
+ process.chdir(resolvedPath);
73
+ console.log(`Working in: ${resolvedPath}`);
74
+ }
75
+ const configPath = path.resolve('docbot.config.ts');
76
+ if (fs.existsSync(configPath) && !options.force) {
77
+ console.log(`Config file already exists: ${configPath}`);
78
+ console.log('Use --force to overwrite.');
79
+ process.exit(1);
80
+ }
81
+ const configContent = `export default {
82
+ docs: {
83
+ maxPages: 100,
84
+ output: 'docs',
85
+ screenshot: true,
86
+ collapseDynamicPages: true,
87
+ scope: 'site',
88
+ includePaths: [],
89
+ excludePaths: [],
90
+ deniedPathSegments: ['callback', 'callbacks', 'logout', 'signout', 'sign_out', 'destroy', 'delete', 'remove'],
91
+ minCanActions: 1,
92
+ minInteractiveElements: 3,
93
+ // prompt: 'Add domain-specific documentation guidance here',
94
+ },
95
+ };
96
+ `;
97
+ fs.writeFileSync(configPath, configContent, 'utf8');
98
+ console.log(`Created: ${configPath}`);
99
+ if (process.cwd() !== originalCwd) {
100
+ process.chdir(originalCwd);
101
+ }
102
+ });
103
+ return cmd;
104
+ }
@@ -0,0 +1,129 @@
1
+ var __rewriteRelativeImportExtension = (this && this.__rewriteRelativeImportExtension) || function (path, preserveJsx) {
2
+ if (typeof path === "string" && /^\.\.?\//.test(path)) {
3
+ return path.replace(/\.(tsx)$|((?:\.d)?)((?:\.[^./]+?)?)\.([cm]?)ts$/i, function (m, tsx, d, ext, cm) {
4
+ return tsx ? preserveJsx ? ".jsx" : ".js" : d && (!ext || !cm) ? m : (d + ext + "." + cm.toLowerCase() + "js");
5
+ });
6
+ }
7
+ return path;
8
+ };
9
+ import { existsSync, readFileSync } from 'node:fs';
10
+ import path, { resolve } from 'node:path';
11
+ import { parseEnv } from 'node:util';
12
+ import { ConfigParser } from "../../../src/config.js";
13
+ class DocbotConfigParser {
14
+ static instance;
15
+ config = null;
16
+ configPath = null;
17
+ constructor() { }
18
+ static getInstance() {
19
+ if (!DocbotConfigParser.instance) {
20
+ DocbotConfigParser.instance = new DocbotConfigParser();
21
+ }
22
+ return DocbotConfigParser.instance;
23
+ }
24
+ static loadEnv(filePath) {
25
+ const resolved = resolve(filePath);
26
+ if (!existsSync(resolved))
27
+ return;
28
+ Object.assign(process.env, parseEnv(readFileSync(resolved, 'utf8')));
29
+ }
30
+ async loadConfig(options) {
31
+ if (this.config && !options?.config && !options?.path) {
32
+ return this.config;
33
+ }
34
+ const originalCwd = process.cwd();
35
+ if (options?.path) {
36
+ process.chdir(resolve(options.path));
37
+ }
38
+ DocbotConfigParser.loadEnv('.env');
39
+ try {
40
+ const resolvedPath = options?.config || this.findConfigFile();
41
+ if (!resolvedPath) {
42
+ this.config = this.mergeWithDefaults({});
43
+ this.configPath = null;
44
+ return this.config;
45
+ }
46
+ const configModule = await this.loadConfigModule(resolvedPath);
47
+ const loadedConfig = configModule.default || configModule;
48
+ this.config = this.mergeWithDefaults(loadedConfig || {});
49
+ this.configPath = resolvedPath;
50
+ return this.config;
51
+ }
52
+ finally {
53
+ if (options?.path && originalCwd !== process.cwd()) {
54
+ process.chdir(originalCwd);
55
+ }
56
+ }
57
+ }
58
+ getConfig() {
59
+ if (this.config) {
60
+ return this.config;
61
+ }
62
+ return this.mergeWithDefaults({});
63
+ }
64
+ getConfigPath() {
65
+ return this.configPath;
66
+ }
67
+ getOutputDir() {
68
+ const outputDir = ConfigParser.getInstance().getOutputDir();
69
+ const docsOutput = this.getConfig().docs?.output;
70
+ if (!docsOutput) {
71
+ return path.join(outputDir, 'docs');
72
+ }
73
+ return path.join(outputDir, docsOutput);
74
+ }
75
+ findConfigFile() {
76
+ const possiblePaths = ['docbot.config.js', 'docbot.config.mjs', 'docbot.config.ts'];
77
+ for (const candidate of possiblePaths) {
78
+ const fullPath = resolve(process.cwd(), candidate);
79
+ if (existsSync(fullPath)) {
80
+ return fullPath;
81
+ }
82
+ }
83
+ return null;
84
+ }
85
+ async loadConfigModule(configPath) {
86
+ const ext = configPath.split('.').pop();
87
+ if (ext === 'ts') {
88
+ try {
89
+ return await import(__rewriteRelativeImportExtension(configPath));
90
+ }
91
+ catch {
92
+ const require = (await import('node:module')).createRequire(import.meta.url);
93
+ return require(configPath);
94
+ }
95
+ }
96
+ if (ext === 'js' || ext === 'mjs') {
97
+ return await import(__rewriteRelativeImportExtension(configPath));
98
+ }
99
+ return JSON.parse(readFileSync(configPath, 'utf8'));
100
+ }
101
+ mergeWithDefaults(config) {
102
+ return this.deepMerge({
103
+ docs: {
104
+ maxPages: 100,
105
+ output: 'docs',
106
+ screenshot: true,
107
+ collapseDynamicPages: true,
108
+ scope: 'site',
109
+ includePaths: [],
110
+ excludePaths: [],
111
+ deniedPathSegments: ['callback', 'callbacks', 'logout', 'signout', 'sign_out', 'destroy', 'delete', 'remove'],
112
+ minCanActions: 1,
113
+ minInteractiveElements: 3,
114
+ },
115
+ }, config);
116
+ }
117
+ deepMerge(target, source) {
118
+ const result = { ...target };
119
+ for (const key in source) {
120
+ if (source[key] && typeof source[key] === 'object' && !Array.isArray(source[key]) && source[key].constructor === Object) {
121
+ result[key] = this.deepMerge(result[key] || {}, source[key]);
122
+ continue;
123
+ }
124
+ result[key] = source[key];
125
+ }
126
+ return result;
127
+ }
128
+ }
129
+ export { DocbotConfigParser };