explorbot 0.1.17 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/bin/explorbot-cli.ts +2 -0
  2. package/boat/doc-collector/bin/doc-collector-cli.ts +5 -0
  3. package/boat/doc-collector/package.json +24 -0
  4. package/boat/doc-collector/src/ai/documentarian.ts +184 -0
  5. package/boat/doc-collector/src/cli.ts +119 -0
  6. package/boat/doc-collector/src/config.ts +162 -0
  7. package/boat/doc-collector/src/docbot.ts +391 -0
  8. package/boat/doc-collector/src/docs-renderer.ts +187 -0
  9. package/boat/doc-collector/src/path-filter.ts +46 -0
  10. package/boat/doc-collector/src/research-navigation.ts +90 -0
  11. package/dist/bin/explorbot-cli.js +2 -0
  12. package/dist/boat/doc-collector/bin/doc-collector-cli.js +4 -0
  13. package/dist/boat/doc-collector/src/ai/documentarian.js +157 -0
  14. package/dist/boat/doc-collector/src/cli.js +104 -0
  15. package/dist/boat/doc-collector/src/config.js +129 -0
  16. package/dist/boat/doc-collector/src/docbot.js +326 -0
  17. package/dist/boat/doc-collector/src/docs-renderer.js +141 -0
  18. package/dist/boat/doc-collector/src/path-filter.js +35 -0
  19. package/dist/boat/doc-collector/src/research-navigation.js +71 -0
  20. package/dist/package.json +4 -1
  21. package/dist/src/ai/researcher/coordinates.js +1 -1
  22. package/dist/src/ai/researcher/parser.js +3 -0
  23. package/dist/src/ai/researcher.js +2 -1
  24. package/dist/src/config.js +10 -3
  25. package/dist/src/explorer.js +14 -1
  26. package/dist/src/state-manager.js +3 -0
  27. package/dist/src/utils/url-matcher.js +5 -3
  28. package/dist/src/utils/web-element.js +3 -2
  29. package/package.json +4 -1
  30. package/src/ai/researcher/coordinates.ts +1 -1
  31. package/src/ai/researcher/parser.ts +3 -0
  32. package/src/ai/researcher.ts +2 -1
  33. package/src/config.ts +13 -3
  34. package/src/explorbot.ts +1 -0
  35. package/src/explorer.ts +12 -1
  36. package/src/state-manager.ts +4 -0
  37. package/src/utils/url-matcher.ts +5 -2
  38. package/src/utils/web-element.ts +3 -2
@@ -748,5 +748,7 @@ program
748
748
  });
749
749
  });
750
750
  import { createApiCommands } from "../boat/api-tester/src/cli.js";
751
+ import { createDocsCommands } from "../boat/doc-collector/src/cli.js";
751
752
  program.addCommand(createApiCommands('api'));
753
+ program.addCommand(createDocsCommands('docs'));
752
754
  program.parse();
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env bun
2
+ import { createDocsCommands } from "../src/cli.js";
3
+ const program = createDocsCommands('doc-collector');
4
+ program.parse();
@@ -0,0 +1,157 @@
1
+ import dedent from 'dedent';
2
+ import { z } from 'zod';
3
+ class Documentarian {
4
+ provider;
5
+ config;
6
+ constructor(provider, config = {}) {
7
+ this.provider = provider;
8
+ this.config = config;
9
+ }
10
+ async document(state, research) {
11
+ try {
12
+ return await this.generateDocumentation(state, research);
13
+ }
14
+ catch (error) {
15
+ if (!this.shouldRetryWithSanitizedResearch(error)) {
16
+ throw error;
17
+ }
18
+ return this.generateDocumentation(state, this.sanitizeResearch(research), true);
19
+ }
20
+ }
21
+ getSystemPrompt() {
22
+ const customPrompt = this.config.docs?.prompt;
23
+ let promptSuffix = '';
24
+ if (customPrompt) {
25
+ promptSuffix = customPrompt;
26
+ }
27
+ return dedent `
28
+ <role>
29
+ You are a product analyst preparing functional website documentation from UI research.
30
+ </role>
31
+
32
+ <task>
33
+ Convert exploratory UI research into a precise spec of what users can do on the current page.
34
+ Distinguish proven capabilities from assumptions.
35
+ Prefer accuracy over coverage.
36
+ </task>
37
+
38
+ <rules>
39
+ Only list capabilities that are grounded in the provided page research.
40
+ Put actions into "can" only when there is direct evidence in the page context.
41
+ Put actions into "might" only when the UI strongly suggests a capability but proof is incomplete.
42
+ Describe each action from the end-user perspective.
43
+ Be explicit about scope:
44
+ - one item
45
+ - list of items
46
+ - bulk operations
47
+ - all items
48
+ - page-level
49
+ Avoid implementation details, selectors, and QA wording.
50
+ Avoid duplicate actions with different phrasing.
51
+ </rules>
52
+
53
+ ${promptSuffix}
54
+ `;
55
+ }
56
+ buildPrompt(state, research, simplified = false) {
57
+ const headings = [state.h1, state.h2, state.h3, state.h4].filter(Boolean).join(' | ');
58
+ const links = (state.links || [])
59
+ .slice(0, 50)
60
+ .map((link) => `- ${link.title}: ${link.url}`)
61
+ .join('\n');
62
+ const simplificationNote = simplified
63
+ ? dedent `
64
+ <fallback_mode>
65
+ The research text was simplified because the original formatting was noisy.
66
+ Ignore malformed table syntax and rely only on clear, repeated signals.
67
+ Prefer fewer actions over speculative coverage.
68
+ </fallback_mode>
69
+ `
70
+ : '';
71
+ return dedent `
72
+ <page>
73
+ URL: ${state.url}
74
+ Title: ${state.title || ''}
75
+ Headings: ${headings}
76
+ </page>
77
+
78
+ <navigation_links>
79
+ ${links}
80
+ </navigation_links>
81
+
82
+ <research>
83
+ ${research}
84
+ </research>
85
+
86
+ ${simplificationNote}
87
+
88
+ <output_requirements>
89
+ Return structured data.
90
+ summary: short page purpose statement.
91
+ can: actions you are 100% sure are available on page.
92
+ might: actions that look possible but are not fully proven.
93
+ For each action provide:
94
+ - action: concise user-facing capability phrased as "user can ..."
95
+ - scope: one of one item, list of items, bulk operations, all items, page-level
96
+ - evidence: short reason based on visible UI or research
97
+ </output_requirements>
98
+ `;
99
+ }
100
+ async generateDocumentation(state, research, simplified = false) {
101
+ const messages = [
102
+ {
103
+ role: 'system',
104
+ content: this.getSystemPrompt(),
105
+ },
106
+ {
107
+ role: 'user',
108
+ content: this.buildPrompt(state, research, simplified),
109
+ },
110
+ ];
111
+ const response = await this.provider.generateObject(messages, pageDocumentationSchema, undefined, {
112
+ agentName: 'documentarian',
113
+ });
114
+ return response.object;
115
+ }
116
+ shouldRetryWithSanitizedResearch(error) {
117
+ const message = error instanceof Error ? error.message : String(error);
118
+ return message.includes('Failed to generate JSON') || message.includes('failed_generation');
119
+ }
120
+ sanitizeResearch(research) {
121
+ const lines = research.split('\n');
122
+ const sanitized = [];
123
+ for (const line of lines) {
124
+ if (!line.trim()) {
125
+ sanitized.push(line);
126
+ continue;
127
+ }
128
+ if (!line.includes('|')) {
129
+ sanitized.push(line);
130
+ continue;
131
+ }
132
+ const pipeCount = (line.match(/\|/g) || []).length;
133
+ if (pipeCount < 2) {
134
+ continue;
135
+ }
136
+ if (line.includes('|------')) {
137
+ sanitized.push(line);
138
+ continue;
139
+ }
140
+ if (line.trim().startsWith('|') && pipeCount >= 4) {
141
+ sanitized.push(line);
142
+ }
143
+ }
144
+ return sanitized.join('\n');
145
+ }
146
+ }
147
+ const capabilitySchema = z.object({
148
+ action: z.string(),
149
+ scope: z.enum(['one item', 'list of items', 'bulk operations', 'all items', 'page-level']),
150
+ evidence: z.string(),
151
+ });
152
+ const pageDocumentationSchema = z.object({
153
+ summary: z.string(),
154
+ can: z.array(capabilitySchema),
155
+ might: z.array(capabilitySchema),
156
+ });
157
+ export { Documentarian };
@@ -0,0 +1,104 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { Command } from 'commander';
4
+ import { setPreserveConsoleLogs } from "../../../src/utils/logger.js";
5
+ import { DocBot } from "./docbot.js";
6
+ function buildOptions(options) {
7
+ let session = options.session;
8
+ if (options.session === true) {
9
+ session = 'output/session.json';
10
+ }
11
+ return {
12
+ verbose: options.verbose || options.debug,
13
+ config: options.config,
14
+ path: options.path,
15
+ show: options.show,
16
+ headless: options.headless,
17
+ incognito: options.incognito,
18
+ session,
19
+ docsConfig: options.docsConfig,
20
+ };
21
+ }
22
+ function addCommonOptions(cmd) {
23
+ return cmd
24
+ .option('-v, --verbose', 'Enable verbose logging')
25
+ .option('--debug', 'Enable debug logging')
26
+ .option('-c, --config <path>', 'Path to explorbot configuration file')
27
+ .option('--docs-config <path>', 'Path to doc collector configuration file')
28
+ .option('-p, --path <path>', 'Working directory path')
29
+ .option('-s, --show', 'Show browser window')
30
+ .option('--headless', 'Run browser in headless mode')
31
+ .option('--incognito', 'Run without recording experiences')
32
+ .option('--session [file]', 'Save/restore browser session from file');
33
+ }
34
+ export function createDocsCommands(name = 'docs') {
35
+ const cmd = new Command(name);
36
+ cmd.description('AI-powered website documentation collector');
37
+ addCommonOptions(cmd.command('collect <path>').description('Crawl pages and generate documentation spec').option('--max-pages <count>', 'Maximum number of pages to document')).action(async (startPath, options) => {
38
+ setPreserveConsoleLogs(true);
39
+ try {
40
+ const bot = new DocBot({
41
+ ...buildOptions(options),
42
+ startUrl: startPath,
43
+ });
44
+ await bot.start();
45
+ let maxPages;
46
+ if (options.maxPages) {
47
+ maxPages = Number.parseInt(options.maxPages, 10);
48
+ }
49
+ const result = await bot.collect(startPath, { maxPages });
50
+ console.log(`\nDocumented ${result.pages.length} page(s)`);
51
+ console.log(`Skipped ${result.skipped.length} page(s)`);
52
+ console.log(`Spec index: ${result.indexPath}`);
53
+ console.log(`Pages dir: ${path.join(result.outputDir, 'pages')}`);
54
+ await bot.stop();
55
+ process.exit(0);
56
+ }
57
+ catch (error) {
58
+ console.error('Failed:', error instanceof Error ? error.message : 'Unknown error');
59
+ process.exit(1);
60
+ }
61
+ });
62
+ cmd
63
+ .command('init')
64
+ .description('Initialize doc collector configuration')
65
+ .option('-f, --force', 'Overwrite existing config file')
66
+ .option('-p, --path <path>', 'Working directory for initialization')
67
+ .action(async (options) => {
68
+ const originalCwd = process.cwd();
69
+ if (options.path) {
70
+ const resolvedPath = path.resolve(options.path);
71
+ fs.mkdirSync(resolvedPath, { recursive: true });
72
+ process.chdir(resolvedPath);
73
+ console.log(`Working in: ${resolvedPath}`);
74
+ }
75
+ const configPath = path.resolve('docbot.config.ts');
76
+ if (fs.existsSync(configPath) && !options.force) {
77
+ console.log(`Config file already exists: ${configPath}`);
78
+ console.log('Use --force to overwrite.');
79
+ process.exit(1);
80
+ }
81
+ const configContent = `export default {
82
+ docs: {
83
+ maxPages: 100,
84
+ output: 'docs',
85
+ screenshot: true,
86
+ collapseDynamicPages: true,
87
+ scope: 'site',
88
+ includePaths: [],
89
+ excludePaths: [],
90
+ deniedPathSegments: ['callback', 'callbacks', 'logout', 'signout', 'sign_out', 'destroy', 'delete', 'remove'],
91
+ minCanActions: 1,
92
+ minInteractiveElements: 3,
93
+ // prompt: 'Add domain-specific documentation guidance here',
94
+ },
95
+ };
96
+ `;
97
+ fs.writeFileSync(configPath, configContent, 'utf8');
98
+ console.log(`Created: ${configPath}`);
99
+ if (process.cwd() !== originalCwd) {
100
+ process.chdir(originalCwd);
101
+ }
102
+ });
103
+ return cmd;
104
+ }
@@ -0,0 +1,129 @@
1
+ var __rewriteRelativeImportExtension = (this && this.__rewriteRelativeImportExtension) || function (path, preserveJsx) {
2
+ if (typeof path === "string" && /^\.\.?\//.test(path)) {
3
+ return path.replace(/\.(tsx)$|((?:\.d)?)((?:\.[^./]+?)?)\.([cm]?)ts$/i, function (m, tsx, d, ext, cm) {
4
+ return tsx ? preserveJsx ? ".jsx" : ".js" : d && (!ext || !cm) ? m : (d + ext + "." + cm.toLowerCase() + "js");
5
+ });
6
+ }
7
+ return path;
8
+ };
9
+ import { existsSync, readFileSync } from 'node:fs';
10
+ import path, { resolve } from 'node:path';
11
+ import { parseEnv } from 'node:util';
12
+ import { ConfigParser } from "../../../src/config.js";
13
+ class DocbotConfigParser {
14
+ static instance;
15
+ config = null;
16
+ configPath = null;
17
+ constructor() { }
18
+ static getInstance() {
19
+ if (!DocbotConfigParser.instance) {
20
+ DocbotConfigParser.instance = new DocbotConfigParser();
21
+ }
22
+ return DocbotConfigParser.instance;
23
+ }
24
+ static loadEnv(filePath) {
25
+ const resolved = resolve(filePath);
26
+ if (!existsSync(resolved))
27
+ return;
28
+ Object.assign(process.env, parseEnv(readFileSync(resolved, 'utf8')));
29
+ }
30
+ async loadConfig(options) {
31
+ if (this.config && !options?.config && !options?.path) {
32
+ return this.config;
33
+ }
34
+ const originalCwd = process.cwd();
35
+ if (options?.path) {
36
+ process.chdir(resolve(options.path));
37
+ }
38
+ DocbotConfigParser.loadEnv('.env');
39
+ try {
40
+ const resolvedPath = options?.config || this.findConfigFile();
41
+ if (!resolvedPath) {
42
+ this.config = this.mergeWithDefaults({});
43
+ this.configPath = null;
44
+ return this.config;
45
+ }
46
+ const configModule = await this.loadConfigModule(resolvedPath);
47
+ const loadedConfig = configModule.default || configModule;
48
+ this.config = this.mergeWithDefaults(loadedConfig || {});
49
+ this.configPath = resolvedPath;
50
+ return this.config;
51
+ }
52
+ finally {
53
+ if (options?.path && originalCwd !== process.cwd()) {
54
+ process.chdir(originalCwd);
55
+ }
56
+ }
57
+ }
58
+ getConfig() {
59
+ if (this.config) {
60
+ return this.config;
61
+ }
62
+ return this.mergeWithDefaults({});
63
+ }
64
+ getConfigPath() {
65
+ return this.configPath;
66
+ }
67
+ getOutputDir() {
68
+ const outputDir = ConfigParser.getInstance().getOutputDir();
69
+ const docsOutput = this.getConfig().docs?.output;
70
+ if (!docsOutput) {
71
+ return path.join(outputDir, 'docs');
72
+ }
73
+ return path.join(outputDir, docsOutput);
74
+ }
75
+ findConfigFile() {
76
+ const possiblePaths = ['docbot.config.js', 'docbot.config.mjs', 'docbot.config.ts'];
77
+ for (const candidate of possiblePaths) {
78
+ const fullPath = resolve(process.cwd(), candidate);
79
+ if (existsSync(fullPath)) {
80
+ return fullPath;
81
+ }
82
+ }
83
+ return null;
84
+ }
85
+ async loadConfigModule(configPath) {
86
+ const ext = configPath.split('.').pop();
87
+ if (ext === 'ts') {
88
+ try {
89
+ return await import(__rewriteRelativeImportExtension(configPath));
90
+ }
91
+ catch {
92
+ const require = (await import('node:module')).createRequire(import.meta.url);
93
+ return require(configPath);
94
+ }
95
+ }
96
+ if (ext === 'js' || ext === 'mjs') {
97
+ return await import(__rewriteRelativeImportExtension(configPath));
98
+ }
99
+ return JSON.parse(readFileSync(configPath, 'utf8'));
100
+ }
101
+ mergeWithDefaults(config) {
102
+ return this.deepMerge({
103
+ docs: {
104
+ maxPages: 100,
105
+ output: 'docs',
106
+ screenshot: true,
107
+ collapseDynamicPages: true,
108
+ scope: 'site',
109
+ includePaths: [],
110
+ excludePaths: [],
111
+ deniedPathSegments: ['callback', 'callbacks', 'logout', 'signout', 'sign_out', 'destroy', 'delete', 'remove'],
112
+ minCanActions: 1,
113
+ minInteractiveElements: 3,
114
+ },
115
+ }, config);
116
+ }
117
+ deepMerge(target, source) {
118
+ const result = { ...target };
119
+ for (const key in source) {
120
+ if (source[key] && typeof source[key] === 'object' && !Array.isArray(source[key]) && source[key].constructor === Object) {
121
+ result[key] = this.deepMerge(result[key] || {}, source[key]);
122
+ continue;
123
+ }
124
+ result[key] = source[key];
125
+ }
126
+ return result;
127
+ }
128
+ }
129
+ export { DocbotConfigParser };