explorbot 0.1.17 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/bin/explorbot-cli.ts +2 -0
  2. package/boat/doc-collector/bin/doc-collector-cli.ts +5 -0
  3. package/boat/doc-collector/package.json +24 -0
  4. package/boat/doc-collector/src/ai/documentarian.ts +184 -0
  5. package/boat/doc-collector/src/cli.ts +119 -0
  6. package/boat/doc-collector/src/config.ts +162 -0
  7. package/boat/doc-collector/src/docbot.ts +391 -0
  8. package/boat/doc-collector/src/docs-renderer.ts +187 -0
  9. package/boat/doc-collector/src/path-filter.ts +46 -0
  10. package/boat/doc-collector/src/research-navigation.ts +90 -0
  11. package/dist/bin/explorbot-cli.js +2 -0
  12. package/dist/boat/doc-collector/bin/doc-collector-cli.js +4 -0
  13. package/dist/boat/doc-collector/src/ai/documentarian.js +157 -0
  14. package/dist/boat/doc-collector/src/cli.js +104 -0
  15. package/dist/boat/doc-collector/src/config.js +129 -0
  16. package/dist/boat/doc-collector/src/docbot.js +326 -0
  17. package/dist/boat/doc-collector/src/docs-renderer.js +141 -0
  18. package/dist/boat/doc-collector/src/path-filter.js +35 -0
  19. package/dist/boat/doc-collector/src/research-navigation.js +71 -0
  20. package/dist/package.json +4 -1
  21. package/dist/src/ai/researcher/coordinates.js +1 -1
  22. package/dist/src/ai/researcher/parser.js +3 -0
  23. package/dist/src/ai/researcher.js +2 -1
  24. package/dist/src/config.js +10 -3
  25. package/dist/src/explorer.js +14 -1
  26. package/dist/src/state-manager.js +3 -0
  27. package/dist/src/utils/url-matcher.js +5 -3
  28. package/dist/src/utils/web-element.js +3 -2
  29. package/package.json +4 -1
  30. package/src/ai/researcher/coordinates.ts +1 -1
  31. package/src/ai/researcher/parser.ts +3 -0
  32. package/src/ai/researcher.ts +2 -1
  33. package/src/config.ts +13 -3
  34. package/src/explorbot.ts +1 -0
  35. package/src/explorer.ts +12 -1
  36. package/src/state-manager.ts +4 -0
  37. package/src/utils/url-matcher.ts +5 -2
  38. package/src/utils/web-element.ts +3 -2
@@ -0,0 +1,391 @@
1
+ import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
2
+ import path from 'node:path';
3
+ import { ExplorBot, type ExplorBotOptions } from '../../../src/explorbot.ts';
4
+ import type { Link, WebPageState } from '../../../src/state-manager.ts';
5
+ import { normalizeUrl } from '../../../src/state-manager.ts';
6
+ import { sanitizeFilename } from '../../../src/utils/strings.ts';
7
+ import { tag } from '../../../src/utils/logger.ts';
8
+ import { Documentarian, type PageDocumentation } from './ai/documentarian.ts';
9
+ import { type DocbotConfig, DocbotConfigParser } from './config.ts';
10
+ import { type DocumentedPage, renderPageDocumentation, renderSpecIndex, type SkippedPage } from './docs-renderer.ts';
11
+ import { getDocPageKey, shouldCrawlDocPath } from './path-filter.ts';
12
+ import { extractResearchNavigationTargets } from './research-navigation.ts';
13
+
14
+ class DocBot {
15
+ private explorBot: ExplorBot;
16
+ private configParser: DocbotConfigParser;
17
+ private config: DocbotConfig = {};
18
+ private documentarian!: Documentarian;
19
+ private options: DocbotOptions;
20
+ private scopeRoot = '/';
21
+
22
+ constructor(options: DocbotOptions = {}) {
23
+ this.options = options;
24
+ const baseUrl = this.extractAbsoluteBaseUrl(options.startUrl || '/');
25
+ this.explorBot = new ExplorBot({
26
+ baseUrl,
27
+ verbose: options.verbose,
28
+ config: options.config,
29
+ path: options.path,
30
+ show: options.show,
31
+ headless: options.headless,
32
+ incognito: options.incognito,
33
+ session: options.session,
34
+ });
35
+ this.configParser = DocbotConfigParser.getInstance();
36
+ }
37
+
38
+ async start(): Promise<void> {
39
+ await this.explorBot.start();
40
+ this.config = await this.configParser.loadConfig({
41
+ config: this.options.docsConfig,
42
+ path: this.options.path,
43
+ });
44
+ this.documentarian = new Documentarian(this.explorBot.getProvider(), this.config);
45
+ this.ensureDirectory(this.configParser.getOutputDir());
46
+ this.ensureDirectory(this.getPagesDir());
47
+ }
48
+
49
+ async stop(): Promise<void> {
50
+ await this.explorBot.stop();
51
+ }
52
+
53
+ async collect(startPath: string, opts: CollectOptions = {}): Promise<CollectionResult> {
54
+ const effectiveStartPath = this.normalizeStartPath(startPath);
55
+ this.scopeRoot = this.getScopeRoot(effectiveStartPath);
56
+ const effectiveMaxPages = this.getMaxPages(opts.maxPages);
57
+ const queue: string[] = [];
58
+ const queued = new Set<string>();
59
+ const documented = new Set<string>();
60
+ const pages: DocumentedPage[] = [];
61
+ const skipped: SkippedPage[] = [];
62
+ const baseUrl = this.explorBot.getConfig().playwright.url;
63
+
64
+ this.enqueuePath(effectiveStartPath, queue, queued);
65
+
66
+ while (queue.length > 0 && pages.length < effectiveMaxPages) {
67
+ const target = queue.shift();
68
+ if (!target) {
69
+ continue;
70
+ }
71
+
72
+ const targetKey = this.getPageKey(target);
73
+ if (documented.has(targetKey)) {
74
+ continue;
75
+ }
76
+
77
+ const stateManager = this.explorBot.getExplorer().getStateManager();
78
+ if (stateManager.hasVisitedState(target)) {
79
+ continue;
80
+ }
81
+
82
+ try {
83
+ tag('info').log(`Collecting docs for ${this.toDisplayUrl(target, baseUrl)}`);
84
+ await this.explorBot.visit(target);
85
+
86
+ if (stateManager.isInDeadLoop()) {
87
+ tag('warning').log('Dead loop detected during docs crawl, stopping collection');
88
+ skipped.push({
89
+ url: target,
90
+ reason: 'dead loop detected during crawl',
91
+ });
92
+ break;
93
+ }
94
+
95
+ const state = this.explorBot.getCurrentState();
96
+ if (!state) {
97
+ skipped.push({
98
+ url: target,
99
+ reason: 'page state was not captured after navigation',
100
+ });
101
+ continue;
102
+ }
103
+
104
+ const pageKey = this.getPageKey(state.url || target);
105
+ if (documented.has(pageKey)) {
106
+ continue;
107
+ }
108
+
109
+ const research = await this.explorBot.agentResearcher().research(state, {
110
+ screenshot: this.shouldUseScreenshots(),
111
+ force: true,
112
+ });
113
+ const documentation = await this.documentarian.document(state, research);
114
+ const lowSignalReason = this.getLowSignalReason(documentation, research);
115
+ if (lowSignalReason) {
116
+ skipped.push({
117
+ url: state.url,
118
+ reason: lowSignalReason,
119
+ });
120
+ documented.add(pageKey);
121
+ continue;
122
+ }
123
+ const filePath = this.savePageDocumentation(state, documentation);
124
+
125
+ pages.push({
126
+ url: state.url,
127
+ title: state.title || '',
128
+ summary: documentation.summary,
129
+ canCount: documentation.can.length,
130
+ mightCount: documentation.might.length,
131
+ canActions: documentation.can.map((item) => item.action),
132
+ mightActions: documentation.might.map((item) => item.action),
133
+ filePath,
134
+ });
135
+ documented.add(pageKey);
136
+
137
+ const nextPaths = this.extractNextPaths(state, baseUrl, research);
138
+ for (const nextPath of nextPaths) {
139
+ if (documented.has(this.getPageKey(nextPath))) {
140
+ continue;
141
+ }
142
+ if (stateManager.hasVisitedState(nextPath)) {
143
+ continue;
144
+ }
145
+ this.enqueuePath(nextPath, queue, queued);
146
+ }
147
+ } catch (error) {
148
+ const reason = error instanceof Error ? error.message : String(error);
149
+ tag('warning').log(`Skipping ${target}: ${reason}`);
150
+ skipped.push({
151
+ url: target,
152
+ reason,
153
+ });
154
+ }
155
+ }
156
+
157
+ const indexPath = this.saveIndex(effectiveStartPath, pages, skipped, effectiveMaxPages);
158
+
159
+ return {
160
+ pages,
161
+ skipped,
162
+ indexPath,
163
+ outputDir: this.configParser.getOutputDir(),
164
+ };
165
+ }
166
+
167
+ private getMaxPages(override?: number): number {
168
+ if (override && override > 0) {
169
+ return override;
170
+ }
171
+
172
+ const configured = this.config.docs?.maxPages;
173
+ if (configured && configured > 0) {
174
+ return configured;
175
+ }
176
+
177
+ return 100;
178
+ }
179
+
180
+ private shouldUseScreenshots(): boolean {
181
+ const screenshot = this.config.docs?.screenshot;
182
+ if (screenshot === false) {
183
+ return false;
184
+ }
185
+ return true;
186
+ }
187
+
188
+ private extractNextPaths(state: WebPageState, baseUrl: string, research: string): string[] {
189
+ const paths: string[] = [];
190
+ const seen = new Set<string>();
191
+
192
+ for (const link of state.links || []) {
193
+ const nextPath = this.resolveLink(link, baseUrl);
194
+ if (!nextPath) {
195
+ continue;
196
+ }
197
+ if (!shouldCrawlDocPath(nextPath, this.config)) {
198
+ continue;
199
+ }
200
+ if (!this.isInScope(nextPath)) {
201
+ continue;
202
+ }
203
+ if (seen.has(nextPath)) {
204
+ continue;
205
+ }
206
+ seen.add(nextPath);
207
+ paths.push(nextPath);
208
+ }
209
+
210
+ for (const target of extractResearchNavigationTargets(state, research)) {
211
+ if (!shouldCrawlDocPath(target, this.config)) {
212
+ continue;
213
+ }
214
+ if (!this.isInScope(target)) {
215
+ continue;
216
+ }
217
+ if (seen.has(target)) {
218
+ continue;
219
+ }
220
+ seen.add(target);
221
+ paths.push(target);
222
+ }
223
+
224
+ return paths;
225
+ }
226
+
227
+ private resolveLink(link: Link, baseUrl: string): string | null {
228
+ let resolved: URL;
229
+
230
+ try {
231
+ resolved = new URL(link.url, baseUrl);
232
+ } catch {
233
+ return null;
234
+ }
235
+
236
+ const base = new URL(baseUrl);
237
+ if (resolved.origin !== base.origin) {
238
+ return null;
239
+ }
240
+
241
+ const pathName = resolved.pathname || '/';
242
+ return `${pathName}${resolved.search}${resolved.hash}`;
243
+ }
244
+
245
+ private toDisplayUrl(target: string, baseUrl: string): string {
246
+ try {
247
+ return new URL(target, baseUrl).toString();
248
+ } catch {
249
+ return target;
250
+ }
251
+ }
252
+
253
+ private enqueuePath(inputPath: string, queue: string[], queued: Set<string>): void {
254
+ const normalized = normalizeUrl(inputPath);
255
+ const pageKey = this.getPageKey(inputPath);
256
+ if (queued.has(pageKey)) {
257
+ return;
258
+ }
259
+ queued.add(pageKey);
260
+ if (!inputPath.startsWith('/')) {
261
+ queue.push(`/${normalized}`);
262
+ return;
263
+ }
264
+ queue.push(inputPath);
265
+ }
266
+
267
+ private getPageKey(pageUrl: string): string {
268
+ return getDocPageKey(pageUrl, this.config);
269
+ }
270
+
271
+ private normalizeStartPath(startPath: string): string {
272
+ try {
273
+ const parsed = new URL(startPath);
274
+ return `${parsed.pathname || '/'}${parsed.search}${parsed.hash}`;
275
+ } catch {
276
+ return startPath;
277
+ }
278
+ }
279
+
280
+ private extractAbsoluteBaseUrl(startPath: string): string | undefined {
281
+ try {
282
+ const parsed = new URL(startPath);
283
+ return parsed.origin;
284
+ } catch {
285
+ return undefined;
286
+ }
287
+ }
288
+
289
+ private isInScope(target: string): boolean {
290
+ const normalized = this.normalizeStartPath(target);
291
+ const scope = this.config.docs?.scope || 'site';
292
+
293
+ if (scope === 'site') {
294
+ return true;
295
+ }
296
+
297
+ if (scope === 'subtree') {
298
+ return normalized === this.scopeRoot || normalized.startsWith(`${this.scopeRoot}/`);
299
+ }
300
+
301
+ if (scope === 'section') {
302
+ return normalized === this.scopeRoot || normalized.startsWith(`${this.scopeRoot}/`) || normalized.startsWith(`${this.scopeRoot}-`);
303
+ }
304
+
305
+ return true;
306
+ }
307
+
308
+ private getScopeRoot(startPath: string): string {
309
+ const normalized = this.normalizeStartPath(startPath);
310
+ const parts = normalized.split('/').filter(Boolean);
311
+ if (parts.length === 0) {
312
+ return '/';
313
+ }
314
+ if (parts.length >= 4) {
315
+ return `/${parts.slice(0, 4).join('/')}`;
316
+ }
317
+ return `/${parts.join('/')}`;
318
+ }
319
+
320
+ private getLowSignalReason(documentation: PageDocumentation, research: string): string | null {
321
+ const minCanActions = this.config.docs?.minCanActions ?? 1;
322
+ const minInteractiveElements = this.config.docs?.minInteractiveElements ?? 3;
323
+
324
+ if (documentation.can.length >= minCanActions) {
325
+ return null;
326
+ }
327
+
328
+ const interactiveCount = this.countInteractiveElements(research);
329
+ if (interactiveCount >= minInteractiveElements) {
330
+ return null;
331
+ }
332
+
333
+ return `low-signal page: only ${documentation.can.length} proven actions and ${interactiveCount} interactive elements`;
334
+ }
335
+
336
+ private countInteractiveElements(research: string): number {
337
+ const matches = [...research.matchAll(/\((\d+) elements?\)/g)];
338
+ return matches.reduce((sum, match) => sum + Number.parseInt(match[1], 10), 0);
339
+ }
340
+
341
+ private savePageDocumentation(state: WebPageState, documentation: PageDocumentation): string {
342
+ const pagePath = this.getPageFilePath(state.url);
343
+ writeFileSync(pagePath, renderPageDocumentation(state, documentation), 'utf8');
344
+ return pagePath;
345
+ }
346
+
347
+ private saveIndex(startPath: string, pages: DocumentedPage[], skipped: SkippedPage[], maxPages: number): string {
348
+ const indexPath = path.join(this.configParser.getOutputDir(), 'spec.md');
349
+ writeFileSync(indexPath, renderSpecIndex(this.configParser.getOutputDir(), startPath, pages, skipped, maxPages), 'utf8');
350
+ return indexPath;
351
+ }
352
+
353
+ private getPagesDir(): string {
354
+ return path.join(this.configParser.getOutputDir(), 'pages');
355
+ }
356
+
357
+ private getPageFilePath(pageUrl: string): string {
358
+ const normalized = normalizeUrl(pageUrl || '/');
359
+ const baseName = sanitizeFilename(normalized || 'root');
360
+ if (baseName) {
361
+ return path.join(this.getPagesDir(), `${baseName}.md`);
362
+ }
363
+ return path.join(this.getPagesDir(), 'root.md');
364
+ }
365
+
366
+ private ensureDirectory(dirPath: string): void {
367
+ if (existsSync(dirPath)) {
368
+ return;
369
+ }
370
+ mkdirSync(dirPath, { recursive: true });
371
+ }
372
+ }
373
+
374
+ interface DocbotOptions extends ExplorBotOptions {
375
+ docsConfig?: string;
376
+ startUrl?: string;
377
+ }
378
+
379
+ interface CollectOptions {
380
+ maxPages?: number;
381
+ }
382
+
383
+ interface CollectionResult {
384
+ pages: DocumentedPage[];
385
+ skipped: SkippedPage[];
386
+ indexPath: string;
387
+ outputDir: string;
388
+ }
389
+
390
+ export { DocBot };
391
+ export type { DocbotOptions, CollectOptions, CollectionResult, DocumentedPage, SkippedPage };
@@ -0,0 +1,187 @@
1
+ import path from 'node:path';
2
+ import type { WebPageState } from '../../../src/state-manager.ts';
3
+ import type { PageDocumentation } from './ai/documentarian.ts';
4
+
5
+ function renderPageDocumentation(state: WebPageState, documentation: PageDocumentation): string {
6
+ const lines: string[] = [];
7
+ lines.push(`# ${state.url}`);
8
+ lines.push('');
9
+
10
+ if (state.title) {
11
+ lines.push(`Title: ${normalizeInlineText(state.title)}`);
12
+ lines.push('');
13
+ }
14
+
15
+ lines.push('## Purpose');
16
+ lines.push('');
17
+ lines.push(ensureSentence(documentation.summary));
18
+ lines.push('');
19
+ lines.push('## User Can');
20
+ lines.push('');
21
+
22
+ if (documentation.can.length === 0) {
23
+ lines.push('- No proven actions were identified from the collected research.');
24
+ lines.push('');
25
+ }
26
+
27
+ for (const item of documentation.can) {
28
+ lines.push(`- ${normalizeAction(item.action)} -> ${item.scope}`);
29
+ lines.push(` Proof: ${ensureSentence(item.evidence)}`);
30
+ }
31
+
32
+ if (documentation.can.length > 0) {
33
+ lines.push('');
34
+ }
35
+
36
+ lines.push('## User Might');
37
+ lines.push('');
38
+
39
+ if (documentation.might.length === 0) {
40
+ lines.push('- No assumption-based actions were identified.');
41
+ lines.push('');
42
+ }
43
+
44
+ for (const item of documentation.might) {
45
+ lines.push(`- ${normalizeAction(item.action, 'might')} -> ${item.scope}`);
46
+ lines.push(` Signal: ${ensureSentence(item.evidence)}`);
47
+ }
48
+
49
+ if (documentation.might.length > 0) {
50
+ lines.push('');
51
+ }
52
+
53
+ return `${lines.join('\n').trimEnd()}\n`;
54
+ }
55
+
56
+ function renderSpecIndex(outputDir: string, startPath: string, pages: DocumentedPage[], skipped: SkippedPage[], maxPages: number): string {
57
+ const lines: string[] = [];
58
+ lines.push('# Website Spec');
59
+ lines.push('');
60
+ lines.push('## Overview');
61
+ lines.push('');
62
+ lines.push(`Start page: ${startPath}`);
63
+ lines.push(`Pages documented: ${pages.length}`);
64
+ lines.push(`Pages skipped: ${skipped.length}`);
65
+ lines.push(`Max pages: ${maxPages}`);
66
+ lines.push('');
67
+ lines.push('## Pages');
68
+ lines.push('');
69
+
70
+ if (pages.length === 0) {
71
+ lines.push('- No pages were documented.');
72
+ lines.push('');
73
+ }
74
+
75
+ for (const page of pages) {
76
+ const relativeFile = path.relative(outputDir, page.filePath).replaceAll('\\', '/');
77
+ lines.push(`### [${page.url}](${relativeFile})`);
78
+ lines.push('');
79
+ lines.push(`Purpose: ${ensureSentence(page.summary)}`);
80
+ lines.push(`Proven actions: ${page.canCount}`);
81
+ lines.push(`Possible actions: ${page.mightCount}`);
82
+ if (page.title) {
83
+ lines.push(`Title: ${normalizeInlineText(page.title)}`);
84
+ }
85
+ lines.push('');
86
+
87
+ if (page.canActions.length > 0) {
88
+ lines.push('User Can:');
89
+ for (const action of page.canActions) {
90
+ lines.push(`- ${normalizeAction(action, 'can')}`);
91
+ }
92
+ lines.push('');
93
+ }
94
+
95
+ if (page.mightActions.length > 0) {
96
+ lines.push('User Might:');
97
+ for (const action of page.mightActions) {
98
+ lines.push(`- ${normalizeAction(action, 'might')}`);
99
+ }
100
+ lines.push('');
101
+ }
102
+ }
103
+
104
+ if (skipped.length > 0) {
105
+ lines.push('## Skipped');
106
+ lines.push('');
107
+
108
+ for (const page of skipped) {
109
+ lines.push(`- ${page.url}. Reason: ${ensureSentence(page.reason)}`);
110
+ }
111
+
112
+ lines.push('');
113
+ }
114
+
115
+ return `${lines.join('\n').trimEnd()}\n`;
116
+ }
117
+
118
+ function normalizeAction(action: string, kind: 'can' | 'might' = 'can'): string {
119
+ const trimmed = normalizeInlineText(action);
120
+ if (!trimmed) {
121
+ return 'user can interact with this page';
122
+ }
123
+
124
+ const normalized = ensureSentence(trimmed).slice(0, -1);
125
+ const lower = normalized.toLowerCase();
126
+
127
+ if (kind === 'can') {
128
+ if (lower.startsWith('user can ')) {
129
+ return normalized;
130
+ }
131
+ if (lower.startsWith('can ')) {
132
+ return `user can ${normalized.slice(4)}`;
133
+ }
134
+ if (lower.startsWith('user might ')) {
135
+ return `user can ${normalized.slice(11)}`;
136
+ }
137
+ return `user can ${normalized}`;
138
+ }
139
+
140
+ if (lower.startsWith('user might ')) {
141
+ return normalized;
142
+ }
143
+ if (lower.startsWith('might ')) {
144
+ return `user might ${normalized.slice(6)}`;
145
+ }
146
+ if (lower.startsWith('user can ')) {
147
+ return `user might ${normalized.slice(9)}`;
148
+ }
149
+ if (lower.startsWith('can ')) {
150
+ return `user might ${normalized.slice(4)}`;
151
+ }
152
+ return `user might ${normalized}`;
153
+ }
154
+
155
+ function ensureSentence(text: string): string {
156
+ const trimmed = normalizeInlineText(text);
157
+ if (!trimmed) {
158
+ return '';
159
+ }
160
+ if (/[.!?]$/.test(trimmed)) {
161
+ return trimmed;
162
+ }
163
+ return `${trimmed}.`;
164
+ }
165
+
166
+ function normalizeInlineText(text: string): string {
167
+ return text.normalize('NFKC').replace(/\s+/g, ' ').trim();
168
+ }
169
+
170
+ interface DocumentedPage {
171
+ url: string;
172
+ title: string;
173
+ summary: string;
174
+ canCount: number;
175
+ mightCount: number;
176
+ canActions: string[];
177
+ mightActions: string[];
178
+ filePath: string;
179
+ }
180
+
181
+ interface SkippedPage {
182
+ url: string;
183
+ reason: string;
184
+ }
185
+
186
+ export { renderPageDocumentation, renderSpecIndex, ensureSentence, normalizeAction };
187
+ export type { DocumentedPage, SkippedPage };
@@ -0,0 +1,46 @@
1
+ import { normalizeUrl } from '../../../src/state-manager.ts';
2
+ import { matchesUrl, generalizeUrl } from '../../../src/utils/url-matcher.ts';
3
+ import type { DocbotConfig } from './config.ts';
4
+
5
+ const DEFAULT_DENIED_PATH_SEGMENTS = ['callback', 'callbacks', 'logout', 'signout', 'sign_out', 'destroy', 'delete', 'remove'];
6
+
7
+ export function shouldCrawlDocPath(nextPath: string, config: DocbotConfig = {}): boolean {
8
+ const parsed = new URL(nextPath, 'http://localhost');
9
+ const segments = parsed.pathname
10
+ .split('/')
11
+ .map((segment) => segment.trim().toLowerCase())
12
+ .filter(Boolean);
13
+ const normalizedPath = parsed.pathname || '/';
14
+
15
+ const includePaths = config.docs?.includePaths || [];
16
+ if (includePaths.length > 0) {
17
+ return includePaths.some((pattern) => matchesUrl(pattern, normalizedPath));
18
+ }
19
+
20
+ const excludePaths = config.docs?.excludePaths || [];
21
+ if (excludePaths.some((pattern) => matchesUrl(pattern, normalizedPath))) {
22
+ return false;
23
+ }
24
+
25
+ if (segments.length === 0) {
26
+ return true;
27
+ }
28
+
29
+ const terminalActions = new Set((config.docs?.deniedPathSegments || DEFAULT_DENIED_PATH_SEGMENTS).map((segment) => segment.trim().toLowerCase()).filter(Boolean));
30
+ if (segments.some((segment) => terminalActions.has(segment))) {
31
+ return false;
32
+ }
33
+
34
+ return true;
35
+ }
36
+
37
+ export function getDocPageKey(pageUrl: string, config: DocbotConfig = {}): string {
38
+ const normalized = normalizeUrl(pageUrl || '/');
39
+ const path = normalized.startsWith('/') ? normalized : `/${normalized}`;
40
+
41
+ if (config.docs?.collapseDynamicPages === false) {
42
+ return normalizeUrl(path);
43
+ }
44
+
45
+ return normalizeUrl(generalizeUrl(path));
46
+ }
@@ -0,0 +1,90 @@
1
+ import type { WebPageState } from '../../../src/state-manager.ts';
2
+ import { parseResearchSections, type ResearchElement } from '../../../src/ai/researcher/parser.ts';
3
+
4
+ const OPEN_API_TAG_SELECTOR_PATTERN = /api-\d+\/tag\/([a-z0-9-]+)(?:["'#/\]\s]|$)/i;
5
+ const OPEN_API_NAVIGATION_SECTION_KEYWORDS = ['navigation', 'menu'];
6
+
7
+ export function extractResearchNavigationTargets(state: WebPageState, research: string): string[] {
8
+ const currentUrl = state.url || '/';
9
+ const sections = parseResearchSections(research);
10
+ const targets: string[] = [];
11
+ const seen = new Set<string>();
12
+
13
+ for (const section of sections) {
14
+ const sectionName = section.name.toLowerCase();
15
+ if (!OPEN_API_NAVIGATION_SECTION_KEYWORDS.some((keyword) => sectionName.includes(keyword))) {
16
+ continue;
17
+ }
18
+
19
+ for (const element of section.elements) {
20
+ const target = extractNavigationTarget(currentUrl, element);
21
+ if (!target || seen.has(target)) {
22
+ continue;
23
+ }
24
+
25
+ seen.add(target);
26
+ targets.push(target);
27
+ }
28
+ }
29
+
30
+ return targets;
31
+ }
32
+
33
+ function extractNavigationTarget(currentUrl: string, element: ResearchElement): string | null {
34
+ const openApiTagFromCss = extractOpenApiTagHashFromCss(element.css);
35
+ if (openApiTagFromCss) {
36
+ return buildSamePageHashTarget(currentUrl, openApiTagFromCss);
37
+ }
38
+
39
+ if (!currentUrl.includes('#tag/')) {
40
+ return null;
41
+ }
42
+
43
+ const inferredOpenApiTag = inferOpenApiTagSlugFromLabel(element.name);
44
+ if (!inferredOpenApiTag) {
45
+ return null;
46
+ }
47
+
48
+ return buildSamePageHashTarget(currentUrl, `tag/${inferredOpenApiTag}`);
49
+ }
50
+
51
+ function extractOpenApiTagHashFromCss(css: string | null): string | null {
52
+ if (!css) {
53
+ return null;
54
+ }
55
+
56
+ const normalizedSelector = css.replaceAll('\\/', '/');
57
+ const match = normalizedSelector.match(OPEN_API_TAG_SELECTOR_PATTERN);
58
+ if (!match?.[1]) {
59
+ return null;
60
+ }
61
+
62
+ return `tag/${match[1].toLowerCase()}`;
63
+ }
64
+
65
+ function inferOpenApiTagSlugFromLabel(name: string): string | null {
66
+ const cleanedLabel = name
67
+ .replace(/^'+|'+$/g, '')
68
+ .replace(/\(expanded\)|\(collapsed\)|open group|close group|show more/gi, '')
69
+ .trim();
70
+
71
+ if (!cleanedLabel.includes('/')) {
72
+ return null;
73
+ }
74
+
75
+ const slug = cleanedLabel
76
+ .split('/')
77
+ .map((part) => part.trim().toLowerCase())
78
+ .filter(Boolean)
79
+ .join('-')
80
+ .replace(/[^a-z0-9-]+/g, '-')
81
+ .replace(/-+/g, '-')
82
+ .replace(/^-|-$/g, '');
83
+
84
+ return slug || null;
85
+ }
86
+
87
+ function buildSamePageHashTarget(currentUrl: string, hashPath: string): string {
88
+ const [baseWithSearch] = currentUrl.split('#');
89
+ return `${baseWithSearch}#${hashPath}`;
90
+ }