@gulibs/safe-coder 0.0.23 → 0.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +351 -15
  2. package/dist/documentation/checkpoint-manager.d.ts +38 -0
  3. package/dist/documentation/checkpoint-manager.d.ts.map +1 -0
  4. package/dist/documentation/checkpoint-manager.js +101 -0
  5. package/dist/documentation/checkpoint-manager.js.map +1 -0
  6. package/dist/documentation/doc-crawler.d.ts +77 -2
  7. package/dist/documentation/doc-crawler.d.ts.map +1 -1
  8. package/dist/documentation/doc-crawler.js +752 -179
  9. package/dist/documentation/doc-crawler.js.map +1 -1
  10. package/dist/documentation/llms-txt/detector.d.ts +31 -0
  11. package/dist/documentation/llms-txt/detector.d.ts.map +1 -0
  12. package/dist/documentation/llms-txt/detector.js +77 -0
  13. package/dist/documentation/llms-txt/detector.js.map +1 -0
  14. package/dist/documentation/llms-txt/downloader.d.ts +30 -0
  15. package/dist/documentation/llms-txt/downloader.d.ts.map +1 -0
  16. package/dist/documentation/llms-txt/downloader.js +84 -0
  17. package/dist/documentation/llms-txt/downloader.js.map +1 -0
  18. package/dist/documentation/llms-txt/index.d.ts +4 -0
  19. package/dist/documentation/llms-txt/index.d.ts.map +1 -0
  20. package/dist/documentation/llms-txt/index.js +4 -0
  21. package/dist/documentation/llms-txt/index.js.map +1 -0
  22. package/dist/documentation/llms-txt/parser.d.ts +43 -0
  23. package/dist/documentation/llms-txt/parser.d.ts.map +1 -0
  24. package/dist/documentation/llms-txt/parser.js +177 -0
  25. package/dist/documentation/llms-txt/parser.js.map +1 -0
  26. package/dist/documentation/skill-generator.d.ts +38 -2
  27. package/dist/documentation/skill-generator.d.ts.map +1 -1
  28. package/dist/documentation/skill-generator.js +331 -62
  29. package/dist/documentation/skill-generator.js.map +1 -1
  30. package/dist/index.js +0 -0
  31. package/dist/server/mcp-server.d.ts.map +1 -1
  32. package/dist/server/mcp-server.js +152 -9
  33. package/dist/server/mcp-server.js.map +1 -1
  34. package/package.json +10 -11
@@ -1,5 +1,10 @@
1
+ import { HttpClient } from '../utils/http-client.js';
1
2
  import { logger } from '../utils/logger.js';
2
3
  import { WebDocumentationBrowser } from './web-doc-browser.js';
4
+ import { LlmsTxtDetector, LlmsTxtDownloader, LlmsTxtParser } from './llms-txt/index.js';
5
+ import { CheckpointManager } from './checkpoint-manager.js';
6
+ import { join } from 'path';
7
+ import { tmpdir } from 'os';
3
8
  export class DocumentationCrawler {
4
9
  browser;
5
10
  visitedUrls;
@@ -9,6 +14,8 @@ export class DocumentationCrawler {
9
14
  options;
10
15
  baseUrl;
11
16
  linkDiscoveryStats;
17
+ checkpointManager;
18
+ pagesSinceLastCheckpoint;
12
19
  DOCUMENTATION_PATTERNS = [
13
20
  /\/docs?\//i,
14
21
  /\/documentation/i,
@@ -39,6 +46,7 @@ export class DocumentationCrawler {
39
46
  this.crawledPages = [];
40
47
  this.errors = [];
41
48
  this.options = {
49
+ crawlStrategy: 'bfs', // Default to breadth-first search
42
50
  maxDepth: 3,
43
51
  maxPages: 50,
44
52
  includePaths: [],
@@ -46,12 +54,15 @@ export class DocumentationCrawler {
46
54
  rateLimit: 500, // 500ms default delay
47
55
  maxRetries: 2, // Default 2 retries
48
56
  retryDelay: 1000, // Default 1 second delay before retry
57
+ useBrowserAutomation: false, // Default to HTTP-only for backward compatibility
58
+ skipLlmsTxt: false, // Enable llms.txt detection by default
59
+ workers: 1, // Default to single-threaded crawling
49
60
  };
50
61
  this.baseUrl = new URL('https://example.com');
51
62
  this.linkDiscoveryStats = {
52
63
  totalLinksFound: 0,
53
64
  linksFiltered: {
54
- notDocumentation: 0,
65
+ notContent: 0,
55
66
  externalDomain: 0,
56
67
  alreadyVisited: 0,
57
68
  excludedPattern: 0,
@@ -61,15 +72,19 @@ export class DocumentationCrawler {
61
72
  pagesDiscovered: 0,
62
73
  pagesCrawled: 0,
63
74
  };
75
+ this.pagesSinceLastCheckpoint = 0;
64
76
  }
65
77
  /**
66
78
  * Crawl documentation starting from a root URL
67
79
  * Uses HTTP client (axios) exclusively - no browser automation
68
80
  * For SPA sites that require JavaScript rendering, use Cursor/Claude's built-in browser tools
81
+ * Supports both BFS (breadth-first) and DFS (depth-first) crawl strategies
69
82
  */
70
83
  async crawl(rootUrl, options = {}) {
84
+ const strategy = options.crawlStrategy || 'bfs';
71
85
  logger.info('Starting documentation crawl using HTTP client (axios)', {
72
86
  url: rootUrl,
87
+ strategy,
73
88
  method: 'HTTP GET',
74
89
  client: 'axios/HttpClient',
75
90
  note: 'For SPA sites, use Cursor/Claude browser tools to get rendered content first',
@@ -82,7 +97,7 @@ export class DocumentationCrawler {
82
97
  this.linkDiscoveryStats = {
83
98
  totalLinksFound: 0,
84
99
  linksFiltered: {
85
- notDocumentation: 0,
100
+ notContent: 0,
86
101
  externalDomain: 0,
87
102
  alreadyVisited: 0,
88
103
  excludedPattern: 0,
@@ -104,12 +119,28 @@ export class DocumentationCrawler {
104
119
  catch (error) {
105
120
  throw new Error(`Invalid root URL: ${rootUrl}`);
106
121
  }
107
- // Check if root URL is documentation
108
- logger.debug('Checking if URL is documentation page (HTTP request)', { url: rootUrl });
109
- const isDoc = await this.browser.isDocumentationPage(rootUrl);
110
- if (!isDoc) {
111
- throw new Error(`The provided URL does not appear to be a documentation page: ${rootUrl}\n` +
112
- `Note: For SPA sites that require JavaScript rendering, use Cursor/Claude's browser tools to get rendered HTML first, then process it.`);
122
+ // No longer require documentation-only pages - allow any website with extractable content
123
+ logger.debug('Starting crawl from URL (permissive mode)', { url: rootUrl });
124
+ // Setup checkpoint manager if enabled
125
+ if (this.options.checkpoint?.enabled) {
126
+ const checkpointFile = this.options.checkpoint.file ||
127
+ join(tmpdir(), `safe-coder-checkpoint-${this.sanitizeFilename(rootUrl)}.json`);
128
+ this.checkpointManager = new CheckpointManager(checkpointFile);
129
+ // Try to resume from checkpoint if requested
130
+ if (this.options.resume) {
131
+ const loaded = await this.loadCheckpoint();
132
+ if (loaded) {
133
+ logger.info('Resumed from checkpoint', {
134
+ pagesCrawled: this.crawledPages.length,
135
+ pendingUrls: this.urlQueue.length,
136
+ visitedUrls: this.visitedUrls.size,
137
+ });
138
+ }
139
+ }
140
+ }
141
+ // Try to detect and use llms.txt if available (unless explicitly disabled)
142
+ if (!this.options.skipLlmsTxt) {
143
+ await this.tryLlmsTxt(rootUrl);
113
144
  }
114
145
  // Detect SPA and provide warning
115
146
  try {
@@ -140,12 +171,82 @@ export class DocumentationCrawler {
140
171
  // Start crawling from root
141
172
  this.urlQueue.push({ url: rootUrl, depth: 0 });
142
173
  let maxDepthReached = 0;
143
- // Process queue
174
+ // Process queue - use parallel workers if specified
144
175
  const startTime = Date.now();
176
+ const workerCount = this.options.workers || 1;
177
+ if (workerCount > 1) {
178
+ logger.info('Using parallel crawling', { workers: workerCount });
179
+ maxDepthReached = await this.crawlWithWorkers(startTime);
180
+ }
181
+ else {
182
+ maxDepthReached = await this.crawlSequential(startTime);
183
+ }
184
+ // Update final statistics
185
+ this.linkDiscoveryStats.pagesDiscovered = this.visitedUrls.size;
186
+ // Calculate final statistics
187
+ const totalTime = ((Date.now() - startTime) / 1000).toFixed(2);
188
+ const avgTimePerPage = this.crawledPages.length > 0
189
+ ? ((Date.now() - startTime) / this.crawledPages.length / 1000).toFixed(2)
190
+ : '0';
191
+ const successRate = this.linkDiscoveryStats.pagesDiscovered > 0
192
+ ? ((this.crawledPages.length / this.linkDiscoveryStats.pagesDiscovered) * 100).toFixed(1)
193
+ : '0';
194
+ // Log crawl completion with comprehensive statistics
195
+ logger.info('Documentation crawl completed using HTTP client (axios)', {
196
+ totalPages: this.crawledPages.length,
197
+ maxDepthReached,
198
+ errors: this.errors.length,
199
+ totalTimeSeconds: totalTime,
200
+ avgTimePerPageSeconds: avgTimePerPage,
201
+ successRate: `${successRate}%`,
202
+ method: 'HTTP GET',
203
+ client: 'axios/HttpClient',
204
+ linkStats: {
205
+ totalLinksFound: this.linkDiscoveryStats.totalLinksFound,
206
+ linksQueued: this.linkDiscoveryStats.linksQueued,
207
+ linksFiltered: this.linkDiscoveryStats.linksFiltered,
208
+ pagesDiscovered: this.linkDiscoveryStats.pagesDiscovered,
209
+ pagesCrawled: this.linkDiscoveryStats.pagesCrawled,
210
+ },
211
+ errorBreakdown: this.getErrorBreakdown(),
212
+ });
213
+ // Validate if content is sufficient for skill generation
214
+ const validation = this.canGenerateSkill(this.crawledPages);
215
+ const abandoned = !validation.canGenerate;
216
+ const abandonReason = validation.reason;
217
+ if (abandoned) {
218
+ logger.warn('Crawl completed but content is insufficient for skill generation', {
219
+ reason: abandonReason,
220
+ pagesCrawled: this.crawledPages.length,
221
+ suggestion: 'Consider crawling more pages or a different website',
222
+ });
223
+ }
224
+ // Clear checkpoint after successful completion
225
+ if (this.checkpointManager && !abandoned) {
226
+ await this.clearCheckpoint();
227
+ }
228
+ return {
229
+ pages: this.crawledPages,
230
+ totalPages: this.crawledPages.length,
231
+ maxDepthReached,
232
+ errors: this.errors,
233
+ linkDiscoveryStats: this.linkDiscoveryStats,
234
+ abandoned,
235
+ abandonReason,
236
+ };
237
+ }
238
+ /**
239
+ * Sequential crawling (single-threaded)
240
+ */
241
+ async crawlSequential(startTime) {
242
+ let maxDepthReached = 0;
145
243
  let lastProgressLog = Date.now();
146
244
  const PROGRESS_LOG_INTERVAL = 5000; // Log progress every 5 seconds
147
245
  while (this.urlQueue.length > 0 && this.crawledPages.length < this.options.maxPages) {
148
- const queued = this.urlQueue.shift();
246
+ // Use different strategies for getting next URL
247
+ // BFS: shift() - take from front (queue behavior)
248
+ // DFS: pop() - take from back (stack behavior)
249
+ const queued = this.options.crawlStrategy === 'dfs' ? this.urlQueue.pop() : this.urlQueue.shift();
149
250
  if (!queued)
150
251
  break;
151
252
  const { url, depth } = queued;
@@ -177,144 +278,177 @@ export class DocumentationCrawler {
177
278
  // Mark as visited
178
279
  this.visitedUrls.add(url);
179
280
  maxDepthReached = Math.max(maxDepthReached, depth);
180
- try {
181
- // Crawl the page using HTTP GET with retry logic
182
- logger.debug('Fetching page via HTTP GET', { url, depth, method: 'HTTP GET', client: 'axios' });
183
- const page = await this.fetchPageWithRetry(url);
184
- // Check if page has minimal content (possible SPA issue)
185
- const contentLength = page.content.length;
186
- const linksCount = page.navigationLinks.length;
187
- if (contentLength < 200 && linksCount < 3) {
188
- logger.warn('Page has minimal content - may be SPA', {
189
- url,
190
- contentLength,
191
- linksCount,
192
- suggestion: 'This page may require JavaScript rendering. Consider using browser automation tools.',
193
- });
281
+ await this.processPage(url, depth);
282
+ // Rate limiting
283
+ if (this.options.rateLimit > 0 && this.urlQueue.length > 0) {
284
+ await this.delay(this.options.rateLimit);
285
+ }
286
+ }
287
+ return maxDepthReached;
288
+ }
289
+ /**
290
+ * Parallel crawling with multiple workers
291
+ */
292
+ async crawlWithWorkers(startTime) {
293
+ let maxDepthReached = 0;
294
+ let lastProgressLog = Date.now();
295
+ const PROGRESS_LOG_INTERVAL = 5000;
296
+ const workerCount = this.options.workers || 1;
297
+ while (this.urlQueue.length > 0 && this.crawledPages.length < this.options.maxPages) {
298
+ // Log progress periodically
299
+ const now = Date.now();
300
+ if (now - lastProgressLog >= PROGRESS_LOG_INTERVAL) {
301
+ const elapsed = ((now - startTime) / 1000).toFixed(1);
302
+ const pagesPerSecond = (this.crawledPages.length / elapsed).toFixed(2);
303
+ logger.info('Crawl progress (parallel)', {
304
+ pagesCrawled: this.crawledPages.length,
305
+ pagesRemaining: this.urlQueue.length,
306
+ maxPages: this.options.maxPages,
307
+ errors: this.errors.length,
308
+ elapsedSeconds: elapsed,
309
+ pagesPerSecond,
310
+ workers: workerCount,
311
+ });
312
+ lastProgressLog = now;
313
+ }
314
+ // Get batch of URLs to process in parallel
315
+ const batch = [];
316
+ const batchSize = Math.min(workerCount, this.urlQueue.length, this.options.maxPages - this.crawledPages.length);
317
+ for (let i = 0; i < batchSize; i++) {
318
+ const queued = this.options.crawlStrategy === 'dfs' ? this.urlQueue.pop() : this.urlQueue.shift();
319
+ if (!queued)
320
+ break;
321
+ // Skip if already visited
322
+ if (this.visitedUrls.has(queued.url)) {
323
+ continue;
194
324
  }
195
- // Convert to CrawledPage format
196
- const crawledPage = {
197
- url: page.url,
198
- title: page.title,
199
- content: page.content,
200
- depth,
201
- sections: page.sections,
202
- navigationLinks: page.navigationLinks,
203
- headings: page.headings,
204
- codeSamples: page.codeSamples,
205
- };
206
- this.crawledPages.push(crawledPage);
207
- this.linkDiscoveryStats.pagesCrawled++;
208
- const totalLinksOnPage = page.navigationLinks.length;
209
- this.linkDiscoveryStats.totalLinksFound += totalLinksOnPage;
210
- logger.debug('Page fetched and parsed successfully', {
325
+ // Check depth limit
326
+ if (queued.depth > this.options.maxDepth) {
327
+ continue;
328
+ }
329
+ // Mark as visited
330
+ this.visitedUrls.add(queued.url);
331
+ maxDepthReached = Math.max(maxDepthReached, queued.depth);
332
+ batch.push(queued);
333
+ }
334
+ if (batch.length === 0) {
335
+ break;
336
+ }
337
+ // Process batch in parallel
338
+ await Promise.all(batch.map(async (queued) => {
339
+ await this.processPage(queued.url, queued.depth);
340
+ // Rate limiting (per worker)
341
+ if (this.options.rateLimit > 0) {
342
+ await this.delay(this.options.rateLimit);
343
+ }
344
+ }));
345
+ }
346
+ return maxDepthReached;
347
+ }
348
+ /**
349
+ * Process a single page (shared by both sequential and parallel crawling)
350
+ */
351
+ async processPage(url, depth) {
352
+ try {
353
+ // Crawl the page using HTTP GET with retry logic
354
+ logger.debug('Fetching page via HTTP GET', { url, depth, method: 'HTTP GET', client: 'axios' });
355
+ const page = await this.fetchPageWithRetry(url);
356
+ // Check if page has minimal content (possible SPA issue)
357
+ const contentLength = page.content.length;
358
+ const linksCount = page.navigationLinks.length;
359
+ if (contentLength < 200 && linksCount < 3) {
360
+ logger.warn('Page has minimal content - may be SPA', {
211
361
  url,
212
- title: page.title.substring(0, 50),
213
- linksFound: totalLinksOnPage,
214
- depth,
362
+ contentLength,
363
+ linksCount,
364
+ suggestion: 'This page may require JavaScript rendering. Consider using browser automation tools.',
215
365
  });
216
- // Discover and queue new URLs
217
- if (depth < this.options.maxDepth) {
218
- const discoveryResult = this.discoverDocumentationLinks(page, depth + 1);
219
- const newUrls = discoveryResult.discovered;
220
- logger.debug('Link discovery completed', {
221
- url,
222
- totalLinksOnPage,
223
- discovered: newUrls.length,
224
- filtered: discoveryResult.filtered,
225
- alreadyVisited: discoveryResult.alreadyVisited,
226
- notDocumentation: discoveryResult.notDocumentation,
227
- externalDomain: discoveryResult.externalDomain,
228
- excludedPattern: discoveryResult.excludedPattern,
229
- queueLengthBefore: this.urlQueue.length,
230
- });
231
- let queuedCount = 0;
232
- let skippedAlreadyVisited = 0;
233
- for (const newUrl of newUrls) {
234
- if (!this.visitedUrls.has(newUrl.url)) {
235
- // Also check if it's already in the queue to avoid duplicates
236
- const alreadyInQueue = this.urlQueue.some(q => q.url === newUrl.url);
237
- if (!alreadyInQueue) {
238
- this.urlQueue.push(newUrl);
239
- this.linkDiscoveryStats.linksQueued++;
240
- queuedCount++;
241
- }
242
- else {
243
- skippedAlreadyVisited++;
244
- }
366
+ }
367
+ // Convert to CrawledPage format
368
+ const crawledPage = {
369
+ url: page.url,
370
+ title: page.title,
371
+ content: page.content,
372
+ depth,
373
+ sections: page.sections,
374
+ navigationLinks: page.navigationLinks,
375
+ headings: page.headings,
376
+ codeSamples: page.codeSamples,
377
+ };
378
+ this.crawledPages.push(crawledPage);
379
+ this.linkDiscoveryStats.pagesCrawled++;
380
+ this.pagesSinceLastCheckpoint++;
381
+ // Save checkpoint if interval reached
382
+ if (this.checkpointManager && this.options.checkpoint?.enabled) {
383
+ const interval = this.options.checkpoint.interval || 10;
384
+ if (this.pagesSinceLastCheckpoint >= interval) {
385
+ await this.saveCheckpoint();
386
+ this.pagesSinceLastCheckpoint = 0;
387
+ }
388
+ }
389
+ const totalLinksOnPage = page.navigationLinks.length;
390
+ this.linkDiscoveryStats.totalLinksFound += totalLinksOnPage;
391
+ logger.debug('Page fetched and parsed successfully', {
392
+ url,
393
+ title: page.title.substring(0, 50),
394
+ linksFound: totalLinksOnPage,
395
+ depth,
396
+ });
397
+ // Discover and queue new URLs
398
+ if (depth < this.options.maxDepth) {
399
+ const discoveryResult = this.discoverDocumentationLinks(page, depth + 1);
400
+ const newUrls = discoveryResult.discovered;
401
+ logger.debug('Link discovery completed', {
402
+ url,
403
+ totalLinksOnPage,
404
+ discovered: newUrls.length,
405
+ filtered: discoveryResult.filtered,
406
+ });
407
+ let queuedCount = 0;
408
+ let skippedAlreadyVisited = 0;
409
+ for (const newUrl of newUrls) {
410
+ if (!this.visitedUrls.has(newUrl.url)) {
411
+ // Also check if it's already in the queue to avoid duplicates
412
+ const alreadyInQueue = this.urlQueue.some(q => q.url === newUrl.url);
413
+ if (!alreadyInQueue) {
414
+ this.urlQueue.push(newUrl);
415
+ this.linkDiscoveryStats.linksQueued++;
416
+ queuedCount++;
245
417
  }
246
418
  else {
247
419
  skippedAlreadyVisited++;
248
420
  }
249
421
  }
250
- logger.debug('Links queued', {
251
- url,
252
- queued: queuedCount,
253
- skippedAlreadyVisited,
254
- queueLengthAfter: this.urlQueue.length,
255
- });
256
- }
257
- else {
258
- this.linkDiscoveryStats.linksFiltered.depthLimit += totalLinksOnPage;
259
- }
260
- // Rate limiting
261
- if (this.options.rateLimit > 0 && this.urlQueue.length > 0) {
262
- await this.delay(this.options.rateLimit);
422
+ else {
423
+ skippedAlreadyVisited++;
424
+ }
263
425
  }
264
- }
265
- catch (error) {
266
- const errorMessage = error instanceof Error ? error.message : String(error);
267
- const errorType = this.classifyError(error);
268
- this.errors.push({
426
+ logger.debug('Links queued', {
269
427
  url,
270
- error: `${errorType}: ${errorMessage}`,
271
- });
272
- logger.warn('Page crawl failed', {
273
- url,
274
- error: errorMessage,
275
- errorType,
276
- depth,
277
- willContinue: true,
428
+ queued: queuedCount,
429
+ skippedAlreadyVisited,
430
+ queueLengthAfter: this.urlQueue.length,
278
431
  });
279
- // Continue crawling other pages
432
+ }
433
+ else {
434
+ this.linkDiscoveryStats.linksFiltered.depthLimit += totalLinksOnPage;
280
435
  }
281
436
  }
282
- // Update final statistics
283
- this.linkDiscoveryStats.pagesDiscovered = this.visitedUrls.size;
284
- // Calculate final statistics
285
- const totalTime = ((Date.now() - startTime) / 1000).toFixed(2);
286
- const avgTimePerPage = this.crawledPages.length > 0
287
- ? ((Date.now() - startTime) / this.crawledPages.length / 1000).toFixed(2)
288
- : '0';
289
- const successRate = this.linkDiscoveryStats.pagesDiscovered > 0
290
- ? ((this.crawledPages.length / this.linkDiscoveryStats.pagesDiscovered) * 100).toFixed(1)
291
- : '0';
292
- // Log crawl completion with comprehensive statistics
293
- logger.info('Documentation crawl completed using HTTP client (axios)', {
294
- totalPages: this.crawledPages.length,
295
- maxDepthReached,
296
- errors: this.errors.length,
297
- totalTimeSeconds: totalTime,
298
- avgTimePerPageSeconds: avgTimePerPage,
299
- successRate: `${successRate}%`,
300
- method: 'HTTP GET',
301
- client: 'axios/HttpClient',
302
- linkStats: {
303
- totalLinksFound: this.linkDiscoveryStats.totalLinksFound,
304
- linksQueued: this.linkDiscoveryStats.linksQueued,
305
- linksFiltered: this.linkDiscoveryStats.linksFiltered,
306
- pagesDiscovered: this.linkDiscoveryStats.pagesDiscovered,
307
- pagesCrawled: this.linkDiscoveryStats.pagesCrawled,
308
- },
309
- errorBreakdown: this.getErrorBreakdown(),
310
- });
311
- return {
312
- pages: this.crawledPages,
313
- totalPages: this.crawledPages.length,
314
- maxDepthReached,
315
- errors: this.errors,
316
- linkDiscoveryStats: this.linkDiscoveryStats,
317
- };
437
+ catch (error) {
438
+ const errorMessage = error instanceof Error ? error.message : String(error);
439
+ const errorType = this.classifyError(error);
440
+ this.errors.push({
441
+ url,
442
+ error: `${errorType}: ${errorMessage}`,
443
+ });
444
+ logger.warn('Page crawl failed', {
445
+ url,
446
+ error: errorMessage,
447
+ errorType,
448
+ depth,
449
+ willContinue: true,
450
+ });
451
+ }
318
452
  }
319
453
  /**
320
454
  * Discover documentation links from a crawled page
@@ -322,7 +456,7 @@ export class DocumentationCrawler {
322
456
  discoverDocumentationLinks(page, nextDepth) {
323
457
  const discovered = [];
324
458
  const filtered = {
325
- notDocumentation: 0,
459
+ notContent: 0, // Renamed from notDocumentation
326
460
  externalDomain: 0,
327
461
  alreadyVisited: 0,
328
462
  excludedPattern: 0,
@@ -353,11 +487,11 @@ export class DocumentationCrawler {
353
487
  linkDetails.push({ url: link.url, reason: 'already_visited' });
354
488
  continue;
355
489
  }
356
- // Check if it's a documentation path
490
+ // Check if it's a valid content path (permissive - only exclude clearly non-content)
357
491
  if (!this.isDocumentationPath(linkUrl.pathname)) {
358
- filtered.notDocumentation++;
359
- this.linkDiscoveryStats.linksFiltered.notDocumentation++;
360
- linkDetails.push({ url: link.url, reason: 'not_documentation_path', pathname: linkUrl.pathname });
492
+ filtered.notContent++;
493
+ this.linkDiscoveryStats.linksFiltered.notContent++;
494
+ linkDetails.push({ url: link.url, reason: 'not_content_path', pathname: linkUrl.pathname });
361
495
  continue;
362
496
  }
363
497
  // Check exclude patterns
@@ -371,8 +505,8 @@ export class DocumentationCrawler {
371
505
  if (this.options.includePaths.length > 0) {
372
506
  const matchesInclude = this.options.includePaths.some(pattern => linkUrl.pathname.includes(pattern));
373
507
  if (!matchesInclude) {
374
- filtered.notDocumentation++;
375
- this.linkDiscoveryStats.linksFiltered.notDocumentation++;
508
+ filtered.notContent++;
509
+ this.linkDiscoveryStats.linksFiltered.notContent++;
376
510
  linkDetails.push({ url: link.url, reason: 'not_in_include_paths', pathname: linkUrl.pathname });
377
511
  continue;
378
512
  }
@@ -413,7 +547,7 @@ export class DocumentationCrawler {
413
547
  totalLinks: page.navigationLinks.length,
414
548
  discovered: discovered.length,
415
549
  filtered: {
416
- notDocumentation: filtered.notDocumentation,
550
+ notContent: filtered.notContent,
417
551
  externalDomain: filtered.externalDomain,
418
552
  alreadyVisited: filtered.alreadyVisited,
419
553
  excludedPattern: filtered.excludedPattern,
@@ -428,52 +562,42 @@ export class DocumentationCrawler {
428
562
  discovered,
429
563
  filtered,
430
564
  alreadyVisited: filtered.alreadyVisited,
431
- notDocumentation: filtered.notDocumentation,
565
+ notContent: filtered.notContent,
432
566
  externalDomain: filtered.externalDomain,
433
567
  excludedPattern: filtered.excludedPattern,
434
568
  };
435
569
  }
436
570
  /**
437
- * Check if a path is a documentation path
571
+ * Check if a path should be crawled (permissive - only exclude clearly non-content paths)
438
572
  */
439
573
  isDocumentationPath(pathname) {
440
- // Check against documentation patterns
441
- const matchesPattern = this.DOCUMENTATION_PATTERNS.some(pattern => pattern.test(pathname));
442
- // If it matches a pattern, it's definitely documentation
443
- if (matchesPattern) {
574
+ // Exclude clearly non-content pages
575
+ if (this.shouldExclude(pathname)) {
576
+ return false;
577
+ }
578
+ // Exclude static resources
579
+ const looksLikeStaticResource = /\.(?:css|js|json|xml|png|jpg|jpeg|gif|svg|ico|woff|woff2|ttf|eot|pdf|zip|exe|dmg)$/i.test(pathname);
580
+ if (looksLikeStaticResource) {
581
+ return false;
582
+ }
583
+ // Exclude API endpoints that are clearly not content (unless they're documentation APIs)
584
+ // Keep API endpoints that might be documentation (e.g., /api/docs, /docs/api)
585
+ const looksLikeApiEndpoint = /^\/api\/[^/]+$/i.test(pathname);
586
+ if (looksLikeApiEndpoint && !pathname.includes('/docs') && !pathname.includes('/documentation')) {
587
+ return false;
588
+ }
589
+ // Allow root path
590
+ if (pathname === '/' || pathname === '') {
444
591
  return true;
445
592
  }
446
- // Additional check: if the base URL is a documentation page (which we verified at start),
447
- // then paths on the same domain are likely documentation too (unless they match excluded patterns)
448
- // This helps with sites that have documentation at root level or non-standard paths
449
- // Only apply this if the path doesn't match excluded patterns
450
- if (!this.shouldExclude(pathname)) {
451
- // If pathname is just "/" or empty, it's the root - check if base URL was documentation
452
- if (pathname === '/' || pathname === '') {
453
- return true; // Root of a documentation site is documentation
454
- }
455
- // For documentation sites, be more permissive:
456
- // 1. If path contains common documentation keywords
457
- // 2. If path looks like a documentation structure (no file extensions like .html, .php, etc.)
458
- // 3. If path doesn't look like an API endpoint or static resource
459
- const hasFileExtension = /\.[a-z]{2,4}$/i.test(pathname.split('?')[0]);
460
- const looksLikeStaticResource = /\.(?:css|js|json|xml|png|jpg|jpeg|gif|svg|ico|woff|woff2|ttf|eot)$/i.test(pathname);
461
- const looksLikeApiEndpoint = /^\/api\/[^/]+$/i.test(pathname) && !/\/docs\/api\//i.test(pathname);
462
- // If it's a static resource or API endpoint (not docs), exclude it
463
- if (looksLikeStaticResource || (looksLikeApiEndpoint && !pathname.includes('/docs'))) {
464
- return false;
465
- }
466
- // If it has a file extension (but not a static resource), be conservative
467
- if (hasFileExtension) {
468
- return false;
469
- }
470
- // For paths without file extensions, check if they contain documentation keywords
471
- // OR if they're under common documentation paths
472
- const looksLikeDoc = /(?:doc|guide|tutorial|api|reference|manual|help|about|getting-started|examples?)/i.test(pathname);
473
- const isUnderDocPath = /^\/(?:docs?|documentation|guides?|tutorials?|api|reference|manual|help|examples?)/i.test(pathname);
474
- return looksLikeDoc || isUnderDocPath;
593
+ // Exclude paths with file extensions (unless they're HTML pages)
594
+ const hasFileExtension = /\.[a-z]{2,4}$/i.test(pathname.split('?')[0]);
595
+ if (hasFileExtension && !pathname.match(/\.(html?|htm)$/i)) {
596
+ return false;
475
597
  }
476
- return false;
598
+ // Permissive: allow any path that doesn't match exclusion patterns
599
+ // This allows crawling any website, not just documentation
600
+ return true;
477
601
  }
478
602
  /**
479
603
  * Check if a path should be excluded
@@ -481,11 +605,143 @@ export class DocumentationCrawler {
481
605
  shouldExclude(pathname) {
482
606
  return this.EXCLUDED_PATTERNS.some(pattern => pattern.test(pathname));
483
607
  }
608
+ /**
609
+ * Check if crawled content is sufficient for skill generation
610
+ * Enhanced with multi-dimensional quality metrics
611
+ */
612
+ canGenerateSkill(pages) {
613
+ if (pages.length === 0) {
614
+ return { canGenerate: false, reason: 'empty_pages' };
615
+ }
616
+ const metrics = this.evaluateContentQuality(pages);
617
+ // All pages are media-only
618
+ if (metrics.mediaOnlyPages === pages.length && !metrics.hasTextContent) {
619
+ return { canGenerate: false, reason: 'media_only' };
620
+ }
621
+ // No pages have sufficient content
622
+ if (!metrics.hasSufficientContent) {
623
+ return { canGenerate: false, reason: 'insufficient_content' };
624
+ }
625
+ // No structured content (headings, sections)
626
+ if (!metrics.hasStructuredContent) {
627
+ return { canGenerate: false, reason: 'no_structured_content' };
628
+ }
629
+ return { canGenerate: true };
630
+ }
631
+ /**
632
+ * Evaluate content quality with multi-dimensional metrics
633
+ */
634
+ evaluateContentQuality(pages) {
635
+ const MIN_CONTENT_LENGTH = 100;
636
+ let hasSufficientContent = false;
637
+ let hasStructuredContent = false;
638
+ let hasTextContent = false;
639
+ let mediaOnlyCount = 0;
640
+ let totalContentLength = 0;
641
+ let totalCodeSamples = 0;
642
+ // Track content diversity
643
+ const urlPatterns = new Set();
644
+ const titlePatterns = new Set();
645
+ for (const page of pages) {
646
+ const contentLength = (page.content || '').trim().length;
647
+ const hasHeadings = page.headings && page.headings.length > 0;
648
+ const hasText = contentLength > 0;
649
+ totalContentLength += contentLength;
650
+ totalCodeSamples += (page.codeSamples || []).length;
651
+ // Check if page is media-only
652
+ const hasImages = /<img[^>]*>/i.test(page.content || '');
653
+ const hasMedia = hasImages || (page.codeSamples && page.codeSamples.length > 0);
654
+ if (hasMedia && contentLength < MIN_CONTENT_LENGTH) {
655
+ mediaOnlyCount++;
656
+ }
657
+ if (contentLength >= MIN_CONTENT_LENGTH) {
658
+ hasSufficientContent = true;
659
+ }
660
+ if (hasHeadings) {
661
+ hasStructuredContent = true;
662
+ }
663
+ if (hasText) {
664
+ hasTextContent = true;
665
+ }
666
+ // Track diversity
667
+ try {
668
+ const urlPath = new URL(page.url).pathname;
669
+ const pathSegments = urlPath.split('/').filter(s => s);
670
+ if (pathSegments.length > 0) {
671
+ urlPatterns.add(pathSegments[0]);
672
+ }
673
+ }
674
+ catch {
675
+ // Invalid URL, skip
676
+ }
677
+ // Track title diversity
678
+ const titleWords = page.title.toLowerCase().split(/\s+/).slice(0, 3);
679
+ titlePatterns.add(titleWords.join(' '));
680
+ }
681
+ // Calculate diversity score (0-1)
682
+ const contentDiversity = Math.min(1, (urlPatterns.size + titlePatterns.size) / (pages.length * 0.5));
683
+ // Calculate API coverage score (0-1)
684
+ const pagesWithCode = pages.filter(p => p.codeSamples && p.codeSamples.length > 0).length;
685
+ const apiCoverage = pages.length > 0 ? pagesWithCode / pages.length : 0;
686
+ const avgContentLength = pages.length > 0 ? totalContentLength / pages.length : 0;
687
+ return {
688
+ hasSufficientContent,
689
+ hasStructuredContent,
690
+ hasTextContent,
691
+ mediaOnlyPages: mediaOnlyCount,
692
+ contentDiversity,
693
+ apiCoverage,
694
+ avgContentLength,
695
+ totalCodeSamples,
696
+ };
697
+ }
698
+ /**
699
+ * Check if should continue crawling based on content quality
700
+ */
701
+ shouldContinueCrawling(currentPages, maxPages) {
702
+ if (currentPages >= maxPages) {
703
+ return false;
704
+ }
705
+ // Evaluate quality every 10 pages
706
+ if (currentPages % 10 === 0 && currentPages > 0) {
707
+ const metrics = this.evaluateContentQuality(this.crawledPages);
708
+ // High quality content - can stop early if we have enough
709
+ if (metrics.hasSufficientContent &&
710
+ metrics.contentDiversity > 0.7 &&
711
+ metrics.apiCoverage > 0.5 &&
712
+ currentPages >= maxPages * 0.5) {
713
+ logger.info('High quality content detected, considering early stop', {
714
+ currentPages,
715
+ maxPages,
716
+ diversity: metrics.contentDiversity.toFixed(2),
717
+ apiCoverage: metrics.apiCoverage.toFixed(2),
718
+ });
719
+ // Continue but log the possibility
720
+ }
721
+ // Low quality warning
722
+ if (currentPages >= maxPages * 0.8 && !metrics.hasSufficientContent) {
723
+ logger.warn('Approaching page limit but content quality is low', {
724
+ currentPages,
725
+ maxPages,
726
+ diversity: metrics.contentDiversity.toFixed(2),
727
+ apiCoverage: metrics.apiCoverage.toFixed(2),
728
+ suggestion: 'Consider increasing maxPages or refining includePaths',
729
+ });
730
+ }
731
+ }
732
+ return currentPages < maxPages;
733
+ }
484
734
  /**
485
735
  * Fetch a page with retry logic
736
+ * Supports both HTML pages and Markdown files
486
737
  */
487
738
  async fetchPageWithRetry(url, retryCount = 0) {
488
739
  try {
740
+ // Check if this is a Markdown file
741
+ if (url.endsWith('.md') || url.includes('.md?') || url.includes('.md#')) {
742
+ return await this.extractMarkdownContent(url);
743
+ }
744
+ // Regular HTML page
489
745
  return await this.browser.browsePage(url);
490
746
  }
491
747
  catch (error) {
@@ -507,6 +763,166 @@ export class DocumentationCrawler {
507
763
  throw error;
508
764
  }
509
765
  }
766
+ /**
767
+ * Extract content from Markdown file
768
+ * Converts Markdown structure to WebDocumentationPage format
769
+ */
770
+ async extractMarkdownContent(url) {
771
+ logger.debug('Extracting Markdown content', { url });
772
+ // Fetch raw markdown content
773
+ const httpClient = new HttpClient();
774
+ const response = await httpClient.get(url, {
775
+ responseType: 'text',
776
+ timeout: 30000,
777
+ });
778
+ const markdownContent = response.data;
779
+ // Parse markdown structure
780
+ const parsed = this.parseMarkdown(markdownContent, url);
781
+ return {
782
+ url,
783
+ title: parsed.title,
784
+ content: parsed.content,
785
+ searchableContent: parsed.content, // Add searchable content for consistency
786
+ sections: parsed.sections,
787
+ navigationLinks: parsed.links,
788
+ headings: parsed.headings,
789
+ codeSamples: parsed.codeSamples,
790
+ isDocumentation: true,
791
+ };
792
+ }
793
+ /**
794
+ * Parse Markdown content into structured data
795
+ */
796
+ parseMarkdown(content, url) {
797
+ const lines = content.split('\n');
798
+ let title = '';
799
+ const headings = [];
800
+ const codeSamples = [];
801
+ const sections = [];
802
+ const links = [];
803
+ const contentLines = [];
804
+ // Extract title from first h1
805
+ for (const line of lines) {
806
+ if (line.startsWith('# ')) {
807
+ title = line.substring(2).trim();
808
+ break;
809
+ }
810
+ }
811
+ // Extract headings (h2-h6)
812
+ const headingRegex = /^(#{2,6})\s+(.+)$/;
813
+ for (const line of lines) {
814
+ const match = line.match(headingRegex);
815
+ if (match) {
816
+ const level = match[1].length;
817
+ const text = match[2].trim();
818
+ const id = text.toLowerCase().replace(/[^\w\s-]/g, '').replace(/\s+/g, '-');
819
+ headings.push({
820
+ level: `h${level}`,
821
+ text,
822
+ id,
823
+ });
824
+ }
825
+ }
826
+ // Extract code blocks
827
+ const codeBlockRegex = /```(\w+)?\n([\s\S]*?)```/g;
828
+ let match;
829
+ while ((match = codeBlockRegex.exec(content)) !== null) {
830
+ const language = match[1] || 'text';
831
+ const code = match[2].trim();
832
+ if (code.length > 10) {
833
+ codeSamples.push({
834
+ code,
835
+ language,
836
+ });
837
+ }
838
+ }
839
+ // Extract content (remove code blocks and headings)
840
+ let contentWithoutCode = content.replace(codeBlockRegex, '');
841
+ contentWithoutCode = contentWithoutCode.replace(/^#{1,6}\s+.+$/gm, '');
842
+ for (const para of contentWithoutCode.split('\n\n')) {
843
+ const trimmed = para.trim();
844
+ if (trimmed.length > 20) {
845
+ contentLines.push(trimmed);
846
+ }
847
+ }
848
+ // Extract links (markdown format)
849
+ const linkRegex = /\[([^\]]*)\]\(([^)]+)\)/g;
850
+ while ((match = linkRegex.exec(content)) !== null) {
851
+ const text = match[1];
852
+ const linkUrl = match[2].trim();
853
+ // Skip anchors
854
+ if (linkUrl.startsWith('#')) {
855
+ continue;
856
+ }
857
+ // Resolve relative URLs
858
+ let absoluteUrl;
859
+ try {
860
+ if (linkUrl.startsWith('http://') || linkUrl.startsWith('https://')) {
861
+ absoluteUrl = linkUrl;
862
+ }
863
+ else {
864
+ absoluteUrl = new URL(linkUrl, url).href;
865
+ }
866
+ // Remove fragment
867
+ absoluteUrl = absoluteUrl.split('#')[0];
868
+ // Only include .md URLs to avoid client-side rendered HTML pages
869
+ if (absoluteUrl.endsWith('.md') || absoluteUrl.includes('.md?')) {
870
+ const linkOrigin = new URL(absoluteUrl).origin;
871
+ const baseOrigin = this.baseUrl.origin;
872
+ links.push({
873
+ text,
874
+ url: absoluteUrl,
875
+ isInternal: linkOrigin === baseOrigin,
876
+ });
877
+ }
878
+ }
879
+ catch (error) {
880
+ // Invalid URL, skip
881
+ logger.debug('Invalid URL in markdown link', { url: linkUrl });
882
+ }
883
+ }
884
+ // Build sections from headings
885
+ let currentSection = null;
886
+ let currentContent = [];
887
+ for (const line of lines) {
888
+ const headerMatch = line.match(headingRegex);
889
+ if (headerMatch) {
890
+ // Save previous section
891
+ if (currentSection) {
892
+ currentSection.content = currentContent.join('\n').trim();
893
+ if (currentSection.content.length > 0) {
894
+ sections.push(currentSection);
895
+ }
896
+ }
897
+ // Start new section
898
+ const text = headerMatch[2].trim();
899
+ currentSection = {
900
+ title: text,
901
+ content: '',
902
+ anchor: text.toLowerCase().replace(/[^\w\s-]/g, '').replace(/\s+/g, '-'),
903
+ };
904
+ currentContent = [];
905
+ }
906
+ else if (currentSection) {
907
+ currentContent.push(line);
908
+ }
909
+ }
910
+ // Save last section
911
+ if (currentSection) {
912
+ currentSection.content = currentContent.join('\n').trim();
913
+ if (currentSection.content.length > 0) {
914
+ sections.push(currentSection);
915
+ }
916
+ }
917
+ return {
918
+ title: title || 'Untitled',
919
+ content: contentLines.join('\n\n'),
920
+ headings,
921
+ codeSamples,
922
+ sections,
923
+ links,
924
+ };
925
+ }
510
926
  /**
511
927
  * Classify error type for better error messages
512
928
  */
@@ -579,6 +995,163 @@ export class DocumentationCrawler {
579
995
  }
580
996
  return breakdown;
581
997
  }
998
+ /**
999
+ * Try to detect and use llms.txt for optimized crawling
1000
+ */
1001
+ async tryLlmsTxt(rootUrl) {
1002
+ logger.info('Checking for llms.txt files', { url: rootUrl });
1003
+ try {
1004
+ const detector = new LlmsTxtDetector(rootUrl);
1005
+ const variants = await detector.detectAll();
1006
+ if (variants.length === 0) {
1007
+ logger.info('No llms.txt files found, proceeding with normal crawl');
1008
+ return;
1009
+ }
1010
+ logger.info('Found llms.txt variants', {
1011
+ count: variants.length,
1012
+ variants: variants.map(v => v.variant),
1013
+ });
1014
+ // Download all variants
1015
+ const downloader = new LlmsTxtDownloader();
1016
+ const downloaded = await downloader.downloadAll(variants);
1017
+ if (downloaded.length === 0) {
1018
+ logger.warn('Failed to download any llms.txt variants');
1019
+ return;
1020
+ }
1021
+ // Use the largest variant (most comprehensive)
1022
+ const largest = downloaded.reduce((prev, current) => current.size > prev.size ? current : prev);
1023
+ logger.info('Using llms.txt for URL extraction', {
1024
+ variant: largest.variant,
1025
+ size: largest.size,
1026
+ });
1027
+ // Parse URLs from llms.txt
1028
+ const parser = new LlmsTxtParser(largest.content, rootUrl);
1029
+ const extractedUrls = parser.extractUrls();
1030
+ if (extractedUrls.length > 0) {
1031
+ logger.info('Extracted URLs from llms.txt', {
1032
+ count: extractedUrls.length,
1033
+ });
1034
+ // Add URLs to queue with depth 0
1035
+ for (const url of extractedUrls) {
1036
+ if (this.isValidUrl(url) && !this.visitedUrls.has(url)) {
1037
+ this.urlQueue.push({ url, depth: 0 });
1038
+ }
1039
+ }
1040
+ logger.info('Added llms.txt URLs to crawl queue', {
1041
+ added: this.urlQueue.length,
1042
+ });
1043
+ }
1044
+ else {
1045
+ logger.info('No URLs extracted from llms.txt, using normal crawl');
1046
+ }
1047
+ }
1048
+ catch (error) {
1049
+ const errorMessage = error instanceof Error ? error.message : String(error);
1050
+ logger.warn('llms.txt detection failed, continuing with normal crawl', {
1051
+ error: errorMessage,
1052
+ });
1053
+ // Continue with normal crawling if llms.txt fails
1054
+ }
1055
+ }
1056
+ /**
1057
+ * Check if a URL is valid for crawling
1058
+ */
1059
+ isValidUrl(url) {
1060
+ try {
1061
+ const parsed = new URL(url);
1062
+ // Must be same origin as base URL
1063
+ if (parsed.origin !== this.baseUrl.origin) {
1064
+ return false;
1065
+ }
1066
+ // Must be http or https
1067
+ if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
1068
+ return false;
1069
+ }
1070
+ return true;
1071
+ }
1072
+ catch {
1073
+ return false;
1074
+ }
1075
+ }
1076
+ /**
1077
+ * Save checkpoint
1078
+ */
1079
+ async saveCheckpoint() {
1080
+ if (!this.checkpointManager) {
1081
+ return;
1082
+ }
1083
+ const checkpointData = {
1084
+ config: this.options,
1085
+ visitedUrls: Array.from(this.visitedUrls),
1086
+ pendingUrls: this.urlQueue,
1087
+ pagesCrawled: this.crawledPages.length,
1088
+ lastUpdated: new Date().toISOString(),
1089
+ baseUrl: this.baseUrl.href,
1090
+ };
1091
+ try {
1092
+ await this.checkpointManager.saveCheckpoint(checkpointData);
1093
+ }
1094
+ catch (error) {
1095
+ logger.warn('Failed to save checkpoint', {
1096
+ error: error instanceof Error ? error.message : String(error),
1097
+ });
1098
+ }
1099
+ }
1100
+ /**
1101
+ * Load checkpoint and restore state
1102
+ */
1103
+ async loadCheckpoint() {
1104
+ if (!this.checkpointManager) {
1105
+ return false;
1106
+ }
1107
+ try {
1108
+ const data = await this.checkpointManager.loadCheckpoint();
1109
+ if (!data) {
1110
+ logger.info('No checkpoint found to resume from');
1111
+ return false;
1112
+ }
1113
+ // Restore state
1114
+ this.visitedUrls = new Set(data.visitedUrls);
1115
+ this.urlQueue = data.pendingUrls;
1116
+ // Note: crawledPages are not restored as they will be regenerated
1117
+ logger.info('State restored from checkpoint', {
1118
+ visitedUrls: this.visitedUrls.size,
1119
+ pendingUrls: this.urlQueue.length,
1120
+ lastUpdated: data.lastUpdated,
1121
+ });
1122
+ return true;
1123
+ }
1124
+ catch (error) {
1125
+ logger.warn('Failed to load checkpoint', {
1126
+ error: error instanceof Error ? error.message : String(error),
1127
+ });
1128
+ return false;
1129
+ }
1130
+ }
1131
+ /**
1132
+ * Clear checkpoint after successful crawl
1133
+ */
1134
+ async clearCheckpoint() {
1135
+ if (this.checkpointManager) {
1136
+ try {
1137
+ await this.checkpointManager.clearCheckpoint();
1138
+ }
1139
+ catch (error) {
1140
+ logger.debug('Failed to clear checkpoint', {
1141
+ error: error instanceof Error ? error.message : String(error),
1142
+ });
1143
+ }
1144
+ }
1145
+ }
1146
+ /**
1147
+ * Sanitize filename for checkpoint
1148
+ */
1149
+ sanitizeFilename(url) {
1150
+ return url
1151
+ .replace(/[^a-z0-9]/gi, '-')
1152
+ .replace(/-+/g, '-')
1153
+ .substring(0, 64);
1154
+ }
582
1155
  /**
583
1156
  * Delay helper for rate limiting
584
1157
  */