@gulibs/safe-coder 0.0.23 → 0.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +351 -15
  2. package/dist/documentation/checkpoint-manager.d.ts +38 -0
  3. package/dist/documentation/checkpoint-manager.d.ts.map +1 -0
  4. package/dist/documentation/checkpoint-manager.js +101 -0
  5. package/dist/documentation/checkpoint-manager.js.map +1 -0
  6. package/dist/documentation/doc-crawler.d.ts +77 -2
  7. package/dist/documentation/doc-crawler.d.ts.map +1 -1
  8. package/dist/documentation/doc-crawler.js +752 -179
  9. package/dist/documentation/doc-crawler.js.map +1 -1
  10. package/dist/documentation/llms-txt/detector.d.ts +31 -0
  11. package/dist/documentation/llms-txt/detector.d.ts.map +1 -0
  12. package/dist/documentation/llms-txt/detector.js +77 -0
  13. package/dist/documentation/llms-txt/detector.js.map +1 -0
  14. package/dist/documentation/llms-txt/downloader.d.ts +30 -0
  15. package/dist/documentation/llms-txt/downloader.d.ts.map +1 -0
  16. package/dist/documentation/llms-txt/downloader.js +84 -0
  17. package/dist/documentation/llms-txt/downloader.js.map +1 -0
  18. package/dist/documentation/llms-txt/index.d.ts +4 -0
  19. package/dist/documentation/llms-txt/index.d.ts.map +1 -0
  20. package/dist/documentation/llms-txt/index.js +4 -0
  21. package/dist/documentation/llms-txt/index.js.map +1 -0
  22. package/dist/documentation/llms-txt/parser.d.ts +43 -0
  23. package/dist/documentation/llms-txt/parser.d.ts.map +1 -0
  24. package/dist/documentation/llms-txt/parser.js +177 -0
  25. package/dist/documentation/llms-txt/parser.js.map +1 -0
  26. package/dist/documentation/skill-generator.d.ts +38 -2
  27. package/dist/documentation/skill-generator.d.ts.map +1 -1
  28. package/dist/documentation/skill-generator.js +331 -62
  29. package/dist/documentation/skill-generator.js.map +1 -1
  30. package/dist/index.js +0 -0
  31. package/dist/server/mcp-server.d.ts.map +1 -1
  32. package/dist/server/mcp-server.js +152 -9
  33. package/dist/server/mcp-server.js.map +1 -1
  34. package/package.json +10 -11
@@ -1,5 +1,6 @@
1
1
  import { HttpClient } from '../utils/http-client.js';
2
2
  export interface CrawlOptions {
3
+ crawlStrategy?: 'bfs' | 'dfs';
3
4
  maxDepth?: number;
4
5
  maxPages?: number;
5
6
  includePaths?: string[];
@@ -7,6 +8,15 @@ export interface CrawlOptions {
7
8
  rateLimit?: number;
8
9
  maxRetries?: number;
9
10
  retryDelay?: number;
11
+ useBrowserAutomation?: boolean;
12
+ skipLlmsTxt?: boolean;
13
+ workers?: number;
14
+ checkpoint?: {
15
+ enabled: boolean;
16
+ interval: number;
17
+ file?: string;
18
+ };
19
+ resume?: boolean;
10
20
  }
11
21
  export interface CrawledPage {
12
22
  url: string;
@@ -36,7 +46,7 @@ export interface CrawledPage {
36
46
  export interface LinkDiscoveryStats {
37
47
  totalLinksFound: number;
38
48
  linksFiltered: {
39
- notDocumentation: number;
49
+ notContent: number;
40
50
  externalDomain: number;
41
51
  alreadyVisited: number;
42
52
  excludedPattern: number;
@@ -46,6 +56,7 @@ export interface LinkDiscoveryStats {
46
56
  pagesDiscovered: number;
47
57
  pagesCrawled: number;
48
58
  }
59
+ export type AbandonReason = 'insufficient_content' | 'media_only' | 'empty_pages' | 'no_structured_content';
49
60
  export interface CrawlResult {
50
61
  pages: CrawledPage[];
51
62
  totalPages: number;
@@ -55,6 +66,8 @@ export interface CrawlResult {
55
66
  error: string;
56
67
  }>;
57
68
  linkDiscoveryStats: LinkDiscoveryStats;
69
+ abandoned?: boolean;
70
+ abandonReason?: AbandonReason;
58
71
  }
59
72
  export declare class DocumentationCrawler {
60
73
  private browser;
@@ -65,6 +78,8 @@ export declare class DocumentationCrawler {
65
78
  private options;
66
79
  private baseUrl;
67
80
  private linkDiscoveryStats;
81
+ private checkpointManager?;
82
+ private pagesSinceLastCheckpoint;
68
83
  private readonly DOCUMENTATION_PATTERNS;
69
84
  private readonly EXCLUDED_PATTERNS;
70
85
  constructor(httpClient?: HttpClient);
@@ -72,24 +87,60 @@ export declare class DocumentationCrawler {
72
87
  * Crawl documentation starting from a root URL
73
88
  * Uses HTTP client (axios) exclusively - no browser automation
74
89
  * For SPA sites that require JavaScript rendering, use Cursor/Claude's built-in browser tools
90
+ * Supports both BFS (breadth-first) and DFS (depth-first) crawl strategies
75
91
  */
76
92
  crawl(rootUrl: string, options?: CrawlOptions): Promise<CrawlResult>;
93
+ /**
94
+ * Sequential crawling (single-threaded)
95
+ */
96
+ private crawlSequential;
97
+ /**
98
+ * Parallel crawling with multiple workers
99
+ */
100
+ private crawlWithWorkers;
101
+ /**
102
+ * Process a single page (shared by both sequential and parallel crawling)
103
+ */
104
+ private processPage;
77
105
  /**
78
106
  * Discover documentation links from a crawled page
79
107
  */
80
108
  private discoverDocumentationLinks;
81
109
  /**
82
- * Check if a path is a documentation path
110
+ * Check if a path should be crawled (permissive - only exclude clearly non-content paths)
83
111
  */
84
112
  private isDocumentationPath;
85
113
  /**
86
114
  * Check if a path should be excluded
87
115
  */
88
116
  private shouldExclude;
117
+ /**
118
+ * Check if crawled content is sufficient for skill generation
119
+ * Enhanced with multi-dimensional quality metrics
120
+ */
121
+ private canGenerateSkill;
122
+ /**
123
+ * Evaluate content quality with multi-dimensional metrics
124
+ */
125
+ private evaluateContentQuality;
126
+ /**
127
+ * Check if should continue crawling based on content quality
128
+ */
129
+ private shouldContinueCrawling;
89
130
  /**
90
131
  * Fetch a page with retry logic
132
+ * Supports both HTML pages and Markdown files
91
133
  */
92
134
  private fetchPageWithRetry;
135
+ /**
136
+ * Extract content from Markdown file
137
+ * Converts Markdown structure to WebDocumentationPage format
138
+ */
139
+ private extractMarkdownContent;
140
+ /**
141
+ * Parse Markdown content into structured data
142
+ */
143
+ private parseMarkdown;
93
144
  /**
94
145
  * Classify error type for better error messages
95
146
  */
@@ -102,6 +153,30 @@ export declare class DocumentationCrawler {
102
153
  * Get error breakdown by type
103
154
  */
104
155
  private getErrorBreakdown;
156
+ /**
157
+ * Try to detect and use llms.txt for optimized crawling
158
+ */
159
+ private tryLlmsTxt;
160
+ /**
161
+ * Check if a URL is valid for crawling
162
+ */
163
+ private isValidUrl;
164
+ /**
165
+ * Save checkpoint
166
+ */
167
+ private saveCheckpoint;
168
+ /**
169
+ * Load checkpoint and restore state
170
+ */
171
+ private loadCheckpoint;
172
+ /**
173
+ * Clear checkpoint after successful crawl
174
+ */
175
+ private clearCheckpoint;
176
+ /**
177
+ * Sanitize filename for checkpoint
178
+ */
179
+ private sanitizeFilename;
105
180
  /**
106
181
  * Delay helper for rate limiting
107
182
  */
@@ -1 +1 @@
1
- {"version":3,"file":"doc-crawler.d.ts","sourceRoot":"","sources":["../../src/documentation/doc-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAIrD,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,KAAK,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,EAAE,MAAM,CAAC;QAChB,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC,CAAC;IACH,eAAe,EAAE,KAAK,CAAC;QACrB,IAAI,EAAE,MAAM,CAAC;QACb,GAAG,EAAE,MAAM,CAAC;QACZ,UAAU,EAAE,OAAO,CAAC;KACrB,CAAC,CAAC;IACH,QAAQ,EAAE,KAAK,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,EAAE,CAAC,EAAE,MAAM,CAAC;KACb,CAAC,CAAC;IACH,WAAW,EAAE,KAAK,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,EAAE,MAAM,CAAC;KAClB,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,kBAAkB;IACjC,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EAAE;QACb,gBAAgB,EAAE,MAAM,CAAC;QACzB,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,eAAe,EAAE,MAAM,CAAC;QACxB,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC;IACF,WAAW,EAAE,MAAM,CAAC;IACpB,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,WAAW,EAAE,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,EAAE,MAAM,CAAC;IACxB,MAAM,EAAE,KAAK,CAAC;QACZ,GAAG,EAAE,MAAM,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;KACf,CAAC,CAAC;IACH,kBAAkB,EAAE,kBAAkB,CAAC;CACxC;AAOD,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,OAAO,CAA0B;IACzC,OAAO,CAAC,WAAW,CAAc;IACjC,OAAO,CAAC,QAAQ,CAAc;IAC9B,OAAO,CAAC,YAAY,CAAgB;IACpC,OAAO,CAAC,MAAM,CAAwC;IACtD,OAAO,CAAC,OAAO,CAAyB;IACxC,OAAO,CAAC,OAAO,CAAM;IACrB,OAAO,CAAC,kBAAkB,CAAqB;IAC/C,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CAWrC;IACF,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAUhC;gBAEU,UAAU,CAAC,EAAE,UAAU;IAgCnC;;;;OAIG;IACG,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,YAAiB,GAAG,OAAO,CAAC,WAAW,CAAC;IAqR9E;;OAEG;IACH,OAAO,CAAC,0BAA0B;IAiJlC;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAgD3B;;OAEG;IACH,OAAO,CAAC,aAAa;IAIrB;;OAEG;YACW,kBAAkB;IA0BhC;;OAEG;IACH,OAAO,CAAC,aAAa;IA0CrB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAmBxB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAWzB;;OAEG;IACH,OAAO,CAAC,KAAK;CAGd"}
1
+ {"version":3,"file":"doc-crawler.d.ts","sourceRoot":"","sources":["../../src/documentation/doc-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAQrD,MAAM,WAAW,YAAY;IAC3B,aAAa,CAAC,EAAE,KAAK,GAAG,KAAK,CAAC;IAC9B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAC/B,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE;QACX,OAAO,EAAE,OAAO,CAAC;QACjB,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,CAAC,EAAE,MAAM,CAAC;KACf,CAAC;IACF,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,KAAK,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,EAAE,MAAM,CAAC;QAChB,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC,CAAC;IACH,eAAe,EAAE,KAAK,CAAC;QACrB,IAAI,EAAE,MAAM,CAAC;QACb,GAAG,EAAE,MAAM,CAAC;QACZ,UAAU,EAAE,OAAO,CAAC;KACrB,CAAC,CAAC;IACH,QAAQ,EAAE,KAAK,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,EAAE,CAAC,EAAE,MAAM,CAAC;KACb,CAAC,CAAC;IACH,WAAW,EAAE,KAAK,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,EAAE,MAAM,CAAC;KAClB,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,kBAAkB;IACjC,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EAAE;QACb,UAAU,EAAE,MAAM,CAAC;QACnB,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,eAAe,EAAE,MAAM,CAAC;QACxB,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC;IACF,WAAW,EAAE,MAAM,CAAC;IACpB,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,MAAM,aAAa,GACrB,sBAAsB,GACtB,YAAY,GACZ,aAAa,GACb,uBAAuB,CAAC;AAE5B,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,WAAW,EAAE,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,EAAE,MAAM,CAAC;IACxB,MAAM,EAAE,KAAK,CAAC;QACZ,GAAG,EAAE,MAAM,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;KACf,CAAC,CAAC;IACH,kBAAkB,EAAE,kBAAkB,CAAC;IACvC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,aAAa,CAAC,EAAE,aAAa,CAAC;CAC/B;AAOD,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,OAAO,CAA0B;IACzC,OAAO,CAAC,WAAW,CAAc;IACjC,OAAO,CAAC,QAAQ,CAAc;IAC9B,OAAO,CAAC,YAAY,CAAgB;IACpC,OAAO,CAAC,MAAM,CAAwC;IACtD,OAAO,CAAC,OAAO,CAYb;IACF,OAAO,CAAC,OAAO,CAAM;IACrB,OAAO,CAAC,kBAAkB,CAAqB;IAC/C,OAAO,CAAC,iBAAiB,CAAC,CAAoB;IAC9C,OAAO,CAAC,wBAAwB,CAAS;IACzC,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CAWrC;IACF,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAUhC;gBAEU,UAAU,CAAC,EAAE,UAAU;IAqCnC;;;;;OAKG;IACG,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,YAAiB,GAAG,OAAO,CAAC,WAAW,CAAC;IA4K9E;;OAEG;YACW,eAAe;IAyD7B;;OAEG;YACW,gBAAgB;IAoE9B;;OAEG;YACW,WAAW;IAgHzB;;OAEG;IACH,OAAO,CAAC,0BAA0B;IAiJlC;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAmC3B;;OAEG;IACH,OAAO,CAAC,aAAa;IAIrB;;;OAGG;IACH,OAAO,CAAC,gBAAgB;IAyBxB;;OAEG;IACH,OAAO,CAAC,sBAAsB;IAoF9B;;OAEG;IACH,OAAO,CAAC,sBAAsB;IAsC9B;;;OAGG;YACW,kBAAkB;IAgChC;;;OAGG;YACW,sBAAsB;IA4BpC;;OAEG;IACH,OAAO,CAAC,aAAa;IAyJrB;;OAEG;IACH,OAAO,CAAC,aAAa;IA0CrB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAmBxB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAWzB;;OAEG;YACW,UAAU;IAmExB;;OAEG;IACH,OAAO,CAAC,UAAU;IAiBlB;;OAEG;YACW,cAAc;IAuB5B;;OAEG;YACW,cAAc;IAgC5B;;OAEG;YACW,eAAe;IAY7B;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAOxB;;OAEG;IACH,OAAO,CAAC,KAAK;CAGd"}