@gulibs/safe-coder 0.0.23 → 0.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +351 -15
- package/dist/documentation/checkpoint-manager.d.ts +38 -0
- package/dist/documentation/checkpoint-manager.d.ts.map +1 -0
- package/dist/documentation/checkpoint-manager.js +101 -0
- package/dist/documentation/checkpoint-manager.js.map +1 -0
- package/dist/documentation/doc-crawler.d.ts +77 -2
- package/dist/documentation/doc-crawler.d.ts.map +1 -1
- package/dist/documentation/doc-crawler.js +752 -179
- package/dist/documentation/doc-crawler.js.map +1 -1
- package/dist/documentation/llms-txt/detector.d.ts +31 -0
- package/dist/documentation/llms-txt/detector.d.ts.map +1 -0
- package/dist/documentation/llms-txt/detector.js +77 -0
- package/dist/documentation/llms-txt/detector.js.map +1 -0
- package/dist/documentation/llms-txt/downloader.d.ts +30 -0
- package/dist/documentation/llms-txt/downloader.d.ts.map +1 -0
- package/dist/documentation/llms-txt/downloader.js +84 -0
- package/dist/documentation/llms-txt/downloader.js.map +1 -0
- package/dist/documentation/llms-txt/index.d.ts +4 -0
- package/dist/documentation/llms-txt/index.d.ts.map +1 -0
- package/dist/documentation/llms-txt/index.js +4 -0
- package/dist/documentation/llms-txt/index.js.map +1 -0
- package/dist/documentation/llms-txt/parser.d.ts +43 -0
- package/dist/documentation/llms-txt/parser.d.ts.map +1 -0
- package/dist/documentation/llms-txt/parser.js +177 -0
- package/dist/documentation/llms-txt/parser.js.map +1 -0
- package/dist/documentation/skill-generator.d.ts +38 -2
- package/dist/documentation/skill-generator.d.ts.map +1 -1
- package/dist/documentation/skill-generator.js +331 -62
- package/dist/documentation/skill-generator.js.map +1 -1
- package/dist/index.js +0 -0
- package/dist/server/mcp-server.d.ts.map +1 -1
- package/dist/server/mcp-server.js +152 -9
- package/dist/server/mcp-server.js.map +1 -1
- package/package.json +10 -11
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { HttpClient } from '../utils/http-client.js';
|
|
2
2
|
export interface CrawlOptions {
|
|
3
|
+
crawlStrategy?: 'bfs' | 'dfs';
|
|
3
4
|
maxDepth?: number;
|
|
4
5
|
maxPages?: number;
|
|
5
6
|
includePaths?: string[];
|
|
@@ -7,6 +8,15 @@ export interface CrawlOptions {
|
|
|
7
8
|
rateLimit?: number;
|
|
8
9
|
maxRetries?: number;
|
|
9
10
|
retryDelay?: number;
|
|
11
|
+
useBrowserAutomation?: boolean;
|
|
12
|
+
skipLlmsTxt?: boolean;
|
|
13
|
+
workers?: number;
|
|
14
|
+
checkpoint?: {
|
|
15
|
+
enabled: boolean;
|
|
16
|
+
interval: number;
|
|
17
|
+
file?: string;
|
|
18
|
+
};
|
|
19
|
+
resume?: boolean;
|
|
10
20
|
}
|
|
11
21
|
export interface CrawledPage {
|
|
12
22
|
url: string;
|
|
@@ -36,7 +46,7 @@ export interface CrawledPage {
|
|
|
36
46
|
export interface LinkDiscoveryStats {
|
|
37
47
|
totalLinksFound: number;
|
|
38
48
|
linksFiltered: {
|
|
39
|
-
|
|
49
|
+
notContent: number;
|
|
40
50
|
externalDomain: number;
|
|
41
51
|
alreadyVisited: number;
|
|
42
52
|
excludedPattern: number;
|
|
@@ -46,6 +56,7 @@ export interface LinkDiscoveryStats {
|
|
|
46
56
|
pagesDiscovered: number;
|
|
47
57
|
pagesCrawled: number;
|
|
48
58
|
}
|
|
59
|
+
export type AbandonReason = 'insufficient_content' | 'media_only' | 'empty_pages' | 'no_structured_content';
|
|
49
60
|
export interface CrawlResult {
|
|
50
61
|
pages: CrawledPage[];
|
|
51
62
|
totalPages: number;
|
|
@@ -55,6 +66,8 @@ export interface CrawlResult {
|
|
|
55
66
|
error: string;
|
|
56
67
|
}>;
|
|
57
68
|
linkDiscoveryStats: LinkDiscoveryStats;
|
|
69
|
+
abandoned?: boolean;
|
|
70
|
+
abandonReason?: AbandonReason;
|
|
58
71
|
}
|
|
59
72
|
export declare class DocumentationCrawler {
|
|
60
73
|
private browser;
|
|
@@ -65,6 +78,8 @@ export declare class DocumentationCrawler {
|
|
|
65
78
|
private options;
|
|
66
79
|
private baseUrl;
|
|
67
80
|
private linkDiscoveryStats;
|
|
81
|
+
private checkpointManager?;
|
|
82
|
+
private pagesSinceLastCheckpoint;
|
|
68
83
|
private readonly DOCUMENTATION_PATTERNS;
|
|
69
84
|
private readonly EXCLUDED_PATTERNS;
|
|
70
85
|
constructor(httpClient?: HttpClient);
|
|
@@ -72,24 +87,60 @@ export declare class DocumentationCrawler {
|
|
|
72
87
|
* Crawl documentation starting from a root URL
|
|
73
88
|
* Uses HTTP client (axios) exclusively - no browser automation
|
|
74
89
|
* For SPA sites that require JavaScript rendering, use Cursor/Claude's built-in browser tools
|
|
90
|
+
* Supports both BFS (breadth-first) and DFS (depth-first) crawl strategies
|
|
75
91
|
*/
|
|
76
92
|
crawl(rootUrl: string, options?: CrawlOptions): Promise<CrawlResult>;
|
|
93
|
+
/**
|
|
94
|
+
* Sequential crawling (single-threaded)
|
|
95
|
+
*/
|
|
96
|
+
private crawlSequential;
|
|
97
|
+
/**
|
|
98
|
+
* Parallel crawling with multiple workers
|
|
99
|
+
*/
|
|
100
|
+
private crawlWithWorkers;
|
|
101
|
+
/**
|
|
102
|
+
* Process a single page (shared by both sequential and parallel crawling)
|
|
103
|
+
*/
|
|
104
|
+
private processPage;
|
|
77
105
|
/**
|
|
78
106
|
* Discover documentation links from a crawled page
|
|
79
107
|
*/
|
|
80
108
|
private discoverDocumentationLinks;
|
|
81
109
|
/**
|
|
82
|
-
* Check if a path
|
|
110
|
+
* Check if a path should be crawled (permissive - only exclude clearly non-content paths)
|
|
83
111
|
*/
|
|
84
112
|
private isDocumentationPath;
|
|
85
113
|
/**
|
|
86
114
|
* Check if a path should be excluded
|
|
87
115
|
*/
|
|
88
116
|
private shouldExclude;
|
|
117
|
+
/**
|
|
118
|
+
* Check if crawled content is sufficient for skill generation
|
|
119
|
+
* Enhanced with multi-dimensional quality metrics
|
|
120
|
+
*/
|
|
121
|
+
private canGenerateSkill;
|
|
122
|
+
/**
|
|
123
|
+
* Evaluate content quality with multi-dimensional metrics
|
|
124
|
+
*/
|
|
125
|
+
private evaluateContentQuality;
|
|
126
|
+
/**
|
|
127
|
+
* Check if should continue crawling based on content quality
|
|
128
|
+
*/
|
|
129
|
+
private shouldContinueCrawling;
|
|
89
130
|
/**
|
|
90
131
|
* Fetch a page with retry logic
|
|
132
|
+
* Supports both HTML pages and Markdown files
|
|
91
133
|
*/
|
|
92
134
|
private fetchPageWithRetry;
|
|
135
|
+
/**
|
|
136
|
+
* Extract content from Markdown file
|
|
137
|
+
* Converts Markdown structure to WebDocumentationPage format
|
|
138
|
+
*/
|
|
139
|
+
private extractMarkdownContent;
|
|
140
|
+
/**
|
|
141
|
+
* Parse Markdown content into structured data
|
|
142
|
+
*/
|
|
143
|
+
private parseMarkdown;
|
|
93
144
|
/**
|
|
94
145
|
* Classify error type for better error messages
|
|
95
146
|
*/
|
|
@@ -102,6 +153,30 @@ export declare class DocumentationCrawler {
|
|
|
102
153
|
* Get error breakdown by type
|
|
103
154
|
*/
|
|
104
155
|
private getErrorBreakdown;
|
|
156
|
+
/**
|
|
157
|
+
* Try to detect and use llms.txt for optimized crawling
|
|
158
|
+
*/
|
|
159
|
+
private tryLlmsTxt;
|
|
160
|
+
/**
|
|
161
|
+
* Check if a URL is valid for crawling
|
|
162
|
+
*/
|
|
163
|
+
private isValidUrl;
|
|
164
|
+
/**
|
|
165
|
+
* Save checkpoint
|
|
166
|
+
*/
|
|
167
|
+
private saveCheckpoint;
|
|
168
|
+
/**
|
|
169
|
+
* Load checkpoint and restore state
|
|
170
|
+
*/
|
|
171
|
+
private loadCheckpoint;
|
|
172
|
+
/**
|
|
173
|
+
* Clear checkpoint after successful crawl
|
|
174
|
+
*/
|
|
175
|
+
private clearCheckpoint;
|
|
176
|
+
/**
|
|
177
|
+
* Sanitize filename for checkpoint
|
|
178
|
+
*/
|
|
179
|
+
private sanitizeFilename;
|
|
105
180
|
/**
|
|
106
181
|
* Delay helper for rate limiting
|
|
107
182
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"doc-crawler.d.ts","sourceRoot":"","sources":["../../src/documentation/doc-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;
|
|
1
|
+
{"version":3,"file":"doc-crawler.d.ts","sourceRoot":"","sources":["../../src/documentation/doc-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAQrD,MAAM,WAAW,YAAY;IAC3B,aAAa,CAAC,EAAE,KAAK,GAAG,KAAK,CAAC;IAC9B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAC/B,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE;QACX,OAAO,EAAE,OAAO,CAAC;QACjB,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,CAAC,EAAE,MAAM,CAAC;KACf,CAAC;IACF,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,KAAK,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,EAAE,MAAM,CAAC;QAChB,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC,CAAC;IACH,eAAe,EAAE,KAAK,CAAC;QACrB,IAAI,EAAE,MAAM,CAAC;QACb,GAAG,EAAE,MAAM,CAAC;QACZ,UAAU,EAAE,OAAO,CAAC;KACrB,CAAC,CAAC;IACH,QAAQ,EAAE,KAAK,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,EAAE,CAAC,EAAE,MAAM,CAAC;KACb,CAAC,CAAC;IACH,WAAW,EAAE,KAAK,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,EAAE,MAAM,CAAC;KAClB,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,kBAAkB;IACjC,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EAAE;QACb,UAAU,EAAE,MAAM,CAAC;QACnB,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,eAAe,EAAE,MAAM,CAAC;QACxB,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC;IACF,WAAW,EAAE,MAAM,CAAC;IACpB,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,MAAM,aAAa,GACrB,sBAAsB,GACtB,YAAY,GACZ,aAAa,GACb,uBAAuB,CAAC;AAE5B,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,WAAW,EAAE,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,EAAE,MAAM,CAAC;IACxB,MAAM,EAAE,KAAK,CAAC;QACZ,GAAG,EAAE,MAAM,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;KACf,CAAC,CAAC;IACH,kBAAkB,EAAE,kBAAkB,CAAC;IACvC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,aAAa,CAAC,EAAE,aAAa,CAAC;CAC/B;AAOD,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,OAAO,CAA0B;IACzC,OAAO,CAAC,WAAW,CAAc;IACjC,OAAO,CAAC,QAAQ,CAAc;IAC9B,OAAO,CAAC,YAAY,CAAgB;IACpC,OAAO,CAAC,MAAM,CAAwC;IACtD,OAAO,CAAC,OAAO,CAYb;IACF,OAAO,CAAC,OAAO,CAAM;IACrB,OAAO,CAAC,kBAAkB,CAAqB;IAC/C,OAAO,CAAC,iBAAiB,CAAC,CAAoB;IAC9C,OAAO,CAAC,wBAAwB,CAAS;IACzC,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CAWrC;IACF,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAUhC;gBAEU,UAAU,CAAC,EAAE,UAAU;IAqCnC;;;;;OAKG;IACG,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,YAAiB,GAAG,OAAO,CAAC,WAAW,CAAC;IA4K9E;;OAEG;YACW,eAAe;IAyD7B;;OAEG;YACW,gBAAgB;IAoE9B;;OAEG;YACW,WAAW;IAgHzB;;OAEG;IACH,OAAO,CAAC,0BAA0B;IAiJlC;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAmC3B;;OAEG;IACH,OAAO,CAAC,aAAa;IAIrB;;;OAGG;IACH,OAAO,CAAC,gBAAgB;IAyBxB;;OAEG;IACH,OAAO,CAAC,sBAAsB;IAoF9B;;OAEG;IACH,OAAO,CAAC,sBAAsB;IAsC9B;;;OAGG;YACW,kBAAkB;IAgChC;;;OAGG;YACW,sBAAsB;IA4BpC;;OAEG;IACH,OAAO,CAAC,aAAa;IAyJrB;;OAEG;IACH,OAAO,CAAC,aAAa;IA0CrB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAmBxB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAWzB;;OAEG;YACW,UAAU;IAmExB;;OAEG;IACH,OAAO,CAAC,UAAU;IAiBlB;;OAEG;YACW,cAAc;IAuB5B;;OAEG;YACW,cAAc;IAgC5B;;OAEG;YACW,eAAe;IAY7B;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAOxB;;OAEG;IACH,OAAO,CAAC,KAAK;CAGd"}
|