@cosmocoder/mcp-web-docs 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +132 -10
- package/build/crawler/auth.d.ts +17 -6
- package/build/crawler/auth.js +166 -31
- package/build/crawler/auth.js.map +1 -1
- package/build/crawler/auth.test.js +197 -33
- package/build/crawler/auth.test.js.map +1 -1
- package/build/index.js +68 -16
- package/build/index.js.map +1 -1
- package/build/index.test.js +134 -0
- package/build/index.test.js.map +1 -1
- package/build/storage/storage.d.ts +17 -0
- package/build/storage/storage.js +102 -5
- package/build/storage/storage.js.map +1 -1
- package/build/storage/storage.test.js +91 -0
- package/build/storage/storage.test.js.map +1 -1
- package/build/types.d.ts +4 -0
- package/build/util/security.js +8 -0
- package/build/util/security.js.map +1 -1
- package/build/util/security.test.js +18 -0
- package/build/util/security.test.js.map +1 -1
- package/package.json +11 -2
- package/build/crawler/cheerio.d.ts +0 -11
- package/build/crawler/cheerio.js +0 -134
- package/build/crawler/cheerio.js.map +0 -1
- package/build/crawler/chromium.d.ts +0 -21
- package/build/crawler/chromium.js +0 -596
- package/build/crawler/chromium.js.map +0 -1
- package/build/crawler/default.d.ts +0 -11
- package/build/crawler/default.js +0 -138
- package/build/crawler/default.js.map +0 -1
- package/build/crawler/factory.d.ts +0 -6
- package/build/crawler/factory.js +0 -83
- package/build/crawler/factory.js.map +0 -1
- package/build/crawler/puppeteer.d.ts +0 -16
- package/build/crawler/puppeteer.js +0 -191
- package/build/crawler/puppeteer.js.map +0 -1
- package/build/embeddings/openai.d.ts +0 -8
- package/build/embeddings/openai.js +0 -56
- package/build/embeddings/openai.js.map +0 -1
- package/build/rag/cache.d.ts +0 -10
- package/build/rag/cache.js +0 -10
- package/build/rag/cache.js.map +0 -1
- package/build/rag/code-generator.d.ts +0 -11
- package/build/rag/code-generator.js +0 -30
- package/build/rag/code-generator.js.map +0 -1
- package/build/rag/context-assembler.d.ts +0 -23
- package/build/rag/context-assembler.js +0 -113
- package/build/rag/context-assembler.js.map +0 -1
- package/build/rag/docs-search.d.ts +0 -55
- package/build/rag/docs-search.js +0 -380
- package/build/rag/docs-search.js.map +0 -1
- package/build/rag/pipeline.d.ts +0 -26
- package/build/rag/pipeline.js +0 -91
- package/build/rag/pipeline.js.map +0 -1
- package/build/rag/query-processor.d.ts +0 -14
- package/build/rag/query-processor.js +0 -57
- package/build/rag/query-processor.js.map +0 -1
- package/build/rag/reranker.d.ts +0 -55
- package/build/rag/reranker.js +0 -210
- package/build/rag/reranker.js.map +0 -1
- package/build/rag/response-generator.d.ts +0 -20
- package/build/rag/response-generator.js +0 -101
- package/build/rag/response-generator.js.map +0 -1
- package/build/rag/retriever.d.ts +0 -19
- package/build/rag/retriever.js +0 -111
- package/build/rag/retriever.js.map +0 -1
- package/build/rag/validator.d.ts +0 -22
- package/build/rag/validator.js +0 -128
- package/build/rag/validator.js.map +0 -1
- package/build/rag/version-manager.d.ts +0 -23
- package/build/rag/version-manager.js +0 -98
- package/build/rag/version-manager.js.map +0 -1
- package/build/types/rag.d.ts +0 -27
- package/build/types/rag.js +0 -2
- package/build/types/rag.js.map +0 -1
- package/build/util/content-utils.d.ts +0 -31
- package/build/util/content-utils.js +0 -120
- package/build/util/content-utils.js.map +0 -1
- package/build/util/content.d.ts +0 -1
- package/build/util/content.js +0 -16
- package/build/util/content.js.map +0 -1
- package/build/util/site-detector.d.ts +0 -22
- package/build/util/site-detector.js +0 -42
- package/build/util/site-detector.js.map +0 -1
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
import { CrawlResult } from '../types.js';
|
|
2
|
-
import { BaseCrawler } from './base.js';
|
|
3
|
-
export declare class DefaultCrawler extends BaseCrawler {
|
|
4
|
-
private readonly BATCH_SIZE;
|
|
5
|
-
private readonly FETCH_TIMEOUT;
|
|
6
|
-
constructor(maxDepth?: number, maxRequestsPerCrawl?: number, onProgress?: (progress: number, description: string) => void);
|
|
7
|
-
crawl(url: string): AsyncGenerator<CrawlResult, void, unknown>;
|
|
8
|
-
private processPageWithRetry;
|
|
9
|
-
private extractLinks;
|
|
10
|
-
private extractTitle;
|
|
11
|
-
}
|
package/build/crawler/default.js
DELETED
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
import { URL } from 'url';
|
|
2
|
-
import * as cheerio from 'cheerio';
|
|
3
|
-
import { BaseCrawler } from './base.js';
|
|
4
|
-
export class DefaultCrawler extends BaseCrawler {
|
|
5
|
-
BATCH_SIZE = 50;
|
|
6
|
-
FETCH_TIMEOUT = 30000; // 30 seconds
|
|
7
|
-
constructor(maxDepth = 4, maxRequestsPerCrawl = 1000, onProgress) {
|
|
8
|
-
super(maxDepth, maxRequestsPerCrawl, onProgress);
|
|
9
|
-
}
|
|
10
|
-
async *crawl(url) {
|
|
11
|
-
console.debug(`[${this.constructor.name}] Starting crawl from: ${url}`);
|
|
12
|
-
if (this.isAborting) {
|
|
13
|
-
console.debug('[DefaultCrawler] Crawl aborted');
|
|
14
|
-
return;
|
|
15
|
-
}
|
|
16
|
-
const startUrl = new URL(url);
|
|
17
|
-
const baseUrl = this.normalizeUrl(startUrl.toString());
|
|
18
|
-
// Track pages to process
|
|
19
|
-
const pagesToCrawl = new Map(); // URL -> depth
|
|
20
|
-
pagesToCrawl.set(baseUrl, 0);
|
|
21
|
-
while (pagesToCrawl.size > 0 && !this.isAborting) {
|
|
22
|
-
// Get batch of URLs to process
|
|
23
|
-
const batchEntries = Array.from(pagesToCrawl.entries()).slice(0, this.BATCH_SIZE);
|
|
24
|
-
const batch = new Map(batchEntries);
|
|
25
|
-
// Remove batch from queue
|
|
26
|
-
batchEntries.forEach(([url]) => pagesToCrawl.delete(url));
|
|
27
|
-
try {
|
|
28
|
-
// Process batch in parallel with timeout and rate limiting
|
|
29
|
-
const results = await Promise.all(Array.from(batch.entries()).map(async ([pageUrl]) => {
|
|
30
|
-
// Apply rate limiting
|
|
31
|
-
await this.rateLimit();
|
|
32
|
-
const result = await this.processPageWithRetry(pageUrl);
|
|
33
|
-
return { pageUrl, ...result };
|
|
34
|
-
}));
|
|
35
|
-
// Handle results
|
|
36
|
-
for (const { pageUrl, content, links, error } of results) {
|
|
37
|
-
if (error || !content || this.isAborting)
|
|
38
|
-
continue;
|
|
39
|
-
this.markUrlAsSeen(pageUrl);
|
|
40
|
-
yield {
|
|
41
|
-
url: pageUrl,
|
|
42
|
-
path: this.getPathFromUrl(pageUrl),
|
|
43
|
-
content,
|
|
44
|
-
title: this.extractTitle(content)
|
|
45
|
-
};
|
|
46
|
-
// Add new links to queue if within depth limit
|
|
47
|
-
const currentDepth = batch.get(pageUrl) || 0;
|
|
48
|
-
if (currentDepth < this.maxDepth) {
|
|
49
|
-
for (const link of links) {
|
|
50
|
-
const normalizedLink = this.normalizeUrl(link);
|
|
51
|
-
if (this.shouldCrawl(normalizedLink) && !pagesToCrawl.has(normalizedLink)) {
|
|
52
|
-
pagesToCrawl.set(normalizedLink, currentDepth + 1);
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
// Check if we've hit the request limit
|
|
57
|
-
if (this.seenUrls.size >= this.maxRequestsPerCrawl) {
|
|
58
|
-
console.debug('[DefaultCrawler] Max requests reached');
|
|
59
|
-
return;
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
// Add delay between batches
|
|
63
|
-
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
64
|
-
}
|
|
65
|
-
catch (e) {
|
|
66
|
-
console.error('[DefaultCrawler] Error processing batch:', e);
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
console.debug('[DefaultCrawler] Crawl completed');
|
|
70
|
-
}
|
|
71
|
-
async processPageWithRetry(url) {
|
|
72
|
-
return this.retryWithBackoff(async () => {
|
|
73
|
-
try {
|
|
74
|
-
// Create fetch request with timeout
|
|
75
|
-
const controller = new AbortController();
|
|
76
|
-
const timeoutId = setTimeout(() => controller.abort(), this.FETCH_TIMEOUT);
|
|
77
|
-
const response = await fetch(url, {
|
|
78
|
-
signal: controller.signal,
|
|
79
|
-
headers: {
|
|
80
|
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
81
|
-
}
|
|
82
|
-
});
|
|
83
|
-
clearTimeout(timeoutId);
|
|
84
|
-
if (!response.ok) {
|
|
85
|
-
throw new Error(`HTTP error! status: ${response.status}`);
|
|
86
|
-
}
|
|
87
|
-
const content = await response.text();
|
|
88
|
-
const links = this.extractLinks(content, new URL(url));
|
|
89
|
-
return { content, links };
|
|
90
|
-
}
|
|
91
|
-
catch (e) {
|
|
92
|
-
if (e instanceof Error) {
|
|
93
|
-
return { content: null, links: [], error: e };
|
|
94
|
-
}
|
|
95
|
-
return { content: null, links: [], error: new Error('Unknown error occurred') };
|
|
96
|
-
}
|
|
97
|
-
});
|
|
98
|
-
}
|
|
99
|
-
extractLinks(html, baseUrl) {
|
|
100
|
-
try {
|
|
101
|
-
const $ = cheerio.load(html);
|
|
102
|
-
const links = new Set();
|
|
103
|
-
// Find all links, including those in navigation elements
|
|
104
|
-
$('a').each((_, element) => {
|
|
105
|
-
const href = $(element).attr('href');
|
|
106
|
-
if (!href)
|
|
107
|
-
return;
|
|
108
|
-
try {
|
|
109
|
-
const url = new URL(href, baseUrl);
|
|
110
|
-
const normalizedUrl = this.normalizeUrl(url.toString());
|
|
111
|
-
// Use BaseCrawler's URL validation
|
|
112
|
-
if (this.shouldCrawl(normalizedUrl)) {
|
|
113
|
-
links.add(normalizedUrl);
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
catch (e) {
|
|
117
|
-
console.debug(`[DefaultCrawler] Invalid URL ${href}:`, e);
|
|
118
|
-
}
|
|
119
|
-
});
|
|
120
|
-
return Array.from(links);
|
|
121
|
-
}
|
|
122
|
-
catch (e) {
|
|
123
|
-
console.error('[DefaultCrawler] Error extracting links:', e);
|
|
124
|
-
return [];
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
extractTitle(html) {
|
|
128
|
-
try {
|
|
129
|
-
const $ = cheerio.load(html);
|
|
130
|
-
return $('title').text().trim() || 'Untitled';
|
|
131
|
-
}
|
|
132
|
-
catch (e) {
|
|
133
|
-
console.error('[DefaultCrawler] Error extracting title:', e);
|
|
134
|
-
return 'Untitled';
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
//# sourceMappingURL=default.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"default.js","sourceRoot":"","sources":["../../src/crawler/default.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,MAAM,KAAK,CAAC;AAC1B,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,OAAO,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAExC,MAAM,OAAO,cAAe,SAAQ,WAAW;IAC5B,UAAU,GAAG,EAAE,CAAC;IAChB,aAAa,GAAG,KAAK,CAAC,CAAC,aAAa;IAErD,YACE,WAAmB,CAAC,EACpB,sBAA8B,IAAI,EAClC,UAA4D;QAE5D,KAAK,CAAC,QAAQ,EAAE,mBAAmB,EAAE,UAAU,CAAC,CAAC;IACnD,CAAC;IAED,KAAK,CAAC,CAAC,KAAK,CAAC,GAAW;QACtB,OAAO,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,0BAA0B,GAAG,EAAE,CAAC,CAAC;QAExE,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YACpB,OAAO,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;YAChD,OAAO;QACT,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC9B,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC,CAAC;QAEvD,yBAAyB;QACzB,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB,CAAC,CAAC,eAAe;QAC/D,YAAY,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QAE7B,OAAO,YAAY,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC;YACjD,+BAA+B;YAC/B,MAAM,YAAY,GAAG,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;YAClF,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC;YAEpC,0BAA0B;YAC1B,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;YAE1D,IAAI,CAAC;gBACH,2DAA2D;gBAC3D,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAC/B,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,OAAO,CAAC,EAAE,EAAE;oBAClD,sBAAsB;oBACtB,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;oBACvB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,oBAAoB,CAAC,OAAO,CAAC,CAAC;oBACxD,OAAO,EAAE,OAAO,EAAE,GAAG,MAAM,EAAE,CAAC;gBAChC,CAAC,CAAC,CACH,CAAC;gBAEF,iBAAiB;gBACjB,KAAK,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,OAAO,EAAE,CAAC;oBACzD,IAAI,KAAK,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,UAAU;wBAAE,SAAS;oBAEnD,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;oBAE5B,MAAM;wBACJ,GAAG,EAAE,OAAO;wBACZ,IAAI,EAAE,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC;wBAClC,OAAO;wBACP,KAAK,EAAE,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC;qBAClC,CAAC;oBAEF,+CAA+C;oBAC/C,MAAM,YAAY,GAAG,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;oBAC7C,IAAI,YAAY,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;wBACjC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;4BACzB,MAAM,cAAc,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;4BAC/C,IAAI,IAAI,CAAC,WAAW,CAAC,cAAc,CAAC,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,cAAc,CAAC,EAAE,CAAC;gCAC1E,YAAY,CAAC,GAAG,CAAC,cAAc,EAAE,YAAY,GAAG,CAAC,CAAC,CAAC;4BACrD,CAAC;wBACH,CAAC;oBACH,CAAC;oBAED,uCAAuC;oBACvC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,IAAI,CAAC,mBAAmB,EAAE,CAAC;wBACnD,OAAO,CAAC,KAAK,CAAC,uCAAuC,CAAC,CAAC;wBACvD,OAAO;oBACT,CAAC;gBACH,CAAC;gBAED,4BAA4B;gBAC5B,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;YAC1D,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,OAAO,CAAC,KAAK,CAAC,0CAA0C,EAAE,CAAC,CAAC,CAAC;YAC/D,CAAC;QACH,CAAC;QAED,OAAO,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;IACpD,CAAC;IAEO,KAAK,CAAC,oBAAoB,CAAC,GAAW;QAK5C,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,IAAI,EAAE;YACtC,IAAI,CAAC;gBACH,oCAAoC;gBACpC,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;gBACzC,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC;gBAE3E,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;oBAChC,MAAM,EAAE,UAAU,CAAC,MAAM;oBACzB,OAAO,EAAE;wBACP,YAAY,EAAE,qHAAqH;qBACpI;iBACF,CAAC,CAAC;gBAEH,YAAY,CAAC,SAAS,CAAC,CAAC;gBAExB,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;oBACjB,MAAM,IAAI,KAAK,CAAC,uBAAuB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;gBAC5D,CAAC;gBAED,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;gBACtC,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;gBAEvD,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;YAC5B,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,IAAI,CAAC,YAAY,KAAK,EAAE,CAAC;oBACvB,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;gBAChD,CAAC;gBACD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,KAAK,EAAE,IAAI,KAAK,CAAC,wBAAwB,CAAC,EAAE,CAAC;YAClF,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;IAEO,YAAY,CAAC,IAAY,EAAE,OAAY;QAC7C,IAAI,CAAC;YACH,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7B,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;YAEhC,yDAAyD;YACzD,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;gBACzB,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBACrC,IAAI,CAAC,IAAI;oBAAE,OAAO;gBAElB,IAAI,CAAC;oBACH,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;oBACnC,MAAM,aAAa,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;oBAExD,mCAAmC;oBACnC,IAAI,IAAI,CAAC,WAAW,CAAC,aAAa,CAAC,EAAE,CAAC;wBACpC,KAAK,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;oBAC3B,CAAC;gBACH,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACX,OAAO,CAAC,KAAK,CAAC,gCAAgC,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC;gBAC5D,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC3B,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,KAAK,CAAC,0CAA0C,EAAE,CAAC,CAAC,CAAC;YAC7D,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAEO,YAAY,CAAC,IAAY;QAC/B,IAAI,CAAC;YACH,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7B,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,IAAI,UAAU,CAAC;QAChD,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,KAAK,CAAC,0CAA0C,EAAE,CAAC,CAAC,CAAC;YAC7D,OAAO,UAAU,CAAC;QACpB,CAAC;IACH,CAAC;CACF"}
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
import { BaseCrawler } from './base.js';
|
|
2
|
-
export declare class CrawlerFactory {
|
|
3
|
-
private static readonly JS_FRAMEWORK_INDICATORS;
|
|
4
|
-
private static detectSiteType;
|
|
5
|
-
static createCrawler(url: string, maxRequestsPerCrawl?: number, maxDepth?: number, onProgress?: (progress: number, description: string) => void): Promise<BaseCrawler>;
|
|
6
|
-
}
|
package/build/crawler/factory.js
DELETED
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
import { URL } from 'url';
|
|
2
|
-
import { DefaultCrawler } from './default.js';
|
|
3
|
-
import { ChromiumCrawler } from './chromium.js';
|
|
4
|
-
import { CheerioCrawler } from './cheerio.js';
|
|
5
|
-
export class CrawlerFactory {
|
|
6
|
-
// Common JavaScript framework identifiers
|
|
7
|
-
static JS_FRAMEWORK_INDICATORS = [
|
|
8
|
-
'react',
|
|
9
|
-
'vue',
|
|
10
|
-
'angular',
|
|
11
|
-
'next',
|
|
12
|
-
'nuxt',
|
|
13
|
-
'gatsby',
|
|
14
|
-
'docusaurus',
|
|
15
|
-
'vuepress',
|
|
16
|
-
'gridsome',
|
|
17
|
-
'svelte'
|
|
18
|
-
];
|
|
19
|
-
static async detectSiteType(url) {
|
|
20
|
-
try {
|
|
21
|
-
const response = await fetch(url, {
|
|
22
|
-
headers: {
|
|
23
|
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
24
|
-
}
|
|
25
|
-
});
|
|
26
|
-
if (!response.ok) {
|
|
27
|
-
throw new Error(`HTTP error! status: ${response.status}`);
|
|
28
|
-
}
|
|
29
|
-
const html = await response.text();
|
|
30
|
-
// Check for JavaScript frameworks
|
|
31
|
-
const hasFramework = CrawlerFactory.JS_FRAMEWORK_INDICATORS.some(framework => html.toLowerCase().includes(framework));
|
|
32
|
-
// Check for JavaScript-heavy indicators
|
|
33
|
-
const isJsHeavy = (html.includes('data-react') ||
|
|
34
|
-
html.includes('ng-') ||
|
|
35
|
-
html.includes('v-') ||
|
|
36
|
-
html.includes('__NEXT_DATA__') ||
|
|
37
|
-
html.includes('nuxt') ||
|
|
38
|
-
html.includes('id="___gatsby"'));
|
|
39
|
-
return { isJsHeavy, hasFramework };
|
|
40
|
-
}
|
|
41
|
-
catch (e) {
|
|
42
|
-
console.error('[CrawlerFactory] Error detecting site type:', e);
|
|
43
|
-
return { isJsHeavy: false, hasFramework: false };
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
static async createCrawler(url, maxRequestsPerCrawl = 1000, maxDepth = 4, onProgress) {
|
|
47
|
-
const startUrl = new URL(url);
|
|
48
|
-
console.debug(`[CrawlerFactory] Creating crawler for ${startUrl}`);
|
|
49
|
-
// Check if site is JavaScript-heavy first
|
|
50
|
-
const { isJsHeavy, hasFramework } = await CrawlerFactory.detectSiteType(url);
|
|
51
|
-
// Try Chromium for JavaScript-heavy sites
|
|
52
|
-
if (isJsHeavy || hasFramework) {
|
|
53
|
-
console.debug(`[CrawlerFactory] Site appears to be JavaScript-heavy, using Chromium crawler`);
|
|
54
|
-
return new ChromiumCrawler(maxDepth, maxRequestsPerCrawl, onProgress);
|
|
55
|
-
}
|
|
56
|
-
// Try default crawler
|
|
57
|
-
try {
|
|
58
|
-
console.debug(`[CrawlerFactory] Attempting default crawler for ${url}`);
|
|
59
|
-
const defaultCrawler = new DefaultCrawler(maxDepth, maxRequestsPerCrawl, onProgress);
|
|
60
|
-
const generator = defaultCrawler.crawl(url);
|
|
61
|
-
const { value: firstPage, done } = await generator.next();
|
|
62
|
-
if (!done && firstPage?.content) {
|
|
63
|
-
console.debug('[CrawlerFactory] Successfully created default crawler');
|
|
64
|
-
return defaultCrawler;
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
catch (e) {
|
|
68
|
-
console.debug('[CrawlerFactory] Default crawler failed:', e);
|
|
69
|
-
}
|
|
70
|
-
// Fall back to Cheerio crawler
|
|
71
|
-
console.debug(`[CrawlerFactory] Attempting Cheerio crawler for ${url}`);
|
|
72
|
-
const cheerioCrawler = new CheerioCrawler(maxDepth, maxRequestsPerCrawl, onProgress);
|
|
73
|
-
const generator = cheerioCrawler.crawl(url);
|
|
74
|
-
const { value: firstPage, done } = await generator.next();
|
|
75
|
-
if (!done && firstPage?.content) {
|
|
76
|
-
console.debug('[CrawlerFactory] Successfully created Cheerio crawler');
|
|
77
|
-
return cheerioCrawler;
|
|
78
|
-
}
|
|
79
|
-
console.error(`[CrawlerFactory] All crawlers failed for ${url}`);
|
|
80
|
-
throw new Error(`Failed to create crawler for ${url}`);
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
//# sourceMappingURL=factory.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"factory.js","sourceRoot":"","sources":["../../src/crawler/factory.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,MAAM,KAAK,CAAC;AAE1B,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAE9C,MAAM,OAAO,cAAc;IACzB,0CAA0C;IAClC,MAAM,CAAU,uBAAuB,GAAG;QAChD,OAAO;QACP,KAAK;QACL,SAAS;QACT,MAAM;QACN,MAAM;QACN,QAAQ;QACR,YAAY;QACZ,UAAU;QACV,UAAU;QACV,QAAQ;KACT,CAAC;IAEM,MAAM,CAAC,KAAK,CAAC,cAAc,CAAC,GAAW;QAI7C,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO,EAAE;oBACP,YAAY,EAAE,qHAAqH;iBACpI;aACF,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,IAAI,KAAK,CAAC,uBAAuB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;YAC5D,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAEnC,kCAAkC;YAClC,MAAM,YAAY,GAAG,cAAc,CAAC,uBAAuB,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAC3E,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,CACvC,CAAC;YAEF,wCAAwC;YACxC,MAAM,SAAS,GAAG,CAChB,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC;gBAC3B,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;gBACpB,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC;gBACnB,IAAI,CAAC,QAAQ,CAAC,eAAe,CAAC;gBAC9B,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;gBACrB,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAChC,CAAC;YAEF,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,CAAC;QACrC,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,KAAK,CAAC,6CAA6C,EAAE,CAAC,CAAC,CAAC;YAChE,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC;QACnD,CAAC;IACH,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,aAAa,CACxB,GAAW,EACX,sBAA8B,IAAI,EAClC,WAAmB,CAAC,EACpB,UAA4D;QAE5D,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC9B,OAAO,CAAC,KAAK,CAAC,yCAAyC,QAAQ,EAAE,CAAC,CAAC;QAEnE,0CAA0C;QAC1C,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,GAAG,MAAM,cAAc,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;QAE7E,0CAA0C;QAC1C,IAAI,SAAS,IAAI,YAAY,EAAE,CAAC;YAC9B,OAAO,CAAC,KAAK,CAAC,8EAA8E,CAAC,CAAC;YAC9F,OAAO,IAAI,eAAe,CAAC,QAAQ,EAAE,mBAAmB,EAAE,UAAU,CAAC,CAAC;QACxE,CAAC;QAED,sBAAsB;QACtB,IAAI,CAAC;YACH,OAAO,CAAC,KAAK,CAAC,mDAAmD,GAAG,EAAE,CAAC,CAAC;YACxE,MAAM,cAAc,GAAG,IAAI,cAAc,CAAC,QAAQ,EAAE,mBAAmB,EAAE,UAAU,CAAC,CAAC;YACrF,MAAM,SAAS,GAAG,cAAc,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAC5C,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,GAAG,MAAM,SAAS,CAAC,IAAI,EAAE,CAAC;YAE1D,IAAI,CAAC,IAAI,IAAI,SAAS,EAAE,OAAO,EAAE,CAAC;gBAChC,OAAO,CAAC,KAAK,CAAC,uDAAuD,CAAC,CAAC;gBACvE,OAAO,cAAc,CAAC;YACxB,CAAC;QACH,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,KAAK,CAAC,0CAA0C,EAAE,CAAC,CAAC,CAAC;QAC/D,CAAC;QAED,+BAA+B;QAC/B,OAAO,CAAC,KAAK,CAAC,mDAAmD,GAAG,EAAE,CAAC,CAAC;QACxE,MAAM,cAAc,GAAG,IAAI,cAAc,CAAC,QAAQ,EAAE,mBAAmB,EAAE,UAAU,CAAC,CAAC;QACrF,MAAM,SAAS,GAAG,cAAc,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC5C,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,GAAG,MAAM,SAAS,CAAC,IAAI,EAAE,CAAC;QAE1D,IAAI,CAAC,IAAI,IAAI,SAAS,EAAE,OAAO,EAAE,CAAC;YAChC,OAAO,CAAC,KAAK,CAAC,uDAAuD,CAAC,CAAC;YACvE,OAAO,cAAc,CAAC;QACxB,CAAC;QAED,OAAO,CAAC,KAAK,CAAC,4CAA4C,GAAG,EAAE,CAAC,CAAC;QACjE,MAAM,IAAI,KAAK,CAAC,gCAAgC,GAAG,EAAE,CAAC,CAAC;IACzD,CAAC"}
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import { CrawlResult } from '../types.js';
|
|
2
|
-
import { BaseCrawler } from './base.js';
|
|
3
|
-
export declare class PuppeteerCrawler extends BaseCrawler {
|
|
4
|
-
private browser?;
|
|
5
|
-
private readonly userAgent;
|
|
6
|
-
private readonly LINK_GROUP_SIZE;
|
|
7
|
-
private curCrawlCount;
|
|
8
|
-
crawl(url: string): AsyncGenerator<CrawlResult, void, unknown>;
|
|
9
|
-
private setupPage;
|
|
10
|
-
private crawlSitePages;
|
|
11
|
-
private gotoPageAndHandleRedirects;
|
|
12
|
-
private processPage;
|
|
13
|
-
private getLinksFromPage;
|
|
14
|
-
private groupLinks;
|
|
15
|
-
abort(): void;
|
|
16
|
-
}
|
|
@@ -1,191 +0,0 @@
|
|
|
1
|
-
import puppeteer from 'puppeteer';
|
|
2
|
-
import { BaseCrawler } from './base.js';
|
|
3
|
-
import { JSDOM } from 'jsdom';
|
|
4
|
-
import { Readability } from '@mozilla/readability';
|
|
5
|
-
export class PuppeteerCrawler extends BaseCrawler {
|
|
6
|
-
browser;
|
|
7
|
-
userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36';
|
|
8
|
-
LINK_GROUP_SIZE = 2;
|
|
9
|
-
curCrawlCount = 0;
|
|
10
|
-
async *crawl(url) {
|
|
11
|
-
try {
|
|
12
|
-
this.browser = await puppeteer.launch({
|
|
13
|
-
headless: true,
|
|
14
|
-
args: [
|
|
15
|
-
'--no-sandbox',
|
|
16
|
-
'--disable-setuid-sandbox',
|
|
17
|
-
'--disable-dev-shm-usage',
|
|
18
|
-
'--disable-gpu',
|
|
19
|
-
'--window-size=1280,800'
|
|
20
|
-
]
|
|
21
|
-
});
|
|
22
|
-
const page = await this.browser.newPage();
|
|
23
|
-
await this.setupPage(page);
|
|
24
|
-
const visitedUrls = new Set();
|
|
25
|
-
yield* this.crawlSitePages(page, new URL(url), 0, visitedUrls);
|
|
26
|
-
}
|
|
27
|
-
finally {
|
|
28
|
-
await this.browser?.close();
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
async setupPage(page) {
|
|
32
|
-
await page.setUserAgent(this.userAgent);
|
|
33
|
-
await page.setViewport({ width: 1280, height: 800 });
|
|
34
|
-
// Block only unnecessary resources
|
|
35
|
-
await page.setRequestInterception(true);
|
|
36
|
-
page.on('request', request => {
|
|
37
|
-
const resourceType = request.resourceType();
|
|
38
|
-
if (['image', 'media', 'font'].includes(resourceType)) {
|
|
39
|
-
request.abort();
|
|
40
|
-
}
|
|
41
|
-
else {
|
|
42
|
-
request.continue();
|
|
43
|
-
}
|
|
44
|
-
});
|
|
45
|
-
// Handle JavaScript errors
|
|
46
|
-
page.on('pageerror', error => {
|
|
47
|
-
console.warn('Page error:', error);
|
|
48
|
-
});
|
|
49
|
-
// Handle console messages
|
|
50
|
-
page.on('console', (msg) => {
|
|
51
|
-
const type = msg.type();
|
|
52
|
-
if (type === 'error' || type === 'warn') {
|
|
53
|
-
console.debug(`Console ${type}:`, msg.text());
|
|
54
|
-
}
|
|
55
|
-
});
|
|
56
|
-
}
|
|
57
|
-
async *crawlSitePages(page, curUrl, depth, visitedUrls) {
|
|
58
|
-
const urlStr = curUrl.toString();
|
|
59
|
-
if (visitedUrls.has(urlStr) || !this.shouldCrawl(urlStr) || depth > this.maxDepth) {
|
|
60
|
-
return;
|
|
61
|
-
}
|
|
62
|
-
try {
|
|
63
|
-
// Rate limiting
|
|
64
|
-
await this.rateLimit();
|
|
65
|
-
// Navigate to page with proper redirect handling
|
|
66
|
-
await this.gotoPageAndHandleRedirects(page, urlStr);
|
|
67
|
-
// Extract content
|
|
68
|
-
const { content, title, links } = await this.processPage(page, curUrl);
|
|
69
|
-
visitedUrls.add(urlStr);
|
|
70
|
-
this.markUrlAsSeen(urlStr);
|
|
71
|
-
this.curCrawlCount++;
|
|
72
|
-
yield {
|
|
73
|
-
url: urlStr,
|
|
74
|
-
path: this.getPathFromUrl(urlStr),
|
|
75
|
-
content,
|
|
76
|
-
title
|
|
77
|
-
};
|
|
78
|
-
// Process links in batches
|
|
79
|
-
if (depth < this.maxDepth && this.curCrawlCount < this.maxRequestsPerCrawl) {
|
|
80
|
-
const linkGroups = this.groupLinks(links);
|
|
81
|
-
for (const linkGroup of linkGroups) {
|
|
82
|
-
for (const link of linkGroup) {
|
|
83
|
-
if (this.curCrawlCount >= this.maxRequestsPerCrawl) {
|
|
84
|
-
return;
|
|
85
|
-
}
|
|
86
|
-
yield* this.crawlSitePages(page, new URL(link), depth + 1, visitedUrls);
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
catch (error) {
|
|
92
|
-
console.error(`Error crawling ${urlStr}:`, error);
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
async gotoPageAndHandleRedirects(page, url) {
|
|
96
|
-
const MAX_PAGE_WAIT_MS = 5000;
|
|
97
|
-
await page.goto(url, {
|
|
98
|
-
timeout: 0,
|
|
99
|
-
waitUntil: 'networkidle2'
|
|
100
|
-
});
|
|
101
|
-
let responseEventOccurred = false;
|
|
102
|
-
const responseHandler = () => responseEventOccurred = true;
|
|
103
|
-
const responseWatcher = new Promise((resolve) => {
|
|
104
|
-
setTimeout(() => {
|
|
105
|
-
if (!responseEventOccurred) {
|
|
106
|
-
resolve();
|
|
107
|
-
}
|
|
108
|
-
else {
|
|
109
|
-
setTimeout(() => resolve(), MAX_PAGE_WAIT_MS);
|
|
110
|
-
}
|
|
111
|
-
}, 500);
|
|
112
|
-
});
|
|
113
|
-
page.on('response', responseHandler);
|
|
114
|
-
await Promise.race([responseWatcher, page.waitForNavigation()]);
|
|
115
|
-
page.off('response', responseHandler);
|
|
116
|
-
}
|
|
117
|
-
async processPage(page, url) {
|
|
118
|
-
// Wait for dynamic content
|
|
119
|
-
try {
|
|
120
|
-
await page.waitForFunction(() => {
|
|
121
|
-
const mainContent = document.querySelector('main') || document.querySelector('.content') || document.querySelector('#content');
|
|
122
|
-
return mainContent && mainContent.children.length > 0;
|
|
123
|
-
}, { timeout: 5000 });
|
|
124
|
-
}
|
|
125
|
-
catch (error) {
|
|
126
|
-
console.warn('Timeout waiting for main content, proceeding anyway');
|
|
127
|
-
}
|
|
128
|
-
// Extract content using Readability
|
|
129
|
-
const html = await page.content();
|
|
130
|
-
const dom = new JSDOM(html, { url: url.toString() });
|
|
131
|
-
const reader = new Readability(dom.window.document, {
|
|
132
|
-
charThreshold: 20,
|
|
133
|
-
nbTopCandidates: 5,
|
|
134
|
-
maxElemsToParse: 10000
|
|
135
|
-
});
|
|
136
|
-
const article = reader.parse();
|
|
137
|
-
if (!article) {
|
|
138
|
-
throw new Error('Failed to parse page content');
|
|
139
|
-
}
|
|
140
|
-
// Extract links
|
|
141
|
-
const links = await this.getLinksFromPage(page, url);
|
|
142
|
-
return {
|
|
143
|
-
content: article.textContent,
|
|
144
|
-
title: article.title,
|
|
145
|
-
links
|
|
146
|
-
};
|
|
147
|
-
}
|
|
148
|
-
async getLinksFromPage(page, curUrl) {
|
|
149
|
-
const links = await page.$$eval('a', (links) => links.map((a) => a.href));
|
|
150
|
-
const cleanedLinks = links
|
|
151
|
-
.map(link => {
|
|
152
|
-
try {
|
|
153
|
-
const url = new URL(link);
|
|
154
|
-
url.hash = ''; // Remove hash
|
|
155
|
-
return url.href;
|
|
156
|
-
}
|
|
157
|
-
catch {
|
|
158
|
-
return null;
|
|
159
|
-
}
|
|
160
|
-
})
|
|
161
|
-
.filter((link) => {
|
|
162
|
-
if (!link)
|
|
163
|
-
return false;
|
|
164
|
-
try {
|
|
165
|
-
const url = new URL(link);
|
|
166
|
-
return (url.pathname.startsWith(curUrl.pathname) &&
|
|
167
|
-
url.hostname === curUrl.hostname &&
|
|
168
|
-
link !== curUrl.href);
|
|
169
|
-
}
|
|
170
|
-
catch {
|
|
171
|
-
return false;
|
|
172
|
-
}
|
|
173
|
-
});
|
|
174
|
-
return Array.from(new Set(cleanedLinks));
|
|
175
|
-
}
|
|
176
|
-
groupLinks(links) {
|
|
177
|
-
return links.reduce((acc, link, i) => {
|
|
178
|
-
const groupIndex = Math.floor(i / this.LINK_GROUP_SIZE);
|
|
179
|
-
if (!acc[groupIndex]) {
|
|
180
|
-
acc.push([]);
|
|
181
|
-
}
|
|
182
|
-
acc[groupIndex].push(link);
|
|
183
|
-
return acc;
|
|
184
|
-
}, []);
|
|
185
|
-
}
|
|
186
|
-
abort() {
|
|
187
|
-
super.abort();
|
|
188
|
-
void this.browser?.close();
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
//# sourceMappingURL=puppeteer.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"puppeteer.js","sourceRoot":"","sources":["../../src/crawler/puppeteer.ts"],"names":[],"mappings":"AAAA,OAAO,SAA4C,MAAM,WAAW,CAAC;AAErE,OAAO,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AACxC,OAAO,EAAE,KAAK,EAAE,MAAM,OAAO,CAAC;AAC9B,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAEnD,MAAM,OAAO,gBAAiB,SAAQ,WAAW;IACvC,OAAO,CAAW;IACT,SAAS,GAAG,qHAAqH,CAAC;IAClI,eAAe,GAAG,CAAC,CAAC;IAC7B,aAAa,GAAG,CAAC,CAAC;IAE1B,KAAK,CAAC,CAAC,KAAK,CAAC,GAAW;QACtB,IAAI,CAAC;YACH,IAAI,CAAC,OAAO,GAAG,MAAM,SAAS,CAAC,MAAM,CAAC;gBACpC,QAAQ,EAAE,IAAI;gBACd,IAAI,EAAE;oBACJ,cAAc;oBACd,0BAA0B;oBAC1B,yBAAyB;oBACzB,eAAe;oBACf,wBAAwB;iBACzB;aACF,CAAC,CAAC;YAEH,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;YAC1C,MAAM,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YAE3B,MAAM,WAAW,GAAG,IAAI,GAAG,EAAU,CAAC;YACtC,KAAK,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,IAAI,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,WAAW,CAAC,CAAC;QACjE,CAAC;gBAAS,CAAC;YACT,MAAM,IAAI,CAAC,OAAO,EAAE,KAAK,EAAE,CAAC;QAC9B,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,SAAS,CAAC,IAAU;QAChC,MAAM,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACxC,MAAM,IAAI,CAAC,WAAW,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC;QAErD,mCAAmC;QACnC,MAAM,IAAI,CAAC,sBAAsB,CAAC,IAAI,CAAC,CAAC;QACxC,IAAI,CAAC,EAAE,CAAC,SAAS,EAAE,OAAO,CAAC,EAAE;YAC3B,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;YAC5C,IAAI,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;gBACtD,OAAO,CAAC,KAAK,EAAE,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,QAAQ,EAAE,CAAC;YACrB,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,2BAA2B;QAC3B,IAAI,CAAC,EAAE,CAAC,WAAW,EAAE,KAAK,CAAC,EAAE;YAC3B,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;QAEH,0BAA0B;QAC1B,IAAI,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,GAAmB,EAAE,EAAE;YACzC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;YACxB,IAAI,IAAI,KAAK,OAAO,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;gBACxC,OAAO,CAAC,KAAK,CAAC,WAAW,IAAI,GAAG,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;YAChD,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;IAEO,KAAK,CAAC,CAAC,cAAc,CAC3B,IAAU,EACV,MAAW,EACX,KAAa,EACb,WAAwB;QAExB,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC;QAEjC,IAAI,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,IAAI,KAAK,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClF,OAAO;QACT,CAAC;QAED,IAAI,CAAC;YACH,gBAAgB;YAChB,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;YAEvB,iDAAiD;YACjD,MAAM,IAAI,CAAC,0BAA0B,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAEpD,kBAAkB;YAClB,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAEvE,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YACxB,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;YAC3B,IAAI,CAAC,aAAa,EAAE,CAAC;YAErB,MAAM;gBACJ,GAAG,EAAE,MAAM;gBACX,IAAI,EAAE,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC;gBACjC,OAAO;gBACP,KAAK;aACN,CAAC;YAEF,2BAA2B;YAC3B,IAAI,KAAK,GAAG,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC,mBAAmB,EAAE,CAAC;gBAC3E,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;gBAC1C,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;oBACnC,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;wBAC7B,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,mBAAmB,EAAE,CAAC;4BACnD,OAAO;wBACT,CAAC;wBACD,KAAK,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,IAAI,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,GAAG,CAAC,EAAE,WAAW,CAAC,CAAC;oBAC1E,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,kBAAkB,MAAM,GAAG,EAAE,KAAK,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,0BAA0B,CAAC,IAAU,EAAE,GAAW;QAC9D,MAAM,gBAAgB,GAAG,IAAI,CAAC;QAE9B,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE;YACnB,OAAO,EAAE,CAAC;YACV,SAAS,EAAE,cAAc;SAC1B,CAAC,CAAC;QAEH,IAAI,qBAAqB,GAAG,KAAK,CAAC;QAClC,MAAM,eAAe,GAAG,GAAG,EAAE,CAAC,qBAAqB,GAAG,IAAI,CAAC;QAE3D,MAAM,eAAe,GAAG,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE;YACpD,UAAU,CAAC,GAAG,EAAE;gBACd,IAAI,CAAC,qBAAqB,EAAE,CAAC;oBAC3B,OAAO,EAAE,CAAC;gBACZ,CAAC;qBAAM,CAAC;oBACN,UAAU,CAAC,GAAG,EAAE,CAAC,OAAO,EAAE,EAAE,gBAAgB,CAAC,CAAC;gBAChD,CAAC;YACH,CAAC,EAAE,GAAG,CAAC,CAAC;QACV,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,EAAE,CAAC,UAAU,EAAE,eAAe,CAAC,CAAC;QACrC,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC,eAAe,EAAE,IAAI,CAAC,iBAAiB,EAAE,CAAC,CAAC,CAAC;QAChE,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,eAAe,CAAC,CAAC;IACxC,CAAC;IAEO,KAAK,CAAC,WAAW,CAAC,IAAU,EAAE,GAAQ;QAC5C,2BAA2B;QAC3B,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,eAAe,CAAC,GAAG,EAAE;gBAC9B,MAAM,WAAW,GAAG,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,QAAQ,CAAC,aAAa,CAAC,UAAU,CAAC,IAAI,QAAQ,CAAC,aAAa,CAAC,UAAU,CAAC,CAAC;gBAC/H,OAAO,WAAW,IAAI,WAAW,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC;YACxD,CAAC,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QACxB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,qDAAqD,CAAC,CAAC;QACtE,CAAC;QAED,oCAAoC;QACpC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;QAClC,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QACrD,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE;YAClD,aAAa,EAAE,EAAE;YACjB,eAAe,EAAE,CAAC;YAClB,eAAe,EAAE,KAAK;SACvB,CAAC,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;QAE/B,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;QAClD,CAAC;QAED,gBAAgB;QAChB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAErD,OAAO;YACL,OAAO,EAAE,OAAO,CAAC,WAAW;YAC5B,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,KAAK;SACN,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,gBAAgB,CAAC,IAAU,EAAE,MAAW;QACpD,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAE1E,MAAM,YAAY,GAAG,KAAK;aACvB,GAAG,CAAC,IAAI,CAAC,EAAE;YACV,IAAI,CAAC;gBACH,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;gBAC1B,GAAG,CAAC,IAAI,GAAG,EAAE,CAAC,CAAC,cAAc;gBAC7B,OAAO,GAAG,CAAC,IAAI,CAAC;YAClB,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC,CAAC;aACD,MAAM,CAAC,CAAC,IAAI,EAAkB,EAAE;YAC/B,IAAI,CAAC,IAAI;gBAAE,OAAO,KAAK,CAAC;YACxB,IAAI,CAAC;gBACH,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;gBAC1B,OAAO,CACL,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,MAAM,CAAC,QAAQ,CAAC;oBACxC,GAAG,CAAC,QAAQ,KAAK,MAAM,CAAC,QAAQ;oBAChC,IAAI,KAAK,MAAM,CAAC,IAAI,CACrB,CAAC;YACJ,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,KAAK,CAAC;YACf,CAAC;QACH,CAAC,CAAC,CAAC;QAEL,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC;IAC3C,CAAC;IAEO,UAAU,CAAC,KAAe;QAChC,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE,EAAE;YACnC,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,IAAI,CAAC,eAAe,CAAC,CAAC;YACxD,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;gBACrB,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACf,CAAC;YACD,GAAG,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC3B,OAAO,GAAG,CAAC;QACb,CAAC,EAAE,EAAgB,CAAC,CAAC;IACvB,CAAC;IAED,KAAK;QACH,KAAK,CAAC,KAAK,EAAE,CAAC;QACd,KAAK,IAAI,CAAC,OAAO,EAAE,KAAK,EAAE,CAAC;IAC7B,CAAC;CACF"}
|
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
import OpenAI from 'openai';
|
|
2
|
-
import { logger } from '../util/logger.js';
|
|
3
|
-
export class OpenAIEmbeddings {
|
|
4
|
-
openai;
|
|
5
|
-
cache;
|
|
6
|
-
dimensions = 1536; // text-embedding-3-small dimensions
|
|
7
|
-
constructor(apiKey) {
|
|
8
|
-
if (!apiKey) {
|
|
9
|
-
throw new Error('OpenAI API key is required');
|
|
10
|
-
}
|
|
11
|
-
this.openai = new OpenAI({ apiKey });
|
|
12
|
-
this.cache = new Map();
|
|
13
|
-
}
|
|
14
|
-
async embed(text) {
|
|
15
|
-
// Ensure input is a string and not empty
|
|
16
|
-
if (!text || typeof text !== 'string') {
|
|
17
|
-
throw new Error('Input text must be a non-empty string');
|
|
18
|
-
}
|
|
19
|
-
// Check cache first
|
|
20
|
-
const cacheKey = text.slice(0, 1000); // Limit cache key size
|
|
21
|
-
const cached = this.cache.get(cacheKey);
|
|
22
|
-
if (cached) {
|
|
23
|
-
return cached;
|
|
24
|
-
}
|
|
25
|
-
try {
|
|
26
|
-
const cleanText = text.trim();
|
|
27
|
-
if (!cleanText) {
|
|
28
|
-
throw new Error('Input text is empty after trimming');
|
|
29
|
-
}
|
|
30
|
-
const response = await this.openai.embeddings.create({
|
|
31
|
-
model: "text-embedding-3-small",
|
|
32
|
-
input: cleanText,
|
|
33
|
-
dimensions: this.dimensions
|
|
34
|
-
});
|
|
35
|
-
if (!response.data?.[0]?.embedding) {
|
|
36
|
-
throw new Error('No embedding returned from OpenAI');
|
|
37
|
-
}
|
|
38
|
-
const embedding = response.data[0].embedding;
|
|
39
|
-
// Cache the result
|
|
40
|
-
this.cache.set(cacheKey, embedding);
|
|
41
|
-
// Limit cache size to prevent memory issues
|
|
42
|
-
if (this.cache.size > 1000) {
|
|
43
|
-
const firstKey = this.cache.keys().next().value;
|
|
44
|
-
if (firstKey) {
|
|
45
|
-
this.cache.delete(firstKey);
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
return embedding;
|
|
49
|
-
}
|
|
50
|
-
catch (error) {
|
|
51
|
-
logger.debug('Error generating embedding:', error);
|
|
52
|
-
throw error;
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
//# sourceMappingURL=openai.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"openai.js","sourceRoot":"","sources":["../../src/embeddings/openai.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAE3C,MAAM,OAAO,gBAAgB;IACnB,MAAM,CAAS;IACf,KAAK,CAAwB;IAC5B,UAAU,GAAG,IAAI,CAAC,CAAC,oCAAoC;IAEhE,YAAY,MAAc;QACxB,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAChD,CAAC;QACD,IAAI,CAAC,MAAM,GAAG,IAAI,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;QACrC,IAAI,CAAC,KAAK,GAAG,IAAI,GAAG,EAAE,CAAC;IACzB,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,yCAAyC;QACzC,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAC3D,CAAC;QAED,oBAAoB;QACpB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,uBAAuB;QAC7D,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACxC,IAAI,MAAM,EAAE,CAAC;YACX,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC9B,IAAI,CAAC,SAAS,EAAE,CAAC;gBACf,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;YACxD,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC;gBACnD,KAAK,EAAE,wBAAwB;gBAC/B,KAAK,EAAE,SAAS;gBAChB,UAAU,EAAE,IAAI,CAAC,UAAU;aAC5B,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC;gBACnC,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YACvD,CAAC;YAED,MAAM,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAE7C,mBAAmB;YACnB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YAEpC,4CAA4C;YAC5C,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,IAAI,EAAE,CAAC;gBAC3B,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC;gBAChD,IAAI,QAAQ,EAAE,CAAC;oBACb,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;gBAC9B,CAAC;YACH,CAAC;YAED,OAAO,SAAS,CAAC;QACnB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,6BAA6B,EAAE,KAAK,CAAC,CAAC;YACnD,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;CACF"}
|
package/build/rag/cache.d.ts
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
import { GeneratedResponse } from "./response-generator.js";
|
|
2
|
-
export interface CachedResponse {
|
|
3
|
-
query: string;
|
|
4
|
-
response: GeneratedResponse;
|
|
5
|
-
}
|
|
6
|
-
export declare class RAGCache {
|
|
7
|
-
private cache;
|
|
8
|
-
getCachedResponse(query: string): Promise<CachedResponse | null>;
|
|
9
|
-
cacheResponse(query: string, response: GeneratedResponse): Promise<void>;
|
|
10
|
-
}
|
package/build/rag/cache.js
DELETED
package/build/rag/cache.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"cache.js","sourceRoot":"","sources":["../../src/rag/cache.ts"],"names":[],"mappings":"AAOA,MAAM,OAAO,QAAQ;IACX,KAAK,GAAgC,IAAI,GAAG,EAAE,CAAC;IAEvD,KAAK,CAAC,iBAAiB,CAAC,KAAa;QACnC,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC;IACvC,CAAC;IAED,KAAK,CAAC,aAAa,CAAC,KAAa,EAAE,QAA2B;QAC5D,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;IAC7C,CAAC;CACF"}
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
export interface CodeExample {
|
|
2
|
-
imports: string;
|
|
3
|
-
props: string;
|
|
4
|
-
usage: string;
|
|
5
|
-
}
|
|
6
|
-
export declare class CodeGenerator {
|
|
7
|
-
generateCodeExample(component: any, context: any): Promise<CodeExample>;
|
|
8
|
-
private generateImports;
|
|
9
|
-
private generateProps;
|
|
10
|
-
private generateUsage;
|
|
11
|
-
}
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
export class CodeGenerator {
|
|
2
|
-
async generateCodeExample(component, context) {
|
|
3
|
-
// Extract component name and props from the component info
|
|
4
|
-
const componentName = component.name || 'Example';
|
|
5
|
-
const componentProps = component.props || {};
|
|
6
|
-
// Generate imports based on context
|
|
7
|
-
const imports = this.generateImports(componentName, context);
|
|
8
|
-
// Generate props string
|
|
9
|
-
const props = this.generateProps(componentProps);
|
|
10
|
-
// Generate usage example
|
|
11
|
-
const usage = this.generateUsage(componentName, props);
|
|
12
|
-
return { imports, props, usage };
|
|
13
|
-
}
|
|
14
|
-
generateImports(componentName, context) {
|
|
15
|
-
// Look for import path in context
|
|
16
|
-
const importPath = context.importPath || 'example';
|
|
17
|
-
return `import ${componentName} from '${importPath}';`;
|
|
18
|
-
}
|
|
19
|
-
generateProps(props) {
|
|
20
|
-
return Object.entries(props)
|
|
21
|
-
.map(([key, value]) => `${key}: ${JSON.stringify(value)}`)
|
|
22
|
-
.join(', ');
|
|
23
|
-
}
|
|
24
|
-
generateUsage(componentName, props) {
|
|
25
|
-
return props
|
|
26
|
-
? `<${componentName} ${props} />`
|
|
27
|
-
: `<${componentName} />`;
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
//# sourceMappingURL=code-generator.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"code-generator.js","sourceRoot":"","sources":["../../src/rag/code-generator.ts"],"names":[],"mappings":"AAMA,MAAM,OAAO,aAAa;IACxB,KAAK,CAAC,mBAAmB,CAAC,SAAc,EAAE,OAAY;QACpD,2DAA2D;QAC3D,MAAM,aAAa,GAAG,SAAS,CAAC,IAAI,IAAI,SAAS,CAAC;QAClD,MAAM,cAAc,GAAG,SAAS,CAAC,KAAK,IAAI,EAAE,CAAC;QAE7C,oCAAoC;QACpC,MAAM,OAAO,GAAG,IAAI,CAAC,eAAe,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;QAE7D,wBAAwB;QACxB,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,CAAC;QAEjD,yBAAyB;QACzB,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;QAEvD,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;IACnC,CAAC;IAEO,eAAe,CAAC,aAAqB,EAAE,OAAY;QACzD,kCAAkC;QAClC,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,SAAS,CAAC;QACnD,OAAO,UAAU,aAAa,UAAU,UAAU,IAAI,CAAC;IACzD,CAAC;IAEO,aAAa,CAAC,KAA0B;QAC9C,OAAO,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC;aACzB,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,KAAK,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC;aACzD,IAAI,CAAC,IAAI,CAAC,CAAC;IAChB,CAAC;IAEO,aAAa,CAAC,aAAqB,EAAE,KAAa;QACxD,OAAO,KAAK;YACV,CAAC,CAAC,IAAI,aAAa,IAAI,KAAK,KAAK;YACjC,CAAC,CAAC,IAAI,aAAa,KAAK,CAAC;IAC7B,CAAC;CACF"}
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
import { EnhancedChunk } from "../types/rag.js";
|
|
2
|
-
import { ComponentRelationship } from "../crawler/content-extractor-types.js";
|
|
3
|
-
export interface AssembledContext {
|
|
4
|
-
hierarchicalContext: EnhancedChunk[];
|
|
5
|
-
relationships: ComponentRelationship[];
|
|
6
|
-
metadata: {
|
|
7
|
-
summary: string;
|
|
8
|
-
topics: string[];
|
|
9
|
-
complexity: 'basic' | 'intermediate' | 'advanced';
|
|
10
|
-
prerequisites: string[];
|
|
11
|
-
frameworks: string[];
|
|
12
|
-
languages: string[];
|
|
13
|
-
};
|
|
14
|
-
}
|
|
15
|
-
export declare class ContextAssembler {
|
|
16
|
-
assembleContext(chunks: EnhancedChunk[]): Promise<AssembledContext>;
|
|
17
|
-
private groupChunksByType;
|
|
18
|
-
private extractRelationships;
|
|
19
|
-
private buildHierarchy;
|
|
20
|
-
private consolidateMetadata;
|
|
21
|
-
private generateSummary;
|
|
22
|
-
private deduplicateRelationships;
|
|
23
|
-
}
|