codesummary 1.2.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -213
- package/README.md +61 -395
- package/features.md +25 -386
- package/package.json +13 -17
- package/src/ai/errors.js +85 -0
- package/src/ai/featureFlags.js +8 -0
- package/src/ai/promptTemplates.js +337 -0
- package/src/ai/providerClient.js +81 -0
- package/src/ai/providers/ollama.js +92 -0
- package/src/ai/providers/openaiCompatible.js +96 -0
- package/src/analysis/repositorySignals.js +196 -0
- package/src/cli.js +819 -77
- package/src/configManager.js +21 -0
- package/src/graph/adapters/baseAdapter.js +24 -0
- package/src/graph/adapters/javascriptAdapter.js +53 -0
- package/src/graph/adapters/pythonAdapter.js +77 -0
- package/src/graph/graphEngine.js +151 -0
- package/src/graph/graphMetrics.js +79 -0
- package/src/graph/graphSchema.js +30 -0
- package/src/graph/universalExtractor.js +29 -0
- package/src/llmGenerator.js +723 -8
- package/src/pdfGenerator.js +1189 -275
- package/src/renderers/llmSummaryRenderer.js +14 -0
- package/src/renderers/pdfThemeRenderer.js +685 -0
- package/src/scanner.js +115 -8
- package/rag-schema.json +0 -114
- package/src/ragConfig.js +0 -369
- package/src/ragGenerator.js +0 -1740
package/src/scanner.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import fs from 'fs-extra';
|
|
2
2
|
import path from 'path';
|
|
3
3
|
import chalk from 'chalk';
|
|
4
|
+
import ignore from 'ignore';
|
|
4
5
|
import ErrorHandler from './errorHandler.js';
|
|
5
6
|
import { formatFileSize, getExtensionDescription, matchesGlobPattern } from './utils.js';
|
|
6
7
|
|
|
@@ -14,6 +15,7 @@ export class Scanner {
|
|
|
14
15
|
this.allowedExtensions = new Set(config.allowedExtensions.map(ext => ext.toLowerCase()));
|
|
15
16
|
this.excludeDirs = new Set(config.excludeDirs);
|
|
16
17
|
this.excludeFiles = config.excludeFiles || [];
|
|
18
|
+
this.csIgnoreMatchers = new Map();
|
|
17
19
|
}
|
|
18
20
|
|
|
19
21
|
/**
|
|
@@ -40,6 +42,8 @@ export class Scanner {
|
|
|
40
42
|
}
|
|
41
43
|
|
|
42
44
|
console.log(chalk.gray(`Scanning directory: ${resolvedRoot}`));
|
|
45
|
+
this.csIgnoreMatchers.clear();
|
|
46
|
+
await this.preloadCsIgnoreFiles(resolvedRoot, scanWarnings);
|
|
43
47
|
|
|
44
48
|
const filesByExtension = {};
|
|
45
49
|
const scannedFiles = new Set(); // Prevent duplicates
|
|
@@ -51,7 +55,7 @@ export class Scanner {
|
|
|
51
55
|
processedFiles: 0
|
|
52
56
|
};
|
|
53
57
|
|
|
54
|
-
await this.walkDirectory(resolvedRoot, resolvedRoot, filesByExtension, scannedFiles, scanContext);
|
|
58
|
+
await this.walkDirectory(resolvedRoot, resolvedRoot, filesByExtension, scannedFiles, scanContext, []);
|
|
55
59
|
|
|
56
60
|
// Sort files within each extension group
|
|
57
61
|
Object.keys(filesByExtension).forEach(ext => {
|
|
@@ -80,8 +84,9 @@ export class Scanner {
|
|
|
80
84
|
* @param {Set} scannedFiles - Set to track processed files and avoid duplicates
|
|
81
85
|
* @param {object} scanContext - Context object to track scan statistics
|
|
82
86
|
*/
|
|
83
|
-
async walkDirectory(currentPath, rootPath, filesByExtension, scannedFiles, scanContext) {
|
|
87
|
+
async walkDirectory(currentPath, rootPath, filesByExtension, scannedFiles, scanContext, inheritedIgnoreChain = []) {
|
|
84
88
|
try {
|
|
89
|
+
const ignoreChain = await this.buildIgnoreChainForDirectory(currentPath, inheritedIgnoreChain);
|
|
85
90
|
const entries = await fs.readdir(currentPath, { withFileTypes: true });
|
|
86
91
|
|
|
87
92
|
for (const entry of entries) {
|
|
@@ -90,16 +95,16 @@ export class Scanner {
|
|
|
90
95
|
|
|
91
96
|
if (entry.isDirectory()) {
|
|
92
97
|
// Skip excluded directories and hidden directories (unless explicitly allowed)
|
|
93
|
-
if (this.shouldSkipDirectory(entry.name, relativePath)) {
|
|
98
|
+
if (this.shouldSkipDirectory(entry.name, relativePath, fullPath, ignoreChain)) {
|
|
94
99
|
scanContext.skippedDirectories++;
|
|
95
100
|
continue;
|
|
96
101
|
}
|
|
97
102
|
|
|
98
103
|
// Recursively scan subdirectory
|
|
99
|
-
await this.walkDirectory(fullPath, rootPath, filesByExtension, scannedFiles, scanContext);
|
|
104
|
+
await this.walkDirectory(fullPath, rootPath, filesByExtension, scannedFiles, scanContext, ignoreChain);
|
|
100
105
|
} else if (entry.isFile()) {
|
|
101
106
|
// Process file if it matches criteria
|
|
102
|
-
await this.processFile(fullPath, rootPath, filesByExtension, scannedFiles, scanContext);
|
|
107
|
+
await this.processFile(fullPath, rootPath, filesByExtension, scannedFiles, scanContext, ignoreChain);
|
|
103
108
|
} else if (entry.isSymbolicLink()) {
|
|
104
109
|
// Handle symbolic links with caution
|
|
105
110
|
scanContext.warnings.push(`Skipped symbolic link: ${relativePath}`);
|
|
@@ -130,7 +135,7 @@ export class Scanner {
|
|
|
130
135
|
* @param {Set} scannedFiles - Set of already processed files
|
|
131
136
|
* @param {object} scanContext - Context object to track scan statistics
|
|
132
137
|
*/
|
|
133
|
-
async processFile(fullPath, rootPath, filesByExtension, scannedFiles, scanContext) {
|
|
138
|
+
async processFile(fullPath, rootPath, filesByExtension, scannedFiles, scanContext, ignoreChain = []) {
|
|
134
139
|
try {
|
|
135
140
|
const relativePath = path.relative(rootPath, fullPath);
|
|
136
141
|
|
|
@@ -140,6 +145,11 @@ export class Scanner {
|
|
|
140
145
|
}
|
|
141
146
|
scannedFiles.add(fullPath);
|
|
142
147
|
|
|
148
|
+
if (this.shouldSkipByCsIgnore(fullPath, false, ignoreChain)) {
|
|
149
|
+
scanContext.skippedFiles++;
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
|
|
143
153
|
const extension = path.extname(relativePath).toLowerCase();
|
|
144
154
|
|
|
145
155
|
// Skip files without extensions or not in allowed list
|
|
@@ -215,7 +225,11 @@ export class Scanner {
|
|
|
215
225
|
* @param {string} relativePath - Relative path from root
|
|
216
226
|
* @returns {boolean} True if directory should be skipped
|
|
217
227
|
*/
|
|
218
|
-
shouldSkipDirectory(dirName, relativePath) {
|
|
228
|
+
shouldSkipDirectory(dirName, relativePath, fullPath, ignoreChain = []) {
|
|
229
|
+
if (this.shouldSkipByCsIgnore(fullPath, true, ignoreChain)) {
|
|
230
|
+
return true;
|
|
231
|
+
}
|
|
232
|
+
|
|
219
233
|
// Skip directories in exclude list
|
|
220
234
|
if (this.excludeDirs.has(dirName)) {
|
|
221
235
|
return true;
|
|
@@ -239,6 +253,99 @@ export class Scanner {
|
|
|
239
253
|
return false;
|
|
240
254
|
}
|
|
241
255
|
|
|
256
|
+
/**
|
|
257
|
+
* Preload every .csignore file under scan root.
|
|
258
|
+
* @param {string} rootPath
|
|
259
|
+
* @param {Array<string>} warnings
|
|
260
|
+
*/
|
|
261
|
+
async preloadCsIgnoreFiles(rootPath, warnings = []) {
|
|
262
|
+
const stack = [rootPath];
|
|
263
|
+
|
|
264
|
+
while (stack.length > 0) {
|
|
265
|
+
const current = stack.pop();
|
|
266
|
+
if (!current) continue;
|
|
267
|
+
|
|
268
|
+
let entries = [];
|
|
269
|
+
try {
|
|
270
|
+
entries = await fs.readdir(current, { withFileTypes: true });
|
|
271
|
+
} catch {
|
|
272
|
+
continue;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
for (const entry of entries) {
|
|
276
|
+
const fullPath = path.join(current, entry.name);
|
|
277
|
+
if (entry.isDirectory()) {
|
|
278
|
+
if (this.excludeDirs.has(entry.name)) continue;
|
|
279
|
+
if (entry.name.startsWith('.') && !this.isAllowedHiddenDirectory(entry.name)) continue;
|
|
280
|
+
stack.push(fullPath);
|
|
281
|
+
continue;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
if (!entry.isFile() || entry.name !== '.csignore') continue;
|
|
285
|
+
|
|
286
|
+
try {
|
|
287
|
+
const content = await fs.readFile(fullPath, 'utf8');
|
|
288
|
+
const matcher = ignore();
|
|
289
|
+
matcher.add(content);
|
|
290
|
+
const negationMatcher = ignore();
|
|
291
|
+
const negationPatterns = content
|
|
292
|
+
.split(/\r\n|\r|\n/)
|
|
293
|
+
.map(line => line.trim())
|
|
294
|
+
.filter(line => line.startsWith('!') && line.length > 1)
|
|
295
|
+
.map(line => line.slice(1).trim())
|
|
296
|
+
.filter(line => line.length > 0 && !line.startsWith('#'));
|
|
297
|
+
if (negationPatterns.length > 0) {
|
|
298
|
+
negationMatcher.add(negationPatterns);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
this.csIgnoreMatchers.set(current, { matcher, negationMatcher, hasNegations: negationPatterns.length > 0 });
|
|
302
|
+
console.log(chalk.gray(`Using .csignore rules from ${fullPath}`));
|
|
303
|
+
} catch (error) {
|
|
304
|
+
warnings.push(`Failed to read .csignore (${fullPath}): ${error.message}`);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* Build ignore chain from scan root to current directory.
|
|
312
|
+
* @param {string} directoryPath
|
|
313
|
+
* @param {Array<object>} inheritedIgnoreChain
|
|
314
|
+
* @returns {Promise<Array<object>>}
|
|
315
|
+
*/
|
|
316
|
+
async buildIgnoreChainForDirectory(directoryPath, inheritedIgnoreChain = []) {
|
|
317
|
+
const layer = this.csIgnoreMatchers.get(directoryPath);
|
|
318
|
+
if (!layer) return inheritedIgnoreChain;
|
|
319
|
+
return [...inheritedIgnoreChain, { baseDir: directoryPath, ...layer }];
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
/**
|
|
323
|
+
* Check whether a path is ignored by .csignore rules using gitignore-like
|
|
324
|
+
* precedence across ancestor directories.
|
|
325
|
+
* @param {string} absolutePath - Absolute path to file or directory
|
|
326
|
+
* @param {boolean} isDirectory - Whether path is a directory
|
|
327
|
+
* @param {Array<object>} ignoreChain - Ordered list of ancestor ignore matchers
|
|
328
|
+
* @returns {boolean}
|
|
329
|
+
*/
|
|
330
|
+
shouldSkipByCsIgnore(absolutePath, isDirectory = false, ignoreChain = []) {
|
|
331
|
+
if (!ignoreChain || ignoreChain.length === 0) return false;
|
|
332
|
+
let ignored = false;
|
|
333
|
+
|
|
334
|
+
for (const layer of ignoreChain) {
|
|
335
|
+
const relative = path.relative(layer.baseDir, absolutePath).replace(/\\/g, '/');
|
|
336
|
+
if (!relative || relative.startsWith('..')) continue;
|
|
337
|
+
const candidate = isDirectory ? `${relative}/` : relative;
|
|
338
|
+
const result = layer.matcher.test(candidate);
|
|
339
|
+
if (result.ignored) ignored = true;
|
|
340
|
+
if (result.unignored) ignored = false;
|
|
341
|
+
if (!result.ignored && !result.unignored && layer.hasNegations && layer.negationMatcher.ignores(candidate)) {
|
|
342
|
+
ignored = false;
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
return ignored;
|
|
347
|
+
}
|
|
348
|
+
|
|
242
349
|
/**
|
|
243
350
|
* Check if a file should be excluded based on patterns
|
|
244
351
|
* @param {string} fileName - File name to check
|
|
@@ -384,4 +491,4 @@ export class Scanner {
|
|
|
384
491
|
}
|
|
385
492
|
}
|
|
386
493
|
|
|
387
|
-
export default Scanner;
|
|
494
|
+
export default Scanner;
|
package/rag-schema.json
DELETED
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
-
"$id": "https://github.com/skamoll/CodeSummary/schemas/rag-output.json",
|
|
4
|
-
"title": "CodeSummary RAG Output Schema",
|
|
5
|
-
"description": "Schema for CodeSummary RAG-optimized JSON output format",
|
|
6
|
-
"type": "object",
|
|
7
|
-
"required": ["metadata", "files", "index"],
|
|
8
|
-
"properties": {
|
|
9
|
-
"metadata": {
|
|
10
|
-
"type": "object",
|
|
11
|
-
"required": ["projectName", "generatedAt", "version", "generator"],
|
|
12
|
-
"properties": {
|
|
13
|
-
"projectName": { "type": "string" },
|
|
14
|
-
"generatedAt": { "type": "string", "format": "date-time" },
|
|
15
|
-
"version": { "type": "string", "pattern": "^\\d+\\.\\d+\\.\\d+$" },
|
|
16
|
-
"generator": { "type": "string" },
|
|
17
|
-
"scanPath": { "type": "string" },
|
|
18
|
-
"config": { "type": "object" },
|
|
19
|
-
"summary": { "type": "object" }
|
|
20
|
-
}
|
|
21
|
-
},
|
|
22
|
-
"files": {
|
|
23
|
-
"type": "array",
|
|
24
|
-
"items": {
|
|
25
|
-
"type": "object",
|
|
26
|
-
"required": ["id", "path", "extension", "language", "hash", "chunks"],
|
|
27
|
-
"properties": {
|
|
28
|
-
"id": { "type": "string" },
|
|
29
|
-
"path": { "type": "string" },
|
|
30
|
-
"extension": { "type": "string" },
|
|
31
|
-
"language": { "type": "string" },
|
|
32
|
-
"size": { "type": "number", "minimum": 0 },
|
|
33
|
-
"hash": { "type": "string", "pattern": "^sha256-[a-f0-9]{64}$" },
|
|
34
|
-
"modified": { "type": "string", "format": "date-time" },
|
|
35
|
-
"tags": { "type": "array", "items": { "type": "string" } },
|
|
36
|
-
"chunks": {
|
|
37
|
-
"type": "array",
|
|
38
|
-
"items": {
|
|
39
|
-
"type": "object",
|
|
40
|
-
"required": ["id", "content", "tokenEstimate"],
|
|
41
|
-
"properties": {
|
|
42
|
-
"id": { "type": "string" },
|
|
43
|
-
"content": { "type": "string" },
|
|
44
|
-
"tokenEstimate": { "type": "number", "minimum": 0 },
|
|
45
|
-
"lineStart": { "type": "number", "minimum": 1 },
|
|
46
|
-
"lineEnd": { "type": "number", "minimum": 1 },
|
|
47
|
-
"chunkingMethod": { "type": "string" },
|
|
48
|
-
"imports": { "type": "array", "items": { "type": "string" } },
|
|
49
|
-
"calls": { "type": "array", "items": { "type": "string" } }
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
},
|
|
56
|
-
"index": {
|
|
57
|
-
"type": "object",
|
|
58
|
-
"required": ["version", "generatedAt", "summary", "chunkOffsets", "fileOffsets", "statistics"],
|
|
59
|
-
"properties": {
|
|
60
|
-
"version": { "type": "string", "pattern": "^\\d+\\.\\d+\\.\\d+$" },
|
|
61
|
-
"generatedAt": { "type": "string", "format": "date-time" },
|
|
62
|
-
"summary": {
|
|
63
|
-
"type": "object",
|
|
64
|
-
"required": ["fileCount", "chunkCount", "totalBytes"],
|
|
65
|
-
"properties": {
|
|
66
|
-
"fileCount": { "type": "number", "minimum": 0 },
|
|
67
|
-
"chunkCount": { "type": "number", "minimum": 0 },
|
|
68
|
-
"totalBytes": { "type": "number", "minimum": 0 },
|
|
69
|
-
"languages": { "type": "array", "items": { "type": "string" } },
|
|
70
|
-
"extensions": { "type": "array", "items": { "type": "string" } }
|
|
71
|
-
}
|
|
72
|
-
},
|
|
73
|
-
"chunkOffsets": {
|
|
74
|
-
"type": "object",
|
|
75
|
-
"patternProperties": {
|
|
76
|
-
"^chunk_[a-f0-9]+_\\d+$": {
|
|
77
|
-
"type": "object",
|
|
78
|
-
"required": ["jsonStart", "jsonEnd", "contentStart", "contentEnd", "filePath"],
|
|
79
|
-
"properties": {
|
|
80
|
-
"jsonStart": { "type": "number", "minimum": 0 },
|
|
81
|
-
"jsonEnd": { "type": "number", "minimum": 0 },
|
|
82
|
-
"contentStart": { "type": "number", "minimum": 0 },
|
|
83
|
-
"contentEnd": { "type": "number", "minimum": 0 },
|
|
84
|
-
"filePath": { "type": "string" }
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
},
|
|
89
|
-
"fileOffsets": {
|
|
90
|
-
"type": "object",
|
|
91
|
-
"patternProperties": {
|
|
92
|
-
"^[a-f0-9]+$": {
|
|
93
|
-
"type": "array",
|
|
94
|
-
"items": { "type": "number", "minimum": 0 },
|
|
95
|
-
"minItems": 2,
|
|
96
|
-
"maxItems": 2
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
},
|
|
100
|
-
"statistics": {
|
|
101
|
-
"type": "object",
|
|
102
|
-
"required": ["processingTimeMs", "bytesWritten", "totalFiles"],
|
|
103
|
-
"properties": {
|
|
104
|
-
"processingTimeMs": { "type": "number", "minimum": 1 },
|
|
105
|
-
"bytesPerSecond": { "type": "number", "minimum": 0 },
|
|
106
|
-
"bytesWritten": { "type": "number", "minimum": 0 },
|
|
107
|
-
"totalFiles": { "type": "number", "minimum": 0 },
|
|
108
|
-
"emptyFiles": { "type": "number", "minimum": 0 }
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
}
|
package/src/ragConfig.js
DELETED
|
@@ -1,369 +0,0 @@
|
|
|
1
|
-
import fs from "fs-extra";
|
|
2
|
-
import yaml from "js-yaml";
|
|
3
|
-
import path from "path";
|
|
4
|
-
import { matchesGlobPattern } from "./utils.js";
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* RAG Configuration Manager
|
|
8
|
-
* Loads and validates configuration from raggen.config.yaml
|
|
9
|
-
*/
|
|
10
|
-
export class RagConfigManager {
|
|
11
|
-
constructor() {
|
|
12
|
-
this.defaultConfig = this.getDefaultConfig();
|
|
13
|
-
this.configPath = null;
|
|
14
|
-
this.loadedConfig = null;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
/**
|
|
18
|
-
* Load configuration from YAML file
|
|
19
|
-
* @param {string} configPath - Path to config file (optional)
|
|
20
|
-
* @returns {object} Merged configuration
|
|
21
|
-
*/
|
|
22
|
-
async loadConfig(configPath = null) {
|
|
23
|
-
// Try to find config file
|
|
24
|
-
this.configPath = configPath || (await this.findConfigFile());
|
|
25
|
-
|
|
26
|
-
if (this.configPath && (await fs.pathExists(this.configPath))) {
|
|
27
|
-
try {
|
|
28
|
-
const yamlContent = await fs.readFile(this.configPath, "utf8");
|
|
29
|
-
const userConfig = yaml.load(yamlContent);
|
|
30
|
-
|
|
31
|
-
// Merge with defaults
|
|
32
|
-
this.loadedConfig = this.mergeConfigs(this.defaultConfig, userConfig);
|
|
33
|
-
|
|
34
|
-
console.log(`📋 RAG config loaded from: ${this.configPath}`);
|
|
35
|
-
return this.loadedConfig;
|
|
36
|
-
} catch (error) {
|
|
37
|
-
console.warn(`⚠️ Error loading RAG config: ${error.message}`);
|
|
38
|
-
console.log(`📋 Using default RAG configuration`);
|
|
39
|
-
return this.defaultConfig;
|
|
40
|
-
}
|
|
41
|
-
} else {
|
|
42
|
-
console.log(`📋 No RAG config found, using defaults`);
|
|
43
|
-
return this.defaultConfig;
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
/**
|
|
48
|
-
* Find configuration file in common locations
|
|
49
|
-
* @returns {string|null} Path to config file or null
|
|
50
|
-
*/
|
|
51
|
-
async findConfigFile() {
|
|
52
|
-
const searchPaths = [
|
|
53
|
-
"raggen.config.yaml",
|
|
54
|
-
"raggen.config.yml",
|
|
55
|
-
".raggen.config.yaml",
|
|
56
|
-
".raggen.config.yml",
|
|
57
|
-
"config/raggen.yaml",
|
|
58
|
-
"config/raggen.yml",
|
|
59
|
-
];
|
|
60
|
-
|
|
61
|
-
for (const searchPath of searchPaths) {
|
|
62
|
-
if (await fs.pathExists(searchPath)) {
|
|
63
|
-
return path.resolve(searchPath);
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
return null;
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
/**
|
|
71
|
-
* Get default configuration
|
|
72
|
-
* @returns {object} Default config
|
|
73
|
-
*/
|
|
74
|
-
getDefaultConfig() {
|
|
75
|
-
return {
|
|
76
|
-
extensions: {
|
|
77
|
-
include: [
|
|
78
|
-
".json",
|
|
79
|
-
".ts",
|
|
80
|
-
".js",
|
|
81
|
-
".jsx",
|
|
82
|
-
".tsx",
|
|
83
|
-
".xml",
|
|
84
|
-
".html",
|
|
85
|
-
".css",
|
|
86
|
-
".scss",
|
|
87
|
-
".md",
|
|
88
|
-
".txt",
|
|
89
|
-
".py",
|
|
90
|
-
".java",
|
|
91
|
-
".cs",
|
|
92
|
-
".cpp",
|
|
93
|
-
".c",
|
|
94
|
-
".h",
|
|
95
|
-
".yaml",
|
|
96
|
-
".yml",
|
|
97
|
-
".sh",
|
|
98
|
-
".bat",
|
|
99
|
-
],
|
|
100
|
-
},
|
|
101
|
-
chunking: {
|
|
102
|
-
maxTokens: 1000,
|
|
103
|
-
overlap: 200,
|
|
104
|
-
tokenEstimation: "ceil(length/4)",
|
|
105
|
-
},
|
|
106
|
-
handlers: {
|
|
107
|
-
code: {
|
|
108
|
-
splitByFunction: true,
|
|
109
|
-
detectImports: true,
|
|
110
|
-
detectCalls: true,
|
|
111
|
-
complexityAnalysis: true,
|
|
112
|
-
},
|
|
113
|
-
markup: {
|
|
114
|
-
splitByElement: true,
|
|
115
|
-
preserveStructure: true,
|
|
116
|
-
},
|
|
117
|
-
styling: {
|
|
118
|
-
splitByRule: true,
|
|
119
|
-
detectImports: true,
|
|
120
|
-
},
|
|
121
|
-
config: {
|
|
122
|
-
splitBySection: true,
|
|
123
|
-
validateSyntax: false,
|
|
124
|
-
},
|
|
125
|
-
},
|
|
126
|
-
paths: {
|
|
127
|
-
exclude: [
|
|
128
|
-
"node_modules",
|
|
129
|
-
".git",
|
|
130
|
-
"dist",
|
|
131
|
-
"build",
|
|
132
|
-
"coverage",
|
|
133
|
-
"out",
|
|
134
|
-
"__pycache__",
|
|
135
|
-
".next",
|
|
136
|
-
".nuxt",
|
|
137
|
-
".cache",
|
|
138
|
-
"tmp",
|
|
139
|
-
"temp",
|
|
140
|
-
"logs",
|
|
141
|
-
"bower_components",
|
|
142
|
-
"vendor",
|
|
143
|
-
],
|
|
144
|
-
},
|
|
145
|
-
files: {
|
|
146
|
-
exclude: [
|
|
147
|
-
"*-lock.json",
|
|
148
|
-
"*.lock",
|
|
149
|
-
"composer.lock",
|
|
150
|
-
"Pipfile.lock",
|
|
151
|
-
"*.min.js",
|
|
152
|
-
"*.min.css",
|
|
153
|
-
"*.map",
|
|
154
|
-
".DS_Store",
|
|
155
|
-
"Thumbs.db",
|
|
156
|
-
"*-lock.yaml",
|
|
157
|
-
],
|
|
158
|
-
},
|
|
159
|
-
performance: {
|
|
160
|
-
maxWorkers: 1,
|
|
161
|
-
batchSize: 50,
|
|
162
|
-
maxFileSize: "100MB",
|
|
163
|
-
streamingThreshold: "10MB",
|
|
164
|
-
},
|
|
165
|
-
output: {
|
|
166
|
-
format: "json",
|
|
167
|
-
compression: false,
|
|
168
|
-
validation: true,
|
|
169
|
-
indexing: true,
|
|
170
|
-
},
|
|
171
|
-
metadata: {
|
|
172
|
-
calculateHashes: true,
|
|
173
|
-
extractTags: true,
|
|
174
|
-
trackRelationships: true,
|
|
175
|
-
includeStats: true,
|
|
176
|
-
},
|
|
177
|
-
logging: {
|
|
178
|
-
level: "info",
|
|
179
|
-
progressReporting: true,
|
|
180
|
-
statisticsReporting: true,
|
|
181
|
-
},
|
|
182
|
-
quality: {
|
|
183
|
-
maxChunkSize: "50KB",
|
|
184
|
-
maxOutputSize: "250MB",
|
|
185
|
-
duplicateDetection: true,
|
|
186
|
-
emptyChunkHandling: "skip",
|
|
187
|
-
},
|
|
188
|
-
};
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
/**
|
|
192
|
-
* Deep merge configuration objects
|
|
193
|
-
* @param {object} defaultConfig - Default configuration
|
|
194
|
-
* @param {object} userConfig - User configuration
|
|
195
|
-
* @returns {object} Merged configuration
|
|
196
|
-
*/
|
|
197
|
-
mergeConfigs(defaultConfig, userConfig) {
|
|
198
|
-
const merged = JSON.parse(JSON.stringify(defaultConfig)); // Deep clone
|
|
199
|
-
|
|
200
|
-
return this.deepMerge(merged, userConfig);
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
/**
|
|
204
|
-
* Recursively merge objects
|
|
205
|
-
* @param {object} target - Target object
|
|
206
|
-
* @param {object} source - Source object
|
|
207
|
-
* @returns {object} Merged object
|
|
208
|
-
*/
|
|
209
|
-
deepMerge(target, source) {
|
|
210
|
-
for (const key in source) {
|
|
211
|
-
if (source.hasOwnProperty(key)) {
|
|
212
|
-
if (
|
|
213
|
-
source[key] &&
|
|
214
|
-
typeof source[key] === "object" &&
|
|
215
|
-
!Array.isArray(source[key])
|
|
216
|
-
) {
|
|
217
|
-
// Recursive merge for objects
|
|
218
|
-
if (!target[key] || typeof target[key] !== "object") {
|
|
219
|
-
target[key] = {};
|
|
220
|
-
}
|
|
221
|
-
this.deepMerge(target[key], source[key]);
|
|
222
|
-
} else {
|
|
223
|
-
// Direct assignment for primitives and arrays
|
|
224
|
-
target[key] = source[key];
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
return target;
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
/**
|
|
232
|
-
* Validate configuration
|
|
233
|
-
* @param {object} config - Configuration to validate
|
|
234
|
-
* @returns {boolean} True if valid
|
|
235
|
-
*/
|
|
236
|
-
validateConfig(config) {
|
|
237
|
-
const errors = [];
|
|
238
|
-
|
|
239
|
-
// Validate required sections
|
|
240
|
-
const requiredSections = ["extensions", "chunking", "handlers"];
|
|
241
|
-
for (const section of requiredSections) {
|
|
242
|
-
if (!config[section]) {
|
|
243
|
-
errors.push(`Missing required section: ${section}`);
|
|
244
|
-
}
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
// Validate chunking settings
|
|
248
|
-
if (config.chunking) {
|
|
249
|
-
if (
|
|
250
|
-
typeof config.chunking.maxTokens !== "number" ||
|
|
251
|
-
config.chunking.maxTokens <= 0
|
|
252
|
-
) {
|
|
253
|
-
errors.push("chunking.maxTokens must be a positive number");
|
|
254
|
-
}
|
|
255
|
-
if (
|
|
256
|
-
typeof config.chunking.overlap !== "number" ||
|
|
257
|
-
config.chunking.overlap < 0
|
|
258
|
-
) {
|
|
259
|
-
errors.push("chunking.overlap must be a non-negative number");
|
|
260
|
-
}
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
// Validate extensions
|
|
264
|
-
if (config.extensions && config.extensions.include) {
|
|
265
|
-
if (!Array.isArray(config.extensions.include)) {
|
|
266
|
-
errors.push("extensions.include must be an array");
|
|
267
|
-
} else {
|
|
268
|
-
for (const ext of config.extensions.include) {
|
|
269
|
-
if (typeof ext !== "string" || !ext.startsWith(".")) {
|
|
270
|
-
errors.push(`Invalid extension: ${ext} (must start with dot)`);
|
|
271
|
-
}
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
if (errors.length > 0) {
|
|
277
|
-
console.error("❌ RAG Configuration validation errors:");
|
|
278
|
-
errors.forEach((error) => console.error(` • ${error}`));
|
|
279
|
-
return false;
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
return true;
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
/**
|
|
286
|
-
* Get configuration value with dot notation
|
|
287
|
-
* @param {string} path - Configuration path (e.g., 'chunking.maxTokens')
|
|
288
|
-
* @param {any} defaultValue - Default value if not found
|
|
289
|
-
* @returns {any} Configuration value
|
|
290
|
-
*/
|
|
291
|
-
get(path, defaultValue = null) {
|
|
292
|
-
const config = this.loadedConfig || this.defaultConfig;
|
|
293
|
-
|
|
294
|
-
return path.split(".").reduce((obj, key) => {
|
|
295
|
-
return obj && obj[key] !== undefined ? obj[key] : defaultValue;
|
|
296
|
-
}, config);
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
/**
|
|
300
|
-
* Display current configuration
|
|
301
|
-
*/
|
|
302
|
-
displayConfig() {
|
|
303
|
-
const config = this.loadedConfig || this.defaultConfig;
|
|
304
|
-
|
|
305
|
-
console.log("\n📋 RAG Generator Configuration:");
|
|
306
|
-
console.log(` Source: ${this.configPath ? this.configPath : "defaults"}`);
|
|
307
|
-
console.log(` Extensions: ${config.extensions.include.length} types`);
|
|
308
|
-
console.log(` Max tokens per chunk: ${config.chunking.maxTokens}`);
|
|
309
|
-
console.log(` Token overlap: ${config.chunking.overlap}`);
|
|
310
|
-
console.log(` Max workers: ${config.performance.maxWorkers}`);
|
|
311
|
-
console.log(` Batch size: ${config.performance.batchSize}`);
|
|
312
|
-
console.log();
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
/**
|
|
316
|
-
* Parse file size string to bytes
|
|
317
|
-
* @param {string} sizeStr - Size string (e.g., '100MB', '1GB')
|
|
318
|
-
* @returns {number} Size in bytes
|
|
319
|
-
*/
|
|
320
|
-
parseFileSize(sizeStr) {
|
|
321
|
-
if (typeof sizeStr === "number") return sizeStr;
|
|
322
|
-
|
|
323
|
-
const units = {
|
|
324
|
-
B: 1,
|
|
325
|
-
KB: 1024,
|
|
326
|
-
MB: 1024 * 1024,
|
|
327
|
-
GB: 1024 * 1024 * 1024,
|
|
328
|
-
};
|
|
329
|
-
|
|
330
|
-
const match = sizeStr.match(/^(\d+(?:\.\d+)?)\s*([KMGT]?B)$/i);
|
|
331
|
-
if (!match) return 0;
|
|
332
|
-
|
|
333
|
-
const value = parseFloat(match[1]);
|
|
334
|
-
const unit = match[2].toUpperCase();
|
|
335
|
-
|
|
336
|
-
return Math.floor(value * (units[unit] || 1));
|
|
337
|
-
}
|
|
338
|
-
|
|
339
|
-
/**
|
|
340
|
-
* Check if file should be excluded by path
|
|
341
|
-
* @param {string} filePath - File path to check
|
|
342
|
-
* @returns {boolean} True if should be excluded
|
|
343
|
-
*/
|
|
344
|
-
shouldExcludePath(filePath) {
|
|
345
|
-
const config = this.loadedConfig || this.defaultConfig;
|
|
346
|
-
const excludePaths = config.paths?.exclude || [];
|
|
347
|
-
|
|
348
|
-
return excludePaths.some((pattern) => {
|
|
349
|
-
return (
|
|
350
|
-
filePath.includes(pattern) ||
|
|
351
|
-
filePath.includes(path.sep + pattern + path.sep)
|
|
352
|
-
);
|
|
353
|
-
});
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
/**
|
|
357
|
-
* Check if file should be excluded by filename pattern
|
|
358
|
-
* @param {string} fileName - File name to check
|
|
359
|
-
* @returns {boolean} True if should be excluded
|
|
360
|
-
*/
|
|
361
|
-
shouldExcludeFile(fileName) {
|
|
362
|
-
const config = this.loadedConfig || this.defaultConfig;
|
|
363
|
-
const excludeFiles = config.files?.exclude || [];
|
|
364
|
-
|
|
365
|
-
return excludeFiles.some((pattern) => matchesGlobPattern(fileName, pattern));
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
export default RagConfigManager;
|