codesummary 1.1.1 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/ragConfig.js CHANGED
@@ -1,373 +1,369 @@
1
- import fs from "fs-extra";
2
- import yaml from "js-yaml";
3
- import path from "path";
4
-
5
- /**
6
- * RAG Configuration Manager
7
- * Loads and validates configuration from raggen.config.yaml
8
- */
9
- export class RagConfigManager {
10
- constructor() {
11
- this.defaultConfig = this.getDefaultConfig();
12
- this.configPath = null;
13
- this.loadedConfig = null;
14
- }
15
-
16
- /**
17
- * Load configuration from YAML file
18
- * @param {string} configPath - Path to config file (optional)
19
- * @returns {object} Merged configuration
20
- */
21
- async loadConfig(configPath = null) {
22
- // Try to find config file
23
- this.configPath = configPath || (await this.findConfigFile());
24
-
25
- if (this.configPath && (await fs.pathExists(this.configPath))) {
26
- try {
27
- const yamlContent = await fs.readFile(this.configPath, "utf8");
28
- const userConfig = yaml.load(yamlContent);
29
-
30
- // Merge with defaults
31
- this.loadedConfig = this.mergeConfigs(this.defaultConfig, userConfig);
32
-
33
- console.log(`šŸ“‹ RAG config loaded from: ${this.configPath}`);
34
- return this.loadedConfig;
35
- } catch (error) {
36
- console.warn(`āš ļø Error loading RAG config: ${error.message}`);
37
- console.log(`šŸ“‹ Using default RAG configuration`);
38
- return this.defaultConfig;
39
- }
40
- } else {
41
- console.log(`šŸ“‹ No RAG config found, using defaults`);
42
- return this.defaultConfig;
43
- }
44
- }
45
-
46
- /**
47
- * Find configuration file in common locations
48
- * @returns {string|null} Path to config file or null
49
- */
50
- async findConfigFile() {
51
- const searchPaths = [
52
- "raggen.config.yaml",
53
- "raggen.config.yml",
54
- ".raggen.config.yaml",
55
- ".raggen.config.yml",
56
- "config/raggen.yaml",
57
- "config/raggen.yml",
58
- ];
59
-
60
- for (const searchPath of searchPaths) {
61
- if (await fs.pathExists(searchPath)) {
62
- return path.resolve(searchPath);
63
- }
64
- }
65
-
66
- return null;
67
- }
68
-
69
- /**
70
- * Get default configuration
71
- * @returns {object} Default config
72
- */
73
- getDefaultConfig() {
74
- return {
75
- extensions: {
76
- include: [
77
- ".json",
78
- ".ts",
79
- ".js",
80
- ".jsx",
81
- ".tsx",
82
- ".xml",
83
- ".html",
84
- ".css",
85
- ".scss",
86
- ".md",
87
- ".txt",
88
- ".py",
89
- ".java",
90
- ".cs",
91
- ".cpp",
92
- ".c",
93
- ".h",
94
- ".yaml",
95
- ".yml",
96
- ".sh",
97
- ".bat",
98
- ],
99
- },
100
- chunking: {
101
- maxTokens: 1000,
102
- overlap: 200,
103
- tokenEstimation: "ceil(length/4)",
104
- },
105
- handlers: {
106
- code: {
107
- splitByFunction: true,
108
- detectImports: true,
109
- detectCalls: true,
110
- complexityAnalysis: true,
111
- },
112
- markup: {
113
- splitByElement: true,
114
- preserveStructure: true,
115
- },
116
- styling: {
117
- splitByRule: true,
118
- detectImports: true,
119
- },
120
- config: {
121
- splitBySection: true,
122
- validateSyntax: false,
123
- },
124
- },
125
- paths: {
126
- exclude: [
127
- "node_modules",
128
- ".git",
129
- "dist",
130
- "build",
131
- "coverage",
132
- "out",
133
- "__pycache__",
134
- ".next",
135
- ".nuxt",
136
- ".cache",
137
- "tmp",
138
- "temp",
139
- "logs",
140
- "bower_components",
141
- "vendor",
142
- ],
143
- },
144
- files: {
145
- exclude: [
146
- "*-lock.json",
147
- "*.lock",
148
- "composer.lock",
149
- "Pipfile.lock",
150
- "*.min.js",
151
- "*.min.css",
152
- "*.map",
153
- ".DS_Store",
154
- "Thumbs.db",
155
- "*-lock.yaml",
156
- ],
157
- },
158
- performance: {
159
- maxWorkers: 1,
160
- batchSize: 50,
161
- maxFileSize: "100MB",
162
- streamingThreshold: "10MB",
163
- },
164
- output: {
165
- format: "json",
166
- compression: false,
167
- validation: true,
168
- indexing: true,
169
- },
170
- metadata: {
171
- calculateHashes: true,
172
- extractTags: true,
173
- trackRelationships: true,
174
- includeStats: true,
175
- },
176
- logging: {
177
- level: "info",
178
- progressReporting: true,
179
- statisticsReporting: true,
180
- },
181
- quality: {
182
- maxChunkSize: "50KB",
183
- maxOutputSize: "250MB",
184
- duplicateDetection: true,
185
- emptyChunkHandling: "skip",
186
- },
187
- };
188
- }
189
-
190
- /**
191
- * Deep merge configuration objects
192
- * @param {object} defaultConfig - Default configuration
193
- * @param {object} userConfig - User configuration
194
- * @returns {object} Merged configuration
195
- */
196
- mergeConfigs(defaultConfig, userConfig) {
197
- const merged = JSON.parse(JSON.stringify(defaultConfig)); // Deep clone
198
-
199
- return this.deepMerge(merged, userConfig);
200
- }
201
-
202
- /**
203
- * Recursively merge objects
204
- * @param {object} target - Target object
205
- * @param {object} source - Source object
206
- * @returns {object} Merged object
207
- */
208
- deepMerge(target, source) {
209
- for (const key in source) {
210
- if (source.hasOwnProperty(key)) {
211
- if (
212
- source[key] &&
213
- typeof source[key] === "object" &&
214
- !Array.isArray(source[key])
215
- ) {
216
- // Recursive merge for objects
217
- if (!target[key] || typeof target[key] !== "object") {
218
- target[key] = {};
219
- }
220
- this.deepMerge(target[key], source[key]);
221
- } else {
222
- // Direct assignment for primitives and arrays
223
- target[key] = source[key];
224
- }
225
- }
226
- }
227
- return target;
228
- }
229
-
230
- /**
231
- * Validate configuration
232
- * @param {object} config - Configuration to validate
233
- * @returns {boolean} True if valid
234
- */
235
- validateConfig(config) {
236
- const errors = [];
237
-
238
- // Validate required sections
239
- const requiredSections = ["extensions", "chunking", "handlers"];
240
- for (const section of requiredSections) {
241
- if (!config[section]) {
242
- errors.push(`Missing required section: ${section}`);
243
- }
244
- }
245
-
246
- // Validate chunking settings
247
- if (config.chunking) {
248
- if (
249
- typeof config.chunking.maxTokens !== "number" ||
250
- config.chunking.maxTokens <= 0
251
- ) {
252
- errors.push("chunking.maxTokens must be a positive number");
253
- }
254
- if (
255
- typeof config.chunking.overlap !== "number" ||
256
- config.chunking.overlap < 0
257
- ) {
258
- errors.push("chunking.overlap must be a non-negative number");
259
- }
260
- }
261
-
262
- // Validate extensions
263
- if (config.extensions && config.extensions.include) {
264
- if (!Array.isArray(config.extensions.include)) {
265
- errors.push("extensions.include must be an array");
266
- } else {
267
- for (const ext of config.extensions.include) {
268
- if (typeof ext !== "string" || !ext.startsWith(".")) {
269
- errors.push(`Invalid extension: ${ext} (must start with dot)`);
270
- }
271
- }
272
- }
273
- }
274
-
275
- if (errors.length > 0) {
276
- console.error("āŒ RAG Configuration validation errors:");
277
- errors.forEach((error) => console.error(` • ${error}`));
278
- return false;
279
- }
280
-
281
- return true;
282
- }
283
-
284
- /**
285
- * Get configuration value with dot notation
286
- * @param {string} path - Configuration path (e.g., 'chunking.maxTokens')
287
- * @param {any} defaultValue - Default value if not found
288
- * @returns {any} Configuration value
289
- */
290
- get(path, defaultValue = null) {
291
- const config = this.loadedConfig || this.defaultConfig;
292
-
293
- return path.split(".").reduce((obj, key) => {
294
- return obj && obj[key] !== undefined ? obj[key] : defaultValue;
295
- }, config);
296
- }
297
-
298
- /**
299
- * Display current configuration
300
- */
301
- displayConfig() {
302
- const config = this.loadedConfig || this.defaultConfig;
303
-
304
- console.log("\nšŸ“‹ RAG Generator Configuration:");
305
- console.log(` Source: ${this.configPath ? this.configPath : "defaults"}`);
306
- console.log(` Extensions: ${config.extensions.include.length} types`);
307
- console.log(` Max tokens per chunk: ${config.chunking.maxTokens}`);
308
- console.log(` Token overlap: ${config.chunking.overlap}`);
309
- console.log(` Max workers: ${config.performance.maxWorkers}`);
310
- console.log(` Batch size: ${config.performance.batchSize}`);
311
- console.log();
312
- }
313
-
314
- /**
315
- * Parse file size string to bytes
316
- * @param {string} sizeStr - Size string (e.g., '100MB', '1GB')
317
- * @returns {number} Size in bytes
318
- */
319
- parseFileSize(sizeStr) {
320
- if (typeof sizeStr === "number") return sizeStr;
321
-
322
- const units = {
323
- B: 1,
324
- KB: 1024,
325
- MB: 1024 * 1024,
326
- GB: 1024 * 1024 * 1024,
327
- };
328
-
329
- const match = sizeStr.match(/^(\d+(?:\.\d+)?)\s*([KMGT]?B)$/i);
330
- if (!match) return 0;
331
-
332
- const value = parseFloat(match[1]);
333
- const unit = match[2].toUpperCase();
334
-
335
- return Math.floor(value * (units[unit] || 1));
336
- }
337
-
338
- /**
339
- * Check if file should be excluded by path
340
- * @param {string} filePath - File path to check
341
- * @returns {boolean} True if should be excluded
342
- */
343
- shouldExcludePath(filePath) {
344
- const config = this.loadedConfig || this.defaultConfig;
345
- const excludePaths = config.paths?.exclude || [];
346
-
347
- return excludePaths.some((pattern) => {
348
- return (
349
- filePath.includes(pattern) ||
350
- filePath.includes(path.sep + pattern + path.sep)
351
- );
352
- });
353
- }
354
-
355
- /**
356
- * Check if file should be excluded by filename pattern
357
- * @param {string} fileName - File name to check
358
- * @returns {boolean} True if should be excluded
359
- */
360
- shouldExcludeFile(fileName) {
361
- const config = this.loadedConfig || this.defaultConfig;
362
- const excludeFiles = config.files?.exclude || [];
363
-
364
- return excludeFiles.some((pattern) => {
365
- // Simple glob pattern matching
366
- const regexPattern = pattern.replace(/\./g, "\\.").replace(/\*/g, ".*");
367
- const regex = new RegExp(`^${regexPattern}$`, "i");
368
- return regex.test(fileName);
369
- });
370
- }
371
- }
372
-
373
- export default new RagConfigManager();
1
+ import fs from "fs-extra";
2
+ import yaml from "js-yaml";
3
+ import path from "path";
4
+ import { matchesGlobPattern } from "./utils.js";
5
+
6
+ /**
7
+ * RAG Configuration Manager
8
+ * Loads and validates configuration from raggen.config.yaml
9
+ */
10
+ export class RagConfigManager {
11
+ constructor() {
12
+ this.defaultConfig = this.getDefaultConfig();
13
+ this.configPath = null;
14
+ this.loadedConfig = null;
15
+ }
16
+
17
+ /**
18
+ * Load configuration from YAML file
19
+ * @param {string} configPath - Path to config file (optional)
20
+ * @returns {object} Merged configuration
21
+ */
22
+ async loadConfig(configPath = null) {
23
+ // Try to find config file
24
+ this.configPath = configPath || (await this.findConfigFile());
25
+
26
+ if (this.configPath && (await fs.pathExists(this.configPath))) {
27
+ try {
28
+ const yamlContent = await fs.readFile(this.configPath, "utf8");
29
+ const userConfig = yaml.load(yamlContent);
30
+
31
+ // Merge with defaults
32
+ this.loadedConfig = this.mergeConfigs(this.defaultConfig, userConfig);
33
+
34
+ console.log(`šŸ“‹ RAG config loaded from: ${this.configPath}`);
35
+ return this.loadedConfig;
36
+ } catch (error) {
37
+ console.warn(`āš ļø Error loading RAG config: ${error.message}`);
38
+ console.log(`šŸ“‹ Using default RAG configuration`);
39
+ return this.defaultConfig;
40
+ }
41
+ } else {
42
+ console.log(`šŸ“‹ No RAG config found, using defaults`);
43
+ return this.defaultConfig;
44
+ }
45
+ }
46
+
47
+ /**
48
+ * Find configuration file in common locations
49
+ * @returns {string|null} Path to config file or null
50
+ */
51
+ async findConfigFile() {
52
+ const searchPaths = [
53
+ "raggen.config.yaml",
54
+ "raggen.config.yml",
55
+ ".raggen.config.yaml",
56
+ ".raggen.config.yml",
57
+ "config/raggen.yaml",
58
+ "config/raggen.yml",
59
+ ];
60
+
61
+ for (const searchPath of searchPaths) {
62
+ if (await fs.pathExists(searchPath)) {
63
+ return path.resolve(searchPath);
64
+ }
65
+ }
66
+
67
+ return null;
68
+ }
69
+
70
+ /**
71
+ * Get default configuration
72
+ * @returns {object} Default config
73
+ */
74
+ getDefaultConfig() {
75
+ return {
76
+ extensions: {
77
+ include: [
78
+ ".json",
79
+ ".ts",
80
+ ".js",
81
+ ".jsx",
82
+ ".tsx",
83
+ ".xml",
84
+ ".html",
85
+ ".css",
86
+ ".scss",
87
+ ".md",
88
+ ".txt",
89
+ ".py",
90
+ ".java",
91
+ ".cs",
92
+ ".cpp",
93
+ ".c",
94
+ ".h",
95
+ ".yaml",
96
+ ".yml",
97
+ ".sh",
98
+ ".bat",
99
+ ],
100
+ },
101
+ chunking: {
102
+ maxTokens: 1000,
103
+ overlap: 200,
104
+ tokenEstimation: "ceil(length/4)",
105
+ },
106
+ handlers: {
107
+ code: {
108
+ splitByFunction: true,
109
+ detectImports: true,
110
+ detectCalls: true,
111
+ complexityAnalysis: true,
112
+ },
113
+ markup: {
114
+ splitByElement: true,
115
+ preserveStructure: true,
116
+ },
117
+ styling: {
118
+ splitByRule: true,
119
+ detectImports: true,
120
+ },
121
+ config: {
122
+ splitBySection: true,
123
+ validateSyntax: false,
124
+ },
125
+ },
126
+ paths: {
127
+ exclude: [
128
+ "node_modules",
129
+ ".git",
130
+ "dist",
131
+ "build",
132
+ "coverage",
133
+ "out",
134
+ "__pycache__",
135
+ ".next",
136
+ ".nuxt",
137
+ ".cache",
138
+ "tmp",
139
+ "temp",
140
+ "logs",
141
+ "bower_components",
142
+ "vendor",
143
+ ],
144
+ },
145
+ files: {
146
+ exclude: [
147
+ "*-lock.json",
148
+ "*.lock",
149
+ "composer.lock",
150
+ "Pipfile.lock",
151
+ "*.min.js",
152
+ "*.min.css",
153
+ "*.map",
154
+ ".DS_Store",
155
+ "Thumbs.db",
156
+ "*-lock.yaml",
157
+ ],
158
+ },
159
+ performance: {
160
+ maxWorkers: 1,
161
+ batchSize: 50,
162
+ maxFileSize: "100MB",
163
+ streamingThreshold: "10MB",
164
+ },
165
+ output: {
166
+ format: "json",
167
+ compression: false,
168
+ validation: true,
169
+ indexing: true,
170
+ },
171
+ metadata: {
172
+ calculateHashes: true,
173
+ extractTags: true,
174
+ trackRelationships: true,
175
+ includeStats: true,
176
+ },
177
+ logging: {
178
+ level: "info",
179
+ progressReporting: true,
180
+ statisticsReporting: true,
181
+ },
182
+ quality: {
183
+ maxChunkSize: "50KB",
184
+ maxOutputSize: "250MB",
185
+ duplicateDetection: true,
186
+ emptyChunkHandling: "skip",
187
+ },
188
+ };
189
+ }
190
+
191
+ /**
192
+ * Deep merge configuration objects
193
+ * @param {object} defaultConfig - Default configuration
194
+ * @param {object} userConfig - User configuration
195
+ * @returns {object} Merged configuration
196
+ */
197
+ mergeConfigs(defaultConfig, userConfig) {
198
+ const merged = JSON.parse(JSON.stringify(defaultConfig)); // Deep clone
199
+
200
+ return this.deepMerge(merged, userConfig);
201
+ }
202
+
203
+ /**
204
+ * Recursively merge objects
205
+ * @param {object} target - Target object
206
+ * @param {object} source - Source object
207
+ * @returns {object} Merged object
208
+ */
209
+ deepMerge(target, source) {
210
+ for (const key in source) {
211
+ if (source.hasOwnProperty(key)) {
212
+ if (
213
+ source[key] &&
214
+ typeof source[key] === "object" &&
215
+ !Array.isArray(source[key])
216
+ ) {
217
+ // Recursive merge for objects
218
+ if (!target[key] || typeof target[key] !== "object") {
219
+ target[key] = {};
220
+ }
221
+ this.deepMerge(target[key], source[key]);
222
+ } else {
223
+ // Direct assignment for primitives and arrays
224
+ target[key] = source[key];
225
+ }
226
+ }
227
+ }
228
+ return target;
229
+ }
230
+
231
+ /**
232
+ * Validate configuration
233
+ * @param {object} config - Configuration to validate
234
+ * @returns {boolean} True if valid
235
+ */
236
+ validateConfig(config) {
237
+ const errors = [];
238
+
239
+ // Validate required sections
240
+ const requiredSections = ["extensions", "chunking", "handlers"];
241
+ for (const section of requiredSections) {
242
+ if (!config[section]) {
243
+ errors.push(`Missing required section: ${section}`);
244
+ }
245
+ }
246
+
247
+ // Validate chunking settings
248
+ if (config.chunking) {
249
+ if (
250
+ typeof config.chunking.maxTokens !== "number" ||
251
+ config.chunking.maxTokens <= 0
252
+ ) {
253
+ errors.push("chunking.maxTokens must be a positive number");
254
+ }
255
+ if (
256
+ typeof config.chunking.overlap !== "number" ||
257
+ config.chunking.overlap < 0
258
+ ) {
259
+ errors.push("chunking.overlap must be a non-negative number");
260
+ }
261
+ }
262
+
263
+ // Validate extensions
264
+ if (config.extensions && config.extensions.include) {
265
+ if (!Array.isArray(config.extensions.include)) {
266
+ errors.push("extensions.include must be an array");
267
+ } else {
268
+ for (const ext of config.extensions.include) {
269
+ if (typeof ext !== "string" || !ext.startsWith(".")) {
270
+ errors.push(`Invalid extension: ${ext} (must start with dot)`);
271
+ }
272
+ }
273
+ }
274
+ }
275
+
276
+ if (errors.length > 0) {
277
+ console.error("āŒ RAG Configuration validation errors:");
278
+ errors.forEach((error) => console.error(` • ${error}`));
279
+ return false;
280
+ }
281
+
282
+ return true;
283
+ }
284
+
285
+ /**
286
+ * Get configuration value with dot notation
287
+ * @param {string} path - Configuration path (e.g., 'chunking.maxTokens')
288
+ * @param {any} defaultValue - Default value if not found
289
+ * @returns {any} Configuration value
290
+ */
291
+ get(path, defaultValue = null) {
292
+ const config = this.loadedConfig || this.defaultConfig;
293
+
294
+ return path.split(".").reduce((obj, key) => {
295
+ return obj && obj[key] !== undefined ? obj[key] : defaultValue;
296
+ }, config);
297
+ }
298
+
299
+ /**
300
+ * Display current configuration
301
+ */
302
+ displayConfig() {
303
+ const config = this.loadedConfig || this.defaultConfig;
304
+
305
+ console.log("\nšŸ“‹ RAG Generator Configuration:");
306
+ console.log(` Source: ${this.configPath ? this.configPath : "defaults"}`);
307
+ console.log(` Extensions: ${config.extensions.include.length} types`);
308
+ console.log(` Max tokens per chunk: ${config.chunking.maxTokens}`);
309
+ console.log(` Token overlap: ${config.chunking.overlap}`);
310
+ console.log(` Max workers: ${config.performance.maxWorkers}`);
311
+ console.log(` Batch size: ${config.performance.batchSize}`);
312
+ console.log();
313
+ }
314
+
315
+ /**
316
+ * Parse file size string to bytes
317
+ * @param {string} sizeStr - Size string (e.g., '100MB', '1GB')
318
+ * @returns {number} Size in bytes
319
+ */
320
+ parseFileSize(sizeStr) {
321
+ if (typeof sizeStr === "number") return sizeStr;
322
+
323
+ const units = {
324
+ B: 1,
325
+ KB: 1024,
326
+ MB: 1024 * 1024,
327
+ GB: 1024 * 1024 * 1024,
328
+ };
329
+
330
+ const match = sizeStr.match(/^(\d+(?:\.\d+)?)\s*([KMGT]?B)$/i);
331
+ if (!match) return 0;
332
+
333
+ const value = parseFloat(match[1]);
334
+ const unit = match[2].toUpperCase();
335
+
336
+ return Math.floor(value * (units[unit] || 1));
337
+ }
338
+
339
+ /**
340
+ * Check if file should be excluded by path
341
+ * @param {string} filePath - File path to check
342
+ * @returns {boolean} True if should be excluded
343
+ */
344
+ shouldExcludePath(filePath) {
345
+ const config = this.loadedConfig || this.defaultConfig;
346
+ const excludePaths = config.paths?.exclude || [];
347
+
348
+ return excludePaths.some((pattern) => {
349
+ return (
350
+ filePath.includes(pattern) ||
351
+ filePath.includes(path.sep + pattern + path.sep)
352
+ );
353
+ });
354
+ }
355
+
356
+ /**
357
+ * Check if file should be excluded by filename pattern
358
+ * @param {string} fileName - File name to check
359
+ * @returns {boolean} True if should be excluded
360
+ */
361
+ shouldExcludeFile(fileName) {
362
+ const config = this.loadedConfig || this.defaultConfig;
363
+ const excludeFiles = config.files?.exclude || [];
364
+
365
+ return excludeFiles.some((pattern) => matchesGlobPattern(fileName, pattern));
366
+ }
367
+ }
368
+
369
+ export default RagConfigManager;