@grec0/memory-bank-mcp 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,407 @@
1
+ /**
2
+ * @fileoverview Intelligent code chunker for Memory Bank
3
+ * Fragments code intelligently using AST parsing when possible
4
+ */
5
+ import * as fs from "fs";
6
+ import { parse } from "@babel/parser";
7
+ import traverseLib from "@babel/traverse";
8
+ import * as crypto from "crypto";
9
+ // Handle traverse library export
10
+ const traverse = typeof traverseLib === 'function' ? traverseLib : traverseLib.default;
11
+ /**
12
+ * Generates unique ID for a chunk based on content and metadata
13
+ */
14
+ function generateChunkId(filePath, content, startLine) {
15
+ const data = `${filePath}:${startLine}:${content}`;
16
+ return crypto.createHash("sha256").update(data).digest("hex").substring(0, 16);
17
+ }
18
+ /**
19
+ * Extracts import statements and other context from code
20
+ */
21
+ function extractContext(content, language) {
22
+ const lines = content.split("\n");
23
+ const contextLines = [];
24
+ if (language === "typescript" || language === "javascript") {
25
+ // Extract imports and top-level comments
26
+ for (const line of lines) {
27
+ const trimmed = line.trim();
28
+ if (trimmed.startsWith("import ") ||
29
+ trimmed.startsWith("export ") ||
30
+ trimmed.startsWith("//") ||
31
+ trimmed.startsWith("/*") ||
32
+ trimmed.startsWith("*")) {
33
+ contextLines.push(line);
34
+ if (contextLines.length >= 10)
35
+ break; // Limit context
36
+ }
37
+ else if (trimmed && !trimmed.startsWith("import")) {
38
+ break; // Stop at first non-import/comment
39
+ }
40
+ }
41
+ }
42
+ else if (language === "python") {
43
+ // Extract imports and docstrings
44
+ for (const line of lines) {
45
+ const trimmed = line.trim();
46
+ if (trimmed.startsWith("import ") ||
47
+ trimmed.startsWith("from ") ||
48
+ trimmed.startsWith("#") ||
49
+ trimmed.startsWith('"""') ||
50
+ trimmed.startsWith("'''")) {
51
+ contextLines.push(line);
52
+ if (contextLines.length >= 10)
53
+ break;
54
+ }
55
+ else if (trimmed && !trimmed.startsWith("import") && !trimmed.startsWith("from")) {
56
+ break;
57
+ }
58
+ }
59
+ }
60
+ return contextLines.join("\n");
61
+ }
62
+ /**
63
+ * Chunks TypeScript/JavaScript code using AST parsing
64
+ */
65
+ function chunkTypeScriptJavaScript(options) {
66
+ const chunks = [];
67
+ const context = extractContext(options.content, options.language);
68
+ try {
69
+ // Parse with Babel
70
+ const ast = parse(options.content, {
71
+ sourceType: "module",
72
+ plugins: [
73
+ "typescript",
74
+ "jsx",
75
+ "decorators-legacy",
76
+ "classProperties",
77
+ "classPrivateProperties",
78
+ "classPrivateMethods",
79
+ "exportDefaultFrom",
80
+ "exportNamespaceFrom",
81
+ "dynamicImport",
82
+ "nullishCoalescingOperator",
83
+ "optionalChaining",
84
+ "objectRestSpread",
85
+ ],
86
+ });
87
+ // Traverse AST to find functions, classes, and methods
88
+ traverse(ast, {
89
+ FunctionDeclaration(path) {
90
+ const node = path.node;
91
+ if (node.loc) {
92
+ const lines = options.content.split("\n");
93
+ const chunkLines = lines.slice(node.loc.start.line - 1, node.loc.end.line);
94
+ const content = chunkLines.join("\n");
95
+ chunks.push({
96
+ id: generateChunkId(options.filePath, content, node.loc.start.line),
97
+ filePath: options.filePath,
98
+ content,
99
+ startLine: node.loc.start.line,
100
+ endLine: node.loc.end.line,
101
+ chunkType: "function",
102
+ name: node.id?.name,
103
+ language: options.language,
104
+ context,
105
+ });
106
+ }
107
+ },
108
+ ArrowFunctionExpression(path) {
109
+ const node = path.node;
110
+ const parent = path.parent;
111
+ // Only capture named arrow functions (const foo = () => {})
112
+ if (parent.type === "VariableDeclarator" &&
113
+ parent.id.type === "Identifier" &&
114
+ node.loc) {
115
+ const lines = options.content.split("\n");
116
+ // Include the variable declaration line
117
+ const startLine = parent.loc?.start.line || node.loc.start.line;
118
+ const endLine = node.loc.end.line;
119
+ const chunkLines = lines.slice(startLine - 1, endLine);
120
+ const content = chunkLines.join("\n");
121
+ chunks.push({
122
+ id: generateChunkId(options.filePath, content, startLine),
123
+ filePath: options.filePath,
124
+ content,
125
+ startLine,
126
+ endLine,
127
+ chunkType: "function",
128
+ name: parent.id.name,
129
+ language: options.language,
130
+ context,
131
+ });
132
+ }
133
+ },
134
+ ClassDeclaration(path) {
135
+ const node = path.node;
136
+ if (node.loc) {
137
+ const lines = options.content.split("\n");
138
+ const chunkLines = lines.slice(node.loc.start.line - 1, node.loc.end.line);
139
+ const content = chunkLines.join("\n");
140
+ chunks.push({
141
+ id: generateChunkId(options.filePath, content, node.loc.start.line),
142
+ filePath: options.filePath,
143
+ content,
144
+ startLine: node.loc.start.line,
145
+ endLine: node.loc.end.line,
146
+ chunkType: "class",
147
+ name: node.id?.name,
148
+ language: options.language,
149
+ context,
150
+ });
151
+ }
152
+ },
153
+ ClassMethod(path) {
154
+ const node = path.node;
155
+ if (node.loc && node.key.type === "Identifier") {
156
+ const lines = options.content.split("\n");
157
+ const chunkLines = lines.slice(node.loc.start.line - 1, node.loc.end.line);
158
+ const content = chunkLines.join("\n");
159
+ chunks.push({
160
+ id: generateChunkId(options.filePath, content, node.loc.start.line),
161
+ filePath: options.filePath,
162
+ content,
163
+ startLine: node.loc.start.line,
164
+ endLine: node.loc.end.line,
165
+ chunkType: "method",
166
+ name: node.key.name,
167
+ language: options.language,
168
+ context,
169
+ });
170
+ }
171
+ },
172
+ });
173
+ // If no chunks were extracted or file is small, treat as single chunk
174
+ if (chunks.length === 0 || options.content.length <= options.maxChunkSize) {
175
+ chunks.push({
176
+ id: generateChunkId(options.filePath, options.content, 1),
177
+ filePath: options.filePath,
178
+ content: options.content,
179
+ startLine: 1,
180
+ endLine: options.content.split("\n").length,
181
+ chunkType: "file",
182
+ language: options.language,
183
+ context,
184
+ });
185
+ }
186
+ }
187
+ catch (error) {
188
+ console.error(`AST parsing failed for ${options.filePath}, falling back to fixed chunking: ${error}`);
189
+ // Fallback to fixed chunking if AST parsing fails
190
+ return chunkByFixedSize(options);
191
+ }
192
+ return chunks;
193
+ }
194
+ /**
195
+ * Chunks Python code using simple pattern matching
196
+ */
197
+ function chunkPython(options) {
198
+ const chunks = [];
199
+ const lines = options.content.split("\n");
200
+ const context = extractContext(options.content, options.language);
201
+ let currentChunk = [];
202
+ let chunkStartLine = 1;
203
+ let inFunction = false;
204
+ let inClass = false;
205
+ let functionName;
206
+ let className;
207
+ let baseIndent = 0;
208
+ for (let i = 0; i < lines.length; i++) {
209
+ const line = lines[i];
210
+ const trimmed = line.trim();
211
+ const indent = line.length - line.trimLeft().length;
212
+ // Detect function definition
213
+ if (trimmed.startsWith("def ")) {
214
+ // Save previous chunk if exists
215
+ if (currentChunk.length > 0) {
216
+ const content = currentChunk.join("\n");
217
+ chunks.push({
218
+ id: generateChunkId(options.filePath, content, chunkStartLine),
219
+ filePath: options.filePath,
220
+ content,
221
+ startLine: chunkStartLine,
222
+ endLine: i,
223
+ chunkType: inClass ? "method" : "function",
224
+ name: functionName,
225
+ language: options.language,
226
+ context,
227
+ });
228
+ }
229
+ // Start new chunk
230
+ currentChunk = [line];
231
+ chunkStartLine = i + 1;
232
+ inFunction = true;
233
+ baseIndent = indent;
234
+ // Extract function name
235
+ const match = trimmed.match(/def\s+(\w+)/);
236
+ functionName = match ? match[1] : undefined;
237
+ }
238
+ else if (trimmed.startsWith("class ")) {
239
+ // Save previous chunk if exists
240
+ if (currentChunk.length > 0) {
241
+ const content = currentChunk.join("\n");
242
+ chunks.push({
243
+ id: generateChunkId(options.filePath, content, chunkStartLine),
244
+ filePath: options.filePath,
245
+ content,
246
+ startLine: chunkStartLine,
247
+ endLine: i,
248
+ chunkType: "class",
249
+ name: className,
250
+ language: options.language,
251
+ context,
252
+ });
253
+ }
254
+ // Start new chunk
255
+ currentChunk = [line];
256
+ chunkStartLine = i + 1;
257
+ inClass = true;
258
+ baseIndent = indent;
259
+ // Extract class name
260
+ const match = trimmed.match(/class\s+(\w+)/);
261
+ className = match ? match[1] : undefined;
262
+ }
263
+ else if (inFunction || inClass) {
264
+ // Check if we're still in the same block (based on indentation)
265
+ if (trimmed && indent <= baseIndent && !trimmed.startsWith("#")) {
266
+ // End of current block
267
+ const content = currentChunk.join("\n");
268
+ chunks.push({
269
+ id: generateChunkId(options.filePath, content, chunkStartLine),
270
+ filePath: options.filePath,
271
+ content,
272
+ startLine: chunkStartLine,
273
+ endLine: i,
274
+ chunkType: inClass ? "class" : "function",
275
+ name: inClass ? className : functionName,
276
+ language: options.language,
277
+ context,
278
+ });
279
+ currentChunk = [line];
280
+ chunkStartLine = i + 1;
281
+ inFunction = false;
282
+ inClass = false;
283
+ }
284
+ else {
285
+ currentChunk.push(line);
286
+ }
287
+ }
288
+ else {
289
+ currentChunk.push(line);
290
+ }
291
+ }
292
+ // Save final chunk
293
+ if (currentChunk.length > 0) {
294
+ const content = currentChunk.join("\n");
295
+ chunks.push({
296
+ id: generateChunkId(options.filePath, content, chunkStartLine),
297
+ filePath: options.filePath,
298
+ content,
299
+ startLine: chunkStartLine,
300
+ endLine: lines.length,
301
+ chunkType: inClass ? "class" : inFunction ? "function" : "block",
302
+ name: inClass ? className : functionName,
303
+ language: options.language,
304
+ context,
305
+ });
306
+ }
307
+ // If no chunks or very small file, return as single chunk
308
+ if (chunks.length === 0) {
309
+ chunks.push({
310
+ id: generateChunkId(options.filePath, options.content, 1),
311
+ filePath: options.filePath,
312
+ content: options.content,
313
+ startLine: 1,
314
+ endLine: lines.length,
315
+ chunkType: "file",
316
+ language: options.language,
317
+ context,
318
+ });
319
+ }
320
+ return chunks;
321
+ }
322
+ /**
323
+ * Chunks code by fixed size with overlap
324
+ */
325
+ function chunkByFixedSize(options) {
326
+ const chunks = [];
327
+ const lines = options.content.split("\n");
328
+ const context = extractContext(options.content, options.language);
329
+ let currentLines = [];
330
+ let currentSize = 0;
331
+ let chunkStartLine = 1;
332
+ for (let i = 0; i < lines.length; i++) {
333
+ const line = lines[i];
334
+ currentLines.push(line);
335
+ currentSize += line.length + 1; // +1 for newline
336
+ // If we've reached max chunk size
337
+ if (currentSize >= options.maxChunkSize) {
338
+ const content = currentLines.join("\n");
339
+ chunks.push({
340
+ id: generateChunkId(options.filePath, content, chunkStartLine),
341
+ filePath: options.filePath,
342
+ content,
343
+ startLine: chunkStartLine,
344
+ endLine: i + 1,
345
+ chunkType: "block",
346
+ language: options.language,
347
+ context,
348
+ });
349
+ // Calculate overlap
350
+ const overlapLines = Math.floor(options.chunkOverlap / 50); // Approximate lines
351
+ currentLines = currentLines.slice(-overlapLines);
352
+ currentSize = currentLines.reduce((sum, l) => sum + l.length + 1, 0);
353
+ chunkStartLine = i + 1 - overlapLines + 1;
354
+ }
355
+ }
356
+ // Add remaining content as final chunk
357
+ if (currentLines.length > 0) {
358
+ const content = currentLines.join("\n");
359
+ chunks.push({
360
+ id: generateChunkId(options.filePath, content, chunkStartLine),
361
+ filePath: options.filePath,
362
+ content,
363
+ startLine: chunkStartLine,
364
+ endLine: lines.length,
365
+ chunkType: "block",
366
+ language: options.language,
367
+ context,
368
+ });
369
+ }
370
+ return chunks;
371
+ }
372
+ /**
373
+ * Main chunking function - routes to appropriate strategy based on language
374
+ */
375
+ export function chunkCode(options) {
376
+ const fullOptions = {
377
+ filePath: options.filePath,
378
+ content: options.content,
379
+ language: options.language,
380
+ maxChunkSize: options.maxChunkSize || 1000,
381
+ chunkOverlap: options.chunkOverlap || 200,
382
+ };
383
+ // Route to appropriate chunking strategy
384
+ if (fullOptions.language === "typescript" || fullOptions.language === "javascript") {
385
+ return chunkTypeScriptJavaScript(fullOptions);
386
+ }
387
+ else if (fullOptions.language === "python") {
388
+ return chunkPython(fullOptions);
389
+ }
390
+ else {
391
+ // For other languages, use fixed-size chunking
392
+ return chunkByFixedSize(fullOptions);
393
+ }
394
+ }
395
+ /**
396
+ * Chunks a file by reading it from disk
397
+ */
398
+ export function chunkFile(filePath, language, maxChunkSize, chunkOverlap) {
399
+ const content = fs.readFileSync(filePath, "utf-8");
400
+ return chunkCode({
401
+ filePath,
402
+ content,
403
+ language,
404
+ maxChunkSize,
405
+ chunkOverlap,
406
+ });
407
+ }