@grec0/memory-bank-mcp 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,8 +6,69 @@ import * as fs from "fs";
6
6
  import { parse } from "@babel/parser";
7
7
  import traverseLib from "@babel/traverse";
8
8
  import * as crypto from "crypto";
9
+ import { getEncoding } from "js-tiktoken";
9
10
  // Handle traverse library export
10
11
  const traverse = typeof traverseLib === 'function' ? traverseLib : traverseLib.default;
12
+ // Initialize tokenizer
13
+ const enc = getEncoding("cl100k_base");
14
+ /**
15
+ * Enforces token limits on chunks, splitting them if necessary
16
+ */
17
+ function enforceTokenLimits(chunks, maxTokens = 8000) {
18
+ const result = [];
19
+ for (const chunk of chunks) {
20
+ const tokens = enc.encode(chunk.content);
21
+ if (tokens.length <= maxTokens) {
22
+ result.push(chunk);
23
+ }
24
+ else {
25
+ // Split into smaller chunks
26
+ const content = chunk.content;
27
+ const lines = content.split('\n');
28
+ let currentChunkLines = [];
29
+ let currentTokens = 0;
30
+ let startLine = chunk.startLine;
31
+ let partIndex = 1;
32
+ for (let i = 0; i < lines.length; i++) {
33
+ const line = lines[i];
34
+ const lineTokens = enc.encode(line + '\n').length;
35
+ if (currentTokens + lineTokens > maxTokens) {
36
+ // Push current chunk
37
+ if (currentChunkLines.length > 0) {
38
+ const subContent = currentChunkLines.join('\n');
39
+ result.push({
40
+ ...chunk,
41
+ id: `${chunk.id}-${partIndex}`,
42
+ content: subContent,
43
+ startLine: startLine,
44
+ endLine: startLine + currentChunkLines.length - 1,
45
+ name: chunk.name ? `${chunk.name} (Part ${partIndex})` : undefined
46
+ });
47
+ partIndex++;
48
+ startLine += currentChunkLines.length;
49
+ currentChunkLines = [];
50
+ currentTokens = 0;
51
+ }
52
+ }
53
+ currentChunkLines.push(line);
54
+ currentTokens += lineTokens;
55
+ }
56
+ // Remaining
57
+ if (currentChunkLines.length > 0) {
58
+ const subContent = currentChunkLines.join('\n');
59
+ result.push({
60
+ ...chunk,
61
+ id: `${chunk.id}-${partIndex}`,
62
+ content: subContent,
63
+ startLine: startLine,
64
+ endLine: chunk.endLine, // Best effort
65
+ name: chunk.name ? `${chunk.name} (Part ${partIndex})` : undefined
66
+ });
67
+ }
68
+ }
69
+ }
70
+ return result;
71
+ }
11
72
  /**
12
73
  * Generates unique ID for a chunk based on content and metadata
13
74
  */
@@ -319,6 +380,438 @@ function chunkPython(options) {
319
380
  }
320
381
  return chunks;
321
382
  }
383
+ /**
384
+ * Chunks HTML/Vue/Svelte code by extracting script/style blocks
385
+ */
386
+ function chunkHtml(options) {
387
+ const chunks = [];
388
+ const content = options.content;
389
+ const context = extractContext(content, options.language);
390
+ // Helper to add chunks from other languages
391
+ const addSubChunks = (subContent, subLang, offsetLine) => {
392
+ // If language is not supported for semantic chunking, it will fall back to fixed size
393
+ // We need to adjust line numbers relative to the file
394
+ const subOptions = {
395
+ ...options,
396
+ content: subContent,
397
+ language: subLang,
398
+ };
399
+ // We use the main chunkCode router to handle the sub-content
400
+ // This allows reusing JS/TS/CSS logic
401
+ let subChunks = [];
402
+ if (subLang === "typescript" || subLang === "javascript" || subLang === "ts" || subLang === "js") {
403
+ subChunks = chunkTypeScriptJavaScript(subOptions);
404
+ }
405
+ else if (subLang === "css" || subLang === "scss" || subLang === "sass") {
406
+ subChunks = chunkCss(subOptions);
407
+ }
408
+ else {
409
+ subChunks = chunkByFixedSize(subOptions);
410
+ }
411
+ subChunks.forEach(chunk => {
412
+ chunk.startLine += offsetLine;
413
+ chunk.endLine += offsetLine;
414
+ // Regenerate ID to ensure it includes the correct line numbers and file context
415
+ chunk.id = generateChunkId(options.filePath, chunk.content, chunk.startLine);
416
+ chunks.push(chunk);
417
+ });
418
+ };
419
+ // 1. Extract <script> blocks
420
+ const scriptRegex = /<script\s*(?:lang=["']([\w-]+)["'])?\s*(?:setup)?\s*>([\s\S]*?)<\/script>/gi;
421
+ let match;
422
+ while ((match = scriptRegex.exec(content)) !== null) {
423
+ const langIdx = match[1] || "javascript"; // Default to JS
424
+ const scriptContent = match[2];
425
+ // Normalize language
426
+ let subLang = langIdx.toLowerCase();
427
+ if (subLang === "ts")
428
+ subLang = "typescript";
429
+ if (subLang === "js")
430
+ subLang = "javascript";
431
+ // Calculate start line
432
+ const preMatch = content.substring(0, match.index);
433
+ const startLine = preMatch.split("\n").length - 1; // 0-indexed adjustment for calc
434
+ addSubChunks(scriptContent, subLang, startLine);
435
+ }
436
+ // 2. Extract <style> blocks
437
+ const styleRegex = /<style\s*(?:lang=["']([\w-]+)["'])?\s*(?:scoped)?\s*>([\s\S]*?)<\/style>/gi;
438
+ while ((match = styleRegex.exec(content)) !== null) {
439
+ const langIdx = match[1] || "css"; // Default to CSS
440
+ const styleContent = match[2];
441
+ // Normalize language
442
+ let subLang = langIdx.toLowerCase();
443
+ // Calculate start line
444
+ const preMatch = content.substring(0, match.index);
445
+ const startLine = preMatch.split("\n").length - 1;
446
+ addSubChunks(styleContent, subLang, startLine);
447
+ }
448
+ // 3. Process the template/HTML structure (rest of file or specific template block)
449
+ // For Vue, we might look for <template>, for pure HTML it's the whole file
450
+ // For simplicity, we'll try to find <template> first, if not, treat whole file (minus script/style) as HTML structure
451
+ // But removing script/style from content to chunk remainder is complex with line numbers.
452
+ // Instead, we will just chunk the whole file as "html" fixed chunks,
453
+ // but we can be smarter: split by top-level tags if possible?
454
+ // Given complexity, falling back to fixed-size chunking for the *entire* file content
455
+ // but labeled as "template" might be redundant with the script/style chunks.
456
+ // Better approach: Regex for <template> block in Vue/Svelte
457
+ const templateRegex = /<template>([\s\S]*?)<\/template>/i;
458
+ const templateMatch = templateRegex.exec(content);
459
+ if (templateMatch) {
460
+ const templateContent = templateMatch[1];
461
+ const preMatch = content.substring(0, templateMatch.index);
462
+ const startLine = preMatch.split("\n").length - 1;
463
+ // Chunk template as HTML (fixed size for now, strict AST for HTML is hard without lib)
464
+ addSubChunks(templateContent, "html", startLine);
465
+ }
466
+ else if (options.language === "html") {
467
+ // For pure HTML files, just use fixed size chunking but exclude script/style if possible?
468
+ // Actually, letting it chunk the whole file by fixed size is a safe fallback for the "structure"
469
+ // The script/style chunks will strictly point to logic/styles.
470
+ // Overlapping coverage is acceptable.
471
+ // Let's rely on fixed partitioning for HTML content
472
+ const htmlChunks = chunkByFixedSize({
473
+ ...options,
474
+ language: "html"
475
+ });
476
+ // We only add these if we are sure we aren't duplicating too much logic?
477
+ // Actually duplication is fine, vector search handles it.
478
+ // But better to separate concerns.
479
+ chunks.push(...htmlChunks);
480
+ }
481
+ return chunks;
482
+ }
483
+ /**
484
+ * Chunks CSS/SCSS code by parsing rule blocks
485
+ */
486
+ function chunkCss(options) {
487
+ const chunks = [];
488
+ const lines = options.content.split("\n");
489
+ const context = extractContext(options.content, options.language);
490
+ let currentChunk = [];
491
+ let chunkStartLine = 1;
492
+ let braceDepth = 0;
493
+ let inComment = false;
494
+ for (let i = 0; i < lines.length; i++) {
495
+ const line = lines[i];
496
+ const trimmed = line.trim();
497
+ if (trimmed.startsWith("/*") && !inComment)
498
+ inComment = true;
499
+ if (trimmed.endsWith("*/") && inComment)
500
+ inComment = false;
501
+ // Count braces to detect block boundaries
502
+ // Simple heuristic, might fail on complex strings containing braces
503
+ const openBraces = (line.match(/\{/g) || []).length;
504
+ const closeBraces = (line.match(/\}/g) || []).length;
505
+ braceDepth += openBraces - closeBraces;
506
+ currentChunk.push(line);
507
+ // If we are at root level (depth 0) and have content, and just closed a block or ended a property
508
+ if (braceDepth === 0 && !inComment && currentChunk.length > 0) {
509
+ const chunkContent = currentChunk.join("\n").trim();
510
+ // Don't chunk empty lines
511
+ if (chunkContent.length > 0 && chunkContent !== "}") {
512
+ // Only finalize chunk if it looks like a complete rule or directive
513
+ // i.e. ends with } or ;
514
+ if (chunkContent.endsWith("}") || chunkContent.endsWith(";")) {
515
+ chunks.push({
516
+ id: generateChunkId(options.filePath, chunkContent, chunkStartLine),
517
+ filePath: options.filePath,
518
+ content: chunkContent,
519
+ startLine: chunkStartLine,
520
+ endLine: i + 1,
521
+ chunkType: "block", // CSS rule
522
+ language: options.language,
523
+ context,
524
+ });
525
+ currentChunk = [];
526
+ chunkStartLine = i + 2; // Next line
527
+ }
528
+ }
529
+ }
530
+ // Safety break for very large chunks
531
+ if (currentChunk.join("\n").length > (options.maxChunkSize * 2)) {
532
+ // Force split if rule is too massive
533
+ const chunkContent = currentChunk.join("\n");
534
+ // Validate content before pushing
535
+ if (chunkContent.trim().length > 0 && chunkContent.trim() !== "}") {
536
+ chunks.push({
537
+ id: generateChunkId(options.filePath, chunkContent, chunkStartLine),
538
+ filePath: options.filePath,
539
+ content: chunkContent,
540
+ startLine: chunkStartLine,
541
+ endLine: i + 1,
542
+ chunkType: "block",
543
+ language: options.language,
544
+ context,
545
+ });
546
+ }
547
+ currentChunk = [];
548
+ chunkStartLine = i + 2;
549
+ braceDepth = 0; // Reset to avoid getting stuck
550
+ }
551
+ }
552
+ // Remaining
553
+ // Remaining
554
+ if (currentChunk.length > 0) {
555
+ const chunkContent = currentChunk.join("\n");
556
+ // Validate content before pushing
557
+ if (chunkContent.trim().length > 0 && chunkContent.trim() !== "}") {
558
+ chunks.push({
559
+ id: generateChunkId(options.filePath, chunkContent, chunkStartLine),
560
+ filePath: options.filePath,
561
+ content: chunkContent,
562
+ startLine: chunkStartLine,
563
+ endLine: lines.length,
564
+ chunkType: "block",
565
+ language: options.language,
566
+ context,
567
+ });
568
+ }
569
+ }
570
+ return chunks;
571
+ }
572
+ /**
573
+ * Chunks JSON files by parsing structure
574
+ */
575
+ function chunkJson(options) {
576
+ const chunks = [];
577
+ // Context for JSON is usually not useful (just start of file)
578
+ const context = "";
579
+ try {
580
+ const json = JSON.parse(options.content);
581
+ if (Array.isArray(json)) {
582
+ // Chunk array items
583
+ json.forEach((item, index) => {
584
+ const itemStr = JSON.stringify(item, null, 2);
585
+ // We can't easily get exact lines from JSON.parse
586
+ // So we approximate or just treat as logical chunks without strict line mapping
587
+ // For semantic search, the content is what matters.
588
+ // Line numbers will be approximate (0-0 or 1-1) unless we re-search the string in content
589
+ // Let's try to locate the item in string roughly? expensive.
590
+ // We will just create chunks with content.
591
+ chunks.push({
592
+ id: generateChunkId(options.filePath, itemStr, index), // index as salt
593
+ filePath: options.filePath,
594
+ content: itemStr,
595
+ startLine: 1, // Unknown
596
+ endLine: 1, // Unknown
597
+ chunkType: "block",
598
+ name: `[${index}]`,
599
+ language: "json",
600
+ context,
601
+ });
602
+ });
603
+ }
604
+ else if (typeof json === "object" && json !== null) {
605
+ // Chunk top-level keys
606
+ Object.keys(json).forEach((key) => {
607
+ const val = json[key];
608
+ const valStr = JSON.stringify(val, null, 2);
609
+ const chunkContent = `"${key}": ${valStr}`;
610
+ if (chunkContent.length > options.maxChunkSize) {
611
+ // If value is huge, maybe we should recurse or fixed-chunk it?
612
+ // For now, let's just push it.
613
+ }
614
+ chunks.push({
615
+ id: generateChunkId(options.filePath, chunkContent, 0),
616
+ filePath: options.filePath,
617
+ content: chunkContent,
618
+ startLine: 1,
619
+ endLine: 1,
620
+ chunkType: "block",
621
+ name: key,
622
+ language: "json",
623
+ context,
624
+ });
625
+ });
626
+ }
627
+ else {
628
+ // Primitive, single chunk
629
+ chunks.push({
630
+ id: generateChunkId(options.filePath, options.content, 1),
631
+ filePath: options.filePath,
632
+ content: options.content,
633
+ startLine: 1,
634
+ endLine: options.content.split("\n").length,
635
+ chunkType: "file",
636
+ language: "json",
637
+ });
638
+ }
639
+ }
640
+ catch (e) {
641
+ // Fallback to fixed size if invalid JSON
642
+ return chunkByFixedSize(options);
643
+ }
644
+ return chunks;
645
+ }
646
+ /**
647
+ * Chunks Java code (Spring Boot support) using brace tracking and regex
648
+ */
649
+ function chunkJava(options) {
650
+ const chunks = [];
651
+ const lines = options.content.split("\n");
652
+ const context = extractContext(options.content, options.language);
653
+ let currentChunk = [];
654
+ let chunkStartLine = 1;
655
+ let braceDepth = 0;
656
+ let inClass = false;
657
+ let inMethod = false;
658
+ let className;
659
+ let methodName;
660
+ let chunkBaseDepth = 0;
661
+ let annotations = [];
662
+ for (let i = 0; i < lines.length; i++) {
663
+ const line = lines[i];
664
+ const trimmed = line.trim();
665
+ // Skip comments for logic but include in chunk
666
+ const isComment = trimmed.startsWith("//") || trimmed.startsWith("/*") || trimmed.startsWith("*");
667
+ // Track strict brace depth
668
+ const openBraces = (line.match(/\{/g) || []).length;
669
+ const closeBraces = (line.match(/\}/g) || []).length;
670
+ // Check for annotations
671
+ if (trimmed.startsWith("@") && !isComment) {
672
+ if (currentChunk.length === 0 && annotations.length === 0) {
673
+ chunkStartLine = i + 1;
674
+ }
675
+ annotations.push(line);
676
+ // Annotations are part of the next chunk
677
+ currentChunk.push(line);
678
+ continue;
679
+ }
680
+ // Detect Class/Interface
681
+ const classMatch = trimmed.match(/(?:public|protected|private)?\s*(?:static)?\s*(?:class|interface|enum)\s+(\w+)/);
682
+ if (classMatch && !isComment) {
683
+ // If we are already in a chunk (e.g. previous class ended), push it
684
+ // But if we are just starting (annotations only), keep going
685
+ if (currentChunk.length > annotations.length && braceDepth === chunkBaseDepth) {
686
+ const content = currentChunk.join("\n");
687
+ chunks.push({
688
+ id: generateChunkId(options.filePath, content, chunkStartLine),
689
+ filePath: options.filePath,
690
+ content,
691
+ startLine: chunkStartLine,
692
+ endLine: i,
693
+ chunkType: inClass ? "class" : "file", // inner class
694
+ name: className,
695
+ language: options.language,
696
+ context
697
+ });
698
+ currentChunk = [...annotations]; // Start new chunk with potential accumulated annotations
699
+ chunkStartLine = i + 1 - annotations.length;
700
+ }
701
+ else if (currentChunk.length === 0) {
702
+ chunkStartLine = i + 1;
703
+ }
704
+ inClass = true;
705
+ inMethod = false;
706
+ className = classMatch[1];
707
+ chunkBaseDepth = braceDepth;
708
+ annotations = [];
709
+ }
710
+ // Detect Method (heuristic: access modifier + type + name + (args) + {)
711
+ // Avoid control structures like if/for/while/switch/catch
712
+ const methodMatch = trimmed.match(/(?:public|protected|private)\s+(?:[\w<>?\[\]]+\s+)(\w+)\s*\(/);
713
+ const isControlFlow = /^(if|for|while|switch|catch|try)\b/.test(trimmed);
714
+ if (methodMatch && !isControlFlow && !isComment) {
715
+ // if we are inside a class, this is a method chunk
716
+ if (braceDepth === chunkBaseDepth + 1) { // Direct member of class
717
+ // Previous logical block (fields, etc) ends here
718
+ if (currentChunk.length > annotations.length) {
719
+ const content = currentChunk.join("\n");
720
+ chunks.push({
721
+ id: generateChunkId(options.filePath, content, chunkStartLine),
722
+ filePath: options.filePath,
723
+ content,
724
+ startLine: chunkStartLine,
725
+ endLine: i,
726
+ chunkType: "block",
727
+ name: className, // Context of class
728
+ language: options.language,
729
+ context
730
+ });
731
+ }
732
+ currentChunk = [...annotations];
733
+ chunkStartLine = i + 1 - annotations.length;
734
+ methodName = methodMatch[1];
735
+ inMethod = true;
736
+ annotations = [];
737
+ }
738
+ }
739
+ currentChunk.push(line);
740
+ braceDepth += openBraces - closeBraces;
741
+ // Check if block ended (method or class)
742
+ // We close the chunk if we return to the depth where we started THIS chunk
743
+ // But we need to handle the case where we just closed the class itself
744
+ // Logic: If we are in a method, and brace depth returns to class level -> method closed
745
+ if (inMethod && braceDepth === chunkBaseDepth + 1 && closeBraces > 0) {
746
+ const content = currentChunk.join("\n");
747
+ chunks.push({
748
+ id: generateChunkId(options.filePath, content, chunkStartLine),
749
+ filePath: options.filePath,
750
+ content,
751
+ startLine: chunkStartLine,
752
+ endLine: i + 1,
753
+ chunkType: "method",
754
+ name: methodName,
755
+ language: options.language,
756
+ context
757
+ });
758
+ currentChunk = [];
759
+ inMethod = false;
760
+ methodName = undefined;
761
+ chunkStartLine = i + 2;
762
+ }
763
+ // If brace depth returns to chunkBaseDepth -> class closed
764
+ else if (inClass && braceDepth === chunkBaseDepth && closeBraces > 0) {
765
+ const content = currentChunk.join("\n");
766
+ chunks.push({
767
+ id: generateChunkId(options.filePath, content, chunkStartLine),
768
+ filePath: options.filePath,
769
+ content,
770
+ startLine: chunkStartLine,
771
+ endLine: i + 1,
772
+ chunkType: "class",
773
+ name: className,
774
+ language: options.language,
775
+ context
776
+ });
777
+ currentChunk = [];
778
+ inClass = false;
779
+ className = undefined;
780
+ chunkStartLine = i + 2;
781
+ }
782
+ // Safety break for very large chunks
783
+ if (currentChunk.join("\n").length > (options.maxChunkSize * 3)) {
784
+ // If a single method is massive, we have to split it.
785
+ // enforceTokenLimits will handle strict splitting, but we should probably
786
+ // force a commit here to avoid memory pressure if it's crazy huge
787
+ }
788
+ if (closeBraces > 0 && annotations.length > 0)
789
+ chunks.push(...[]); // no-op just to use variable
790
+ if (openBraces > 0)
791
+ annotations = []; // Clear annotations if we opened a brace (they were consumed)
792
+ }
793
+ // Remaining content
794
+ if (currentChunk.length > 0) {
795
+ const content = currentChunk.join("\n");
796
+ if (content.trim().length > 0) {
797
+ chunks.push({
798
+ id: generateChunkId(options.filePath, content, chunkStartLine),
799
+ filePath: options.filePath,
800
+ content,
801
+ startLine: chunkStartLine,
802
+ endLine: lines.length,
803
+ chunkType: "file",
804
+ language: options.language,
805
+ context
806
+ });
807
+ }
808
+ }
809
+ // Fallback if regex failed to find anything
810
+ if (chunks.length === 0) {
811
+ return chunkByFixedSize(options);
812
+ }
813
+ return chunks;
814
+ }
322
815
  /**
323
816
  * Chunks code by fixed size with overlap
324
817
  */
@@ -380,17 +873,36 @@ export function chunkCode(options) {
380
873
  maxChunkSize: options.maxChunkSize || 1000,
381
874
  chunkOverlap: options.chunkOverlap || 200,
382
875
  };
876
+ // Force fixed-size chunking for minified files to prevent context length errors
877
+ if (fullOptions.filePath.includes(".min.")) {
878
+ const rawChunks = chunkByFixedSize(fullOptions);
879
+ return enforceTokenLimits(rawChunks);
880
+ }
383
881
  // Route to appropriate chunking strategy
882
+ let chunks = [];
384
883
  if (fullOptions.language === "typescript" || fullOptions.language === "javascript") {
385
- return chunkTypeScriptJavaScript(fullOptions);
884
+ chunks = chunkTypeScriptJavaScript(fullOptions);
386
885
  }
387
886
  else if (fullOptions.language === "python") {
388
- return chunkPython(fullOptions);
887
+ chunks = chunkPython(fullOptions);
888
+ }
889
+ else if (["html", "vue", "svelte"].includes(fullOptions.language)) {
890
+ chunks = chunkHtml(fullOptions);
891
+ }
892
+ else if (["css", "scss", "sass", "less"].includes(fullOptions.language)) {
893
+ chunks = chunkCss(fullOptions);
894
+ }
895
+ else if (fullOptions.language === "json") {
896
+ chunks = chunkJson(fullOptions);
897
+ }
898
+ else if (fullOptions.language === "java") {
899
+ chunks = chunkJava(fullOptions);
389
900
  }
390
901
  else {
391
902
  // For other languages, use fixed-size chunking
392
- return chunkByFixedSize(fullOptions);
903
+ chunks = chunkByFixedSize(fullOptions);
393
904
  }
905
+ return enforceTokenLimits(chunks);
394
906
  }
395
907
  /**
396
908
  * Chunks a file by reading it from disk