@grec0/memory-bank-mcp 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +420 -425
- package/dist/common/chunker.js +515 -3
- package/dist/common/embeddingService.js +51 -39
- package/dist/common/fileScanner.js +48 -29
- package/dist/common/indexManager.js +85 -46
- package/dist/common/logger.js +54 -0
- package/dist/common/vectorStore.js +47 -4
- package/dist/index.js +1 -1
- package/dist/tools/analyzeCoverage.js +66 -46
- package/dist/tools/indexCode.js +1 -0
- package/package.json +2 -1
- package/dist/common/setup.js +0 -49
- package/dist/common/utils.js +0 -215
- package/dist/operations/boardMemberships.js +0 -186
- package/dist/operations/boards.js +0 -268
- package/dist/operations/cards.js +0 -426
- package/dist/operations/comments.js +0 -249
- package/dist/operations/labels.js +0 -258
- package/dist/operations/lists.js +0 -157
- package/dist/operations/projects.js +0 -102
- package/dist/operations/tasks.js +0 -238
- package/dist/tools/board-summary.js +0 -151
- package/dist/tools/card-details.js +0 -106
- package/dist/tools/create-card-with-tasks.js +0 -81
- package/dist/tools/workflow-actions.js +0 -145
package/dist/common/chunker.js
CHANGED
|
@@ -6,8 +6,69 @@ import * as fs from "fs";
|
|
|
6
6
|
import { parse } from "@babel/parser";
|
|
7
7
|
import traverseLib from "@babel/traverse";
|
|
8
8
|
import * as crypto from "crypto";
|
|
9
|
+
import { getEncoding } from "js-tiktoken";
|
|
9
10
|
// Handle traverse library export
|
|
10
11
|
const traverse = typeof traverseLib === 'function' ? traverseLib : traverseLib.default;
|
|
12
|
+
// Initialize tokenizer
|
|
13
|
+
const enc = getEncoding("cl100k_base");
|
|
14
|
+
/**
|
|
15
|
+
* Enforces token limits on chunks, splitting them if necessary
|
|
16
|
+
*/
|
|
17
|
+
function enforceTokenLimits(chunks, maxTokens = 8000) {
|
|
18
|
+
const result = [];
|
|
19
|
+
for (const chunk of chunks) {
|
|
20
|
+
const tokens = enc.encode(chunk.content);
|
|
21
|
+
if (tokens.length <= maxTokens) {
|
|
22
|
+
result.push(chunk);
|
|
23
|
+
}
|
|
24
|
+
else {
|
|
25
|
+
// Split into smaller chunks
|
|
26
|
+
const content = chunk.content;
|
|
27
|
+
const lines = content.split('\n');
|
|
28
|
+
let currentChunkLines = [];
|
|
29
|
+
let currentTokens = 0;
|
|
30
|
+
let startLine = chunk.startLine;
|
|
31
|
+
let partIndex = 1;
|
|
32
|
+
for (let i = 0; i < lines.length; i++) {
|
|
33
|
+
const line = lines[i];
|
|
34
|
+
const lineTokens = enc.encode(line + '\n').length;
|
|
35
|
+
if (currentTokens + lineTokens > maxTokens) {
|
|
36
|
+
// Push current chunk
|
|
37
|
+
if (currentChunkLines.length > 0) {
|
|
38
|
+
const subContent = currentChunkLines.join('\n');
|
|
39
|
+
result.push({
|
|
40
|
+
...chunk,
|
|
41
|
+
id: `${chunk.id}-${partIndex}`,
|
|
42
|
+
content: subContent,
|
|
43
|
+
startLine: startLine,
|
|
44
|
+
endLine: startLine + currentChunkLines.length - 1,
|
|
45
|
+
name: chunk.name ? `${chunk.name} (Part ${partIndex})` : undefined
|
|
46
|
+
});
|
|
47
|
+
partIndex++;
|
|
48
|
+
startLine += currentChunkLines.length;
|
|
49
|
+
currentChunkLines = [];
|
|
50
|
+
currentTokens = 0;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
currentChunkLines.push(line);
|
|
54
|
+
currentTokens += lineTokens;
|
|
55
|
+
}
|
|
56
|
+
// Remaining
|
|
57
|
+
if (currentChunkLines.length > 0) {
|
|
58
|
+
const subContent = currentChunkLines.join('\n');
|
|
59
|
+
result.push({
|
|
60
|
+
...chunk,
|
|
61
|
+
id: `${chunk.id}-${partIndex}`,
|
|
62
|
+
content: subContent,
|
|
63
|
+
startLine: startLine,
|
|
64
|
+
endLine: chunk.endLine, // Best effort
|
|
65
|
+
name: chunk.name ? `${chunk.name} (Part ${partIndex})` : undefined
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
return result;
|
|
71
|
+
}
|
|
11
72
|
/**
|
|
12
73
|
* Generates unique ID for a chunk based on content and metadata
|
|
13
74
|
*/
|
|
@@ -319,6 +380,438 @@ function chunkPython(options) {
|
|
|
319
380
|
}
|
|
320
381
|
return chunks;
|
|
321
382
|
}
|
|
383
|
+
/**
|
|
384
|
+
* Chunks HTML/Vue/Svelte code by extracting script/style blocks
|
|
385
|
+
*/
|
|
386
|
+
function chunkHtml(options) {
|
|
387
|
+
const chunks = [];
|
|
388
|
+
const content = options.content;
|
|
389
|
+
const context = extractContext(content, options.language);
|
|
390
|
+
// Helper to add chunks from other languages
|
|
391
|
+
const addSubChunks = (subContent, subLang, offsetLine) => {
|
|
392
|
+
// If language is not supported for semantic chunking, it will fall back to fixed size
|
|
393
|
+
// We need to adjust line numbers relative to the file
|
|
394
|
+
const subOptions = {
|
|
395
|
+
...options,
|
|
396
|
+
content: subContent,
|
|
397
|
+
language: subLang,
|
|
398
|
+
};
|
|
399
|
+
// We use the main chunkCode router to handle the sub-content
|
|
400
|
+
// This allows reusing JS/TS/CSS logic
|
|
401
|
+
let subChunks = [];
|
|
402
|
+
if (subLang === "typescript" || subLang === "javascript" || subLang === "ts" || subLang === "js") {
|
|
403
|
+
subChunks = chunkTypeScriptJavaScript(subOptions);
|
|
404
|
+
}
|
|
405
|
+
else if (subLang === "css" || subLang === "scss" || subLang === "sass") {
|
|
406
|
+
subChunks = chunkCss(subOptions);
|
|
407
|
+
}
|
|
408
|
+
else {
|
|
409
|
+
subChunks = chunkByFixedSize(subOptions);
|
|
410
|
+
}
|
|
411
|
+
subChunks.forEach(chunk => {
|
|
412
|
+
chunk.startLine += offsetLine;
|
|
413
|
+
chunk.endLine += offsetLine;
|
|
414
|
+
// Regenerate ID to ensure it includes the correct line numbers and file context
|
|
415
|
+
chunk.id = generateChunkId(options.filePath, chunk.content, chunk.startLine);
|
|
416
|
+
chunks.push(chunk);
|
|
417
|
+
});
|
|
418
|
+
};
|
|
419
|
+
// 1. Extract <script> blocks
|
|
420
|
+
const scriptRegex = /<script\s*(?:lang=["']([\w-]+)["'])?\s*(?:setup)?\s*>([\s\S]*?)<\/script>/gi;
|
|
421
|
+
let match;
|
|
422
|
+
while ((match = scriptRegex.exec(content)) !== null) {
|
|
423
|
+
const langIdx = match[1] || "javascript"; // Default to JS
|
|
424
|
+
const scriptContent = match[2];
|
|
425
|
+
// Normalize language
|
|
426
|
+
let subLang = langIdx.toLowerCase();
|
|
427
|
+
if (subLang === "ts")
|
|
428
|
+
subLang = "typescript";
|
|
429
|
+
if (subLang === "js")
|
|
430
|
+
subLang = "javascript";
|
|
431
|
+
// Calculate start line
|
|
432
|
+
const preMatch = content.substring(0, match.index);
|
|
433
|
+
const startLine = preMatch.split("\n").length - 1; // 0-indexed adjustment for calc
|
|
434
|
+
addSubChunks(scriptContent, subLang, startLine);
|
|
435
|
+
}
|
|
436
|
+
// 2. Extract <style> blocks
|
|
437
|
+
const styleRegex = /<style\s*(?:lang=["']([\w-]+)["'])?\s*(?:scoped)?\s*>([\s\S]*?)<\/style>/gi;
|
|
438
|
+
while ((match = styleRegex.exec(content)) !== null) {
|
|
439
|
+
const langIdx = match[1] || "css"; // Default to CSS
|
|
440
|
+
const styleContent = match[2];
|
|
441
|
+
// Normalize language
|
|
442
|
+
let subLang = langIdx.toLowerCase();
|
|
443
|
+
// Calculate start line
|
|
444
|
+
const preMatch = content.substring(0, match.index);
|
|
445
|
+
const startLine = preMatch.split("\n").length - 1;
|
|
446
|
+
addSubChunks(styleContent, subLang, startLine);
|
|
447
|
+
}
|
|
448
|
+
// 3. Process the template/HTML structure (rest of file or specific template block)
|
|
449
|
+
// For Vue, we might look for <template>, for pure HTML it's the whole file
|
|
450
|
+
// For simplicity, we'll try to find <template> first, if not, treat whole file (minus script/style) as HTML structure
|
|
451
|
+
// But removing script/style from content to chunk remainder is complex with line numbers.
|
|
452
|
+
// Instead, we will just chunk the whole file as "html" fixed chunks,
|
|
453
|
+
// but we can be smarter: split by top-level tags if possible?
|
|
454
|
+
// Given complexity, falling back to fixed-size chunking for the *entire* file content
|
|
455
|
+
// but labeled as "template" might be redundant with the script/style chunks.
|
|
456
|
+
// Better approach: Regex for <template> block in Vue/Svelte
|
|
457
|
+
const templateRegex = /<template>([\s\S]*?)<\/template>/i;
|
|
458
|
+
const templateMatch = templateRegex.exec(content);
|
|
459
|
+
if (templateMatch) {
|
|
460
|
+
const templateContent = templateMatch[1];
|
|
461
|
+
const preMatch = content.substring(0, templateMatch.index);
|
|
462
|
+
const startLine = preMatch.split("\n").length - 1;
|
|
463
|
+
// Chunk template as HTML (fixed size for now, strict AST for HTML is hard without lib)
|
|
464
|
+
addSubChunks(templateContent, "html", startLine);
|
|
465
|
+
}
|
|
466
|
+
else if (options.language === "html") {
|
|
467
|
+
// For pure HTML files, just use fixed size chunking but exclude script/style if possible?
|
|
468
|
+
// Actually, letting it chunk the whole file by fixed size is a safe fallback for the "structure"
|
|
469
|
+
// The script/style chunks will strictly point to logic/styles.
|
|
470
|
+
// Overlapping coverage is acceptable.
|
|
471
|
+
// Let's rely on fixed partitioning for HTML content
|
|
472
|
+
const htmlChunks = chunkByFixedSize({
|
|
473
|
+
...options,
|
|
474
|
+
language: "html"
|
|
475
|
+
});
|
|
476
|
+
// We only add these if we are sure we aren't duplicating too much logic?
|
|
477
|
+
// Actually duplication is fine, vector search handles it.
|
|
478
|
+
// But better to separate concerns.
|
|
479
|
+
chunks.push(...htmlChunks);
|
|
480
|
+
}
|
|
481
|
+
return chunks;
|
|
482
|
+
}
|
|
483
|
+
/**
|
|
484
|
+
* Chunks CSS/SCSS code by parsing rule blocks
|
|
485
|
+
*/
|
|
486
|
+
function chunkCss(options) {
|
|
487
|
+
const chunks = [];
|
|
488
|
+
const lines = options.content.split("\n");
|
|
489
|
+
const context = extractContext(options.content, options.language);
|
|
490
|
+
let currentChunk = [];
|
|
491
|
+
let chunkStartLine = 1;
|
|
492
|
+
let braceDepth = 0;
|
|
493
|
+
let inComment = false;
|
|
494
|
+
for (let i = 0; i < lines.length; i++) {
|
|
495
|
+
const line = lines[i];
|
|
496
|
+
const trimmed = line.trim();
|
|
497
|
+
if (trimmed.startsWith("/*") && !inComment)
|
|
498
|
+
inComment = true;
|
|
499
|
+
if (trimmed.endsWith("*/") && inComment)
|
|
500
|
+
inComment = false;
|
|
501
|
+
// Count braces to detect block boundaries
|
|
502
|
+
// Simple heuristic, might fail on complex strings containing braces
|
|
503
|
+
const openBraces = (line.match(/\{/g) || []).length;
|
|
504
|
+
const closeBraces = (line.match(/\}/g) || []).length;
|
|
505
|
+
braceDepth += openBraces - closeBraces;
|
|
506
|
+
currentChunk.push(line);
|
|
507
|
+
// If we are at root level (depth 0) and have content, and just closed a block or ended a property
|
|
508
|
+
if (braceDepth === 0 && !inComment && currentChunk.length > 0) {
|
|
509
|
+
const chunkContent = currentChunk.join("\n").trim();
|
|
510
|
+
// Don't chunk empty lines
|
|
511
|
+
if (chunkContent.length > 0 && chunkContent !== "}") {
|
|
512
|
+
// Only finalize chunk if it looks like a complete rule or directive
|
|
513
|
+
// i.e. ends with } or ;
|
|
514
|
+
if (chunkContent.endsWith("}") || chunkContent.endsWith(";")) {
|
|
515
|
+
chunks.push({
|
|
516
|
+
id: generateChunkId(options.filePath, chunkContent, chunkStartLine),
|
|
517
|
+
filePath: options.filePath,
|
|
518
|
+
content: chunkContent,
|
|
519
|
+
startLine: chunkStartLine,
|
|
520
|
+
endLine: i + 1,
|
|
521
|
+
chunkType: "block", // CSS rule
|
|
522
|
+
language: options.language,
|
|
523
|
+
context,
|
|
524
|
+
});
|
|
525
|
+
currentChunk = [];
|
|
526
|
+
chunkStartLine = i + 2; // Next line
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
// Safety break for very large chunks
|
|
531
|
+
if (currentChunk.join("\n").length > (options.maxChunkSize * 2)) {
|
|
532
|
+
// Force split if rule is too massive
|
|
533
|
+
const chunkContent = currentChunk.join("\n");
|
|
534
|
+
// Validate content before pushing
|
|
535
|
+
if (chunkContent.trim().length > 0 && chunkContent.trim() !== "}") {
|
|
536
|
+
chunks.push({
|
|
537
|
+
id: generateChunkId(options.filePath, chunkContent, chunkStartLine),
|
|
538
|
+
filePath: options.filePath,
|
|
539
|
+
content: chunkContent,
|
|
540
|
+
startLine: chunkStartLine,
|
|
541
|
+
endLine: i + 1,
|
|
542
|
+
chunkType: "block",
|
|
543
|
+
language: options.language,
|
|
544
|
+
context,
|
|
545
|
+
});
|
|
546
|
+
}
|
|
547
|
+
currentChunk = [];
|
|
548
|
+
chunkStartLine = i + 2;
|
|
549
|
+
braceDepth = 0; // Reset to avoid getting stuck
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
// Remaining
|
|
553
|
+
// Remaining
|
|
554
|
+
if (currentChunk.length > 0) {
|
|
555
|
+
const chunkContent = currentChunk.join("\n");
|
|
556
|
+
// Validate content before pushing
|
|
557
|
+
if (chunkContent.trim().length > 0 && chunkContent.trim() !== "}") {
|
|
558
|
+
chunks.push({
|
|
559
|
+
id: generateChunkId(options.filePath, chunkContent, chunkStartLine),
|
|
560
|
+
filePath: options.filePath,
|
|
561
|
+
content: chunkContent,
|
|
562
|
+
startLine: chunkStartLine,
|
|
563
|
+
endLine: lines.length,
|
|
564
|
+
chunkType: "block",
|
|
565
|
+
language: options.language,
|
|
566
|
+
context,
|
|
567
|
+
});
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
return chunks;
|
|
571
|
+
}
|
|
572
|
+
/**
|
|
573
|
+
* Chunks JSON files by parsing structure
|
|
574
|
+
*/
|
|
575
|
+
function chunkJson(options) {
|
|
576
|
+
const chunks = [];
|
|
577
|
+
// Context for JSON is usually not useful (just start of file)
|
|
578
|
+
const context = "";
|
|
579
|
+
try {
|
|
580
|
+
const json = JSON.parse(options.content);
|
|
581
|
+
if (Array.isArray(json)) {
|
|
582
|
+
// Chunk array items
|
|
583
|
+
json.forEach((item, index) => {
|
|
584
|
+
const itemStr = JSON.stringify(item, null, 2);
|
|
585
|
+
// We can't easily get exact lines from JSON.parse
|
|
586
|
+
// So we approximate or just treat as logical chunks without strict line mapping
|
|
587
|
+
// For semantic search, the content is what matters.
|
|
588
|
+
// Line numbers will be approximate (0-0 or 1-1) unless we re-search the string in content
|
|
589
|
+
// Let's try to locate the item in string roughly? expensive.
|
|
590
|
+
// We will just create chunks with content.
|
|
591
|
+
chunks.push({
|
|
592
|
+
id: generateChunkId(options.filePath, itemStr, index), // index as salt
|
|
593
|
+
filePath: options.filePath,
|
|
594
|
+
content: itemStr,
|
|
595
|
+
startLine: 1, // Unknown
|
|
596
|
+
endLine: 1, // Unknown
|
|
597
|
+
chunkType: "block",
|
|
598
|
+
name: `[${index}]`,
|
|
599
|
+
language: "json",
|
|
600
|
+
context,
|
|
601
|
+
});
|
|
602
|
+
});
|
|
603
|
+
}
|
|
604
|
+
else if (typeof json === "object" && json !== null) {
|
|
605
|
+
// Chunk top-level keys
|
|
606
|
+
Object.keys(json).forEach((key) => {
|
|
607
|
+
const val = json[key];
|
|
608
|
+
const valStr = JSON.stringify(val, null, 2);
|
|
609
|
+
const chunkContent = `"${key}": ${valStr}`;
|
|
610
|
+
if (chunkContent.length > options.maxChunkSize) {
|
|
611
|
+
// If value is huge, maybe we should recurse or fixed-chunk it?
|
|
612
|
+
// For now, let's just push it.
|
|
613
|
+
}
|
|
614
|
+
chunks.push({
|
|
615
|
+
id: generateChunkId(options.filePath, chunkContent, 0),
|
|
616
|
+
filePath: options.filePath,
|
|
617
|
+
content: chunkContent,
|
|
618
|
+
startLine: 1,
|
|
619
|
+
endLine: 1,
|
|
620
|
+
chunkType: "block",
|
|
621
|
+
name: key,
|
|
622
|
+
language: "json",
|
|
623
|
+
context,
|
|
624
|
+
});
|
|
625
|
+
});
|
|
626
|
+
}
|
|
627
|
+
else {
|
|
628
|
+
// Primitive, single chunk
|
|
629
|
+
chunks.push({
|
|
630
|
+
id: generateChunkId(options.filePath, options.content, 1),
|
|
631
|
+
filePath: options.filePath,
|
|
632
|
+
content: options.content,
|
|
633
|
+
startLine: 1,
|
|
634
|
+
endLine: options.content.split("\n").length,
|
|
635
|
+
chunkType: "file",
|
|
636
|
+
language: "json",
|
|
637
|
+
});
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
catch (e) {
|
|
641
|
+
// Fallback to fixed size if invalid JSON
|
|
642
|
+
return chunkByFixedSize(options);
|
|
643
|
+
}
|
|
644
|
+
return chunks;
|
|
645
|
+
}
|
|
646
|
+
/**
|
|
647
|
+
* Chunks Java code (Spring Boot support) using brace tracking and regex
|
|
648
|
+
*/
|
|
649
|
+
function chunkJava(options) {
|
|
650
|
+
const chunks = [];
|
|
651
|
+
const lines = options.content.split("\n");
|
|
652
|
+
const context = extractContext(options.content, options.language);
|
|
653
|
+
let currentChunk = [];
|
|
654
|
+
let chunkStartLine = 1;
|
|
655
|
+
let braceDepth = 0;
|
|
656
|
+
let inClass = false;
|
|
657
|
+
let inMethod = false;
|
|
658
|
+
let className;
|
|
659
|
+
let methodName;
|
|
660
|
+
let chunkBaseDepth = 0;
|
|
661
|
+
let annotations = [];
|
|
662
|
+
for (let i = 0; i < lines.length; i++) {
|
|
663
|
+
const line = lines[i];
|
|
664
|
+
const trimmed = line.trim();
|
|
665
|
+
// Skip comments for logic but include in chunk
|
|
666
|
+
const isComment = trimmed.startsWith("//") || trimmed.startsWith("/*") || trimmed.startsWith("*");
|
|
667
|
+
// Track strict brace depth
|
|
668
|
+
const openBraces = (line.match(/\{/g) || []).length;
|
|
669
|
+
const closeBraces = (line.match(/\}/g) || []).length;
|
|
670
|
+
// Check for annotations
|
|
671
|
+
if (trimmed.startsWith("@") && !isComment) {
|
|
672
|
+
if (currentChunk.length === 0 && annotations.length === 0) {
|
|
673
|
+
chunkStartLine = i + 1;
|
|
674
|
+
}
|
|
675
|
+
annotations.push(line);
|
|
676
|
+
// Annotations are part of the next chunk
|
|
677
|
+
currentChunk.push(line);
|
|
678
|
+
continue;
|
|
679
|
+
}
|
|
680
|
+
// Detect Class/Interface
|
|
681
|
+
const classMatch = trimmed.match(/(?:public|protected|private)?\s*(?:static)?\s*(?:class|interface|enum)\s+(\w+)/);
|
|
682
|
+
if (classMatch && !isComment) {
|
|
683
|
+
// If we are already in a chunk (e.g. previous class ended), push it
|
|
684
|
+
// But if we are just starting (annotations only), keep going
|
|
685
|
+
if (currentChunk.length > annotations.length && braceDepth === chunkBaseDepth) {
|
|
686
|
+
const content = currentChunk.join("\n");
|
|
687
|
+
chunks.push({
|
|
688
|
+
id: generateChunkId(options.filePath, content, chunkStartLine),
|
|
689
|
+
filePath: options.filePath,
|
|
690
|
+
content,
|
|
691
|
+
startLine: chunkStartLine,
|
|
692
|
+
endLine: i,
|
|
693
|
+
chunkType: inClass ? "class" : "file", // inner class
|
|
694
|
+
name: className,
|
|
695
|
+
language: options.language,
|
|
696
|
+
context
|
|
697
|
+
});
|
|
698
|
+
currentChunk = [...annotations]; // Start new chunk with potential accumulated annotations
|
|
699
|
+
chunkStartLine = i + 1 - annotations.length;
|
|
700
|
+
}
|
|
701
|
+
else if (currentChunk.length === 0) {
|
|
702
|
+
chunkStartLine = i + 1;
|
|
703
|
+
}
|
|
704
|
+
inClass = true;
|
|
705
|
+
inMethod = false;
|
|
706
|
+
className = classMatch[1];
|
|
707
|
+
chunkBaseDepth = braceDepth;
|
|
708
|
+
annotations = [];
|
|
709
|
+
}
|
|
710
|
+
// Detect Method (heuristic: access modifier + type + name + (args) + {)
|
|
711
|
+
// Avoid control structures like if/for/while/switch/catch
|
|
712
|
+
const methodMatch = trimmed.match(/(?:public|protected|private)\s+(?:[\w<>?\[\]]+\s+)(\w+)\s*\(/);
|
|
713
|
+
const isControlFlow = /^(if|for|while|switch|catch|try)\b/.test(trimmed);
|
|
714
|
+
if (methodMatch && !isControlFlow && !isComment) {
|
|
715
|
+
// if we are inside a class, this is a method chunk
|
|
716
|
+
if (braceDepth === chunkBaseDepth + 1) { // Direct member of class
|
|
717
|
+
// Previous logical block (fields, etc) ends here
|
|
718
|
+
if (currentChunk.length > annotations.length) {
|
|
719
|
+
const content = currentChunk.join("\n");
|
|
720
|
+
chunks.push({
|
|
721
|
+
id: generateChunkId(options.filePath, content, chunkStartLine),
|
|
722
|
+
filePath: options.filePath,
|
|
723
|
+
content,
|
|
724
|
+
startLine: chunkStartLine,
|
|
725
|
+
endLine: i,
|
|
726
|
+
chunkType: "block",
|
|
727
|
+
name: className, // Context of class
|
|
728
|
+
language: options.language,
|
|
729
|
+
context
|
|
730
|
+
});
|
|
731
|
+
}
|
|
732
|
+
currentChunk = [...annotations];
|
|
733
|
+
chunkStartLine = i + 1 - annotations.length;
|
|
734
|
+
methodName = methodMatch[1];
|
|
735
|
+
inMethod = true;
|
|
736
|
+
annotations = [];
|
|
737
|
+
}
|
|
738
|
+
}
|
|
739
|
+
currentChunk.push(line);
|
|
740
|
+
braceDepth += openBraces - closeBraces;
|
|
741
|
+
// Check if block ended (method or class)
|
|
742
|
+
// We close the chunk if we return to the depth where we started THIS chunk
|
|
743
|
+
// But we need to handle the case where we just closed the class itself
|
|
744
|
+
// Logic: If we are in a method, and brace depth returns to class level -> method closed
|
|
745
|
+
if (inMethod && braceDepth === chunkBaseDepth + 1 && closeBraces > 0) {
|
|
746
|
+
const content = currentChunk.join("\n");
|
|
747
|
+
chunks.push({
|
|
748
|
+
id: generateChunkId(options.filePath, content, chunkStartLine),
|
|
749
|
+
filePath: options.filePath,
|
|
750
|
+
content,
|
|
751
|
+
startLine: chunkStartLine,
|
|
752
|
+
endLine: i + 1,
|
|
753
|
+
chunkType: "method",
|
|
754
|
+
name: methodName,
|
|
755
|
+
language: options.language,
|
|
756
|
+
context
|
|
757
|
+
});
|
|
758
|
+
currentChunk = [];
|
|
759
|
+
inMethod = false;
|
|
760
|
+
methodName = undefined;
|
|
761
|
+
chunkStartLine = i + 2;
|
|
762
|
+
}
|
|
763
|
+
// If brace depth returns to chunkBaseDepth -> class closed
|
|
764
|
+
else if (inClass && braceDepth === chunkBaseDepth && closeBraces > 0) {
|
|
765
|
+
const content = currentChunk.join("\n");
|
|
766
|
+
chunks.push({
|
|
767
|
+
id: generateChunkId(options.filePath, content, chunkStartLine),
|
|
768
|
+
filePath: options.filePath,
|
|
769
|
+
content,
|
|
770
|
+
startLine: chunkStartLine,
|
|
771
|
+
endLine: i + 1,
|
|
772
|
+
chunkType: "class",
|
|
773
|
+
name: className,
|
|
774
|
+
language: options.language,
|
|
775
|
+
context
|
|
776
|
+
});
|
|
777
|
+
currentChunk = [];
|
|
778
|
+
inClass = false;
|
|
779
|
+
className = undefined;
|
|
780
|
+
chunkStartLine = i + 2;
|
|
781
|
+
}
|
|
782
|
+
// Safety break for very large chunks
|
|
783
|
+
if (currentChunk.join("\n").length > (options.maxChunkSize * 3)) {
|
|
784
|
+
// If a single method is massive, we have to split it.
|
|
785
|
+
// enforceTokenLimits will handle strict splitting, but we should probably
|
|
786
|
+
// force a commit here to avoid memory pressure if it's crazy huge
|
|
787
|
+
}
|
|
788
|
+
if (closeBraces > 0 && annotations.length > 0)
|
|
789
|
+
chunks.push(...[]); // no-op just to use variable
|
|
790
|
+
if (openBraces > 0)
|
|
791
|
+
annotations = []; // Clear annotations if we opened a brace (they were consumed)
|
|
792
|
+
}
|
|
793
|
+
// Remaining content
|
|
794
|
+
if (currentChunk.length > 0) {
|
|
795
|
+
const content = currentChunk.join("\n");
|
|
796
|
+
if (content.trim().length > 0) {
|
|
797
|
+
chunks.push({
|
|
798
|
+
id: generateChunkId(options.filePath, content, chunkStartLine),
|
|
799
|
+
filePath: options.filePath,
|
|
800
|
+
content,
|
|
801
|
+
startLine: chunkStartLine,
|
|
802
|
+
endLine: lines.length,
|
|
803
|
+
chunkType: "file",
|
|
804
|
+
language: options.language,
|
|
805
|
+
context
|
|
806
|
+
});
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
// Fallback if regex failed to find anything
|
|
810
|
+
if (chunks.length === 0) {
|
|
811
|
+
return chunkByFixedSize(options);
|
|
812
|
+
}
|
|
813
|
+
return chunks;
|
|
814
|
+
}
|
|
322
815
|
/**
|
|
323
816
|
* Chunks code by fixed size with overlap
|
|
324
817
|
*/
|
|
@@ -380,17 +873,36 @@ export function chunkCode(options) {
|
|
|
380
873
|
maxChunkSize: options.maxChunkSize || 1000,
|
|
381
874
|
chunkOverlap: options.chunkOverlap || 200,
|
|
382
875
|
};
|
|
876
|
+
// Force fixed-size chunking for minified files to prevent context length errors
|
|
877
|
+
if (fullOptions.filePath.includes(".min.")) {
|
|
878
|
+
const rawChunks = chunkByFixedSize(fullOptions);
|
|
879
|
+
return enforceTokenLimits(rawChunks);
|
|
880
|
+
}
|
|
383
881
|
// Route to appropriate chunking strategy
|
|
882
|
+
let chunks = [];
|
|
384
883
|
if (fullOptions.language === "typescript" || fullOptions.language === "javascript") {
|
|
385
|
-
|
|
884
|
+
chunks = chunkTypeScriptJavaScript(fullOptions);
|
|
386
885
|
}
|
|
387
886
|
else if (fullOptions.language === "python") {
|
|
388
|
-
|
|
887
|
+
chunks = chunkPython(fullOptions);
|
|
888
|
+
}
|
|
889
|
+
else if (["html", "vue", "svelte"].includes(fullOptions.language)) {
|
|
890
|
+
chunks = chunkHtml(fullOptions);
|
|
891
|
+
}
|
|
892
|
+
else if (["css", "scss", "sass", "less"].includes(fullOptions.language)) {
|
|
893
|
+
chunks = chunkCss(fullOptions);
|
|
894
|
+
}
|
|
895
|
+
else if (fullOptions.language === "json") {
|
|
896
|
+
chunks = chunkJson(fullOptions);
|
|
897
|
+
}
|
|
898
|
+
else if (fullOptions.language === "java") {
|
|
899
|
+
chunks = chunkJava(fullOptions);
|
|
389
900
|
}
|
|
390
901
|
else {
|
|
391
902
|
// For other languages, use fixed-size chunking
|
|
392
|
-
|
|
903
|
+
chunks = chunkByFixedSize(fullOptions);
|
|
393
904
|
}
|
|
905
|
+
return enforceTokenLimits(chunks);
|
|
394
906
|
}
|
|
395
907
|
/**
|
|
396
908
|
* Chunks a file by reading it from disk
|