@mhalder/qdrant-mcp-server 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.codecov.yml +16 -0
- package/CHANGELOG.md +18 -0
- package/README.md +236 -9
- package/build/code/chunker/base.d.ts +19 -0
- package/build/code/chunker/base.d.ts.map +1 -0
- package/build/code/chunker/base.js +5 -0
- package/build/code/chunker/base.js.map +1 -0
- package/build/code/chunker/character-chunker.d.ts +22 -0
- package/build/code/chunker/character-chunker.d.ts.map +1 -0
- package/build/code/chunker/character-chunker.js +111 -0
- package/build/code/chunker/character-chunker.js.map +1 -0
- package/build/code/chunker/tree-sitter-chunker.d.ts +29 -0
- package/build/code/chunker/tree-sitter-chunker.d.ts.map +1 -0
- package/build/code/chunker/tree-sitter-chunker.js +213 -0
- package/build/code/chunker/tree-sitter-chunker.js.map +1 -0
- package/build/code/config.d.ts +11 -0
- package/build/code/config.d.ts.map +1 -0
- package/build/code/config.js +145 -0
- package/build/code/config.js.map +1 -0
- package/build/code/indexer.d.ts +42 -0
- package/build/code/indexer.d.ts.map +1 -0
- package/build/code/indexer.js +508 -0
- package/build/code/indexer.js.map +1 -0
- package/build/code/metadata.d.ts +32 -0
- package/build/code/metadata.d.ts.map +1 -0
- package/build/code/metadata.js +128 -0
- package/build/code/metadata.js.map +1 -0
- package/build/code/scanner.d.ts +35 -0
- package/build/code/scanner.d.ts.map +1 -0
- package/build/code/scanner.js +108 -0
- package/build/code/scanner.js.map +1 -0
- package/build/code/sync/merkle.d.ts +45 -0
- package/build/code/sync/merkle.d.ts.map +1 -0
- package/build/code/sync/merkle.js +116 -0
- package/build/code/sync/merkle.js.map +1 -0
- package/build/code/sync/snapshot.d.ts +41 -0
- package/build/code/sync/snapshot.d.ts.map +1 -0
- package/build/code/sync/snapshot.js +91 -0
- package/build/code/sync/snapshot.js.map +1 -0
- package/build/code/sync/synchronizer.d.ts +53 -0
- package/build/code/sync/synchronizer.d.ts.map +1 -0
- package/build/code/sync/synchronizer.js +132 -0
- package/build/code/sync/synchronizer.js.map +1 -0
- package/build/code/types.d.ts +98 -0
- package/build/code/types.d.ts.map +1 -0
- package/build/code/types.js +5 -0
- package/build/code/types.js.map +1 -0
- package/build/index.js +250 -0
- package/build/index.js.map +1 -1
- package/examples/code-search/README.md +271 -0
- package/package.json +13 -1
- package/src/code/chunker/base.ts +22 -0
- package/src/code/chunker/character-chunker.ts +131 -0
- package/src/code/chunker/tree-sitter-chunker.ts +250 -0
- package/src/code/config.ts +156 -0
- package/src/code/indexer.ts +613 -0
- package/src/code/metadata.ts +153 -0
- package/src/code/scanner.ts +124 -0
- package/src/code/sync/merkle.ts +136 -0
- package/src/code/sync/snapshot.ts +110 -0
- package/src/code/sync/synchronizer.ts +154 -0
- package/src/code/types.ts +117 -0
- package/src/index.ts +296 -0
- package/tests/code/chunker/character-chunker.test.ts +141 -0
- package/tests/code/chunker/tree-sitter-chunker.test.ts +275 -0
- package/tests/code/fixtures/sample-py/calculator.py +32 -0
- package/tests/code/fixtures/sample-ts/async-operations.ts +120 -0
- package/tests/code/fixtures/sample-ts/auth.ts +31 -0
- package/tests/code/fixtures/sample-ts/config.ts +52 -0
- package/tests/code/fixtures/sample-ts/database.ts +50 -0
- package/tests/code/fixtures/sample-ts/index.ts +39 -0
- package/tests/code/fixtures/sample-ts/types-advanced.ts +132 -0
- package/tests/code/fixtures/sample-ts/utils.ts +105 -0
- package/tests/code/fixtures/sample-ts/validator.ts +169 -0
- package/tests/code/indexer.test.ts +828 -0
- package/tests/code/integration.test.ts +708 -0
- package/tests/code/metadata.test.ts +457 -0
- package/tests/code/scanner.test.ts +131 -0
- package/tests/code/sync/merkle.test.ts +406 -0
- package/tests/code/sync/snapshot.test.ts +360 -0
- package/tests/code/sync/synchronizer.test.ts +501 -0
- package/vitest.config.ts +1 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type definitions for code vectorization module
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export interface CodeConfig {
|
|
6
|
+
// Chunking
|
|
7
|
+
chunkSize: number;
|
|
8
|
+
chunkOverlap: number;
|
|
9
|
+
enableASTChunking: boolean;
|
|
10
|
+
|
|
11
|
+
// File discovery
|
|
12
|
+
supportedExtensions: string[];
|
|
13
|
+
ignorePatterns: string[];
|
|
14
|
+
customExtensions?: string[];
|
|
15
|
+
customIgnorePatterns?: string[];
|
|
16
|
+
|
|
17
|
+
// Indexing
|
|
18
|
+
batchSize: number; // Embeddings per batch
|
|
19
|
+
maxChunksPerFile?: number;
|
|
20
|
+
maxTotalChunks?: number;
|
|
21
|
+
|
|
22
|
+
// Search
|
|
23
|
+
defaultSearchLimit: number;
|
|
24
|
+
enableHybridSearch: boolean;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export interface ScannerConfig {
|
|
28
|
+
supportedExtensions: string[];
|
|
29
|
+
ignorePatterns: string[];
|
|
30
|
+
customIgnorePatterns?: string[];
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface ChunkerConfig {
|
|
34
|
+
chunkSize: number;
|
|
35
|
+
chunkOverlap: number;
|
|
36
|
+
maxChunkSize: number;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export interface IndexOptions {
|
|
40
|
+
forceReindex?: boolean;
|
|
41
|
+
extensions?: string[];
|
|
42
|
+
ignorePatterns?: string[];
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export interface IndexStats {
|
|
46
|
+
filesScanned: number;
|
|
47
|
+
filesIndexed: number;
|
|
48
|
+
chunksCreated: number;
|
|
49
|
+
durationMs: number;
|
|
50
|
+
status: "completed" | "partial" | "failed";
|
|
51
|
+
errors?: string[];
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export interface ChangeStats {
|
|
55
|
+
filesAdded: number;
|
|
56
|
+
filesModified: number;
|
|
57
|
+
filesDeleted: number;
|
|
58
|
+
chunksAdded: number;
|
|
59
|
+
chunksDeleted: number;
|
|
60
|
+
durationMs: number;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export interface CodeSearchResult {
|
|
64
|
+
content: string;
|
|
65
|
+
filePath: string;
|
|
66
|
+
startLine: number;
|
|
67
|
+
endLine: number;
|
|
68
|
+
language: string;
|
|
69
|
+
score: number;
|
|
70
|
+
fileExtension: string;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export interface SearchOptions {
|
|
74
|
+
limit?: number;
|
|
75
|
+
useHybrid?: boolean;
|
|
76
|
+
fileTypes?: string[];
|
|
77
|
+
pathPattern?: string;
|
|
78
|
+
scoreThreshold?: number;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export interface IndexStatus {
|
|
82
|
+
isIndexed: boolean;
|
|
83
|
+
collectionName?: string;
|
|
84
|
+
filesCount?: number;
|
|
85
|
+
chunksCount?: number;
|
|
86
|
+
lastUpdated?: Date;
|
|
87
|
+
languages?: string[];
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export type ProgressCallback = (progress: ProgressUpdate) => void;
|
|
91
|
+
|
|
92
|
+
export interface ProgressUpdate {
|
|
93
|
+
phase: "scanning" | "chunking" | "embedding" | "storing";
|
|
94
|
+
current: number;
|
|
95
|
+
total: number;
|
|
96
|
+
percentage: number;
|
|
97
|
+
message: string;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
export interface CodeChunk {
|
|
101
|
+
content: string;
|
|
102
|
+
startLine: number;
|
|
103
|
+
endLine: number;
|
|
104
|
+
metadata: {
|
|
105
|
+
filePath: string;
|
|
106
|
+
language: string;
|
|
107
|
+
chunkIndex: number;
|
|
108
|
+
chunkType?: "function" | "class" | "interface" | "block";
|
|
109
|
+
name?: string; // Function/class name if applicable
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export interface FileChanges {
|
|
114
|
+
added: string[];
|
|
115
|
+
modified: string[];
|
|
116
|
+
deleted: string[];
|
|
117
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -17,6 +17,16 @@ import {
|
|
|
17
17
|
import Bottleneck from "bottleneck";
|
|
18
18
|
import express from "express";
|
|
19
19
|
import { z } from "zod";
|
|
20
|
+
import {
|
|
21
|
+
DEFAULT_BATCH_SIZE,
|
|
22
|
+
DEFAULT_CHUNK_OVERLAP,
|
|
23
|
+
DEFAULT_CHUNK_SIZE,
|
|
24
|
+
DEFAULT_CODE_EXTENSIONS,
|
|
25
|
+
DEFAULT_IGNORE_PATTERNS,
|
|
26
|
+
DEFAULT_SEARCH_LIMIT,
|
|
27
|
+
} from "./code/config.js";
|
|
28
|
+
import { CodeIndexer } from "./code/indexer.js";
|
|
29
|
+
import type { CodeConfig } from "./code/types.js";
|
|
20
30
|
import { EmbeddingProviderFactory } from "./embeddings/factory.js";
|
|
21
31
|
import { BM25SparseVectorGenerator } from "./embeddings/sparse.js";
|
|
22
32
|
import { getPrompt, listPrompts, loadPromptsConfig, type PromptsConfig } from "./prompts/index.js";
|
|
@@ -144,6 +154,20 @@ async function checkOllamaAvailability() {
|
|
|
144
154
|
const qdrant = new QdrantManager(QDRANT_URL);
|
|
145
155
|
const embeddings = EmbeddingProviderFactory.createFromEnv();
|
|
146
156
|
|
|
157
|
+
// Initialize code indexer
|
|
158
|
+
const codeConfig: CodeConfig = {
|
|
159
|
+
chunkSize: parseInt(process.env.CODE_CHUNK_SIZE || String(DEFAULT_CHUNK_SIZE), 10),
|
|
160
|
+
chunkOverlap: parseInt(process.env.CODE_CHUNK_OVERLAP || String(DEFAULT_CHUNK_OVERLAP), 10),
|
|
161
|
+
enableASTChunking: process.env.CODE_ENABLE_AST !== "false",
|
|
162
|
+
supportedExtensions: DEFAULT_CODE_EXTENSIONS,
|
|
163
|
+
ignorePatterns: DEFAULT_IGNORE_PATTERNS,
|
|
164
|
+
batchSize: parseInt(process.env.CODE_BATCH_SIZE || String(DEFAULT_BATCH_SIZE), 10),
|
|
165
|
+
defaultSearchLimit: parseInt(process.env.CODE_SEARCH_LIMIT || String(DEFAULT_SEARCH_LIMIT), 10),
|
|
166
|
+
enableHybridSearch: process.env.CODE_ENABLE_HYBRID === "true",
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
const codeIndexer = new CodeIndexer(qdrant, embeddings, codeConfig);
|
|
170
|
+
|
|
147
171
|
// Load prompts configuration if file exists
|
|
148
172
|
let promptsConfig: PromptsConfig | null = null;
|
|
149
173
|
if (existsSync(PROMPTS_CONFIG_FILE)) {
|
|
@@ -366,6 +390,111 @@ function registerHandlers(server: Server) {
|
|
|
366
390
|
required: ["collection", "query"],
|
|
367
391
|
},
|
|
368
392
|
},
|
|
393
|
+
{
|
|
394
|
+
name: "index_codebase",
|
|
395
|
+
description:
|
|
396
|
+
"Index a codebase for semantic code search. Automatically discovers files, chunks code intelligently using AST-aware parsing, and stores in vector database. Respects .gitignore and other ignore files.",
|
|
397
|
+
inputSchema: {
|
|
398
|
+
type: "object",
|
|
399
|
+
properties: {
|
|
400
|
+
path: {
|
|
401
|
+
type: "string",
|
|
402
|
+
description: "Absolute or relative path to codebase root directory",
|
|
403
|
+
},
|
|
404
|
+
forceReindex: {
|
|
405
|
+
type: "boolean",
|
|
406
|
+
description: "Force full re-index even if already indexed (default: false)",
|
|
407
|
+
},
|
|
408
|
+
extensions: {
|
|
409
|
+
type: "array",
|
|
410
|
+
items: { type: "string" },
|
|
411
|
+
description: "Custom file extensions to index (e.g., ['.proto', '.graphql'])",
|
|
412
|
+
},
|
|
413
|
+
ignorePatterns: {
|
|
414
|
+
type: "array",
|
|
415
|
+
items: { type: "string" },
|
|
416
|
+
description: "Additional patterns to ignore (e.g., ['**/test/**', '**/*.test.ts'])",
|
|
417
|
+
},
|
|
418
|
+
},
|
|
419
|
+
required: ["path"],
|
|
420
|
+
},
|
|
421
|
+
},
|
|
422
|
+
{
|
|
423
|
+
name: "search_code",
|
|
424
|
+
description:
|
|
425
|
+
"Search indexed codebase using natural language queries. Returns semantically relevant code chunks with file paths and line numbers.",
|
|
426
|
+
inputSchema: {
|
|
427
|
+
type: "object",
|
|
428
|
+
properties: {
|
|
429
|
+
path: {
|
|
430
|
+
type: "string",
|
|
431
|
+
description: "Path to codebase (must be indexed first)",
|
|
432
|
+
},
|
|
433
|
+
query: {
|
|
434
|
+
type: "string",
|
|
435
|
+
description: "Natural language search query (e.g., 'authentication logic')",
|
|
436
|
+
},
|
|
437
|
+
limit: {
|
|
438
|
+
type: "number",
|
|
439
|
+
description: "Maximum number of results (default: 5, max: 100)",
|
|
440
|
+
},
|
|
441
|
+
fileTypes: {
|
|
442
|
+
type: "array",
|
|
443
|
+
items: { type: "string" },
|
|
444
|
+
description: "Filter by file extensions (e.g., ['.ts', '.py'])",
|
|
445
|
+
},
|
|
446
|
+
pathPattern: {
|
|
447
|
+
type: "string",
|
|
448
|
+
description: "Filter by path glob pattern (e.g., 'src/services/**')",
|
|
449
|
+
},
|
|
450
|
+
},
|
|
451
|
+
required: ["path", "query"],
|
|
452
|
+
},
|
|
453
|
+
},
|
|
454
|
+
{
|
|
455
|
+
name: "reindex_changes",
|
|
456
|
+
description:
|
|
457
|
+
"Incrementally re-index only changed files. Detects added, modified, and deleted files since last index. Requires previous indexing with index_codebase.",
|
|
458
|
+
inputSchema: {
|
|
459
|
+
type: "object",
|
|
460
|
+
properties: {
|
|
461
|
+
path: {
|
|
462
|
+
type: "string",
|
|
463
|
+
description: "Path to codebase",
|
|
464
|
+
},
|
|
465
|
+
},
|
|
466
|
+
required: ["path"],
|
|
467
|
+
},
|
|
468
|
+
},
|
|
469
|
+
{
|
|
470
|
+
name: "get_index_status",
|
|
471
|
+
description: "Get indexing status and statistics for a codebase.",
|
|
472
|
+
inputSchema: {
|
|
473
|
+
type: "object",
|
|
474
|
+
properties: {
|
|
475
|
+
path: {
|
|
476
|
+
type: "string",
|
|
477
|
+
description: "Path to codebase",
|
|
478
|
+
},
|
|
479
|
+
},
|
|
480
|
+
required: ["path"],
|
|
481
|
+
},
|
|
482
|
+
},
|
|
483
|
+
{
|
|
484
|
+
name: "clear_index",
|
|
485
|
+
description:
|
|
486
|
+
"Delete all indexed data for a codebase. This is irreversible and will remove the entire collection.",
|
|
487
|
+
inputSchema: {
|
|
488
|
+
type: "object",
|
|
489
|
+
properties: {
|
|
490
|
+
path: {
|
|
491
|
+
type: "string",
|
|
492
|
+
description: "Path to codebase",
|
|
493
|
+
},
|
|
494
|
+
},
|
|
495
|
+
required: ["path"],
|
|
496
|
+
},
|
|
497
|
+
},
|
|
369
498
|
],
|
|
370
499
|
};
|
|
371
500
|
});
|
|
@@ -600,6 +729,173 @@ function registerHandlers(server: Server) {
|
|
|
600
729
|
};
|
|
601
730
|
}
|
|
602
731
|
|
|
732
|
+
case "index_codebase": {
|
|
733
|
+
const IndexCodebaseSchema = z.object({
|
|
734
|
+
path: z.string(),
|
|
735
|
+
forceReindex: z.boolean().optional(),
|
|
736
|
+
extensions: z.array(z.string()).optional(),
|
|
737
|
+
ignorePatterns: z.array(z.string()).optional(),
|
|
738
|
+
});
|
|
739
|
+
|
|
740
|
+
const { path, forceReindex, extensions, ignorePatterns } =
|
|
741
|
+
IndexCodebaseSchema.parse(args);
|
|
742
|
+
|
|
743
|
+
const stats = await codeIndexer.indexCodebase(
|
|
744
|
+
path,
|
|
745
|
+
{ forceReindex, extensions, ignorePatterns },
|
|
746
|
+
(progress) => {
|
|
747
|
+
// Progress callback - could send progress updates via SSE in future
|
|
748
|
+
console.error(`[${progress.phase}] ${progress.percentage}% - ${progress.message}`);
|
|
749
|
+
}
|
|
750
|
+
);
|
|
751
|
+
|
|
752
|
+
let statusMessage = `Indexed ${stats.filesIndexed}/${stats.filesScanned} files (${stats.chunksCreated} chunks) in ${(stats.durationMs / 1000).toFixed(1)}s`;
|
|
753
|
+
|
|
754
|
+
if (stats.status === "partial") {
|
|
755
|
+
statusMessage += `\n\nWarnings:\n${stats.errors?.join("\n")}`;
|
|
756
|
+
} else if (stats.status === "failed") {
|
|
757
|
+
statusMessage = `Indexing failed:\n${stats.errors?.join("\n")}`;
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
return {
|
|
761
|
+
content: [
|
|
762
|
+
{
|
|
763
|
+
type: "text",
|
|
764
|
+
text: statusMessage,
|
|
765
|
+
},
|
|
766
|
+
],
|
|
767
|
+
isError: stats.status === "failed",
|
|
768
|
+
};
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
case "search_code": {
|
|
772
|
+
const SearchCodeSchema = z.object({
|
|
773
|
+
path: z.string(),
|
|
774
|
+
query: z.string(),
|
|
775
|
+
limit: z.number().optional(),
|
|
776
|
+
fileTypes: z.array(z.string()).optional(),
|
|
777
|
+
pathPattern: z.string().optional(),
|
|
778
|
+
});
|
|
779
|
+
|
|
780
|
+
const { path, query, limit, fileTypes, pathPattern } = SearchCodeSchema.parse(args);
|
|
781
|
+
|
|
782
|
+
const results = await codeIndexer.searchCode(path, query, {
|
|
783
|
+
limit,
|
|
784
|
+
fileTypes,
|
|
785
|
+
pathPattern,
|
|
786
|
+
});
|
|
787
|
+
|
|
788
|
+
if (results.length === 0) {
|
|
789
|
+
return {
|
|
790
|
+
content: [
|
|
791
|
+
{
|
|
792
|
+
type: "text",
|
|
793
|
+
text: `No results found for query: "${query}"`,
|
|
794
|
+
},
|
|
795
|
+
],
|
|
796
|
+
};
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
// Format results with file references
|
|
800
|
+
const formattedResults = results
|
|
801
|
+
.map(
|
|
802
|
+
(r, idx) =>
|
|
803
|
+
`\n--- Result ${idx + 1} (score: ${r.score.toFixed(3)}) ---\n` +
|
|
804
|
+
`File: ${r.filePath}:${r.startLine}-${r.endLine}\n` +
|
|
805
|
+
`Language: ${r.language}\n\n` +
|
|
806
|
+
`${r.content}\n`
|
|
807
|
+
)
|
|
808
|
+
.join("\n");
|
|
809
|
+
|
|
810
|
+
return {
|
|
811
|
+
content: [
|
|
812
|
+
{
|
|
813
|
+
type: "text",
|
|
814
|
+
text: `Found ${results.length} result(s):\n${formattedResults}`,
|
|
815
|
+
},
|
|
816
|
+
],
|
|
817
|
+
};
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
case "get_index_status": {
|
|
821
|
+
const GetIndexStatusSchema = z.object({
|
|
822
|
+
path: z.string(),
|
|
823
|
+
});
|
|
824
|
+
|
|
825
|
+
const { path } = GetIndexStatusSchema.parse(args);
|
|
826
|
+
const status = await codeIndexer.getIndexStatus(path);
|
|
827
|
+
|
|
828
|
+
if (!status.isIndexed) {
|
|
829
|
+
return {
|
|
830
|
+
content: [
|
|
831
|
+
{
|
|
832
|
+
type: "text",
|
|
833
|
+
text: `Codebase at "${path}" is not indexed. Use index_codebase to index it first.`,
|
|
834
|
+
},
|
|
835
|
+
],
|
|
836
|
+
};
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
return {
|
|
840
|
+
content: [
|
|
841
|
+
{
|
|
842
|
+
type: "text",
|
|
843
|
+
text: JSON.stringify(status, null, 2),
|
|
844
|
+
},
|
|
845
|
+
],
|
|
846
|
+
};
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
case "reindex_changes": {
|
|
850
|
+
const ReindexChangesSchema = z.object({
|
|
851
|
+
path: z.string(),
|
|
852
|
+
});
|
|
853
|
+
|
|
854
|
+
const { path } = ReindexChangesSchema.parse(args);
|
|
855
|
+
|
|
856
|
+
const stats = await codeIndexer.reindexChanges(path, (progress) => {
|
|
857
|
+
console.error(`[${progress.phase}] ${progress.percentage}% - ${progress.message}`);
|
|
858
|
+
});
|
|
859
|
+
|
|
860
|
+
let message = `Incremental re-index complete:\n`;
|
|
861
|
+
message += `- Files added: ${stats.filesAdded}\n`;
|
|
862
|
+
message += `- Files modified: ${stats.filesModified}\n`;
|
|
863
|
+
message += `- Files deleted: ${stats.filesDeleted}\n`;
|
|
864
|
+
message += `- Chunks added: ${stats.chunksAdded}\n`;
|
|
865
|
+
message += `- Duration: ${(stats.durationMs / 1000).toFixed(1)}s`;
|
|
866
|
+
|
|
867
|
+
if (stats.filesAdded === 0 && stats.filesModified === 0 && stats.filesDeleted === 0) {
|
|
868
|
+
message = `No changes detected. Codebase is up to date.`;
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
return {
|
|
872
|
+
content: [
|
|
873
|
+
{
|
|
874
|
+
type: "text",
|
|
875
|
+
text: message,
|
|
876
|
+
},
|
|
877
|
+
],
|
|
878
|
+
};
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
case "clear_index": {
|
|
882
|
+
const ClearIndexSchema = z.object({
|
|
883
|
+
path: z.string(),
|
|
884
|
+
});
|
|
885
|
+
|
|
886
|
+
const { path } = ClearIndexSchema.parse(args);
|
|
887
|
+
await codeIndexer.clearIndex(path);
|
|
888
|
+
|
|
889
|
+
return {
|
|
890
|
+
content: [
|
|
891
|
+
{
|
|
892
|
+
type: "text",
|
|
893
|
+
text: `Index cleared for codebase at "${path}".`,
|
|
894
|
+
},
|
|
895
|
+
],
|
|
896
|
+
};
|
|
897
|
+
}
|
|
898
|
+
|
|
603
899
|
default:
|
|
604
900
|
return {
|
|
605
901
|
content: [
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import { beforeEach, describe, expect, it } from "vitest";
|
|
2
|
+
import { CharacterChunker } from "../../../src/code/chunker/character-chunker.js";
|
|
3
|
+
import type { ChunkerConfig } from "../../../src/code/types.js";
|
|
4
|
+
|
|
5
|
+
describe("CharacterChunker", () => {
|
|
6
|
+
let chunker: CharacterChunker;
|
|
7
|
+
let config: ChunkerConfig;
|
|
8
|
+
|
|
9
|
+
beforeEach(() => {
|
|
10
|
+
config = {
|
|
11
|
+
chunkSize: 100,
|
|
12
|
+
chunkOverlap: 20,
|
|
13
|
+
maxChunkSize: 200,
|
|
14
|
+
};
|
|
15
|
+
chunker = new CharacterChunker(config);
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
describe("chunk", () => {
|
|
19
|
+
it("should chunk small code into single chunk", async () => {
|
|
20
|
+
const code =
|
|
21
|
+
"function hello() {\n console.log('Starting hello function');\n return 'world';\n}";
|
|
22
|
+
const chunks = await chunker.chunk(code, "test.ts", "typescript");
|
|
23
|
+
|
|
24
|
+
expect(chunks).toHaveLength(1);
|
|
25
|
+
expect(chunks[0].content).toContain("hello");
|
|
26
|
+
expect(chunks[0].startLine).toBe(1);
|
|
27
|
+
expect(chunks[0].metadata.language).toBe("typescript");
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("should chunk large code into multiple chunks", async () => {
|
|
31
|
+
const code = Array(20)
|
|
32
|
+
.fill("function testFunction() { console.log('This is a test function'); return true; }\n")
|
|
33
|
+
.join("");
|
|
34
|
+
const chunks = await chunker.chunk(code, "test.js", "javascript");
|
|
35
|
+
|
|
36
|
+
expect(chunks.length).toBeGreaterThan(1);
|
|
37
|
+
chunks.forEach((chunk) => {
|
|
38
|
+
expect(chunk.content.length).toBeLessThanOrEqual(config.maxChunkSize);
|
|
39
|
+
});
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it("should preserve line numbers", async () => {
|
|
43
|
+
const code =
|
|
44
|
+
"This is line 1 with enough content to not be filtered\n" +
|
|
45
|
+
"This is line 2 with enough content to not be filtered\n" +
|
|
46
|
+
"This is line 3 with enough content to not be filtered";
|
|
47
|
+
const chunks = await chunker.chunk(code, "test.txt", "text");
|
|
48
|
+
|
|
49
|
+
expect(chunks.length).toBeGreaterThan(0);
|
|
50
|
+
expect(chunks[0].startLine).toBe(1);
|
|
51
|
+
expect(chunks[0].endLine).toBeGreaterThan(chunks[0].startLine);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it("should apply overlap between chunks", async () => {
|
|
55
|
+
const code = Array(20).fill("const x = 1;\n").join("");
|
|
56
|
+
const chunks = await chunker.chunk(code, "test.js", "javascript");
|
|
57
|
+
|
|
58
|
+
if (chunks.length > 1) {
|
|
59
|
+
// Check that there's overlap in content
|
|
60
|
+
expect(chunks.length).toBeGreaterThan(1);
|
|
61
|
+
}
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it("should find good break points", async () => {
|
|
65
|
+
const code = `function foo() {
|
|
66
|
+
return 1;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function bar() {
|
|
70
|
+
return 2;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function baz() {
|
|
74
|
+
return 3;
|
|
75
|
+
}`;
|
|
76
|
+
|
|
77
|
+
const chunks = await chunker.chunk(code, "test.js", "javascript");
|
|
78
|
+
// Should try to break at function boundaries
|
|
79
|
+
chunks.forEach((chunk) => {
|
|
80
|
+
expect(chunk.content.length).toBeGreaterThan(0);
|
|
81
|
+
});
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
it("should handle empty code", async () => {
|
|
85
|
+
const code = "";
|
|
86
|
+
const chunks = await chunker.chunk(code, "test.ts", "typescript");
|
|
87
|
+
expect(chunks).toHaveLength(0);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
it("should handle code with only whitespace", async () => {
|
|
91
|
+
const code = " \n\n\n ";
|
|
92
|
+
const chunks = await chunker.chunk(code, "test.ts", "typescript");
|
|
93
|
+
expect(chunks).toHaveLength(0);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it("should skip very small chunks", async () => {
|
|
97
|
+
const code = "x";
|
|
98
|
+
const chunks = await chunker.chunk(code, "test.ts", "typescript");
|
|
99
|
+
expect(chunks).toHaveLength(0);
|
|
100
|
+
});
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
describe("supportsLanguage", () => {
|
|
104
|
+
it("should support all languages", () => {
|
|
105
|
+
expect(chunker.supportsLanguage("typescript")).toBe(true);
|
|
106
|
+
expect(chunker.supportsLanguage("python")).toBe(true);
|
|
107
|
+
expect(chunker.supportsLanguage("unknown")).toBe(true);
|
|
108
|
+
});
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
describe("getStrategyName", () => {
|
|
112
|
+
it("should return correct strategy name", () => {
|
|
113
|
+
expect(chunker.getStrategyName()).toBe("character-based");
|
|
114
|
+
});
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
describe("metadata", () => {
|
|
118
|
+
it("should include correct chunk metadata", async () => {
|
|
119
|
+
const code = "function test() {\n console.log('test function');\n return 1;\n}";
|
|
120
|
+
const chunks = await chunker.chunk(code, "/path/to/file.ts", "typescript");
|
|
121
|
+
|
|
122
|
+
expect(chunks.length).toBeGreaterThan(0);
|
|
123
|
+
expect(chunks[0].metadata).toEqual({
|
|
124
|
+
filePath: "/path/to/file.ts",
|
|
125
|
+
language: "typescript",
|
|
126
|
+
chunkIndex: 0,
|
|
127
|
+
chunkType: "block",
|
|
128
|
+
});
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
it("should increment chunk index", async () => {
|
|
132
|
+
const code = Array(20).fill("function test() {}\n").join("");
|
|
133
|
+
const chunks = await chunker.chunk(code, "test.ts", "typescript");
|
|
134
|
+
|
|
135
|
+
if (chunks.length > 1) {
|
|
136
|
+
expect(chunks[0].metadata.chunkIndex).toBe(0);
|
|
137
|
+
expect(chunks[1].metadata.chunkIndex).toBe(1);
|
|
138
|
+
}
|
|
139
|
+
});
|
|
140
|
+
});
|
|
141
|
+
});
|