@mhalder/qdrant-mcp-server 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.codecov.yml +16 -0
- package/CHANGELOG.md +18 -0
- package/README.md +236 -9
- package/build/code/chunker/base.d.ts +19 -0
- package/build/code/chunker/base.d.ts.map +1 -0
- package/build/code/chunker/base.js +5 -0
- package/build/code/chunker/base.js.map +1 -0
- package/build/code/chunker/character-chunker.d.ts +22 -0
- package/build/code/chunker/character-chunker.d.ts.map +1 -0
- package/build/code/chunker/character-chunker.js +111 -0
- package/build/code/chunker/character-chunker.js.map +1 -0
- package/build/code/chunker/tree-sitter-chunker.d.ts +29 -0
- package/build/code/chunker/tree-sitter-chunker.d.ts.map +1 -0
- package/build/code/chunker/tree-sitter-chunker.js +213 -0
- package/build/code/chunker/tree-sitter-chunker.js.map +1 -0
- package/build/code/config.d.ts +11 -0
- package/build/code/config.d.ts.map +1 -0
- package/build/code/config.js +145 -0
- package/build/code/config.js.map +1 -0
- package/build/code/indexer.d.ts +42 -0
- package/build/code/indexer.d.ts.map +1 -0
- package/build/code/indexer.js +508 -0
- package/build/code/indexer.js.map +1 -0
- package/build/code/metadata.d.ts +32 -0
- package/build/code/metadata.d.ts.map +1 -0
- package/build/code/metadata.js +128 -0
- package/build/code/metadata.js.map +1 -0
- package/build/code/scanner.d.ts +35 -0
- package/build/code/scanner.d.ts.map +1 -0
- package/build/code/scanner.js +108 -0
- package/build/code/scanner.js.map +1 -0
- package/build/code/sync/merkle.d.ts +45 -0
- package/build/code/sync/merkle.d.ts.map +1 -0
- package/build/code/sync/merkle.js +116 -0
- package/build/code/sync/merkle.js.map +1 -0
- package/build/code/sync/snapshot.d.ts +41 -0
- package/build/code/sync/snapshot.d.ts.map +1 -0
- package/build/code/sync/snapshot.js +91 -0
- package/build/code/sync/snapshot.js.map +1 -0
- package/build/code/sync/synchronizer.d.ts +53 -0
- package/build/code/sync/synchronizer.d.ts.map +1 -0
- package/build/code/sync/synchronizer.js +132 -0
- package/build/code/sync/synchronizer.js.map +1 -0
- package/build/code/types.d.ts +98 -0
- package/build/code/types.d.ts.map +1 -0
- package/build/code/types.js +5 -0
- package/build/code/types.js.map +1 -0
- package/build/index.js +250 -0
- package/build/index.js.map +1 -1
- package/examples/code-search/README.md +271 -0
- package/package.json +13 -1
- package/src/code/chunker/base.ts +22 -0
- package/src/code/chunker/character-chunker.ts +131 -0
- package/src/code/chunker/tree-sitter-chunker.ts +250 -0
- package/src/code/config.ts +156 -0
- package/src/code/indexer.ts +613 -0
- package/src/code/metadata.ts +153 -0
- package/src/code/scanner.ts +124 -0
- package/src/code/sync/merkle.ts +136 -0
- package/src/code/sync/snapshot.ts +110 -0
- package/src/code/sync/synchronizer.ts +154 -0
- package/src/code/types.ts +117 -0
- package/src/index.ts +296 -0
- package/tests/code/chunker/character-chunker.test.ts +141 -0
- package/tests/code/chunker/tree-sitter-chunker.test.ts +275 -0
- package/tests/code/fixtures/sample-py/calculator.py +32 -0
- package/tests/code/fixtures/sample-ts/async-operations.ts +120 -0
- package/tests/code/fixtures/sample-ts/auth.ts +31 -0
- package/tests/code/fixtures/sample-ts/config.ts +52 -0
- package/tests/code/fixtures/sample-ts/database.ts +50 -0
- package/tests/code/fixtures/sample-ts/index.ts +39 -0
- package/tests/code/fixtures/sample-ts/types-advanced.ts +132 -0
- package/tests/code/fixtures/sample-ts/utils.ts +105 -0
- package/tests/code/fixtures/sample-ts/validator.ts +169 -0
- package/tests/code/indexer.test.ts +828 -0
- package/tests/code/integration.test.ts +708 -0
- package/tests/code/metadata.test.ts +457 -0
- package/tests/code/scanner.test.ts +131 -0
- package/tests/code/sync/merkle.test.ts +406 -0
- package/tests/code/sync/snapshot.test.ts +360 -0
- package/tests/code/sync/synchronizer.test.ts +501 -0
- package/vitest.config.ts +1 -0
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TreeSitterChunker - AST-aware code chunking using tree-sitter
|
|
3
|
+
* Primary chunking strategy for supported languages
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import Parser from "tree-sitter";
|
|
7
|
+
// tree-sitter language modules don't have proper types
|
|
8
|
+
import Bash from "tree-sitter-bash";
|
|
9
|
+
import Go from "tree-sitter-go";
|
|
10
|
+
import Java from "tree-sitter-java";
|
|
11
|
+
import JavaScript from "tree-sitter-javascript";
|
|
12
|
+
import Python from "tree-sitter-python";
|
|
13
|
+
import Rust from "tree-sitter-rust";
|
|
14
|
+
import TypeScript from "tree-sitter-typescript";
|
|
15
|
+
|
|
16
|
+
import type { ChunkerConfig, CodeChunk } from "../types.js";
|
|
17
|
+
import type { CodeChunker } from "./base.js";
|
|
18
|
+
import { CharacterChunker } from "./character-chunker.js";
|
|
19
|
+
|
|
20
|
+
interface LanguageConfig {
|
|
21
|
+
parser: Parser;
|
|
22
|
+
chunkableTypes: string[];
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export class TreeSitterChunker implements CodeChunker {
|
|
26
|
+
private languages: Map<string, LanguageConfig> = new Map();
|
|
27
|
+
private fallbackChunker: CharacterChunker;
|
|
28
|
+
|
|
29
|
+
constructor(private config: ChunkerConfig) {
|
|
30
|
+
this.fallbackChunker = new CharacterChunker(config);
|
|
31
|
+
this.initializeParsers();
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
private initializeParsers(): void {
|
|
35
|
+
// TypeScript
|
|
36
|
+
const tsParser = new Parser();
|
|
37
|
+
tsParser.setLanguage(TypeScript.typescript as any);
|
|
38
|
+
this.languages.set("typescript", {
|
|
39
|
+
parser: tsParser,
|
|
40
|
+
chunkableTypes: [
|
|
41
|
+
"function_declaration",
|
|
42
|
+
"method_definition",
|
|
43
|
+
"class_declaration",
|
|
44
|
+
"interface_declaration",
|
|
45
|
+
"type_alias_declaration",
|
|
46
|
+
"enum_declaration",
|
|
47
|
+
],
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// JavaScript
|
|
51
|
+
const jsParser = new Parser();
|
|
52
|
+
jsParser.setLanguage(JavaScript as any);
|
|
53
|
+
this.languages.set("javascript", {
|
|
54
|
+
parser: jsParser,
|
|
55
|
+
chunkableTypes: [
|
|
56
|
+
"function_declaration",
|
|
57
|
+
"method_definition",
|
|
58
|
+
"class_declaration",
|
|
59
|
+
"export_statement",
|
|
60
|
+
],
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
// Python
|
|
64
|
+
const pyParser = new Parser();
|
|
65
|
+
pyParser.setLanguage(Python as any);
|
|
66
|
+
this.languages.set("python", {
|
|
67
|
+
parser: pyParser,
|
|
68
|
+
chunkableTypes: ["function_definition", "class_definition", "decorated_definition"],
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
// Go
|
|
72
|
+
const goParser = new Parser();
|
|
73
|
+
goParser.setLanguage(Go as any);
|
|
74
|
+
this.languages.set("go", {
|
|
75
|
+
parser: goParser,
|
|
76
|
+
chunkableTypes: [
|
|
77
|
+
"function_declaration",
|
|
78
|
+
"method_declaration",
|
|
79
|
+
"type_declaration",
|
|
80
|
+
"interface_declaration",
|
|
81
|
+
],
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
// Rust
|
|
85
|
+
const rustParser = new Parser();
|
|
86
|
+
rustParser.setLanguage(Rust as any);
|
|
87
|
+
this.languages.set("rust", {
|
|
88
|
+
parser: rustParser,
|
|
89
|
+
chunkableTypes: ["function_item", "impl_item", "trait_item", "struct_item", "enum_item"],
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
// Java
|
|
93
|
+
const javaParser = new Parser();
|
|
94
|
+
javaParser.setLanguage(Java as any);
|
|
95
|
+
this.languages.set("java", {
|
|
96
|
+
parser: javaParser,
|
|
97
|
+
chunkableTypes: [
|
|
98
|
+
"method_declaration",
|
|
99
|
+
"class_declaration",
|
|
100
|
+
"interface_declaration",
|
|
101
|
+
"enum_declaration",
|
|
102
|
+
],
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
// Bash
|
|
106
|
+
const bashParser = new Parser();
|
|
107
|
+
bashParser.setLanguage(Bash as any);
|
|
108
|
+
this.languages.set("bash", {
|
|
109
|
+
parser: bashParser,
|
|
110
|
+
chunkableTypes: ["function_definition", "command"],
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
async chunk(code: string, filePath: string, language: string): Promise<CodeChunk[]> {
|
|
115
|
+
const langConfig = this.languages.get(language);
|
|
116
|
+
|
|
117
|
+
if (!langConfig) {
|
|
118
|
+
// Fallback to character-based chunking
|
|
119
|
+
return this.fallbackChunker.chunk(code, filePath, language);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
try {
|
|
123
|
+
const tree = langConfig.parser.parse(code);
|
|
124
|
+
const chunks: CodeChunk[] = [];
|
|
125
|
+
|
|
126
|
+
// Find all chunkable nodes
|
|
127
|
+
const nodes = this.findChunkableNodes(tree.rootNode, langConfig.chunkableTypes);
|
|
128
|
+
|
|
129
|
+
for (const [index, node] of nodes.entries()) {
|
|
130
|
+
const content = code.substring(node.startIndex, node.endIndex);
|
|
131
|
+
|
|
132
|
+
// Skip chunks that are too small
|
|
133
|
+
if (content.length < 50) {
|
|
134
|
+
continue;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// If chunk is too large, fall back to character chunking for this node
|
|
138
|
+
if (content.length > this.config.maxChunkSize * 2) {
|
|
139
|
+
const subChunks = await this.fallbackChunker.chunk(content, filePath, language);
|
|
140
|
+
// Adjust line numbers for sub-chunks
|
|
141
|
+
for (const subChunk of subChunks) {
|
|
142
|
+
chunks.push({
|
|
143
|
+
...subChunk,
|
|
144
|
+
startLine: node.startPosition.row + 1 + subChunk.startLine - 1,
|
|
145
|
+
endLine: node.startPosition.row + 1 + subChunk.endLine - 1,
|
|
146
|
+
metadata: {
|
|
147
|
+
...subChunk.metadata,
|
|
148
|
+
chunkIndex: chunks.length,
|
|
149
|
+
},
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
chunks.push({
|
|
156
|
+
content: content.trim(),
|
|
157
|
+
startLine: node.startPosition.row + 1,
|
|
158
|
+
endLine: node.endPosition.row + 1,
|
|
159
|
+
metadata: {
|
|
160
|
+
filePath,
|
|
161
|
+
language,
|
|
162
|
+
chunkIndex: index,
|
|
163
|
+
chunkType: this.getChunkType(node.type),
|
|
164
|
+
name: this.extractName(node, code),
|
|
165
|
+
},
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// If no chunks found or file is small, use fallback
|
|
170
|
+
if (chunks.length === 0 && code.length > 100) {
|
|
171
|
+
return this.fallbackChunker.chunk(code, filePath, language);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return chunks;
|
|
175
|
+
} catch (error) {
|
|
176
|
+
// On parsing error, fallback to character-based chunking
|
|
177
|
+
console.error(`Tree-sitter parsing failed for ${filePath}:`, error);
|
|
178
|
+
return this.fallbackChunker.chunk(code, filePath, language);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
supportsLanguage(language: string): boolean {
|
|
183
|
+
return this.languages.has(language);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
getStrategyName(): string {
|
|
187
|
+
return "tree-sitter";
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Find all chunkable nodes in the AST
|
|
192
|
+
*/
|
|
193
|
+
private findChunkableNodes(
|
|
194
|
+
node: Parser.SyntaxNode,
|
|
195
|
+
chunkableTypes: string[]
|
|
196
|
+
): Parser.SyntaxNode[] {
|
|
197
|
+
const nodes: Parser.SyntaxNode[] = [];
|
|
198
|
+
|
|
199
|
+
const traverse = (n: Parser.SyntaxNode) => {
|
|
200
|
+
if (chunkableTypes.includes(n.type)) {
|
|
201
|
+
nodes.push(n);
|
|
202
|
+
// Don't traverse children of chunkable nodes to avoid nested chunks
|
|
203
|
+
return;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
for (const child of n.children) {
|
|
207
|
+
traverse(child);
|
|
208
|
+
}
|
|
209
|
+
};
|
|
210
|
+
|
|
211
|
+
traverse(node);
|
|
212
|
+
return nodes;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Extract function/class name from AST node
|
|
217
|
+
*/
|
|
218
|
+
private extractName(node: Parser.SyntaxNode, code: string): string | undefined {
|
|
219
|
+
// Try to find name node
|
|
220
|
+
const nameNode = node.childForFieldName("name");
|
|
221
|
+
if (nameNode) {
|
|
222
|
+
return code.substring(nameNode.startIndex, nameNode.endIndex);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// For some node types, name might be in a different location
|
|
226
|
+
for (const child of node.children) {
|
|
227
|
+
if (child.type === "identifier" || child.type === "type_identifier") {
|
|
228
|
+
return code.substring(child.startIndex, child.endIndex);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
return undefined;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Map AST node type to chunk type
|
|
237
|
+
*/
|
|
238
|
+
private getChunkType(nodeType: string): "function" | "class" | "interface" | "block" {
|
|
239
|
+
if (nodeType.includes("function") || nodeType.includes("method")) {
|
|
240
|
+
return "function";
|
|
241
|
+
}
|
|
242
|
+
if (nodeType.includes("class") || nodeType.includes("struct")) {
|
|
243
|
+
return "class";
|
|
244
|
+
}
|
|
245
|
+
if (nodeType.includes("interface") || nodeType.includes("trait")) {
|
|
246
|
+
return "interface";
|
|
247
|
+
}
|
|
248
|
+
return "block";
|
|
249
|
+
}
|
|
250
|
+
}
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Configuration and constants for code vectorization
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export const DEFAULT_CODE_EXTENSIONS = [
|
|
6
|
+
// TypeScript/JavaScript
|
|
7
|
+
".ts",
|
|
8
|
+
".tsx",
|
|
9
|
+
".js",
|
|
10
|
+
".jsx",
|
|
11
|
+
// Python
|
|
12
|
+
".py",
|
|
13
|
+
// Go
|
|
14
|
+
".go",
|
|
15
|
+
// Rust
|
|
16
|
+
".rs",
|
|
17
|
+
// Java/Kotlin
|
|
18
|
+
".java",
|
|
19
|
+
".kt",
|
|
20
|
+
// C/C++
|
|
21
|
+
".c",
|
|
22
|
+
".cpp",
|
|
23
|
+
".h",
|
|
24
|
+
".hpp",
|
|
25
|
+
".cc",
|
|
26
|
+
".cxx",
|
|
27
|
+
// C#
|
|
28
|
+
".cs",
|
|
29
|
+
// Ruby
|
|
30
|
+
".rb",
|
|
31
|
+
// PHP
|
|
32
|
+
".php",
|
|
33
|
+
// Swift
|
|
34
|
+
".swift",
|
|
35
|
+
// Dart
|
|
36
|
+
".dart",
|
|
37
|
+
// Scala
|
|
38
|
+
".scala",
|
|
39
|
+
// Clojure
|
|
40
|
+
".clj",
|
|
41
|
+
".cljs",
|
|
42
|
+
// Haskell
|
|
43
|
+
".hs",
|
|
44
|
+
// OCaml
|
|
45
|
+
".ml",
|
|
46
|
+
// Shell
|
|
47
|
+
".sh",
|
|
48
|
+
".bash",
|
|
49
|
+
".zsh",
|
|
50
|
+
".fish",
|
|
51
|
+
// SQL/Data
|
|
52
|
+
".sql",
|
|
53
|
+
".proto",
|
|
54
|
+
".graphql",
|
|
55
|
+
// Web
|
|
56
|
+
".vue",
|
|
57
|
+
".svelte",
|
|
58
|
+
// Config/Markup
|
|
59
|
+
".md",
|
|
60
|
+
".markdown",
|
|
61
|
+
".json",
|
|
62
|
+
".yaml",
|
|
63
|
+
".yml",
|
|
64
|
+
".toml",
|
|
65
|
+
".xml",
|
|
66
|
+
];
|
|
67
|
+
|
|
68
|
+
export const DEFAULT_IGNORE_PATTERNS = [
|
|
69
|
+
"node_modules/**",
|
|
70
|
+
"dist/**",
|
|
71
|
+
"build/**",
|
|
72
|
+
"out/**",
|
|
73
|
+
"target/**",
|
|
74
|
+
"coverage/**",
|
|
75
|
+
".nyc_output/**",
|
|
76
|
+
".cache/**",
|
|
77
|
+
"__pycache__/**",
|
|
78
|
+
".git/**",
|
|
79
|
+
".svn/**",
|
|
80
|
+
".hg/**",
|
|
81
|
+
".vscode/**",
|
|
82
|
+
".idea/**",
|
|
83
|
+
"*.min.js",
|
|
84
|
+
"*.min.css",
|
|
85
|
+
"*.bundle.js",
|
|
86
|
+
"*.map",
|
|
87
|
+
"*.log",
|
|
88
|
+
".env",
|
|
89
|
+
".env.*",
|
|
90
|
+
];
|
|
91
|
+
|
|
92
|
+
export const LANGUAGE_MAP: Record<string, string> = {
|
|
93
|
+
// TypeScript/JavaScript
|
|
94
|
+
".ts": "typescript",
|
|
95
|
+
".tsx": "typescript",
|
|
96
|
+
".js": "javascript",
|
|
97
|
+
".jsx": "javascript",
|
|
98
|
+
|
|
99
|
+
// Backend languages
|
|
100
|
+
".py": "python",
|
|
101
|
+
".java": "java",
|
|
102
|
+
".go": "go",
|
|
103
|
+
".rs": "rust",
|
|
104
|
+
".rb": "ruby",
|
|
105
|
+
".php": "php",
|
|
106
|
+
|
|
107
|
+
// Systems languages
|
|
108
|
+
".c": "c",
|
|
109
|
+
".cpp": "cpp",
|
|
110
|
+
".cc": "cpp",
|
|
111
|
+
".cxx": "cpp",
|
|
112
|
+
".h": "c",
|
|
113
|
+
".hpp": "cpp",
|
|
114
|
+
".cs": "c_sharp",
|
|
115
|
+
|
|
116
|
+
// Mobile
|
|
117
|
+
".swift": "swift",
|
|
118
|
+
".kt": "kotlin",
|
|
119
|
+
".dart": "dart",
|
|
120
|
+
|
|
121
|
+
// Functional
|
|
122
|
+
".scala": "scala",
|
|
123
|
+
".clj": "clojure",
|
|
124
|
+
".cljs": "clojure",
|
|
125
|
+
".hs": "haskell",
|
|
126
|
+
".ml": "ocaml",
|
|
127
|
+
|
|
128
|
+
// Scripting
|
|
129
|
+
".sh": "bash",
|
|
130
|
+
".bash": "bash",
|
|
131
|
+
".zsh": "bash",
|
|
132
|
+
".fish": "fish",
|
|
133
|
+
|
|
134
|
+
// Data/Query
|
|
135
|
+
".sql": "sql",
|
|
136
|
+
".proto": "proto",
|
|
137
|
+
".graphql": "graphql",
|
|
138
|
+
|
|
139
|
+
// Markup/Config
|
|
140
|
+
".md": "markdown",
|
|
141
|
+
".markdown": "markdown",
|
|
142
|
+
".json": "json",
|
|
143
|
+
".yaml": "yaml",
|
|
144
|
+
".yml": "yaml",
|
|
145
|
+
".toml": "toml",
|
|
146
|
+
".xml": "xml",
|
|
147
|
+
|
|
148
|
+
// Web
|
|
149
|
+
".vue": "vue",
|
|
150
|
+
".svelte": "svelte",
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
export const DEFAULT_CHUNK_SIZE = 2500;
|
|
154
|
+
export const DEFAULT_CHUNK_OVERLAP = 300;
|
|
155
|
+
export const DEFAULT_BATCH_SIZE = 100;
|
|
156
|
+
export const DEFAULT_SEARCH_LIMIT = 5;
|