semantic-code-mcp 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +259 -0
- package/config.json +85 -0
- package/features/check-last-version.js +504 -0
- package/features/clear-cache.js +75 -0
- package/features/get-status.js +210 -0
- package/features/hybrid-search.js +189 -0
- package/features/index-codebase.js +999 -0
- package/features/set-workspace.js +183 -0
- package/index.js +297 -0
- package/lib/ast-chunker.js +273 -0
- package/lib/cache-factory.js +13 -0
- package/lib/cache.js +157 -0
- package/lib/config.js +1296 -0
- package/lib/embedding-worker.js +155 -0
- package/lib/gemini-embedder.js +351 -0
- package/lib/ignore-patterns.js +896 -0
- package/lib/milvus-cache.js +478 -0
- package/lib/mrl-embedder.js +235 -0
- package/lib/project-detector.js +75 -0
- package/lib/resource-throttle.js +85 -0
- package/lib/sqlite-cache.js +468 -0
- package/lib/tokenizer.js +149 -0
- package/lib/utils.js +214 -0
- package/package.json +70 -0
- package/reindex.js +109 -0
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Get Status Feature
|
|
3
|
+
*
|
|
4
|
+
* MCP tool to return comprehensive status information about the server.
|
|
5
|
+
* Useful for agents to understand current state and configuration.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import fs from 'fs/promises';
|
|
9
|
+
import path from 'path';
|
|
10
|
+
import { createRequire } from 'module';
|
|
11
|
+
|
|
12
|
+
const require = createRequire(import.meta.url);
|
|
13
|
+
const packageJson = require('../package.json');
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Get tool definition for MCP registration
|
|
17
|
+
*/
|
|
18
|
+
export function getToolDefinition(config) {
|
|
19
|
+
return {
|
|
20
|
+
name: "f_get_status",
|
|
21
|
+
description: "Get comprehensive status information about the Smart Coding MCP server. Returns version, workspace path, model configuration, indexing status, and cache information. Useful for understanding the current state of the semantic search system.",
|
|
22
|
+
inputSchema: {
|
|
23
|
+
type: "object",
|
|
24
|
+
properties: {},
|
|
25
|
+
required: []
|
|
26
|
+
}
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Status Reporter class
|
|
32
|
+
*/
|
|
33
|
+
export class StatusReporter {
|
|
34
|
+
constructor(config, cache, indexer, embedder) {
|
|
35
|
+
this.config = config;
|
|
36
|
+
this.cache = cache;
|
|
37
|
+
this.indexer = indexer;
|
|
38
|
+
this.embedder = embedder;
|
|
39
|
+
this.startTime = Date.now();
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Get comprehensive status
|
|
44
|
+
*/
|
|
45
|
+
async getStatus() {
|
|
46
|
+
const vectorStoreProvider = (this.config.vectorStoreProvider || 'sqlite').toLowerCase();
|
|
47
|
+
let totalChunks = 0;
|
|
48
|
+
let totalFiles = 0;
|
|
49
|
+
|
|
50
|
+
if (typeof this.cache?.getStats === 'function') {
|
|
51
|
+
try {
|
|
52
|
+
const stats = await this.cache.getStats();
|
|
53
|
+
totalChunks = Number(stats?.totalChunks || 0);
|
|
54
|
+
totalFiles = Number(stats?.totalFiles || 0);
|
|
55
|
+
} catch (err) {
|
|
56
|
+
// Fallback to legacy vectorStore contract if cache-specific stats fail.
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (totalChunks === 0 && totalFiles === 0) {
|
|
61
|
+
const vectorStore = this.cache?.getVectorStore?.() || [];
|
|
62
|
+
totalChunks = vectorStore.length;
|
|
63
|
+
totalFiles = new Set(vectorStore.map((v) => v.file)).size;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Get cache size (check for SQLite database)
|
|
67
|
+
let cacheSizeBytes = 0;
|
|
68
|
+
let cacheType = 'none';
|
|
69
|
+
if (vectorStoreProvider === 'milvus') {
|
|
70
|
+
cacheType = 'milvus';
|
|
71
|
+
} else {
|
|
72
|
+
try {
|
|
73
|
+
// Check for SQLite cache first
|
|
74
|
+
const sqlitePath = path.join(this.config.cacheDirectory, 'embeddings.db');
|
|
75
|
+
const stats = await fs.stat(sqlitePath);
|
|
76
|
+
cacheSizeBytes = stats.size;
|
|
77
|
+
cacheType = 'sqlite';
|
|
78
|
+
} catch {
|
|
79
|
+
// Try old JSON cache as fallback
|
|
80
|
+
try {
|
|
81
|
+
const jsonPath = path.join(this.config.cacheDirectory, 'embeddings.json');
|
|
82
|
+
const stats = await fs.stat(jsonPath);
|
|
83
|
+
cacheSizeBytes = stats.size;
|
|
84
|
+
cacheType = 'json';
|
|
85
|
+
} catch {
|
|
86
|
+
// No cache file exists
|
|
87
|
+
cacheType = 'none';
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Determine index status and progressive indexing info
|
|
93
|
+
let indexStatus = 'empty';
|
|
94
|
+
let progressiveIndexing = null;
|
|
95
|
+
|
|
96
|
+
if (this.indexer?.isIndexing) {
|
|
97
|
+
indexStatus = 'indexing';
|
|
98
|
+
// Include progressive indexing status
|
|
99
|
+
if (this.indexer.indexingStatus) {
|
|
100
|
+
progressiveIndexing = {
|
|
101
|
+
inProgress: this.indexer.indexingStatus.inProgress,
|
|
102
|
+
totalFiles: this.indexer.indexingStatus.totalFiles,
|
|
103
|
+
processedFiles: this.indexer.indexingStatus.processedFiles,
|
|
104
|
+
percentage: this.indexer.indexingStatus.percentage
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
} else if (totalChunks > 0) {
|
|
108
|
+
indexStatus = 'ready';
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const provider = (this.config.embeddingProvider || "local").toLowerCase();
|
|
112
|
+
const isApiProvider = ["gemini", "openai", "openai-compatible", "vertex"].includes(provider);
|
|
113
|
+
const defaultApiModel =
|
|
114
|
+
provider === "vertex"
|
|
115
|
+
? "gemini-embedding-001"
|
|
116
|
+
: (provider === "openai" || provider === "openai-compatible")
|
|
117
|
+
? "text-embedding-3-small"
|
|
118
|
+
: (this.config.geminiModel || "gemini-embedding-001");
|
|
119
|
+
const configuredApiModel =
|
|
120
|
+
typeof this.config.embeddingModel === "string" && this.config.embeddingModel.trim().length > 0
|
|
121
|
+
? this.config.embeddingModel.trim()
|
|
122
|
+
: "";
|
|
123
|
+
|
|
124
|
+
return {
|
|
125
|
+
version: packageJson.version,
|
|
126
|
+
uptime: Math.floor((Date.now() - this.startTime) / 1000),
|
|
127
|
+
|
|
128
|
+
workspace: {
|
|
129
|
+
path: this.config.searchDirectory,
|
|
130
|
+
cacheDirectory: this.config.cacheDirectory
|
|
131
|
+
},
|
|
132
|
+
|
|
133
|
+
model: {
|
|
134
|
+
provider: this.config.embeddingProvider,
|
|
135
|
+
name: this.embedder?.modelName || (
|
|
136
|
+
isApiProvider
|
|
137
|
+
? (
|
|
138
|
+
configuredApiModel && !configuredApiModel.includes("nomic")
|
|
139
|
+
? configuredApiModel
|
|
140
|
+
: defaultApiModel
|
|
141
|
+
)
|
|
142
|
+
: this.config.embeddingModel
|
|
143
|
+
),
|
|
144
|
+
dimension: this.embedder?.dimension || (
|
|
145
|
+
isApiProvider
|
|
146
|
+
? this.config.geminiDimensions
|
|
147
|
+
: this.config.embeddingDimension
|
|
148
|
+
),
|
|
149
|
+
device: this.embedder?.device || this.config.device
|
|
150
|
+
},
|
|
151
|
+
|
|
152
|
+
index: {
|
|
153
|
+
status: indexStatus,
|
|
154
|
+
filesIndexed: totalFiles,
|
|
155
|
+
chunksCount: totalChunks,
|
|
156
|
+
chunkingMode: this.config.chunkingMode,
|
|
157
|
+
...(progressiveIndexing && { progressiveIndexing })
|
|
158
|
+
},
|
|
159
|
+
|
|
160
|
+
cache: {
|
|
161
|
+
enabled: this.config.enableCache,
|
|
162
|
+
type: cacheType,
|
|
163
|
+
path: cacheType === 'milvus' ? this.config.milvusAddress : this.config.cacheDirectory,
|
|
164
|
+
sizeBytes: cacheSizeBytes,
|
|
165
|
+
sizeFormatted: formatBytes(cacheSizeBytes)
|
|
166
|
+
},
|
|
167
|
+
|
|
168
|
+
config: {
|
|
169
|
+
maxResults: this.config.maxResults,
|
|
170
|
+
chunkSize: this.config.chunkSize,
|
|
171
|
+
semanticWeight: this.config.semanticWeight,
|
|
172
|
+
exactMatchBoost: this.config.exactMatchBoost,
|
|
173
|
+
workerThreads: this.config.workerThreads,
|
|
174
|
+
embeddingProvider: this.config.embeddingProvider,
|
|
175
|
+
vectorStoreProvider
|
|
176
|
+
},
|
|
177
|
+
|
|
178
|
+
resourceThrottling: {
|
|
179
|
+
maxCpuPercent: this.config.maxCpuPercent,
|
|
180
|
+
batchDelay: this.config.batchDelay,
|
|
181
|
+
maxWorkers: this.config.maxWorkers
|
|
182
|
+
}
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Format bytes to human readable
|
|
189
|
+
*/
|
|
190
|
+
function formatBytes(bytes) {
|
|
191
|
+
if (bytes === 0) return '0 B';
|
|
192
|
+
const k = 1024;
|
|
193
|
+
const sizes = ['B', 'KB', 'MB', 'GB'];
|
|
194
|
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
195
|
+
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Handle MCP tool call
|
|
200
|
+
*/
|
|
201
|
+
export async function handleToolCall(request, instance) {
|
|
202
|
+
const status = await instance.getStatus();
|
|
203
|
+
|
|
204
|
+
return {
|
|
205
|
+
content: [{
|
|
206
|
+
type: "text",
|
|
207
|
+
text: JSON.stringify(status, null, 2)
|
|
208
|
+
}]
|
|
209
|
+
};
|
|
210
|
+
}
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
import path from "path";
|
|
2
|
+
import { cosineSimilarity } from "../lib/utils.js";
|
|
3
|
+
|
|
4
|
+
export class HybridSearch {
|
|
5
|
+
constructor(embedder, cache, config, indexer = null) {
|
|
6
|
+
this.embedder = embedder;
|
|
7
|
+
this.cache = cache;
|
|
8
|
+
this.config = config;
|
|
9
|
+
this.indexer = indexer; // Reference to indexer for status checking
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
async search(query, maxResults) {
|
|
13
|
+
const hasAnnSearch = typeof this.cache?.searchByVector === "function";
|
|
14
|
+
|
|
15
|
+
// Show warning if indexing is still in progress but we have some results
|
|
16
|
+
let indexingWarning = null;
|
|
17
|
+
if (this.indexer?.indexingStatus?.inProgress) {
|
|
18
|
+
indexingWarning = `⚠️ Indexing in progress (${this.indexer.indexingStatus.percentage}% complete). Results shown are from partially indexed codebase.\n\n`;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// Generate query embedding
|
|
22
|
+
const queryEmbed = await this.embedder(query, { pooling: "mean", normalize: true });
|
|
23
|
+
const queryVector = Array.from(queryEmbed.data);
|
|
24
|
+
|
|
25
|
+
if (hasAnnSearch) {
|
|
26
|
+
const annTopK = Math.max(maxResults * 5, 20);
|
|
27
|
+
const candidates = await this.cache.searchByVector(queryVector, annTopK);
|
|
28
|
+
|
|
29
|
+
const scoredChunks = candidates.map((chunk) => {
|
|
30
|
+
// Base semantic score from provider (Milvus or fallback cache) plus lexical boost.
|
|
31
|
+
let score = Number(chunk.score || 0) * this.config.semanticWeight;
|
|
32
|
+
|
|
33
|
+
const lowerQuery = query.toLowerCase();
|
|
34
|
+
const lowerContent = String(chunk.content || "").toLowerCase();
|
|
35
|
+
|
|
36
|
+
if (lowerContent.includes(lowerQuery)) {
|
|
37
|
+
score += this.config.exactMatchBoost;
|
|
38
|
+
} else {
|
|
39
|
+
const queryWords = lowerQuery.split(/\s+/).filter(Boolean);
|
|
40
|
+
const matchedWords = queryWords.filter(
|
|
41
|
+
(word) => word.length > 2 && lowerContent.includes(word)
|
|
42
|
+
).length;
|
|
43
|
+
const lexicalBoost = queryWords.length > 0 ? (matchedWords / queryWords.length) * 0.3 : 0;
|
|
44
|
+
score += lexicalBoost;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return { ...chunk, score };
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
const results = scoredChunks
|
|
51
|
+
.sort((a, b) => b.score - a.score)
|
|
52
|
+
.slice(0, maxResults);
|
|
53
|
+
|
|
54
|
+
if (results.length === 0) {
|
|
55
|
+
const stats = typeof this.cache?.getStats === "function"
|
|
56
|
+
? await this.cache.getStats().catch(() => null)
|
|
57
|
+
: null;
|
|
58
|
+
const totalChunks = Number(stats?.totalChunks || 0);
|
|
59
|
+
|
|
60
|
+
if (totalChunks === 0) {
|
|
61
|
+
if (this.indexer?.indexingStatus?.inProgress) {
|
|
62
|
+
return {
|
|
63
|
+
results: [],
|
|
64
|
+
message: `Indexing in progress (${this.indexer.indexingStatus.percentage}% complete). Search available but results may be incomplete. Please wait for indexing to finish for full coverage.`
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
return {
|
|
68
|
+
results: [],
|
|
69
|
+
message: "No code has been indexed yet. Please wait for initial indexing to complete."
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return { results, message: null, indexingWarning };
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Legacy fallback: in-memory vector scoring.
|
|
78
|
+
const vectorStore = this.cache.getVectorStore();
|
|
79
|
+
|
|
80
|
+
if (vectorStore.length === 0) {
|
|
81
|
+
if (this.indexer?.indexingStatus?.inProgress) {
|
|
82
|
+
return {
|
|
83
|
+
results: [],
|
|
84
|
+
message: `Indexing in progress (${this.indexer.indexingStatus.percentage}% complete). Search available but results may be incomplete. Please wait for indexing to finish for full coverage.`
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
return {
|
|
88
|
+
results: [],
|
|
89
|
+
message: "No code has been indexed yet. Please wait for initial indexing to complete."
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const scoredChunks = vectorStore.map((chunk) => {
|
|
94
|
+
let score = cosineSimilarity(queryVector, chunk.vector) * this.config.semanticWeight;
|
|
95
|
+
|
|
96
|
+
const lowerQuery = query.toLowerCase();
|
|
97
|
+
const lowerContent = chunk.content.toLowerCase();
|
|
98
|
+
|
|
99
|
+
if (lowerContent.includes(lowerQuery)) {
|
|
100
|
+
score += this.config.exactMatchBoost;
|
|
101
|
+
} else {
|
|
102
|
+
const queryWords = lowerQuery.split(/\s+/);
|
|
103
|
+
const matchedWords = queryWords.filter((word) =>
|
|
104
|
+
word.length > 2 && lowerContent.includes(word)
|
|
105
|
+
).length;
|
|
106
|
+
score += (matchedWords / queryWords.length) * 0.3;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return { ...chunk, score };
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
const results = scoredChunks
|
|
113
|
+
.sort((a, b) => b.score - a.score)
|
|
114
|
+
.slice(0, maxResults);
|
|
115
|
+
|
|
116
|
+
return { results, message: null, indexingWarning };
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
formatResults(results) {
|
|
120
|
+
if (results.length === 0) {
|
|
121
|
+
return "No matching code found for your query.";
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return results.map((r, idx) => {
|
|
125
|
+
const relPath = path.relative(this.config.searchDirectory, r.file);
|
|
126
|
+
return `## Result ${idx + 1} (Relevance: ${(r.score * 100).toFixed(1)}%)\n` +
|
|
127
|
+
`**File:** \`${relPath}\`\n` +
|
|
128
|
+
`**Lines:** ${r.startLine}-${r.endLine}\n\n` +
|
|
129
|
+
"```" + path.extname(r.file).slice(1) + "\n" +
|
|
130
|
+
r.content + "\n" +
|
|
131
|
+
"```\n";
|
|
132
|
+
}).join("\n");
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// MCP Tool definition for this feature
|
|
137
|
+
export function getToolDefinition(config) {
|
|
138
|
+
return {
|
|
139
|
+
name: "a_semantic_search",
|
|
140
|
+
description: "Performs intelligent hybrid code search combining semantic understanding with exact text matching. Ideal for finding code by meaning (e.g., 'authentication logic', 'database queries') even with typos or variations. Returns the most relevant code snippets with file locations and line numbers.",
|
|
141
|
+
inputSchema: {
|
|
142
|
+
type: "object",
|
|
143
|
+
properties: {
|
|
144
|
+
query: {
|
|
145
|
+
type: "string",
|
|
146
|
+
description: "Search query - can be natural language (e.g., 'where do we handle user login') or specific terms"
|
|
147
|
+
},
|
|
148
|
+
maxResults: {
|
|
149
|
+
type: "number",
|
|
150
|
+
description: "Maximum number of results to return (default: from config)",
|
|
151
|
+
default: config.maxResults
|
|
152
|
+
}
|
|
153
|
+
},
|
|
154
|
+
required: ["query"]
|
|
155
|
+
},
|
|
156
|
+
annotations: {
|
|
157
|
+
title: "Semantic Code Search",
|
|
158
|
+
readOnlyHint: true,
|
|
159
|
+
destructiveHint: false,
|
|
160
|
+
idempotentHint: true,
|
|
161
|
+
openWorldHint: false
|
|
162
|
+
}
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Tool handler
|
|
167
|
+
export async function handleToolCall(request, hybridSearch) {
|
|
168
|
+
const query = request.params.arguments.query;
|
|
169
|
+
const maxResults = request.params.arguments.maxResults || hybridSearch.config.maxResults;
|
|
170
|
+
|
|
171
|
+
const { results, message, indexingWarning } = await hybridSearch.search(query, maxResults);
|
|
172
|
+
|
|
173
|
+
if (message) {
|
|
174
|
+
return {
|
|
175
|
+
content: [{ type: "text", text: message }]
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
let formattedText = hybridSearch.formatResults(results);
|
|
180
|
+
|
|
181
|
+
// Prepend indexing warning if present
|
|
182
|
+
if (indexingWarning) {
|
|
183
|
+
formattedText = indexingWarning + formattedText;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return {
|
|
187
|
+
content: [{ type: "text", text: formattedText }]
|
|
188
|
+
};
|
|
189
|
+
}
|