smart-coding-mcp 1.4.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +131 -31
- package/config.json +7 -2
- package/features/get-status.js +163 -0
- package/features/hybrid-search.js +23 -4
- package/features/index-codebase.js +145 -60
- package/features/set-workspace.js +155 -0
- package/index.js +152 -64
- package/lib/ast-chunker.js +273 -0
- package/lib/config.js +91 -2
- package/lib/embedding-worker.js +29 -2
- package/lib/mrl-embedder.js +133 -0
- package/lib/resource-throttle.js +85 -0
- package/lib/sqlite-cache.js +408 -0
- package/lib/tokenizer.js +4 -0
- package/package.json +6 -3
- package/test/ast-chunker.test.js +105 -0
- package/test/device-detection.test.js +110 -0
- package/test/embedding-model.test.js +14 -11
- package/test/helpers.js +3 -3
- package/test/mrl-embedder.test.js +108 -0
package/README.md
CHANGED
|
@@ -5,16 +5,18 @@
|
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
[](https://nodejs.org/)
|
|
7
7
|
|
|
8
|
-
An extensible Model Context Protocol (MCP) server that provides intelligent semantic code search for AI assistants. Built with local AI models (
|
|
8
|
+
An extensible Model Context Protocol (MCP) server that provides intelligent semantic code search for AI assistants. Built with local AI models using Matryoshka Representation Learning (MRL) for flexible embedding dimensions (64-768d), with runtime workspace switching and comprehensive status reporting.
|
|
9
9
|
|
|
10
10
|
### Available Tools
|
|
11
11
|
|
|
12
|
-
| Tool | Description | Example
|
|
13
|
-
| ---------------------- | ------------------------------------------------- |
|
|
14
|
-
| `semantic_search` | Find code by meaning, not just keywords | `"Where do we validate user input?"`
|
|
15
|
-
| `index_codebase` | Manually trigger reindexing | Use after major refactoring or branch switches
|
|
16
|
-
| `clear_cache` | Reset the embeddings cache | Useful when cache becomes corrupted
|
|
17
|
-
| `d_check_last_version` | Get latest version of any package (20 ecosystems) | `"express"`, `"npm:react"`, `"pip:requests"`
|
|
12
|
+
| Tool | Description | Example |
|
|
13
|
+
| ---------------------- | ------------------------------------------------- | ----------------------------------------------- |
|
|
14
|
+
| `semantic_search` | Find code by meaning, not just keywords | `"Where do we validate user input?"` |
|
|
15
|
+
| `index_codebase` | Manually trigger reindexing | Use after major refactoring or branch switches |
|
|
16
|
+
| `clear_cache` | Reset the embeddings cache | Useful when cache becomes corrupted |
|
|
17
|
+
| `d_check_last_version` | Get latest version of any package (20 ecosystems) | `"express"`, `"npm:react"`, `"pip:requests"` |
|
|
18
|
+
| `e_set_workspace` | Change project path at runtime | Switch to different project without restart |
|
|
19
|
+
| `f_get_status` | Get server info: version, index status, config | Check indexing progress, model info, cache size |
|
|
18
20
|
|
|
19
21
|
## What This Does
|
|
20
22
|
|
|
@@ -44,6 +46,34 @@ This MCP server solves that by indexing your codebase with AI embeddings. Your A
|
|
|
44
46
|
- Your code never leaves your system
|
|
45
47
|
- No API calls to external services
|
|
46
48
|
|
|
49
|
+
## Performance & Resource Management
|
|
50
|
+
|
|
51
|
+
**Progressive Indexing**
|
|
52
|
+
|
|
53
|
+
- Search works immediately, even while indexing continues (like video buffering)
|
|
54
|
+
- Incremental saves every 5 batches - no data loss if interrupted
|
|
55
|
+
- Real-time indexing status shown when searching during indexing
|
|
56
|
+
|
|
57
|
+
**Resource Throttling**
|
|
58
|
+
|
|
59
|
+
- CPU usage limited to 50% by default (configurable)
|
|
60
|
+
- Your laptop stays responsive during indexing
|
|
61
|
+
- Configurable delays between batches
|
|
62
|
+
- Worker thread limits respect system resources
|
|
63
|
+
|
|
64
|
+
**SQLite Cache**
|
|
65
|
+
|
|
66
|
+
- 5-10x faster than JSON for large codebases
|
|
67
|
+
- Write-Ahead Logging (WAL) for better concurrency
|
|
68
|
+
- Binary blob storage for smaller cache size
|
|
69
|
+
- Automatic migration from JSON
|
|
70
|
+
|
|
71
|
+
**Optimized Defaults**
|
|
72
|
+
|
|
73
|
+
- 128d embeddings by default (2x faster than 256d, minimal quality loss)
|
|
74
|
+
- Smart batch sizing based on project size
|
|
75
|
+
- Parallel processing with auto-tuned worker threads
|
|
76
|
+
|
|
47
77
|
## Installation
|
|
48
78
|
|
|
49
79
|
Install globally via npm:
|
|
@@ -130,19 +160,25 @@ For clients that support dynamic variables (VS Code, Cursor):
|
|
|
130
160
|
|
|
131
161
|
Override configuration settings via environment variables in your MCP config:
|
|
132
162
|
|
|
133
|
-
| Variable
|
|
134
|
-
|
|
|
135
|
-
| `SMART_CODING_VERBOSE`
|
|
136
|
-
| `SMART_CODING_BATCH_SIZE`
|
|
137
|
-
| `SMART_CODING_MAX_FILE_SIZE`
|
|
138
|
-
| `SMART_CODING_CHUNK_SIZE`
|
|
139
|
-
| `SMART_CODING_MAX_RESULTS`
|
|
140
|
-
| `SMART_CODING_SMART_INDEXING`
|
|
141
|
-
| `SMART_CODING_WATCH_FILES`
|
|
142
|
-
| `SMART_CODING_SEMANTIC_WEIGHT`
|
|
143
|
-
| `SMART_CODING_EXACT_MATCH_BOOST`
|
|
144
|
-
| `SMART_CODING_EMBEDDING_MODEL`
|
|
145
|
-
| `
|
|
163
|
+
| Variable | Type | Default | Description |
|
|
164
|
+
| ---------------------------------- | ------- | -------------------------------- | ------------------------------------------ |
|
|
165
|
+
| `SMART_CODING_VERBOSE` | boolean | `false` | Enable detailed logging |
|
|
166
|
+
| `SMART_CODING_BATCH_SIZE` | number | `100` | Files to process in parallel |
|
|
167
|
+
| `SMART_CODING_MAX_FILE_SIZE` | number | `1048576` | Max file size in bytes (1MB) |
|
|
168
|
+
| `SMART_CODING_CHUNK_SIZE` | number | `25` | Lines of code per chunk |
|
|
169
|
+
| `SMART_CODING_MAX_RESULTS` | number | `5` | Max search results |
|
|
170
|
+
| `SMART_CODING_SMART_INDEXING` | boolean | `true` | Enable smart project detection |
|
|
171
|
+
| `SMART_CODING_WATCH_FILES` | boolean | `false` | Enable file watching for auto-reindex |
|
|
172
|
+
| `SMART_CODING_SEMANTIC_WEIGHT` | number | `0.7` | Weight for semantic similarity (0-1) |
|
|
173
|
+
| `SMART_CODING_EXACT_MATCH_BOOST` | number | `1.5` | Boost for exact text matches |
|
|
174
|
+
| `SMART_CODING_EMBEDDING_MODEL` | string | `nomic-ai/nomic-embed-text-v1.5` | AI embedding model to use |
|
|
175
|
+
| `SMART_CODING_EMBEDDING_DIMENSION` | number | `128` | MRL dimension (64, 128, 256, 512, 768) |
|
|
176
|
+
| `SMART_CODING_DEVICE` | string | `cpu` | Inference device (`cpu`, `webgpu`, `auto`) |
|
|
177
|
+
| `SMART_CODING_CHUNKING_MODE` | string | `smart` | Code chunking (`smart`, `ast`, `line`) |
|
|
178
|
+
| `SMART_CODING_WORKER_THREADS` | string | `auto` | Worker threads (`auto` or 1-32) |
|
|
179
|
+
| `SMART_CODING_MAX_CPU_PERCENT` | number | `50` | Max CPU usage during indexing (10-100%) |
|
|
180
|
+
| `SMART_CODING_BATCH_DELAY` | number | `100` | Delay between batches in ms (0-5000) |
|
|
181
|
+
| `SMART_CODING_MAX_WORKERS` | string | `auto` | Override max worker threads limit |
|
|
146
182
|
|
|
147
183
|
**Example with environment variables:**
|
|
148
184
|
|
|
@@ -166,16 +202,73 @@ Override configuration settings via environment variables in your MCP config:
|
|
|
166
202
|
|
|
167
203
|
## How It Works
|
|
168
204
|
|
|
169
|
-
|
|
205
|
+
```mermaid
|
|
206
|
+
flowchart TB
|
|
207
|
+
subgraph IDE["IDE / AI Assistant"]
|
|
208
|
+
Agent["AI Agent<br/>(Claude, GPT, Gemini)"]
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
subgraph MCP["Smart Coding MCP Server"]
|
|
212
|
+
direction TB
|
|
213
|
+
Protocol["Model Context Protocol<br/>JSON-RPC over stdio"]
|
|
214
|
+
Tools["MCP Tools<br/>semantic_search | index_codebase | set_workspace | get_status"]
|
|
215
|
+
|
|
216
|
+
subgraph Indexing["Indexing Pipeline"]
|
|
217
|
+
Discovery["File Discovery<br/>glob patterns + smart ignore"]
|
|
218
|
+
Chunking["Code Chunking<br/>Smart (regex) / AST (Tree-sitter)"]
|
|
219
|
+
Embedding["AI Embedding<br/>transformers.js + ONNX Runtime"]
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
subgraph AI["AI Model"]
|
|
223
|
+
Model["nomic-embed-text-v1.5<br/>Matryoshka Representation Learning"]
|
|
224
|
+
Dimensions["Flexible Dimensions<br/>64 | 128 | 256 | 512 | 768"]
|
|
225
|
+
Normalize["Layer Norm → Slice → L2 Normalize"]
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
subgraph Search["Search"]
|
|
229
|
+
QueryEmbed["Query → Vector"]
|
|
230
|
+
Cosine["Cosine Similarity"]
|
|
231
|
+
Hybrid["Hybrid Search<br/>Semantic + Exact Match Boost"]
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
subgraph Storage["Cache"]
|
|
236
|
+
Vectors["SQLite Database<br/>embeddings.db (WAL mode)"]
|
|
237
|
+
Hashes["File Hashes<br/>Incremental updates"]
|
|
238
|
+
Progressive["Progressive Indexing<br/>Search works during indexing"]
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
Agent <-->|"MCP Protocol"| Protocol
|
|
242
|
+
Protocol --> Tools
|
|
243
|
+
|
|
244
|
+
Tools --> Discovery
|
|
245
|
+
Discovery --> Chunking
|
|
246
|
+
Chunking --> Embedding
|
|
247
|
+
Embedding --> Model
|
|
248
|
+
Model --> Dimensions
|
|
249
|
+
Dimensions --> Normalize
|
|
250
|
+
Normalize --> Vectors
|
|
251
|
+
|
|
252
|
+
Tools --> QueryEmbed
|
|
253
|
+
QueryEmbed --> Model
|
|
254
|
+
Cosine --> Hybrid
|
|
255
|
+
Vectors --> Cosine
|
|
256
|
+
Hybrid --> Agent
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
### Tech Stack
|
|
170
260
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
261
|
+
| Component | Technology |
|
|
262
|
+
| ------------- | ------------------------------------- |
|
|
263
|
+
| **Protocol** | Model Context Protocol (JSON-RPC) |
|
|
264
|
+
| **AI Model** | nomic-embed-text-v1.5 (MRL) |
|
|
265
|
+
| **Inference** | transformers.js + ONNX Runtime |
|
|
266
|
+
| **Chunking** | Smart regex / Tree-sitter AST |
|
|
267
|
+
| **Search** | Cosine similarity + exact match boost |
|
|
175
268
|
|
|
176
|
-
|
|
269
|
+
### Search Flow
|
|
177
270
|
|
|
178
|
-
|
|
271
|
+
Query → Vector embedding → Cosine similarity → Ranked results
|
|
179
272
|
|
|
180
273
|
## Examples
|
|
181
274
|
|
|
@@ -214,11 +307,18 @@ Finds all try/catch blocks and error handling patterns.
|
|
|
214
307
|
|
|
215
308
|
## Technical Details
|
|
216
309
|
|
|
217
|
-
**Embedding Model**:
|
|
310
|
+
**Embedding Model**: nomic-embed-text-v1.5 via transformers.js v3
|
|
311
|
+
|
|
312
|
+
- Matryoshka Representation Learning (MRL) for flexible dimensions
|
|
313
|
+
- Configurable output: 64, 128, 256, 512, or 768 dimensions
|
|
314
|
+
- Longer context (8192 tokens vs 256 for MiniLM)
|
|
315
|
+
- Better code understanding through specialized training
|
|
316
|
+
- WebGPU support for up to 100x faster inference (when available)
|
|
317
|
+
|
|
318
|
+
**Legacy Model**: all-MiniLM-L6-v2 (fallback)
|
|
218
319
|
|
|
219
|
-
- Fast inference (
|
|
220
|
-
-
|
|
221
|
-
- Good accuracy for code search
|
|
320
|
+
- Fast inference, small footprint (~100MB)
|
|
321
|
+
- Fixed 384-dimensional output
|
|
222
322
|
|
|
223
323
|
**Vector Similarity**: Cosine similarity
|
|
224
324
|
|
package/config.json
CHANGED
|
@@ -60,8 +60,13 @@
|
|
|
60
60
|
"cacheDirectory": "./.smart-coding-cache",
|
|
61
61
|
"watchFiles": false,
|
|
62
62
|
"verbose": false,
|
|
63
|
-
"embeddingModel": "
|
|
63
|
+
"embeddingModel": "nomic-ai/nomic-embed-text-v1.5",
|
|
64
|
+
"embeddingDimension": 128,
|
|
65
|
+
"device": "auto",
|
|
66
|
+
"chunkingMode": "smart",
|
|
64
67
|
"semanticWeight": 0.7,
|
|
65
68
|
"exactMatchBoost": 1.5,
|
|
66
|
-
"workerThreads": "auto"
|
|
69
|
+
"workerThreads": "auto",
|
|
70
|
+
"maxCpuPercent": 50,
|
|
71
|
+
"batchDelay": 100
|
|
67
72
|
}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Get Status Feature
|
|
3
|
+
*
|
|
4
|
+
* MCP tool to return comprehensive status information about the server.
|
|
5
|
+
* Useful for agents to understand current state and configuration.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import fs from 'fs/promises';
|
|
9
|
+
import path from 'path';
|
|
10
|
+
import { createRequire } from 'module';
|
|
11
|
+
|
|
12
|
+
const require = createRequire(import.meta.url);
|
|
13
|
+
const packageJson = require('../package.json');
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Get tool definition for MCP registration
|
|
17
|
+
*/
|
|
18
|
+
export function getToolDefinition(config) {
|
|
19
|
+
return {
|
|
20
|
+
name: "f_get_status",
|
|
21
|
+
description: "Get comprehensive status information about the Smart Coding MCP server. Returns version, workspace path, model configuration, indexing status, and cache information. Useful for understanding the current state of the semantic search system.",
|
|
22
|
+
inputSchema: {
|
|
23
|
+
type: "object",
|
|
24
|
+
properties: {},
|
|
25
|
+
required: []
|
|
26
|
+
}
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Status Reporter class
|
|
32
|
+
*/
|
|
33
|
+
export class StatusReporter {
|
|
34
|
+
constructor(config, cache, indexer, embedder) {
|
|
35
|
+
this.config = config;
|
|
36
|
+
this.cache = cache;
|
|
37
|
+
this.indexer = indexer;
|
|
38
|
+
this.embedder = embedder;
|
|
39
|
+
this.startTime = Date.now();
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Get comprehensive status
|
|
44
|
+
*/
|
|
45
|
+
async getStatus() {
|
|
46
|
+
const vectorStore = this.cache?.getVectorStore() || [];
|
|
47
|
+
|
|
48
|
+
// Get unique files from vector store
|
|
49
|
+
const uniqueFiles = new Set(vectorStore.map(v => v.file));
|
|
50
|
+
|
|
51
|
+
// Get cache size (check for SQLite database)
|
|
52
|
+
let cacheSizeBytes = 0;
|
|
53
|
+
let cacheType = 'none';
|
|
54
|
+
try {
|
|
55
|
+
// Check for SQLite cache first
|
|
56
|
+
const sqlitePath = path.join(this.config.cacheDirectory, 'embeddings.db');
|
|
57
|
+
const stats = await fs.stat(sqlitePath);
|
|
58
|
+
cacheSizeBytes = stats.size;
|
|
59
|
+
cacheType = 'sqlite';
|
|
60
|
+
} catch {
|
|
61
|
+
// Try old JSON cache as fallback
|
|
62
|
+
try {
|
|
63
|
+
const jsonPath = path.join(this.config.cacheDirectory, 'embeddings.json');
|
|
64
|
+
const stats = await fs.stat(jsonPath);
|
|
65
|
+
cacheSizeBytes = stats.size;
|
|
66
|
+
cacheType = 'json';
|
|
67
|
+
} catch {
|
|
68
|
+
// No cache file exists
|
|
69
|
+
cacheType = 'none';
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Determine index status and progressive indexing info
|
|
74
|
+
let indexStatus = 'empty';
|
|
75
|
+
let progressiveIndexing = null;
|
|
76
|
+
|
|
77
|
+
if (this.indexer?.isIndexing) {
|
|
78
|
+
indexStatus = 'indexing';
|
|
79
|
+
// Include progressive indexing status
|
|
80
|
+
if (this.indexer.indexingStatus) {
|
|
81
|
+
progressiveIndexing = {
|
|
82
|
+
inProgress: this.indexer.indexingStatus.inProgress,
|
|
83
|
+
totalFiles: this.indexer.indexingStatus.totalFiles,
|
|
84
|
+
processedFiles: this.indexer.indexingStatus.processedFiles,
|
|
85
|
+
percentage: this.indexer.indexingStatus.percentage
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
} else if (vectorStore.length > 0) {
|
|
89
|
+
indexStatus = 'ready';
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return {
|
|
93
|
+
version: packageJson.version,
|
|
94
|
+
uptime: Math.floor((Date.now() - this.startTime) / 1000),
|
|
95
|
+
|
|
96
|
+
workspace: {
|
|
97
|
+
path: this.config.searchDirectory,
|
|
98
|
+
cacheDirectory: this.config.cacheDirectory
|
|
99
|
+
},
|
|
100
|
+
|
|
101
|
+
model: {
|
|
102
|
+
name: this.embedder?.modelName || this.config.embeddingModel,
|
|
103
|
+
dimension: this.embedder?.dimension || this.config.embeddingDimension,
|
|
104
|
+
device: this.embedder?.device || this.config.device
|
|
105
|
+
},
|
|
106
|
+
|
|
107
|
+
index: {
|
|
108
|
+
status: indexStatus,
|
|
109
|
+
filesIndexed: uniqueFiles.size,
|
|
110
|
+
chunksCount: vectorStore.length,
|
|
111
|
+
chunkingMode: this.config.chunkingMode,
|
|
112
|
+
...(progressiveIndexing && { progressiveIndexing })
|
|
113
|
+
},
|
|
114
|
+
|
|
115
|
+
cache: {
|
|
116
|
+
enabled: this.config.enableCache,
|
|
117
|
+
type: cacheType,
|
|
118
|
+
path: this.config.cacheDirectory,
|
|
119
|
+
sizeBytes: cacheSizeBytes,
|
|
120
|
+
sizeFormatted: formatBytes(cacheSizeBytes)
|
|
121
|
+
},
|
|
122
|
+
|
|
123
|
+
config: {
|
|
124
|
+
maxResults: this.config.maxResults,
|
|
125
|
+
chunkSize: this.config.chunkSize,
|
|
126
|
+
semanticWeight: this.config.semanticWeight,
|
|
127
|
+
exactMatchBoost: this.config.exactMatchBoost,
|
|
128
|
+
workerThreads: this.config.workerThreads
|
|
129
|
+
},
|
|
130
|
+
|
|
131
|
+
resourceThrottling: {
|
|
132
|
+
maxCpuPercent: this.config.maxCpuPercent,
|
|
133
|
+
batchDelay: this.config.batchDelay,
|
|
134
|
+
maxWorkers: this.config.maxWorkers
|
|
135
|
+
}
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Format bytes to human readable
|
|
142
|
+
*/
|
|
143
|
+
function formatBytes(bytes) {
|
|
144
|
+
if (bytes === 0) return '0 B';
|
|
145
|
+
const k = 1024;
|
|
146
|
+
const sizes = ['B', 'KB', 'MB', 'GB'];
|
|
147
|
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
148
|
+
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Handle MCP tool call
|
|
153
|
+
*/
|
|
154
|
+
export async function handleToolCall(request, instance) {
|
|
155
|
+
const status = await instance.getStatus();
|
|
156
|
+
|
|
157
|
+
return {
|
|
158
|
+
content: [{
|
|
159
|
+
type: "text",
|
|
160
|
+
text: JSON.stringify(status, null, 2)
|
|
161
|
+
}]
|
|
162
|
+
};
|
|
163
|
+
}
|
|
@@ -2,21 +2,35 @@ import path from "path";
|
|
|
2
2
|
import { cosineSimilarity } from "../lib/utils.js";
|
|
3
3
|
|
|
4
4
|
export class HybridSearch {
|
|
5
|
-
constructor(embedder, cache, config) {
|
|
5
|
+
constructor(embedder, cache, config, indexer = null) {
|
|
6
6
|
this.embedder = embedder;
|
|
7
7
|
this.cache = cache;
|
|
8
8
|
this.config = config;
|
|
9
|
+
this.indexer = indexer; // Reference to indexer for status checking
|
|
9
10
|
}
|
|
10
11
|
|
|
11
12
|
async search(query, maxResults) {
|
|
12
13
|
const vectorStore = this.cache.getVectorStore();
|
|
13
14
|
|
|
14
15
|
if (vectorStore.length === 0) {
|
|
16
|
+
// Check if indexing is in progress
|
|
17
|
+
if (this.indexer?.indexingStatus?.inProgress) {
|
|
18
|
+
return {
|
|
19
|
+
results: [],
|
|
20
|
+
message: `Indexing in progress (${this.indexer.indexingStatus.percentage}% complete). Search available but results may be incomplete. Please wait for indexing to finish for full coverage.`
|
|
21
|
+
};
|
|
22
|
+
}
|
|
15
23
|
return {
|
|
16
24
|
results: [],
|
|
17
25
|
message: "No code has been indexed yet. Please wait for initial indexing to complete."
|
|
18
26
|
};
|
|
19
27
|
}
|
|
28
|
+
|
|
29
|
+
// Show warning if indexing is still in progress but we have some results
|
|
30
|
+
let indexingWarning = null;
|
|
31
|
+
if (this.indexer?.indexingStatus?.inProgress) {
|
|
32
|
+
indexingWarning = `⚠️ Indexing in progress (${this.indexer.indexingStatus.percentage}% complete). Results shown are from partially indexed codebase.\n\n`;
|
|
33
|
+
}
|
|
20
34
|
|
|
21
35
|
// Generate query embedding
|
|
22
36
|
const queryEmbed = await this.embedder(query, { pooling: "mean", normalize: true });
|
|
@@ -50,7 +64,7 @@ export class HybridSearch {
|
|
|
50
64
|
.sort((a, b) => b.score - a.score)
|
|
51
65
|
.slice(0, maxResults);
|
|
52
66
|
|
|
53
|
-
return { results, message: null };
|
|
67
|
+
return { results, message: null, indexingWarning };
|
|
54
68
|
}
|
|
55
69
|
|
|
56
70
|
formatResults(results) {
|
|
@@ -105,7 +119,7 @@ export async function handleToolCall(request, hybridSearch) {
|
|
|
105
119
|
const query = request.params.arguments.query;
|
|
106
120
|
const maxResults = request.params.arguments.maxResults || hybridSearch.config.maxResults;
|
|
107
121
|
|
|
108
|
-
const { results, message } = await hybridSearch.search(query, maxResults);
|
|
122
|
+
const { results, message, indexingWarning } = await hybridSearch.search(query, maxResults);
|
|
109
123
|
|
|
110
124
|
if (message) {
|
|
111
125
|
return {
|
|
@@ -113,7 +127,12 @@ export async function handleToolCall(request, hybridSearch) {
|
|
|
113
127
|
};
|
|
114
128
|
}
|
|
115
129
|
|
|
116
|
-
|
|
130
|
+
let formattedText = hybridSearch.formatResults(results);
|
|
131
|
+
|
|
132
|
+
// Prepend indexing warning if present
|
|
133
|
+
if (indexingWarning) {
|
|
134
|
+
formattedText = indexingWarning + formattedText;
|
|
135
|
+
}
|
|
117
136
|
|
|
118
137
|
return {
|
|
119
138
|
content: [{ type: "text", text: formattedText }]
|