semantic-code-mcp 2.0.1 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +518 -111
- package/features/index-codebase.js +18 -24
- package/lib/config.js +31 -28
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -4,11 +4,44 @@
|
|
|
4
4
|
[](https://www.npmjs.com/package/semantic-code-mcp)
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
[](https://nodejs.org/)
|
|
7
|
+
[]()
|
|
8
|
+
[]()
|
|
9
|
+
[](https://milvus.io)
|
|
7
10
|
|
|
8
|
-
AI-powered semantic code search for coding agents. An MCP server
|
|
11
|
+
AI-powered semantic code search for coding agents. An MCP server with **non-blocking background indexing**, **multi-provider embeddings** (Gemini, Vertex AI, OpenAI, local), and **Milvus / Zilliz Cloud** vector storage — designed for **multi-agent concurrent access**.
|
|
12
|
+
|
|
13
|
+
Run Claude Code, Codex, Copilot, and Antigravity against the same code index simultaneously. Indexing runs in the background; search works immediately while indexing continues.
|
|
9
14
|
|
|
10
15
|
> Ask *"where do we handle authentication?"* and find code that uses `login`, `session`, `verifyCredentials` — even when no file contains the word "authentication."
|
|
11
16
|
|
|
17
|
+
## Quick Start
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
npx -y semantic-code-mcp@latest --workspace /path/to/your/project
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
MCP config:
|
|
24
|
+
|
|
25
|
+
```json
|
|
26
|
+
{
|
|
27
|
+
"mcpServers": {
|
|
28
|
+
"semantic-code-mcp": {
|
|
29
|
+
"command": "npx",
|
|
30
|
+
"args": ["-y", "semantic-code-mcp@latest", "--workspace", "/path/to/project"]
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
```mermaid
|
|
37
|
+
graph LR
|
|
38
|
+
A["Claude Code"] --> M["Milvus Standalone<br/>(Docker)"]
|
|
39
|
+
B["Codex"] --> M
|
|
40
|
+
C["Copilot"] --> M
|
|
41
|
+
D["Antigravity"] --> M
|
|
42
|
+
M --> V["Shared Vector Index"]
|
|
43
|
+
```
|
|
44
|
+
|
|
12
45
|
## Why
|
|
13
46
|
|
|
14
47
|
Traditional `grep` and keyword search break down when you don't know the exact terms used in the codebase. Semantic search bridges that gap:
|
|
@@ -20,40 +53,135 @@ Traditional `grep` and keyword search break down when you don't know the exact t
|
|
|
20
53
|
|
|
21
54
|
Based on [Cursor's research](https://cursor.com/blog/semsearch) showing semantic search improves AI agent performance by 12.5%.
|
|
22
55
|
|
|
23
|
-
##
|
|
56
|
+
## Setup
|
|
24
57
|
|
|
25
|
-
|
|
26
|
-
|
|
58
|
+
<details>
|
|
59
|
+
<summary><strong>Claude Code / Claude Desktop</strong></summary>
|
|
60
|
+
|
|
61
|
+
```json
|
|
62
|
+
{
|
|
63
|
+
"mcpServers": {
|
|
64
|
+
"semantic-code-mcp": {
|
|
65
|
+
"command": "npx",
|
|
66
|
+
"args": ["-y", "semantic-code-mcp@latest", "--workspace", "/path/to/project"]
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
27
70
|
```
|
|
28
71
|
|
|
29
|
-
|
|
72
|
+
Claude Code: `~/.claude/settings.local.json` → `mcpServers`
|
|
73
|
+
Claude Desktop: `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
74
|
+
|
|
75
|
+
</details>
|
|
76
|
+
|
|
77
|
+
<details>
|
|
78
|
+
<summary><strong>VS Code / Cursor / Windsurf (Copilot)</strong></summary>
|
|
79
|
+
|
|
80
|
+
Create `.vscode/mcp.json` in your project root:
|
|
81
|
+
|
|
82
|
+
```json
|
|
83
|
+
{
|
|
84
|
+
"servers": {
|
|
85
|
+
"semantic-code-mcp": {
|
|
86
|
+
"command": "npx",
|
|
87
|
+
"args": ["-y", "semantic-code-mcp@latest", "--workspace", "${workspaceFolder}"]
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
> VS Code and Cursor support `${workspaceFolder}`. Windsurf requires absolute paths.
|
|
94
|
+
|
|
95
|
+
</details>
|
|
96
|
+
|
|
97
|
+
<details>
|
|
98
|
+
<summary><strong>Codex (OpenAI)</strong></summary>
|
|
99
|
+
|
|
100
|
+
`~/.codex/config.toml`:
|
|
101
|
+
|
|
102
|
+
```toml
|
|
103
|
+
[mcp_servers.semantic-code-mcp]
|
|
104
|
+
command = "npx"
|
|
105
|
+
args = ["-y", "semantic-code-mcp@latest", "--workspace", "/path/to/project"]
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
</details>
|
|
109
|
+
|
|
110
|
+
<details>
|
|
111
|
+
<summary><strong>Antigravity (Google)</strong></summary>
|
|
112
|
+
|
|
113
|
+
`~/.gemini/antigravity/mcp_config.json`:
|
|
30
114
|
|
|
31
115
|
```json
|
|
32
116
|
{
|
|
33
117
|
"mcpServers": {
|
|
34
118
|
"semantic-code-mcp": {
|
|
35
119
|
"command": "npx",
|
|
36
|
-
"args": ["-y", "semantic-code-mcp@latest", "--workspace", "/path/to/
|
|
120
|
+
"args": ["-y", "semantic-code-mcp@latest", "--workspace", "/path/to/project"]
|
|
37
121
|
}
|
|
38
122
|
}
|
|
39
123
|
}
|
|
40
124
|
```
|
|
41
125
|
|
|
42
|
-
|
|
126
|
+
</details>
|
|
43
127
|
|
|
44
|
-
|
|
128
|
+
<details>
|
|
129
|
+
<summary><strong>🐚 Shell Script (Monorepo / Large Codebases)</strong></summary>
|
|
130
|
+
|
|
131
|
+
For monorepos or workspaces with 1000+ files, a shell wrapper script gives you:
|
|
132
|
+
- **Real-time logs** — see indexing progress, error details, 429 retry status
|
|
133
|
+
- **No MCP timeout** — long-running index operations won't be killed
|
|
134
|
+
- **Environment isolation** — pin provider credentials per project
|
|
135
|
+
|
|
136
|
+
Create `start-semantic-code-mcp.sh`:
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
#!/bin/bash
|
|
140
|
+
export SMART_CODING_WORKSPACE="/path/to/monorepo"
|
|
141
|
+
export SMART_CODING_EMBEDDING_PROVIDER="vertex"
|
|
142
|
+
export SMART_CODING_VECTOR_STORE_PROVIDER="milvus"
|
|
143
|
+
export SMART_CODING_MILVUS_ADDRESS="http://localhost:19530"
|
|
144
|
+
export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service-account.json"
|
|
145
|
+
export SMART_CODING_VERTEX_PROJECT="your-gcp-project-id"
|
|
146
|
+
|
|
147
|
+
cd /path/to/semantic-code-mcp
|
|
148
|
+
exec node index.js
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
chmod +x start-semantic-code-mcp.sh
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
Then reference in your MCP config:
|
|
156
|
+
|
|
157
|
+
```json
|
|
158
|
+
{
|
|
159
|
+
"semantic-code-mcp": {
|
|
160
|
+
"command": "/absolute/path/to/start-semantic-code-mcp.sh",
|
|
161
|
+
"args": []
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
> **When to use shell scripts over npx:**
|
|
167
|
+
> - Monorepo with multiple sub-projects sharing one index
|
|
168
|
+
> - 1000+ files requiring long initial indexing
|
|
169
|
+
> - Debugging 429 rate-limit or gRPC errors (need real-time stderr)
|
|
170
|
+
> - Pinning specific provider credentials per workspace
|
|
171
|
+
|
|
172
|
+
</details>
|
|
45
173
|
|
|
46
174
|
## Features
|
|
47
175
|
|
|
48
176
|
### Multi-Provider Embeddings
|
|
49
177
|
|
|
50
|
-
| Provider
|
|
51
|
-
|
|
52
|
-
| **Local** (default)
|
|
53
|
-
| **Gemini**
|
|
54
|
-
| **OpenAI**
|
|
55
|
-
| **OpenAI-compatible** | Any compatible endpoint | Varies
|
|
56
|
-
| **Vertex AI**
|
|
178
|
+
| Provider | Model | Privacy | Speed |
|
|
179
|
+
| --------------------- | ----------------------- | ---------- | ------------- |
|
|
180
|
+
| **Local** (default) | nomic-embed-text-v1.5 | 100% local | ~50ms/chunk |
|
|
181
|
+
| **Gemini** | gemini-embedding-001 | API call | Fast, batched |
|
|
182
|
+
| **OpenAI** | text-embedding-3-small | API call | Fast |
|
|
183
|
+
| **OpenAI-compatible** | Any compatible endpoint | Varies | Varies |
|
|
184
|
+
| **Vertex AI** | Google Cloud models | GCP | Fast |
|
|
57
185
|
|
|
58
186
|
### Flexible Vector Storage
|
|
59
187
|
|
|
@@ -72,28 +200,180 @@ Three modes to match your codebase:
|
|
|
72
200
|
|
|
73
201
|
CPU capped at 50% during indexing. Your machine stays responsive.
|
|
74
202
|
|
|
203
|
+
### Multi-Agent Concurrent Access
|
|
204
|
+
|
|
205
|
+
Multiple AI agents (Claude Code, Codex, Copilot, Antigravity) can query the same vector index simultaneously via **Milvus Standalone** (Docker). No file locking, no index corruption.
|
|
206
|
+
|
|
207
|
+
<details>
|
|
208
|
+
<summary><strong>Docker Setup (Milvus Standalone)</strong></summary>
|
|
209
|
+
|
|
210
|
+
Milvus Standalone runs **3 containers** working together:
|
|
211
|
+
|
|
212
|
+
```mermaid
|
|
213
|
+
graph LR
|
|
214
|
+
A["semantic-code-mcp"] -->|"gRPC :19530"| M["milvus standalone"]
|
|
215
|
+
M -->|"object storage"| S["minio :9000"]
|
|
216
|
+
M -->|"metadata"| E["etcd :2379"]
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
| Container | Role | Image |
|
|
220
|
+
| -------------- | ------------------------------------- | ----------------- |
|
|
221
|
+
| **standalone** | Vector engine (gRPC :19530) | `milvusdb/milvus` |
|
|
222
|
+
| **etcd** | Metadata store (cluster coordination) | `coreos/etcd` |
|
|
223
|
+
| **minio** | Object storage (index files, logs) | `minio/minio` |
|
|
224
|
+
|
|
225
|
+
#### Performance Guidelines
|
|
226
|
+
|
|
227
|
+
| Resource | Minimum | Recommended |
|
|
228
|
+
| -------- | -------- | ----------------------------- |
|
|
229
|
+
| RAM | **4 GB** | 8 GB+ |
|
|
230
|
+
| Disk | 10 GB | 50 GB+ (scales with codebase) |
|
|
231
|
+
| CPU | 2 cores | 4+ cores |
|
|
232
|
+
| Docker | v20+ | Latest |
|
|
233
|
+
|
|
234
|
+
> ⚠️ **RAM is the critical bottleneck.** Milvus Standalone idles at ~2.5 GB RAM across the 3 containers. Machines with < 4 GB will experience swap thrashing and gRPC timeouts. Check with `docker stats`.
|
|
235
|
+
|
|
236
|
+
#### 1. Install with Docker Compose
|
|
237
|
+
|
|
238
|
+
```yaml
|
|
239
|
+
# docker-compose.yml
|
|
240
|
+
version: '3.5'
|
|
241
|
+
services:
|
|
242
|
+
etcd:
|
|
243
|
+
image: coreos/etcd:v3.5.18
|
|
244
|
+
environment:
|
|
245
|
+
ETCD_AUTO_COMPACTION_MODE: revision
|
|
246
|
+
ETCD_AUTO_COMPACTION_RETENTION: "1000"
|
|
247
|
+
ETCD_QUOTA_BACKEND_BYTES: "4294967296"
|
|
248
|
+
command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
|
|
249
|
+
volumes:
|
|
250
|
+
- etcd-data:/etcd
|
|
251
|
+
|
|
252
|
+
minio:
|
|
253
|
+
image: minio/minio:RELEASE.2023-03-20T20-16-18Z
|
|
254
|
+
environment:
|
|
255
|
+
MINIO_ACCESS_KEY: minioadmin
|
|
256
|
+
MINIO_SECRET_KEY: minioadmin
|
|
257
|
+
command: minio server /minio_data --console-address ":9001"
|
|
258
|
+
ports:
|
|
259
|
+
- "9000:9000"
|
|
260
|
+
- "9001:9001"
|
|
261
|
+
volumes:
|
|
262
|
+
- minio-data:/minio_data
|
|
263
|
+
|
|
264
|
+
standalone:
|
|
265
|
+
image: milvusdb/milvus:v2.5.1
|
|
266
|
+
command: ["milvus", "run", "standalone"]
|
|
267
|
+
environment:
|
|
268
|
+
ETCD_ENDPOINTS: etcd:2379
|
|
269
|
+
MINIO_ADDRESS: minio:9000
|
|
270
|
+
ports:
|
|
271
|
+
- "19530:19530"
|
|
272
|
+
- "9091:9091"
|
|
273
|
+
volumes:
|
|
274
|
+
- milvus-data:/var/lib/milvus
|
|
275
|
+
depends_on:
|
|
276
|
+
- etcd
|
|
277
|
+
- minio
|
|
278
|
+
|
|
279
|
+
volumes:
|
|
280
|
+
etcd-data:
|
|
281
|
+
minio-data:
|
|
282
|
+
milvus-data:
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
#### 2. Start & Verify
|
|
286
|
+
|
|
287
|
+
```bash
|
|
288
|
+
# Start all 3 containers
|
|
289
|
+
docker compose up -d
|
|
290
|
+
|
|
291
|
+
# Verify all 3 containers are running
|
|
292
|
+
docker compose ps
|
|
293
|
+
# NAME STATUS
|
|
294
|
+
# etcd running
|
|
295
|
+
# minio running
|
|
296
|
+
# standalone running (healthy)
|
|
297
|
+
|
|
298
|
+
# Check RAM usage (expect ~2.5 GB total idle)
|
|
299
|
+
docker stats --no-stream
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
#### 3. Configure MCP to use Milvus
|
|
303
|
+
|
|
304
|
+
```json
|
|
305
|
+
{
|
|
306
|
+
"env": {
|
|
307
|
+
"SMART_CODING_VECTOR_STORE_PROVIDER": "milvus",
|
|
308
|
+
"SMART_CODING_MILVUS_ADDRESS": "http://localhost:19530"
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
#### 4. Verify connection
|
|
314
|
+
|
|
315
|
+
```bash
|
|
316
|
+
# Should return collection list (may be empty initially)
|
|
317
|
+
curl http://localhost:19530/v1/vector/collections
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
#### 5. Lifecycle Management
|
|
321
|
+
|
|
322
|
+
```bash
|
|
323
|
+
# Stop all containers (preserves data)
|
|
324
|
+
docker compose stop
|
|
325
|
+
|
|
326
|
+
# Restart after reboot
|
|
327
|
+
docker compose start
|
|
328
|
+
|
|
329
|
+
# Full reset (removes all indexed vectors)
|
|
330
|
+
docker compose down -v
|
|
331
|
+
|
|
332
|
+
# View logs for debugging
|
|
333
|
+
docker compose logs -f standalone
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
#### 6. Monitoring
|
|
337
|
+
|
|
338
|
+
- **MinIO Console**: http://localhost:9001 (minioadmin / minioadmin)
|
|
339
|
+
- **Milvus Health**: http://localhost:9091/healthz
|
|
340
|
+
- **Container RAM**: `docker stats --no-stream`
|
|
341
|
+
|
|
342
|
+
#### Troubleshooting
|
|
343
|
+
|
|
344
|
+
| Symptom | Cause | Fix |
|
|
345
|
+
| ------------------------------------- | ---------------------------- | -------------------------------------------------------------------------------- |
|
|
346
|
+
| gRPC timeout / connection refused | Milvus not fully started | Wait 30–60s after `docker compose up -d`, check `docker compose logs standalone` |
|
|
347
|
+
| Swap thrashing, slow queries | < 4 GB RAM | Upgrade RAM or use SQLite for single-agent setups |
|
|
348
|
+
| `etcd: mvcc: database space exceeded` | etcd compaction backlog | `docker compose restart etcd` |
|
|
349
|
+
| Milvus OOM killed | RAM pressure from other apps | Close heavy apps or increase Docker memory limit |
|
|
350
|
+
|
|
351
|
+
> **SQLite vs Milvus:** SQLite is single-process — only one agent can write at a time. Milvus handles concurrent reads/writes from multiple agents without conflicts. Use Milvus when running 2+ agents on the same codebase.
|
|
352
|
+
|
|
353
|
+
</details>
|
|
354
|
+
|
|
75
355
|
## Tools
|
|
76
356
|
|
|
77
|
-
| Tool
|
|
78
|
-
|
|
79
|
-
| `a_semantic_search`
|
|
80
|
-
| `b_index_codebase`
|
|
81
|
-
| `c_clear_cache`
|
|
82
|
-
| `d_check_last_version` | Look up latest package version from 20+ registries.
|
|
83
|
-
| `e_set_workspace`
|
|
84
|
-
| `f_get_status`
|
|
357
|
+
| Tool | Description |
|
|
358
|
+
| ---------------------- | ------------------------------------------------------------ |
|
|
359
|
+
| `a_semantic_search` | Find code by meaning. Hybrid semantic + exact match scoring. |
|
|
360
|
+
| `b_index_codebase` | Trigger manual reindex (normally automatic & incremental). |
|
|
361
|
+
| `c_clear_cache` | Reset embeddings cache entirely. |
|
|
362
|
+
| `d_check_last_version` | Look up latest package version from 20+ registries. |
|
|
363
|
+
| `e_set_workspace` | Switch project at runtime without restart. |
|
|
364
|
+
| `f_get_status` | Server health: version, index progress, config. |
|
|
85
365
|
|
|
86
366
|
## IDE Setup
|
|
87
367
|
|
|
88
|
-
| IDE / App
|
|
89
|
-
|
|
90
|
-
| **VS Code**
|
|
91
|
-
| **Cursor**
|
|
92
|
-
| **Windsurf**
|
|
93
|
-
| **Claude Desktop** | [Setup](docs/ide-setup/claude-desktop.md) | ❌
|
|
94
|
-
| **OpenCode**
|
|
95
|
-
| **Raycast**
|
|
96
|
-
| **Antigravity**
|
|
368
|
+
| IDE / App | Guide | `${workspaceFolder}` |
|
|
369
|
+
| ------------------ | ----------------------------------------- | -------------------- |
|
|
370
|
+
| **VS Code** | [Setup](docs/ide-setup/vscode.md) | ✅ |
|
|
371
|
+
| **Cursor** | [Setup](docs/ide-setup/cursor.md) | ✅ |
|
|
372
|
+
| **Windsurf** | [Setup](docs/ide-setup/windsurf.md) | ❌ |
|
|
373
|
+
| **Claude Desktop** | [Setup](docs/ide-setup/claude-desktop.md) | ❌ |
|
|
374
|
+
| **OpenCode** | [Setup](docs/ide-setup/opencode.md) | ❌ |
|
|
375
|
+
| **Raycast** | [Setup](docs/ide-setup/raycast.md) | ❌ |
|
|
376
|
+
| **Antigravity** | [Setup](docs/ide-setup/antigravity.md) | ❌ |
|
|
97
377
|
|
|
98
378
|
### Multi-Project
|
|
99
379
|
|
|
@@ -118,67 +398,92 @@ All settings via environment variables. Prefix: `SMART_CODING_`.
|
|
|
118
398
|
|
|
119
399
|
### Core
|
|
120
400
|
|
|
121
|
-
| Variable
|
|
122
|
-
|
|
123
|
-
| `SMART_CODING_VERBOSE`
|
|
124
|
-
| `SMART_CODING_MAX_RESULTS`
|
|
125
|
-
| `SMART_CODING_BATCH_SIZE`
|
|
126
|
-
| `SMART_CODING_MAX_FILE_SIZE`
|
|
127
|
-
| `SMART_CODING_CHUNK_SIZE`
|
|
128
|
-
| `SMART_CODING_CHUNKING_MODE`
|
|
129
|
-
| `SMART_CODING_WATCH_FILES`
|
|
130
|
-
| `SMART_CODING_AUTO_INDEX_DELAY` | `
|
|
131
|
-
| `SMART_CODING_MAX_CPU_PERCENT`
|
|
401
|
+
| Variable | Default | Description |
|
|
402
|
+
| ------------------------------- | --------- | ------------------------------------------------------------------------------------------------------- |
|
|
403
|
+
| `SMART_CODING_VERBOSE` | `false` | Detailed logging |
|
|
404
|
+
| `SMART_CODING_MAX_RESULTS` | `5` | Search results returned |
|
|
405
|
+
| `SMART_CODING_BATCH_SIZE` | `100` | Files per parallel batch |
|
|
406
|
+
| `SMART_CODING_MAX_FILE_SIZE` | `1048576` | Max file size (1MB) |
|
|
407
|
+
| `SMART_CODING_CHUNK_SIZE` | `25` | Lines per chunk |
|
|
408
|
+
| `SMART_CODING_CHUNKING_MODE` | `smart` | `smart` / `ast` / `line` |
|
|
409
|
+
| `SMART_CODING_WATCH_FILES` | `false` | Auto-reindex on changes |
|
|
410
|
+
| `SMART_CODING_AUTO_INDEX_DELAY` | `false` | Background index on startup. `false`=off (multi-agent safe), `true`=5s, or ms value. Single-agent only. |
|
|
411
|
+
| `SMART_CODING_MAX_CPU_PERCENT` | `50` | CPU cap during indexing |
|
|
132
412
|
|
|
133
413
|
### Embedding Provider
|
|
134
414
|
|
|
135
|
-
| Variable
|
|
136
|
-
|
|
137
|
-
| `SMART_CODING_EMBEDDING_PROVIDER`
|
|
138
|
-
| `SMART_CODING_EMBEDDING_MODEL`
|
|
139
|
-
| `SMART_CODING_EMBEDDING_DIMENSION` | `128`
|
|
140
|
-
| `SMART_CODING_DEVICE`
|
|
415
|
+
| Variable | Default | Description |
|
|
416
|
+
| ---------------------------------- | -------------------------------- | -------------------------------------------------------------- |
|
|
417
|
+
| `SMART_CODING_EMBEDDING_PROVIDER` | `local` | `local` / `gemini` / `openai` / `openai-compatible` / `vertex` |
|
|
418
|
+
| `SMART_CODING_EMBEDDING_MODEL` | `nomic-ai/nomic-embed-text-v1.5` | Model name |
|
|
419
|
+
| `SMART_CODING_EMBEDDING_DIMENSION` | `128` | MRL dimension (64–768) |
|
|
420
|
+
| `SMART_CODING_DEVICE` | `auto` | `cpu` / `webgpu` / `auto` |
|
|
141
421
|
|
|
142
422
|
### Gemini
|
|
143
423
|
|
|
144
|
-
| Variable
|
|
145
|
-
|
|
146
|
-
| `SMART_CODING_GEMINI_API_KEY`
|
|
147
|
-
| `SMART_CODING_GEMINI_MODEL`
|
|
148
|
-
| `SMART_CODING_GEMINI_DIMENSIONS`
|
|
149
|
-
| `SMART_CODING_GEMINI_BATCH_SIZE`
|
|
150
|
-
| `SMART_CODING_GEMINI_MAX_RETRIES` | `3`
|
|
424
|
+
| Variable | Default | Description |
|
|
425
|
+
| --------------------------------- | ---------------------- | ----------------- |
|
|
426
|
+
| `SMART_CODING_GEMINI_API_KEY` | — | API key |
|
|
427
|
+
| `SMART_CODING_GEMINI_MODEL` | `gemini-embedding-001` | Model |
|
|
428
|
+
| `SMART_CODING_GEMINI_DIMENSIONS` | `768` | Output dimensions |
|
|
429
|
+
| `SMART_CODING_GEMINI_BATCH_SIZE` | `24` | Micro-batch size |
|
|
430
|
+
| `SMART_CODING_GEMINI_MAX_RETRIES` | `3` | Retry count |
|
|
151
431
|
|
|
152
432
|
### OpenAI / Compatible
|
|
153
433
|
|
|
154
|
-
| Variable
|
|
155
|
-
|
|
156
|
-
| `SMART_CODING_EMBEDDING_API_KEY`
|
|
157
|
-
| `SMART_CODING_EMBEDDING_BASE_URL` | —
|
|
434
|
+
| Variable | Default | Description |
|
|
435
|
+
| --------------------------------- | ------- | -------------------------- |
|
|
436
|
+
| `SMART_CODING_EMBEDDING_API_KEY` | — | API key |
|
|
437
|
+
| `SMART_CODING_EMBEDDING_BASE_URL` | — | Base URL (compatible only) |
|
|
158
438
|
|
|
159
439
|
### Vertex AI
|
|
160
440
|
|
|
161
|
-
| Variable
|
|
162
|
-
|
|
163
|
-
| `SMART_CODING_VERTEX_PROJECT`
|
|
164
|
-
| `SMART_CODING_VERTEX_LOCATION` | `us-central1` | Region
|
|
441
|
+
| Variable | Default | Description |
|
|
442
|
+
| ------------------------------ | ------------- | -------------- |
|
|
443
|
+
| `SMART_CODING_VERTEX_PROJECT` | — | GCP project ID |
|
|
444
|
+
| `SMART_CODING_VERTEX_LOCATION` | `us-central1` | Region |
|
|
165
445
|
|
|
166
446
|
### Vector Store
|
|
167
447
|
|
|
168
|
-
| Variable
|
|
169
|
-
|
|
170
|
-
| `SMART_CODING_VECTOR_STORE_PROVIDER` | `sqlite`
|
|
171
|
-
| `SMART_CODING_MILVUS_ADDRESS`
|
|
172
|
-
| `SMART_CODING_MILVUS_TOKEN`
|
|
173
|
-
| `SMART_CODING_MILVUS_DATABASE`
|
|
174
|
-
| `SMART_CODING_MILVUS_COLLECTION`
|
|
448
|
+
| Variable | Default | Description |
|
|
449
|
+
| ------------------------------------ | ------------------------- | -------------------------------------- |
|
|
450
|
+
| `SMART_CODING_VECTOR_STORE_PROVIDER` | `sqlite` | `sqlite` / `milvus` |
|
|
451
|
+
| `SMART_CODING_MILVUS_ADDRESS` | — | Milvus endpoint or Zilliz Cloud URI |
|
|
452
|
+
| `SMART_CODING_MILVUS_TOKEN` | — | Auth token (required for Zilliz Cloud) |
|
|
453
|
+
| `SMART_CODING_MILVUS_DATABASE` | `default` | Database name |
|
|
454
|
+
| `SMART_CODING_MILVUS_COLLECTION` | `smart_coding_embeddings` | Collection |
|
|
455
|
+
|
|
456
|
+
### Zilliz Cloud (Managed Milvus)
|
|
457
|
+
|
|
458
|
+
For teams or serverless deployments, use [Zilliz Cloud](https://zilliz.com) instead of self-hosted Docker:
|
|
459
|
+
|
|
460
|
+
```json
|
|
461
|
+
{
|
|
462
|
+
"env": {
|
|
463
|
+
"SMART_CODING_VECTOR_STORE_PROVIDER": "milvus",
|
|
464
|
+
"SMART_CODING_MILVUS_ADDRESS": "https://in03-xxxx.api.gcp-us-west1.zillizcloud.com",
|
|
465
|
+
"SMART_CODING_MILVUS_TOKEN": "your-zilliz-api-key"
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
```
|
|
469
|
+
|
|
470
|
+
| Feature | Milvus Standalone (Docker) | Zilliz Cloud |
|
|
471
|
+
| ----------- | -------------------------- | --------------------------- |
|
|
472
|
+
| Setup | Self-hosted, 3 containers | Managed SaaS |
|
|
473
|
+
| RAM | ~2.5 GB idle | None (serverless) |
|
|
474
|
+
| Multi-agent | ✅ via shared Docker | ✅ via shared endpoint |
|
|
475
|
+
| Scaling | Manual | Auto-scaling |
|
|
476
|
+
| Free tier | — | 2 collections, 1M vectors |
|
|
477
|
+
| Best for | Local dev, single machine | Team use, CI/CD, production |
|
|
478
|
+
|
|
479
|
+
> Get your Zilliz Cloud URI and API key from the [Zilliz Console](https://cloud.zilliz.com) → Cluster → Connect.
|
|
175
480
|
|
|
176
481
|
### Search Tuning
|
|
177
482
|
|
|
178
|
-
| Variable
|
|
179
|
-
|
|
180
|
-
| `SMART_CODING_SEMANTIC_WEIGHT`
|
|
181
|
-
| `SMART_CODING_EXACT_MATCH_BOOST` | `1.5`
|
|
483
|
+
| Variable | Default | Description |
|
|
484
|
+
| -------------------------------- | ------- | ------------------------ |
|
|
485
|
+
| `SMART_CODING_SEMANTIC_WEIGHT` | `0.7` | Semantic vs exact weight |
|
|
486
|
+
| `SMART_CODING_EXACT_MATCH_BOOST` | `1.5` | Exact match multiplier |
|
|
182
487
|
|
|
183
488
|
### Example with Gemini + Milvus
|
|
184
489
|
|
|
@@ -201,48 +506,141 @@ All settings via environment variables. Prefix: `SMART_CODING_`.
|
|
|
201
506
|
|
|
202
507
|
## Architecture
|
|
203
508
|
|
|
204
|
-
```
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
├── features/
|
|
218
|
-
│ ├── hybrid-search.js # Semantic + exact match search
|
|
219
|
-
│ ├── index-codebase.js # File discovery & incremental indexing
|
|
220
|
-
│ ├── clear-cache.js # Cache reset
|
|
221
|
-
│ ├── check-last-version.js # Package version lookup
|
|
222
|
-
│ ├── set-workspace.js # Runtime workspace switching
|
|
223
|
-
│ └── get-status.js # Server status
|
|
224
|
-
└── test/ # Vitest test suite
|
|
509
|
+
```mermaid
|
|
510
|
+
graph TD
|
|
511
|
+
A["MCP Server — index.js"] --> B["Features"]
|
|
512
|
+
B --> B1["hybrid-search"]
|
|
513
|
+
B --> B2["index-codebase"]
|
|
514
|
+
B --> B3["set-workspace / get-status / clear-cache"]
|
|
515
|
+
|
|
516
|
+
B2 --> C["Code Chunking — AST or Smart Regex"]
|
|
517
|
+
C --> D["Embedding — Local / Gemini / Vertex / OpenAI"]
|
|
518
|
+
D --> E["Vector Store — SQLite or Milvus"]
|
|
519
|
+
|
|
520
|
+
B1 --> D
|
|
521
|
+
B1 --> E
|
|
225
522
|
```
|
|
226
523
|
|
|
227
524
|
## How It Works
|
|
228
525
|
|
|
229
|
-
```
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
526
|
+
```mermaid
|
|
527
|
+
flowchart LR
|
|
528
|
+
A["📁 Source Files"] -->|glob + .gitignore| B["✂️ Smart/AST<br/>Chunking"]
|
|
529
|
+
B -->|language-aware| C["🧠 AI Embedding<br/>(Local or API)"]
|
|
530
|
+
C -->|vectors| D["💾 SQLite / Milvus<br/>Storage"]
|
|
531
|
+
D -->|incremental hash| D
|
|
532
|
+
|
|
533
|
+
E["🔍 Search Query"] -->|embed| C
|
|
534
|
+
C -->|cosine similarity| F["📊 Hybrid Scoring<br/>semantic + exact match"]
|
|
535
|
+
F --> G["🎯 Top N Results<br/>with relevance scores"]
|
|
536
|
+
|
|
537
|
+
style A fill:#2d3748,color:#e2e8f0
|
|
538
|
+
style C fill:#553c9a,color:#e9d8fd
|
|
539
|
+
style D fill:#2a4365,color:#bee3f8
|
|
540
|
+
style G fill:#22543d,color:#c6f6d5
|
|
242
541
|
```
|
|
243
542
|
|
|
244
543
|
**Progressive indexing** — search works immediately while indexing continues in the background. Only changed files are re-indexed on subsequent runs.
|
|
245
544
|
|
|
545
|
+
## Incremental Indexing & Optimization
|
|
546
|
+
|
|
547
|
+
Semantic Code MCP uses a **hash-based incremental indexing** strategy to minimize redundant work:
|
|
548
|
+
|
|
549
|
+
```mermaid
|
|
550
|
+
flowchart TD
|
|
551
|
+
A["File discovered"] --> B{"Hash changed?"}
|
|
552
|
+
B -->|No| C["Skip — use cached vectors"]
|
|
553
|
+
B -->|Yes| D["Re-chunk & re-embed"]
|
|
554
|
+
D --> E["Update vector store"]
|
|
555
|
+
F["Deleted file detected"] --> G["Prune stale vectors"]
|
|
556
|
+
|
|
557
|
+
style C fill:#22543d,color:#c6f6d5
|
|
558
|
+
style D fill:#744210,color:#fefcbf
|
|
559
|
+
style G fill:#742a2a,color:#fed7d7
|
|
560
|
+
```
|
|
561
|
+
|
|
562
|
+
**How it works:**
|
|
563
|
+
|
|
564
|
+
1. **File discovery** — glob patterns with `.gitignore`-aware filtering
|
|
565
|
+
2. **Hash comparison** — each file's `mtime + size` is compared against the cached index
|
|
566
|
+
3. **Delta processing** — only changed/new files are chunked and embedded
|
|
567
|
+
4. **Stale pruning** — deleted files are removed from the vector store automatically
|
|
568
|
+
5. **Progressive search** — queries work immediately, even mid-indexing
|
|
569
|
+
|
|
570
|
+
**Performance characteristics:**
|
|
571
|
+
|
|
572
|
+
| Scenario | Behavior | Typical Time |
|
|
573
|
+
| --------------------------- | ----------------- | ------------------------------ |
|
|
574
|
+
| First run (500 files) | Full index | ~30–60s (API), ~2–5min (local) |
|
|
575
|
+
| Subsequent run (no changes) | Hash check only | < 1s |
|
|
576
|
+
| 10 files changed | Incremental delta | ~2–5s |
|
|
577
|
+
| Branch switch | Partial re-index | ~5–15s |
|
|
578
|
+
| `force=true` | Full rebuild | Same as first run |
|
|
579
|
+
|
|
580
|
+
> ⚠️ **Multi-agent warning:** Auto-index is **disabled by default** to prevent concurrent Milvus writes when multiple agents share the same server. Set `SMART_CODING_AUTO_INDEX_DELAY=true` (5s) only if a **single agent** connects to this MCP server. Use `b_index_codebase` for explicit on-demand indexing in multi-agent setups.
|
|
581
|
+
|
|
582
|
+
<details>
|
|
583
|
+
<summary><strong>🐚 Shell Reindex for Bulk Operations</strong></summary>
|
|
584
|
+
|
|
585
|
+
MCP tool calls have timeout limits and don't expose real-time logs. For bulk operations (initial setup, full rebuild, migration), use the CLI reindex script directly:
|
|
586
|
+
|
|
587
|
+
```bash
|
|
588
|
+
cd /path/to/semantic-code-mcp
|
|
589
|
+
node reindex.js /path/to/workspace --force
|
|
590
|
+
```
|
|
591
|
+
|
|
592
|
+
**When to use CLI over MCP tools:**
|
|
593
|
+
|
|
594
|
+
| Scenario | Use |
|
|
595
|
+
| ---------------------------- | ----------------------------------- |
|
|
596
|
+
| Daily incremental updates | MCP `b_index_codebase(force=false)` |
|
|
597
|
+
| Initial workspace setup | CLI `node reindex.js /path --force` |
|
|
598
|
+
| Full rebuild after migration | CLI `node reindex.js /path --force` |
|
|
599
|
+
| 1000+ file bulk update | CLI (timeout-safe, real-time logs) |
|
|
600
|
+
| Debugging 429 / gRPC errors | CLI (stderr visible) |
|
|
601
|
+
|
|
602
|
+
> The CLI reindex script uses the same incremental engine under the hood. `--force` only forces re-embedding; it still uses the same hash-based delta for efficiency.
|
|
603
|
+
|
|
604
|
+
</details>
|
|
605
|
+
|
|
606
|
+
## Non-Blocking Indexing Workflow
|
|
607
|
+
|
|
608
|
+
All indexing operations run in the **background** and return immediately. The agent can search while indexing continues.
|
|
609
|
+
|
|
610
|
+
```mermaid
|
|
611
|
+
sequenceDiagram
|
|
612
|
+
participant Agent
|
|
613
|
+
participant MCP as semantic-code-mcp
|
|
614
|
+
participant BG as Background Thread
|
|
615
|
+
participant Store as Milvus / SQLite
|
|
616
|
+
|
|
617
|
+
Agent->>MCP: b_index_codebase(force=false)
|
|
618
|
+
MCP->>BG: startBackgroundIndexing()
|
|
619
|
+
MCP-->>Agent: {status: "started", message: "..."}
|
|
620
|
+
Note over Agent: ⚡ Returns instantly
|
|
621
|
+
|
|
622
|
+
loop Poll every 2-3s
|
|
623
|
+
Agent->>MCP: f_get_status()
|
|
624
|
+
MCP-->>Agent: {index.status: "indexing", progress: "150/500 files"}
|
|
625
|
+
end
|
|
626
|
+
|
|
627
|
+
BG->>Store: upsert vectors
|
|
628
|
+
BG-->>MCP: done
|
|
629
|
+
|
|
630
|
+
Agent->>MCP: f_get_status()
|
|
631
|
+
MCP-->>Agent: {index.status: "ready"}
|
|
632
|
+
|
|
633
|
+
Agent->>MCP: a_semantic_search(query)
|
|
634
|
+
MCP-->>Agent: [results]
|
|
635
|
+
```
|
|
636
|
+
|
|
637
|
+
**Rules for agents:**
|
|
638
|
+
1. **Always call `f_get_status` first** — check workspace and indexing status
|
|
639
|
+
2. **Use `e_set_workspace` if workspace is wrong** — before any indexing
|
|
640
|
+
3. **Poll `f_get_status` until `index.status: "ready"`** before relying on search results
|
|
641
|
+
4. **Progressive search is supported** — `a_semantic_search` works during indexing with partial results
|
|
642
|
+
5. **`SMART_CODING_AUTO_INDEX_DELAY=false`** by default — use `b_index_codebase` for explicit on-demand indexing in multi-agent setups
|
|
643
|
+
|
|
246
644
|
## Privacy
|
|
247
645
|
|
|
248
646
|
- **Local mode**: everything runs on your machine. Code never leaves your system.
|
|
@@ -256,6 +654,15 @@ Copyright (c) 2025 Omar Haris (original), bitkyc08 (modifications, 2026)
|
|
|
256
654
|
|
|
257
655
|
See [LICENSE](LICENSE) for full text.
|
|
258
656
|
|
|
259
|
-
|
|
657
|
+
### About
|
|
658
|
+
|
|
659
|
+
This project is a fork of [smart-coding-mcp](https://github.com/omarHaris/smart-coding-mcp) by Omar Haris, heavily extended for production use.
|
|
260
660
|
|
|
261
|
-
|
|
661
|
+
**Key additions over upstream**:
|
|
662
|
+
- Multi-provider embeddings (Gemini, Vertex AI, OpenAI, OpenAI-compatible)
|
|
663
|
+
- Milvus vector store with ANN search for large codebases
|
|
664
|
+
- AST-based code chunking via Tree-sitter
|
|
665
|
+
- Resource throttling (CPU cap at 50%)
|
|
666
|
+
- Runtime workspace switching (`e_set_workspace`)
|
|
667
|
+
- Package version checker across 20+ registries (`d_check_last_version`)
|
|
668
|
+
- Comprehensive IDE setup guides (VS Code, Cursor, Windsurf, Claude Desktop, Antigravity)
|
|
@@ -935,7 +935,7 @@ export class CodebaseIndexer {
|
|
|
935
935
|
export function getToolDefinition() {
|
|
936
936
|
return {
|
|
937
937
|
name: "b_index_codebase",
|
|
938
|
-
description: "
|
|
938
|
+
description: "Trigger codebase reindex. Returns IMMEDIATELY (non-blocking). Poll f_get_status until index.status='ready' before calling a_semantic_search. Do NOT search while indexing.",
|
|
939
939
|
inputSchema: {
|
|
940
940
|
type: "object",
|
|
941
941
|
properties: {
|
|
@@ -959,41 +959,35 @@ export function getToolDefinition() {
|
|
|
959
959
|
// Tool handler
|
|
960
960
|
export async function handleToolCall(request, indexer) {
|
|
961
961
|
const force = request.params.arguments?.force || false;
|
|
962
|
-
const result = await indexer.indexAll(force);
|
|
963
962
|
|
|
964
|
-
//
|
|
965
|
-
if (
|
|
963
|
+
// Guard: already indexing
|
|
964
|
+
if (indexer.isIndexing) {
|
|
965
|
+
const status = indexer.getIndexingStatus();
|
|
966
966
|
return {
|
|
967
967
|
content: [{
|
|
968
968
|
type: "text",
|
|
969
|
-
text:
|
|
969
|
+
text: JSON.stringify({
|
|
970
|
+
accepted: false,
|
|
971
|
+
status: "rejected",
|
|
972
|
+
message: "Indexing already in progress. Use f_get_status to poll.",
|
|
973
|
+
progress: status
|
|
974
|
+
}, null, 2)
|
|
970
975
|
}]
|
|
971
976
|
};
|
|
972
977
|
}
|
|
973
978
|
|
|
974
|
-
//
|
|
975
|
-
|
|
976
|
-
const stats = {
|
|
977
|
-
totalChunks: result?.totalChunks ?? cacheStats.totalChunks,
|
|
978
|
-
totalFiles: result?.totalFiles ?? cacheStats.totalFiles,
|
|
979
|
-
filesProcessed: result?.filesProcessed ?? 0,
|
|
980
|
-
chunksCreated: result?.chunksCreated ?? 0
|
|
981
|
-
};
|
|
982
|
-
|
|
983
|
-
let message = result?.message
|
|
984
|
-
? `Codebase reindexed successfully.\n\n${result.message}`
|
|
985
|
-
: `Codebase reindexed successfully.`;
|
|
986
|
-
|
|
987
|
-
message += `\n\nStatistics:\n- Total files in index: ${stats.totalFiles}\n- Total code chunks: ${stats.totalChunks}`;
|
|
988
|
-
|
|
989
|
-
if (stats.filesProcessed > 0) {
|
|
990
|
-
message += `\n- Files processed this run: ${stats.filesProcessed}\n- Chunks created this run: ${stats.chunksCreated}`;
|
|
991
|
-
}
|
|
979
|
+
// Fire-and-forget — returns immediately
|
|
980
|
+
indexer.startBackgroundIndexing(force);
|
|
992
981
|
|
|
993
982
|
return {
|
|
994
983
|
content: [{
|
|
995
984
|
type: "text",
|
|
996
|
-
text:
|
|
985
|
+
text: JSON.stringify({
|
|
986
|
+
accepted: true,
|
|
987
|
+
status: "started",
|
|
988
|
+
message: "Indexing started in background. Use f_get_status to poll progress.",
|
|
989
|
+
force
|
|
990
|
+
}, null, 2)
|
|
997
991
|
}]
|
|
998
992
|
};
|
|
999
993
|
}
|
package/lib/config.js
CHANGED
|
@@ -859,15 +859,15 @@ const DEFAULT_CONFIG = {
|
|
|
859
859
|
semanticWeight: 0.7,
|
|
860
860
|
exactMatchBoost: 1.5,
|
|
861
861
|
smartIndexing: true,
|
|
862
|
-
|
|
862
|
+
|
|
863
863
|
// Resource throttling (balanced performance/responsiveness)
|
|
864
864
|
maxCpuPercent: 50, // Max CPU usage during indexing (default: 50%)
|
|
865
865
|
batchDelay: 10, // Delay between batches in ms (default: 10ms)
|
|
866
866
|
maxWorkers: 'auto', // Max worker threads ('auto' = 50% of cores, or specific number)
|
|
867
|
-
|
|
867
|
+
|
|
868
868
|
// Startup behavior
|
|
869
|
-
autoIndexDelay:
|
|
870
|
-
|
|
869
|
+
autoIndexDelay: false, // Auto-index on startup: false = disabled (safe for multi-agent). Set to ms delay (e.g. 5000) for single-agent setups.
|
|
870
|
+
|
|
871
871
|
// Progressive indexing
|
|
872
872
|
incrementalSaveInterval: 5, // Save to cache every N batches
|
|
873
873
|
allowPartialSearch: true // Allow searches while indexing is in progress
|
|
@@ -880,7 +880,7 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
880
880
|
// Determine the base directory for configuration
|
|
881
881
|
let baseDir;
|
|
882
882
|
let configPath;
|
|
883
|
-
|
|
883
|
+
|
|
884
884
|
if (workspaceDir) {
|
|
885
885
|
// Workspace mode: load config from workspace root
|
|
886
886
|
baseDir = path.resolve(workspaceDir);
|
|
@@ -892,7 +892,7 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
892
892
|
baseDir = path.resolve(scriptDir, '..');
|
|
893
893
|
configPath = path.join(baseDir, "config.json");
|
|
894
894
|
}
|
|
895
|
-
|
|
895
|
+
|
|
896
896
|
let userConfig = {};
|
|
897
897
|
try {
|
|
898
898
|
const configData = await fs.readFile(configPath, "utf-8");
|
|
@@ -904,9 +904,9 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
904
904
|
console.error(`[Config] No config.json found: ${configError.message}`);
|
|
905
905
|
}
|
|
906
906
|
}
|
|
907
|
-
|
|
907
|
+
|
|
908
908
|
config = { ...DEFAULT_CONFIG, ...userConfig };
|
|
909
|
-
|
|
909
|
+
|
|
910
910
|
// Set workspace-specific directories
|
|
911
911
|
if (workspaceDir) {
|
|
912
912
|
config.searchDirectory = baseDir;
|
|
@@ -915,7 +915,7 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
915
915
|
config.searchDirectory = path.resolve(baseDir, config.searchDirectory);
|
|
916
916
|
config.cacheDirectory = path.resolve(baseDir, config.cacheDirectory);
|
|
917
917
|
}
|
|
918
|
-
|
|
918
|
+
|
|
919
919
|
// Smart project detection
|
|
920
920
|
if (config.smartIndexing !== false) {
|
|
921
921
|
const detector = new ProjectDetector(config.searchDirectory);
|
|
@@ -941,13 +941,13 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
941
941
|
}
|
|
942
942
|
console.error(`[Config] Applied ${smartPatterns.length} smart ignore patterns`);
|
|
943
943
|
}
|
|
944
|
-
|
|
944
|
+
|
|
945
945
|
console.error("[Config] Loaded configuration from config.json");
|
|
946
946
|
} catch (error) {
|
|
947
947
|
console.error("[Config] Using default configuration (config.json not found or invalid)");
|
|
948
948
|
console.error(`[Config] Error: ${error.message}`);
|
|
949
949
|
}
|
|
950
|
-
|
|
950
|
+
|
|
951
951
|
// Apply environment variable overrides (prefix: SMART_CODING_) with validation
|
|
952
952
|
if (process.env.SMART_CODING_VERBOSE !== undefined) {
|
|
953
953
|
const value = process.env.SMART_CODING_VERBOSE;
|
|
@@ -955,7 +955,7 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
955
955
|
config.verbose = value === 'true';
|
|
956
956
|
}
|
|
957
957
|
}
|
|
958
|
-
|
|
958
|
+
|
|
959
959
|
if (process.env.SMART_CODING_BATCH_SIZE !== undefined) {
|
|
960
960
|
const value = parseInt(process.env.SMART_CODING_BATCH_SIZE, 10);
|
|
961
961
|
if (!isNaN(value) && value > 0 && value <= 1000) {
|
|
@@ -964,7 +964,7 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
964
964
|
console.error(`[Config] Invalid SMART_CODING_BATCH_SIZE: ${process.env.SMART_CODING_BATCH_SIZE}, using default`);
|
|
965
965
|
}
|
|
966
966
|
}
|
|
967
|
-
|
|
967
|
+
|
|
968
968
|
if (process.env.SMART_CODING_MAX_FILE_SIZE !== undefined) {
|
|
969
969
|
const value = parseInt(process.env.SMART_CODING_MAX_FILE_SIZE, 10);
|
|
970
970
|
if (!isNaN(value) && value > 0) {
|
|
@@ -973,7 +973,7 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
973
973
|
console.error(`[Config] Invalid SMART_CODING_MAX_FILE_SIZE: ${process.env.SMART_CODING_MAX_FILE_SIZE}, using default`);
|
|
974
974
|
}
|
|
975
975
|
}
|
|
976
|
-
|
|
976
|
+
|
|
977
977
|
if (process.env.SMART_CODING_CHUNK_SIZE !== undefined) {
|
|
978
978
|
const value = parseInt(process.env.SMART_CODING_CHUNK_SIZE, 10);
|
|
979
979
|
if (!isNaN(value) && value > 0 && value <= 100) {
|
|
@@ -982,7 +982,7 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
982
982
|
console.error(`[Config] Invalid SMART_CODING_CHUNK_SIZE: ${process.env.SMART_CODING_CHUNK_SIZE}, using default`);
|
|
983
983
|
}
|
|
984
984
|
}
|
|
985
|
-
|
|
985
|
+
|
|
986
986
|
if (process.env.SMART_CODING_MAX_RESULTS !== undefined) {
|
|
987
987
|
const value = parseInt(process.env.SMART_CODING_MAX_RESULTS, 10);
|
|
988
988
|
if (!isNaN(value) && value > 0 && value <= 100) {
|
|
@@ -991,14 +991,14 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
991
991
|
console.error(`[Config] Invalid SMART_CODING_MAX_RESULTS: ${process.env.SMART_CODING_MAX_RESULTS}, using default`);
|
|
992
992
|
}
|
|
993
993
|
}
|
|
994
|
-
|
|
994
|
+
|
|
995
995
|
if (process.env.SMART_CODING_SMART_INDEXING !== undefined) {
|
|
996
996
|
const value = process.env.SMART_CODING_SMART_INDEXING;
|
|
997
997
|
if (value === 'true' || value === 'false') {
|
|
998
998
|
config.smartIndexing = value === 'true';
|
|
999
999
|
}
|
|
1000
1000
|
}
|
|
1001
|
-
|
|
1001
|
+
|
|
1002
1002
|
if (process.env.SMART_CODING_WATCH_FILES !== undefined) {
|
|
1003
1003
|
const value = process.env.SMART_CODING_WATCH_FILES;
|
|
1004
1004
|
if (value === 'true' || value === 'false') {
|
|
@@ -1045,7 +1045,7 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
1045
1045
|
console.error(`[Config] Milvus collection: ${value}`);
|
|
1046
1046
|
}
|
|
1047
1047
|
}
|
|
1048
|
-
|
|
1048
|
+
|
|
1049
1049
|
if (process.env.SMART_CODING_SEMANTIC_WEIGHT !== undefined) {
|
|
1050
1050
|
const value = parseFloat(process.env.SMART_CODING_SEMANTIC_WEIGHT);
|
|
1051
1051
|
if (!isNaN(value) && value >= 0 && value <= 1) {
|
|
@@ -1054,7 +1054,7 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
1054
1054
|
console.error(`[Config] Invalid SMART_CODING_SEMANTIC_WEIGHT: ${process.env.SMART_CODING_SEMANTIC_WEIGHT}, using default (must be 0-1)`);
|
|
1055
1055
|
}
|
|
1056
1056
|
}
|
|
1057
|
-
|
|
1057
|
+
|
|
1058
1058
|
if (process.env.SMART_CODING_EXACT_MATCH_BOOST !== undefined) {
|
|
1059
1059
|
const value = parseFloat(process.env.SMART_CODING_EXACT_MATCH_BOOST);
|
|
1060
1060
|
if (!isNaN(value) && value >= 0) {
|
|
@@ -1063,7 +1063,7 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
1063
1063
|
console.error(`[Config] Invalid SMART_CODING_EXACT_MATCH_BOOST: ${process.env.SMART_CODING_EXACT_MATCH_BOOST}, using default`);
|
|
1064
1064
|
}
|
|
1065
1065
|
}
|
|
1066
|
-
|
|
1066
|
+
|
|
1067
1067
|
if (process.env.SMART_CODING_EMBEDDING_MODEL !== undefined) {
|
|
1068
1068
|
const value = process.env.SMART_CODING_EMBEDDING_MODEL.trim();
|
|
1069
1069
|
if (value.length > 0) {
|
|
@@ -1180,7 +1180,7 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
1180
1180
|
console.error(`[Config] Invalid SMART_CODING_GEMINI_MAX_RETRIES: ${process.env.SMART_CODING_GEMINI_MAX_RETRIES}, using default (must be 0-10)`);
|
|
1181
1181
|
}
|
|
1182
1182
|
}
|
|
1183
|
-
|
|
1183
|
+
|
|
1184
1184
|
if (process.env.SMART_CODING_WORKER_THREADS !== undefined) {
|
|
1185
1185
|
const value = process.env.SMART_CODING_WORKER_THREADS.trim().toLowerCase();
|
|
1186
1186
|
if (value === 'auto') {
|
|
@@ -1194,7 +1194,7 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
1194
1194
|
}
|
|
1195
1195
|
}
|
|
1196
1196
|
}
|
|
1197
|
-
|
|
1197
|
+
|
|
1198
1198
|
// MRL embedding dimension
|
|
1199
1199
|
if (process.env.SMART_CODING_EMBEDDING_DIMENSION !== undefined) {
|
|
1200
1200
|
const value = parseInt(process.env.SMART_CODING_EMBEDDING_DIMENSION, 10);
|
|
@@ -1206,7 +1206,7 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
1206
1206
|
console.error(`[Config] Invalid SMART_CODING_EMBEDDING_DIMENSION: ${value}, using default (must be 64, 128, 256, 512, or 768)`);
|
|
1207
1207
|
}
|
|
1208
1208
|
}
|
|
1209
|
-
|
|
1209
|
+
|
|
1210
1210
|
// Device selection
|
|
1211
1211
|
if (process.env.SMART_CODING_DEVICE !== undefined) {
|
|
1212
1212
|
const value = process.env.SMART_CODING_DEVICE.trim().toLowerCase();
|
|
@@ -1218,7 +1218,7 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
1218
1218
|
console.error(`[Config] Invalid SMART_CODING_DEVICE: ${value}, using default (must be 'cpu', 'webgpu', or 'auto')`);
|
|
1219
1219
|
}
|
|
1220
1220
|
}
|
|
1221
|
-
|
|
1221
|
+
|
|
1222
1222
|
// Chunking mode
|
|
1223
1223
|
if (process.env.SMART_CODING_CHUNKING_MODE !== undefined) {
|
|
1224
1224
|
const value = process.env.SMART_CODING_CHUNKING_MODE.trim().toLowerCase();
|
|
@@ -1230,7 +1230,7 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
1230
1230
|
console.error(`[Config] Invalid SMART_CODING_CHUNKING_MODE: ${value}, using default (must be 'smart', 'ast', or 'line')`);
|
|
1231
1231
|
}
|
|
1232
1232
|
}
|
|
1233
|
-
|
|
1233
|
+
|
|
1234
1234
|
// Resource throttling - Max CPU percent
|
|
1235
1235
|
if (process.env.SMART_CODING_MAX_CPU_PERCENT !== undefined) {
|
|
1236
1236
|
const value = parseInt(process.env.SMART_CODING_MAX_CPU_PERCENT, 10);
|
|
@@ -1241,7 +1241,7 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
1241
1241
|
console.error(`[Config] Invalid SMART_CODING_MAX_CPU_PERCENT: ${value}, using default (must be 10-100)`);
|
|
1242
1242
|
}
|
|
1243
1243
|
}
|
|
1244
|
-
|
|
1244
|
+
|
|
1245
1245
|
// Resource throttling - Batch delay
|
|
1246
1246
|
if (process.env.SMART_CODING_BATCH_DELAY !== undefined) {
|
|
1247
1247
|
const value = parseInt(process.env.SMART_CODING_BATCH_DELAY, 10);
|
|
@@ -1252,7 +1252,7 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
1252
1252
|
console.error(`[Config] Invalid SMART_CODING_BATCH_DELAY: ${value}, using default (must be 0-5000)`);
|
|
1253
1253
|
}
|
|
1254
1254
|
}
|
|
1255
|
-
|
|
1255
|
+
|
|
1256
1256
|
// Resource throttling - Max workers
|
|
1257
1257
|
if (process.env.SMART_CODING_MAX_WORKERS !== undefined) {
|
|
1258
1258
|
const value = process.env.SMART_CODING_MAX_WORKERS.trim().toLowerCase();
|
|
@@ -1275,13 +1275,16 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
1275
1275
|
if (value === 'false' || value === '0') {
|
|
1276
1276
|
config.autoIndexDelay = false;
|
|
1277
1277
|
console.error(`[Config] Auto-indexing disabled`);
|
|
1278
|
+
} else if (value === 'true') {
|
|
1279
|
+
config.autoIndexDelay = 5000;
|
|
1280
|
+
console.error(`[Config] Auto-indexing enabled (5000ms delay)`);
|
|
1278
1281
|
} else {
|
|
1279
1282
|
const numValue = parseInt(value, 10);
|
|
1280
1283
|
if (!isNaN(numValue) && numValue >= 0 && numValue <= 60000) {
|
|
1281
1284
|
config.autoIndexDelay = numValue;
|
|
1282
1285
|
console.error(`[Config] Auto-index delay: ${numValue}ms`);
|
|
1283
1286
|
} else {
|
|
1284
|
-
console.error(`[Config] Invalid SMART_CODING_AUTO_INDEX_DELAY: ${value}, using default (must be 0-60000 or 'false')`);
|
|
1287
|
+
console.error(`[Config] Invalid SMART_CODING_AUTO_INDEX_DELAY: ${value}, using default (must be 0-60000, 'true', or 'false')`);
|
|
1285
1288
|
}
|
|
1286
1289
|
}
|
|
1287
1290
|
}
|
package/package.json
CHANGED