smart-coding-mcp 1.2.4 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -168
- package/config.json +4 -3
- package/example.png +0 -0
- package/features/clear-cache.js +30 -7
- package/features/index-codebase.js +507 -37
- package/how-its-works.png +0 -0
- package/index.js +2 -2
- package/lib/cache.js +5 -0
- package/lib/config.js +29 -4
- package/lib/embedding-worker.js +67 -0
- package/lib/tokenizer.js +142 -0
- package/lib/utils.js +113 -25
- package/package.json +9 -3
- package/test/clear-cache.test.js +288 -0
- package/test/embedding-model.test.js +230 -0
- package/test/helpers.js +128 -0
- package/test/hybrid-search.test.js +243 -0
- package/test/index-codebase.test.js +246 -0
- package/test/integration.test.js +223 -0
- package/test/tokenizer.test.js +225 -0
- package/vitest.config.js +29 -0
package/README.md
CHANGED
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
# Smart Coding MCP
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
[](https://www.npmjs.com/package/smart-coding-mcp)
|
|
4
|
+
[](https://www.npmjs.com/package/smart-coding-mcp)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://nodejs.org/)
|
|
7
|
+
|
|
8
|
+
An extensible Model Context Protocol (MCP) server that provides intelligent semantic code search for AI assistants. Built with local AI models (RAG), inspired by Cursor's semantic search research.
|
|
4
9
|
|
|
5
10
|
## What This Does
|
|
6
11
|
|
|
@@ -8,6 +13,8 @@ AI coding assistants work better when they can find relevant code quickly. Tradi
|
|
|
8
13
|
|
|
9
14
|
This MCP server solves that by indexing your codebase with AI embeddings. Your AI assistant can search by meaning instead of exact keywords, finding relevant code even when the terminology differs.
|
|
10
15
|
|
|
16
|
+

|
|
17
|
+
|
|
11
18
|
## Why Use This
|
|
12
19
|
|
|
13
20
|
**Better Code Understanding**
|
|
@@ -36,6 +43,12 @@ Install globally via npm:
|
|
|
36
43
|
npm install -g smart-coding-mcp
|
|
37
44
|
```
|
|
38
45
|
|
|
46
|
+
To update to the latest version:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
npm update -g smart-coding-mcp
|
|
50
|
+
```
|
|
51
|
+
|
|
39
52
|
## Configuration
|
|
40
53
|
|
|
41
54
|
Add to your MCP configuration file. The location depends on your IDE and OS:
|
|
@@ -80,33 +93,23 @@ Add the server configuration to the `mcpServers` object in your config file:
|
|
|
80
93
|
}
|
|
81
94
|
```
|
|
82
95
|
|
|
83
|
-
### Option 3: Auto-Detect Current Directory
|
|
84
|
-
|
|
85
|
-
```json
|
|
86
|
-
{
|
|
87
|
-
"mcpServers": {
|
|
88
|
-
"smart-coding-mcp": {
|
|
89
|
-
"command": "smart-coding-mcp"
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
```
|
|
94
|
-
|
|
95
96
|
## Environment Variables
|
|
96
97
|
|
|
97
98
|
Override configuration settings via environment variables in your MCP config:
|
|
98
99
|
|
|
99
|
-
| Variable | Type | Default
|
|
100
|
-
| -------------------------------- | ------- |
|
|
101
|
-
| `SMART_CODING_VERBOSE` | boolean | `false`
|
|
102
|
-
| `SMART_CODING_BATCH_SIZE` | number | `100`
|
|
103
|
-
| `SMART_CODING_MAX_FILE_SIZE` | number | `1048576`
|
|
104
|
-
| `SMART_CODING_CHUNK_SIZE` | number | `
|
|
105
|
-
| `SMART_CODING_MAX_RESULTS` | number | `5`
|
|
106
|
-
| `SMART_CODING_SMART_INDEXING` | boolean | `true`
|
|
107
|
-
| `SMART_CODING_WATCH_FILES` | boolean | `false`
|
|
108
|
-
| `SMART_CODING_SEMANTIC_WEIGHT` | number | `0.7`
|
|
109
|
-
| `SMART_CODING_EXACT_MATCH_BOOST` | number | `1.5`
|
|
100
|
+
| Variable | Type | Default | Description |
|
|
101
|
+
| -------------------------------- | ------- | ------------------------- | ------------------------------------- |
|
|
102
|
+
| `SMART_CODING_VERBOSE` | boolean | `false` | Enable detailed logging |
|
|
103
|
+
| `SMART_CODING_BATCH_SIZE` | number | `100` | Files to process in parallel |
|
|
104
|
+
| `SMART_CODING_MAX_FILE_SIZE` | number | `1048576` | Max file size in bytes (1MB) |
|
|
105
|
+
| `SMART_CODING_CHUNK_SIZE` | number | `25` | Lines of code per chunk |
|
|
106
|
+
| `SMART_CODING_MAX_RESULTS` | number | `5` | Max search results |
|
|
107
|
+
| `SMART_CODING_SMART_INDEXING` | boolean | `true` | Enable smart project detection |
|
|
108
|
+
| `SMART_CODING_WATCH_FILES` | boolean | `false` | Enable file watching for auto-reindex |
|
|
109
|
+
| `SMART_CODING_SEMANTIC_WEIGHT` | number | `0.7` | Weight for semantic similarity (0-1) |
|
|
110
|
+
| `SMART_CODING_EXACT_MATCH_BOOST` | number | `1.5` | Boost for exact text matches |
|
|
111
|
+
| `SMART_CODING_EMBEDDING_MODEL` | string | `Xenova/all-MiniLM-L6-v2` | AI embedding model to use |
|
|
112
|
+
| `SMART_CODING_WORKER_THREADS` | string | `auto` | Worker threads (`auto` or 1-32) |
|
|
110
113
|
|
|
111
114
|
**Example with environment variables:**
|
|
112
115
|
|
|
@@ -160,60 +163,7 @@ The server indexes your code in four steps:
|
|
|
160
163
|
|
|
161
164
|
When you search, your query is converted to the same vector format and compared against all code chunks using cosine similarity. The most relevant matches are returned.
|
|
162
165
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
The server detects your project type by looking for marker files and automatically applies appropriate ignore patterns:
|
|
166
|
-
|
|
167
|
-
**JavaScript/Node** (package.json found)
|
|
168
|
-
|
|
169
|
-
- Ignores: node_modules, dist, build, .next, coverage
|
|
170
|
-
|
|
171
|
-
**Python** (requirements.txt or pyproject.toml)
|
|
172
|
-
|
|
173
|
-
- Ignores: **pycache**, venv, .pytest_cache, .tox
|
|
174
|
-
|
|
175
|
-
**Android** (build.gradle)
|
|
176
|
-
|
|
177
|
-
- Ignores: .gradle, build artifacts, generated code
|
|
178
|
-
|
|
179
|
-
**iOS** (Podfile)
|
|
180
|
-
|
|
181
|
-
- Ignores: Pods, DerivedData, xcuserdata
|
|
182
|
-
|
|
183
|
-
**And more**: Go, PHP, Rust, Ruby, .NET
|
|
184
|
-
|
|
185
|
-
This typically reduces indexed file count by 100x. A project with 50,000 files (including node_modules) indexes just 500 actual source files.
|
|
186
|
-
|
|
187
|
-
## Configuration
|
|
188
|
-
|
|
189
|
-
The server works out of the box with sensible defaults. Create a `config.json` file in your workspace to customize:
|
|
190
|
-
|
|
191
|
-
```json
|
|
192
|
-
{
|
|
193
|
-
"searchDirectory": ".",
|
|
194
|
-
"fileExtensions": ["js", "ts", "py", "java", "go"],
|
|
195
|
-
"excludePatterns": ["**/my-custom-ignore/**"],
|
|
196
|
-
"smartIndexing": true,
|
|
197
|
-
"verbose": false,
|
|
198
|
-
"enableCache": true,
|
|
199
|
-
"cacheDirectory": "./.smart-coding-cache",
|
|
200
|
-
"watchFiles": true,
|
|
201
|
-
"chunkSize": 15,
|
|
202
|
-
"batchSize": 100,
|
|
203
|
-
"maxFileSize": 1048576,
|
|
204
|
-
"maxResults": 5
|
|
205
|
-
}
|
|
206
|
-
```
|
|
207
|
-
|
|
208
|
-
**Key options:**
|
|
209
|
-
|
|
210
|
-
- `smartIndexing`: Enable automatic project type detection and smart ignore patterns (default: true)
|
|
211
|
-
- `verbose`: Show detailed indexing logs (default: false)
|
|
212
|
-
- `watchFiles`: Automatically reindex when files change (default: true)
|
|
213
|
-
- `enableCache`: Cache embeddings to disk (default: true)
|
|
214
|
-
- `chunkSize`: Lines of code per chunk - smaller = more precise, larger = more context (default: 15)
|
|
215
|
-
- `batchSize`: Number of files to process in parallel (default: 100)
|
|
216
|
-
- `maxFileSize`: Skip files larger than this size in bytes (default: 1MB)
|
|
166
|
+

|
|
217
167
|
|
|
218
168
|
## Examples
|
|
219
169
|
|
|
@@ -243,85 +193,6 @@ Query: "error handling and exceptions"
|
|
|
243
193
|
|
|
244
194
|
Finds all try/catch blocks and error handling patterns.
|
|
245
195
|
|
|
246
|
-
## Performance
|
|
247
|
-
|
|
248
|
-
Tested on a typical JavaScript project:
|
|
249
|
-
|
|
250
|
-
| Metric | Without Smart Indexing | With Smart Indexing |
|
|
251
|
-
| -------------- | ---------------------- | ------------------- |
|
|
252
|
-
| Files scanned | 50,000+ | 500 |
|
|
253
|
-
| Indexing time | 10+ min | 2-3 min |
|
|
254
|
-
| Memory usage | 2GB+ | ~200MB |
|
|
255
|
-
| Search latency | N/A | <100ms |
|
|
256
|
-
|
|
257
|
-
## Supported File Types
|
|
258
|
-
|
|
259
|
-
Languages: JavaScript, TypeScript, Python, Java, Kotlin, Scala, C, C++, C#, Go, Rust, Ruby, PHP, Swift, Shell
|
|
260
|
-
|
|
261
|
-
Web: HTML, CSS, SCSS, Sass, XML, SVG
|
|
262
|
-
|
|
263
|
-
Config/Data: JSON, YAML, TOML, SQL
|
|
264
|
-
|
|
265
|
-
Total: 36 file extensions
|
|
266
|
-
|
|
267
|
-
## Architecture
|
|
268
|
-
|
|
269
|
-
```
|
|
270
|
-
smart-coding-mcp/
|
|
271
|
-
├── index.js # MCP server entry point
|
|
272
|
-
├── lib/
|
|
273
|
-
│ ├── config.js # Configuration + smart detection
|
|
274
|
-
│ ├── cache.js # Embeddings persistence
|
|
275
|
-
│ ├── utils.js # Smart chunking
|
|
276
|
-
│ ├── ignore-patterns.js # Language-specific patterns
|
|
277
|
-
│ └── project-detector.js # Project type detection
|
|
278
|
-
└── features/
|
|
279
|
-
├── hybrid-search.js # Semantic + exact match search
|
|
280
|
-
├── index-codebase.js # File indexing + watching
|
|
281
|
-
└── clear-cache.js # Cache management
|
|
282
|
-
```
|
|
283
|
-
|
|
284
|
-
The modular design makes it easy to add new features. See ARCHITECTURE.md for implementation details.
|
|
285
|
-
|
|
286
|
-
## Troubleshooting
|
|
287
|
-
|
|
288
|
-
**"Server can't find config.json"**
|
|
289
|
-
|
|
290
|
-
Make sure `cwd` is set in your MCP configuration to the full path of smart-coding-mcp.
|
|
291
|
-
|
|
292
|
-
**"Indexing takes too long"**
|
|
293
|
-
|
|
294
|
-
- Verify `smartIndexing` is enabled
|
|
295
|
-
- Add more patterns to `excludePatterns`
|
|
296
|
-
- Reduce `fileExtensions` to only what you need
|
|
297
|
-
|
|
298
|
-
**"Search results aren't relevant"**
|
|
299
|
-
|
|
300
|
-
- Try more specific queries
|
|
301
|
-
- Increase `maxResults` to see more options
|
|
302
|
-
- Run `index_codebase` to force a full reindex
|
|
303
|
-
|
|
304
|
-
**"Cache corruption errors"**
|
|
305
|
-
|
|
306
|
-
Use the `clear_cache` tool or run:
|
|
307
|
-
|
|
308
|
-
```bash
|
|
309
|
-
npm run clear-cache
|
|
310
|
-
```
|
|
311
|
-
|
|
312
|
-
## CLI Commands
|
|
313
|
-
|
|
314
|
-
```bash
|
|
315
|
-
# Start the server
|
|
316
|
-
npm start
|
|
317
|
-
|
|
318
|
-
# Development mode with auto-restart
|
|
319
|
-
npm run dev
|
|
320
|
-
|
|
321
|
-
# Clear embeddings cache
|
|
322
|
-
npm run clear-cache
|
|
323
|
-
```
|
|
324
|
-
|
|
325
196
|
## Privacy
|
|
326
197
|
|
|
327
198
|
- AI model runs entirely on your machine
|
|
@@ -353,17 +224,6 @@ This project builds on research from Cursor showing that semantic search improve
|
|
|
353
224
|
|
|
354
225
|
See: https://cursor.com/blog/semsearch
|
|
355
226
|
|
|
356
|
-
## Contributing
|
|
357
|
-
|
|
358
|
-
Contributions are welcome. See CONTRIBUTING.md for guidelines.
|
|
359
|
-
|
|
360
|
-
Potential areas for improvement:
|
|
361
|
-
|
|
362
|
-
- Additional language support
|
|
363
|
-
- Code complexity analysis
|
|
364
|
-
- Refactoring pattern detection
|
|
365
|
-
- Documentation generation
|
|
366
|
-
|
|
367
227
|
## License
|
|
368
228
|
|
|
369
229
|
MIT License
|
package/config.json
CHANGED
|
@@ -50,8 +50,8 @@
|
|
|
50
50
|
"**/.smart-coding-cache/**"
|
|
51
51
|
],
|
|
52
52
|
"smartIndexing": true,
|
|
53
|
-
"chunkSize":
|
|
54
|
-
"chunkOverlap":
|
|
53
|
+
"chunkSize": 25,
|
|
54
|
+
"chunkOverlap": 5,
|
|
55
55
|
"batchSize": 100,
|
|
56
56
|
"maxFileSize": 1048576,
|
|
57
57
|
"maxResults": 5,
|
|
@@ -61,5 +61,6 @@
|
|
|
61
61
|
"verbose": false,
|
|
62
62
|
"embeddingModel": "Xenova/all-MiniLM-L6-v2",
|
|
63
63
|
"semanticWeight": 0.7,
|
|
64
|
-
"exactMatchBoost": 1.5
|
|
64
|
+
"exactMatchBoost": 1.5,
|
|
65
|
+
"workerThreads": "auto"
|
|
65
66
|
}
|
package/example.png
ADDED
|
Binary file
|
package/features/clear-cache.js
CHANGED
|
@@ -1,16 +1,39 @@
|
|
|
1
1
|
export class CacheClearer {
|
|
2
|
-
constructor(embedder, cache, config) {
|
|
2
|
+
constructor(embedder, cache, config, indexer) {
|
|
3
3
|
this.cache = cache;
|
|
4
4
|
this.config = config;
|
|
5
|
+
this.indexer = indexer;
|
|
6
|
+
this.isClearing = false;
|
|
5
7
|
}
|
|
6
8
|
|
|
7
9
|
async execute() {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
// Check if indexing is in progress
|
|
11
|
+
if (this.indexer && this.indexer.isIndexing) {
|
|
12
|
+
throw new Error("Cannot clear cache while indexing is in progress. Please wait for indexing to complete.");
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
// Check if cache is currently being saved (race condition prevention)
|
|
16
|
+
if (this.cache.isSaving) {
|
|
17
|
+
throw new Error("Cannot clear cache while cache is being saved. Please try again in a moment.");
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Check if a clear operation is already in progress (prevent concurrent clears)
|
|
21
|
+
if (this.isClearing) {
|
|
22
|
+
throw new Error("Cache clear operation already in progress. Please wait for it to complete.");
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
this.isClearing = true;
|
|
26
|
+
|
|
27
|
+
try {
|
|
28
|
+
await this.cache.clear();
|
|
29
|
+
return {
|
|
30
|
+
success: true,
|
|
31
|
+
message: `Cache cleared successfully. Next indexing will be a full rebuild.`,
|
|
32
|
+
cacheDirectory: this.config.cacheDirectory
|
|
33
|
+
};
|
|
34
|
+
} finally {
|
|
35
|
+
this.isClearing = false;
|
|
36
|
+
}
|
|
14
37
|
}
|
|
15
38
|
}
|
|
16
39
|
|