src-mcp 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -51,7 +51,6 @@ SRC indexes your codebase into semantic, searchable chunks that LLMs actually un
|
|
|
51
51
|
| Feature | Description |
|
|
52
52
|
|---------|-------------|
|
|
53
53
|
| **Hybrid Search** | Vector + BM25 + RRF fusion for optimal results |
|
|
54
|
-
| **LLM Re-ranking** | AI-powered relevance optimization |
|
|
55
54
|
| **Call Graph** | Shows who calls what and what calls who |
|
|
56
55
|
| **Cross-file Context** | Resolves imports and path aliases automatically |
|
|
57
56
|
| **Incremental Updates** | SHA-256 hash detection for fast updates |
|
|
@@ -74,12 +73,11 @@ SRC indexes your codebase into semantic, searchable chunks that LLMs actually un
|
|
|
74
73
|
|
|
75
74
|
### 1. Install Ollama
|
|
76
75
|
|
|
77
|
-
SRC requires [Ollama](https://ollama.com) for embeddings
|
|
76
|
+
SRC requires [Ollama](https://ollama.com) for embeddings:
|
|
78
77
|
|
|
79
78
|
```bash
|
|
80
79
|
# Install from https://ollama.com, then:
|
|
81
80
|
ollama pull nomic-embed-text
|
|
82
|
-
ollama pull qwen2.5:1.5b
|
|
83
81
|
```
|
|
84
82
|
|
|
85
83
|
### 2. Install SRC
|
|
@@ -152,7 +150,6 @@ src-mcp get_index_status
|
|
|
152
150
|
|------|----------|---------|-------------|
|
|
153
151
|
| `search_code` | `--limit` | 10 | Max results |
|
|
154
152
|
| `search_code` | `--mode` | hybrid | `hybrid` / `vector` / `fts` |
|
|
155
|
-
| `search_code` | `--rerank` | true | LLM re-ranking |
|
|
156
153
|
| `index_codebase` | `--concurrency` | 4 | Parallel workers |
|
|
157
154
|
| `index_codebase` | `--force` | false | Re-index if exists |
|
|
158
155
|
|
|
@@ -234,7 +231,6 @@ Hybrid search with vector similarity, BM25 keyword matching, and RRF fusion.
|
|
|
234
231
|
| `limit` | number | No | `10` | Maximum results to return |
|
|
235
232
|
| `threshold` | number | No | — | Distance threshold (0-2, vector mode only) |
|
|
236
233
|
| `mode` | enum | No | `hybrid` | Search mode: `hybrid`, `vector`, or `fts` |
|
|
237
|
-
| `rerank` | boolean | No | `true` | Enable LLM re-ranking |
|
|
238
234
|
| `includeCallContext` | boolean | No | `true` | Include caller/callee information |
|
|
239
235
|
|
|
240
236
|
**Search Modes:**
|
|
@@ -405,7 +401,6 @@ All settings can be configured via environment variables:
|
|
|
405
401
|
| `CHUNK_SIZE` | Characters per chunk | `1000` |
|
|
406
402
|
| `CHUNK_OVERLAP` | Overlap between chunks | `200` |
|
|
407
403
|
| `EMBEDDING_BATCH_SIZE` | Batch size for embedding | `10` |
|
|
408
|
-
| `RERANK_MODEL` | Model for re-ranking | `qwen2.5:1.5b` |
|
|
409
404
|
| `LOG_LEVEL` | Log verbosity | `info` |
|
|
410
405
|
|
|
411
406
|
**Example:**
|
|
@@ -576,7 +571,7 @@ Source Files → Semantic Chunking → AST Enrichment → Cross-file Context →
|
|
|
576
571
|
|
|
577
572
|
```
|
|
578
573
|
Query → Embed Query → Vector Search ─┐
|
|
579
|
-
├→ RRF Fusion →
|
|
574
|
+
├→ RRF Fusion → Add Call Context → Results
|
|
580
575
|
Query → Tokenize ───→ BM25 Search ───┘
|
|
581
576
|
```
|
|
582
577
|
|
|
@@ -586,9 +581,8 @@ Query → Tokenize ───→ BM25 Search ───┘
|
|
|
586
581
|
2. **Vector Search** — Find semantically similar chunks (cosine similarity)
|
|
587
582
|
3. **BM25 Search** — Find keyword matches (term frequency)
|
|
588
583
|
4. **RRF Fusion** — Combine rankings with Reciprocal Rank Fusion (k=60)
|
|
589
|
-
5. **
|
|
590
|
-
6. **
|
|
591
|
-
7. **Return** — Ranked results with full context
|
|
584
|
+
5. **Call Context** — Add caller/callee information from call graph
|
|
585
|
+
6. **Return** — Ranked results with full context
|
|
592
586
|
|
|
593
587
|
### Technical Specifications
|
|
594
588
|
|
|
@@ -596,7 +590,6 @@ Query → Tokenize ───→ BM25 Search ───┘
|
|
|
596
590
|
|-----------|---------------|
|
|
597
591
|
| **Embedding Model** | nomic-embed-text (137M params) |
|
|
598
592
|
| **Vector Dimensions** | 768 |
|
|
599
|
-
| **Re-ranking Model** | qwen2.5:1.5b (1.5B params) |
|
|
600
593
|
| **Chunk Size** | 1000 characters |
|
|
601
594
|
| **Chunk Overlap** | 200 characters |
|
|
602
595
|
| **Batch Size** | 10 embeddings per request |
|
|
@@ -612,7 +605,6 @@ Query → Tokenize ───→ BM25 Search ───┘
|
|
|
612
605
|
| Feature | SRC | Basic MCPs |
|
|
613
606
|
|---------|-----|------------|
|
|
614
607
|
| **Search Method** | Hybrid (Vector + BM25 + RRF) | Keyword only or basic embedding |
|
|
615
|
-
| **Re-ranking** | LLM-powered | None |
|
|
616
608
|
| **Call Graph** | Full caller/callee context | None |
|
|
617
609
|
| **Cross-file Context** | Resolves imports & path aliases | None |
|
|
618
610
|
| **Incremental Updates** | SHA-256 hash detection | Full re-index required |
|
|
@@ -622,11 +614,10 @@ Query → Tokenize ───→ BM25 Search ───┘
|
|
|
622
614
|
### Key Advantages
|
|
623
615
|
|
|
624
616
|
1. **Hybrid Search** — Combines semantic understanding with keyword precision
|
|
625
|
-
2. **
|
|
626
|
-
3. **
|
|
627
|
-
4. **
|
|
628
|
-
5. **
|
|
629
|
-
6. **Semantic Chunking** — Splits at symbol boundaries, not arbitrary lines
|
|
617
|
+
2. **Call Graph** — Understand code relationships, not just content
|
|
618
|
+
3. **Cross-file Resolution** — Follows imports to provide complete context
|
|
619
|
+
4. **Incremental Updates** — Only re-index what changed
|
|
620
|
+
5. **Semantic Chunking** — Splits at symbol boundaries, not arbitrary lines
|
|
630
621
|
|
|
631
622
|
---
|
|
632
623
|
|
|
@@ -652,7 +643,6 @@ Error: model 'nomic-embed-text' not found
|
|
|
652
643
|
**Solution:**
|
|
653
644
|
```bash
|
|
654
645
|
ollama pull nomic-embed-text
|
|
655
|
-
ollama pull qwen2.5:1.5b
|
|
656
646
|
```
|
|
657
647
|
|
|
658
648
|
### Index Already Exists
|
package/dist/bin.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { a as logger, i as colors, n as features, o as EMBEDDING_CONFIG, r as createIndexWatcher, s as config, t as startServer } from "./server-
|
|
2
|
+
import { a as logger, i as colors, n as features, o as EMBEDDING_CONFIG, r as createIndexWatcher, s as config, t as startServer } from "./server-DL8hfycz.mjs";
|
|
3
3
|
import { defineCommand, runMain } from "citty";
|
|
4
4
|
|
|
5
5
|
//#region src/cli/parser.ts
|
package/dist/index.mjs
CHANGED
|
@@ -21,7 +21,7 @@ import "@langchain/textsplitters";
|
|
|
21
21
|
const config = {
|
|
22
22
|
name: "src-mcp",
|
|
23
23
|
fullName: "SRC (Structured Repo Context)",
|
|
24
|
-
version: "1.0.
|
|
24
|
+
version: "1.0.3",
|
|
25
25
|
description: "MCP server for codebase analysis with Treesitter (SCM queries), AST parsing, and embedding-based indexing"
|
|
26
26
|
};
|
|
27
27
|
const nodeEnv = process.env.NODE_ENV;
|
|
@@ -40,8 +40,7 @@ const EMBEDDING_CONFIG = {
|
|
|
40
40
|
embeddingDimensions: Number(process.env.EMBEDDING_DIMENSIONS) || 768,
|
|
41
41
|
defaultChunkSize: Number(process.env.CHUNK_SIZE) || 1e3,
|
|
42
42
|
defaultChunkOverlap: Number(process.env.CHUNK_OVERLAP) || 200,
|
|
43
|
-
batchSize: Number(process.env.EMBEDDING_BATCH_SIZE) || 10
|
|
44
|
-
rerankModel: process.env.RERANK_MODEL ?? "qwen2.5:1.5b"
|
|
43
|
+
batchSize: Number(process.env.EMBEDDING_BATCH_SIZE) || 10
|
|
45
44
|
};
|
|
46
45
|
/**
|
|
47
46
|
* Enrichment configuration for cross-file context
|
|
@@ -2387,8 +2386,9 @@ var IndexWatcher = class {
|
|
|
2387
2386
|
async start() {
|
|
2388
2387
|
const health = await this.ollamaClient.healthCheck();
|
|
2389
2388
|
if (!health.ok) throw new Error(health.error ?? "Ollama is not available");
|
|
2389
|
+
const needsFullIndex = !this.vectorStore.exists();
|
|
2390
2390
|
await this.vectorStore.connect();
|
|
2391
|
-
if (
|
|
2391
|
+
if (needsFullIndex) await this.fullIndex();
|
|
2392
2392
|
this.watcher = watch(this.directory, {
|
|
2393
2393
|
ignored: (filePath) => {
|
|
2394
2394
|
const relativePath = path.relative(this.directory, filePath).replace(/\\/g, "/");
|
|
@@ -2468,96 +2468,6 @@ function createIndexWatcher(options) {
|
|
|
2468
2468
|
return new IndexWatcher(options);
|
|
2469
2469
|
}
|
|
2470
2470
|
|
|
2471
|
-
//#endregion
|
|
2472
|
-
//#region src/core/embeddings/reranker.ts
|
|
2473
|
-
/**
|
|
2474
|
-
* Parse LLM response to extract relevance score
|
|
2475
|
-
*/
|
|
2476
|
-
function parseScore(response) {
|
|
2477
|
-
const match = /\b(\d+(?:\.\d+)?)\b/.exec(response);
|
|
2478
|
-
if (match?.[1]) {
|
|
2479
|
-
const score = parseFloat(match[1]);
|
|
2480
|
-
if (score >= 0 && score <= 10) return score;
|
|
2481
|
-
if (score > 10 && score <= 100) return score / 10;
|
|
2482
|
-
}
|
|
2483
|
-
return 5;
|
|
2484
|
-
}
|
|
2485
|
-
/**
|
|
2486
|
-
* Score a single query-document pair using Ollama
|
|
2487
|
-
*/
|
|
2488
|
-
async function scoreResult(query, content, options) {
|
|
2489
|
-
const model = options.model ?? "llama3.2";
|
|
2490
|
-
const timeout = options.timeout ?? 3e4;
|
|
2491
|
-
const prompt = `Rate the relevance of the following code snippet to the search query on a scale of 0-10.
|
|
2492
|
-
0 = completely irrelevant
|
|
2493
|
-
5 = somewhat relevant
|
|
2494
|
-
10 = highly relevant and directly answers the query
|
|
2495
|
-
|
|
2496
|
-
Query: "${query}"
|
|
2497
|
-
|
|
2498
|
-
Code:
|
|
2499
|
-
\`\`\`
|
|
2500
|
-
${content.slice(0, 1e3)}
|
|
2501
|
-
\`\`\`
|
|
2502
|
-
|
|
2503
|
-
Respond with ONLY a number between 0 and 10.`;
|
|
2504
|
-
try {
|
|
2505
|
-
const response = await fetch(`${options.ollamaBaseUrl}/api/generate`, {
|
|
2506
|
-
method: "POST",
|
|
2507
|
-
headers: { "Content-Type": "application/json" },
|
|
2508
|
-
body: JSON.stringify({
|
|
2509
|
-
model,
|
|
2510
|
-
prompt,
|
|
2511
|
-
stream: false,
|
|
2512
|
-
options: {
|
|
2513
|
-
temperature: 0,
|
|
2514
|
-
num_predict: 10
|
|
2515
|
-
}
|
|
2516
|
-
}),
|
|
2517
|
-
signal: AbortSignal.timeout(timeout)
|
|
2518
|
-
});
|
|
2519
|
-
if (!response.ok) {
|
|
2520
|
-
logger.warn(`Re-ranking request failed: ${response.statusText}`);
|
|
2521
|
-
return 5;
|
|
2522
|
-
}
|
|
2523
|
-
return parseScore((await response.json()).response ?? "5");
|
|
2524
|
-
} catch (error) {
|
|
2525
|
-
logger.warn(`Re-ranking error: ${error instanceof Error ? error.message : String(error)}`);
|
|
2526
|
-
return 5;
|
|
2527
|
-
}
|
|
2528
|
-
}
|
|
2529
|
-
/**
|
|
2530
|
-
* Re-rank search results using LLM scoring
|
|
2531
|
-
*
|
|
2532
|
-
* Takes initial search results and re-scores them based on
|
|
2533
|
-
* semantic relevance to the query using an LLM.
|
|
2534
|
-
*/
|
|
2535
|
-
async function rerank(query, results, options) {
|
|
2536
|
-
const maxResults = options.maxResults ?? 20;
|
|
2537
|
-
const toRerank = results.slice(0, maxResults);
|
|
2538
|
-
if (toRerank.length === 0) return [];
|
|
2539
|
-
logger.debug(`Re-ranking ${String(toRerank.length)} results for: ${query}`);
|
|
2540
|
-
const batchSize = 5;
|
|
2541
|
-
const rerankedResults = [];
|
|
2542
|
-
for (let i = 0; i < toRerank.length; i += batchSize) {
|
|
2543
|
-
const batch = toRerank.slice(i, i + batchSize);
|
|
2544
|
-
const scores = await Promise.all(batch.map(async (result) => scoreResult(query, result.chunk.content, options)));
|
|
2545
|
-
for (let j = 0; j < batch.length; j++) {
|
|
2546
|
-
const result = batch[j];
|
|
2547
|
-
const score = scores[j];
|
|
2548
|
-
if (result !== void 0 && score !== void 0) rerankedResults.push({
|
|
2549
|
-
...result,
|
|
2550
|
-
originalScore: result.score,
|
|
2551
|
-
rerankScore: score,
|
|
2552
|
-
score
|
|
2553
|
-
});
|
|
2554
|
-
}
|
|
2555
|
-
}
|
|
2556
|
-
rerankedResults.sort((a, b) => b.rerankScore - a.rerankScore);
|
|
2557
|
-
logger.debug(`Re-ranking complete, top score: ${String(rerankedResults[0]?.rerankScore ?? 0)}`);
|
|
2558
|
-
return rerankedResults;
|
|
2559
|
-
}
|
|
2560
|
-
|
|
2561
2471
|
//#endregion
|
|
2562
2472
|
//#region src/core/embeddings/callgraph.ts
|
|
2563
2473
|
/**
|
|
@@ -3051,7 +2961,6 @@ const searchCodeSchema = z.object({
|
|
|
3051
2961
|
"fts",
|
|
3052
2962
|
"hybrid"
|
|
3053
2963
|
]).optional().default("hybrid").describe("Search mode: 'vector' (semantic only), 'fts' (keyword only), 'hybrid' (combined with RRF fusion)"),
|
|
3054
|
-
rerank: z.boolean().optional().default(true).describe("Enable LLM re-ranking for improved relevance (enabled by default)"),
|
|
3055
2964
|
includeCallContext: z.boolean().optional().default(true).describe("Include caller/callee information for each result (uses cached call graph)")
|
|
3056
2965
|
});
|
|
3057
2966
|
/**
|
|
@@ -3114,7 +3023,7 @@ function formatResults(results, baseDir) {
|
|
|
3114
3023
|
* Execute the search_code feature
|
|
3115
3024
|
*/
|
|
3116
3025
|
async function execute$2(input) {
|
|
3117
|
-
const { query, directory, limit, threshold, mode,
|
|
3026
|
+
const { query, directory, limit, threshold, mode, includeCallContext } = input;
|
|
3118
3027
|
if (!fs.existsSync(directory)) return {
|
|
3119
3028
|
success: false,
|
|
3120
3029
|
error: `Directory not found: ${directory}`
|
|
@@ -3136,11 +3045,6 @@ async function execute$2(input) {
|
|
|
3136
3045
|
const queryVector = await ollamaClient.embed(query);
|
|
3137
3046
|
let results = await vectorStore.searchHybrid(queryVector, query, limit, { mode });
|
|
3138
3047
|
if (threshold !== void 0 && mode === "vector") results = results.filter((r) => r.score <= threshold);
|
|
3139
|
-
if (enableRerank && results.length > 0) results = await rerank(query, results, {
|
|
3140
|
-
ollamaBaseUrl: EMBEDDING_CONFIG.ollamaBaseUrl,
|
|
3141
|
-
model: EMBEDDING_CONFIG.rerankModel,
|
|
3142
|
-
maxResults: limit
|
|
3143
|
-
});
|
|
3144
3048
|
vectorStore.close();
|
|
3145
3049
|
let formattedResults = formatResults(results, absoluteDir);
|
|
3146
3050
|
if (includeCallContext && formattedResults.length > 0) {
|
|
@@ -3713,7 +3617,6 @@ SRC is a semantic code search MCP server. It indexes codebases and provides inte
|
|
|
3713
3617
|
- **Vector embeddings** for semantic similarity (understands meaning, not just keywords)
|
|
3714
3618
|
- **BM25 keyword search** for exact matches
|
|
3715
3619
|
- **Hybrid search** combining both with RRF fusion
|
|
3716
|
-
- **LLM re-ranking** for optimal relevance
|
|
3717
3620
|
- **Call graph analysis** showing function relationships
|
|
3718
3621
|
|
|
3719
3622
|
## When to use SRC?
|
|
@@ -3781,7 +3684,6 @@ search_code(query: "your search query here")
|
|
|
3781
3684
|
| query | string | required | Natural language search query |
|
|
3782
3685
|
| limit | number | 10 | Max results to return |
|
|
3783
3686
|
| mode | "hybrid" / "vector" / "fts" | "hybrid" | Search mode |
|
|
3784
|
-
| rerank | boolean | true | LLM re-ranking for better relevance |
|
|
3785
3687
|
| includeCallContext | boolean | true | Include caller/callee info |
|
|
3786
3688
|
| threshold | number | - | Distance threshold (vector mode only) |
|
|
3787
3689
|
|
|
@@ -3886,4 +3788,4 @@ async function startServer() {
|
|
|
3886
3788
|
|
|
3887
3789
|
//#endregion
|
|
3888
3790
|
export { logger as a, colors as i, features as n, EMBEDDING_CONFIG as o, createIndexWatcher as r, config as s, startServer as t };
|
|
3889
|
-
//# sourceMappingURL=server-
|
|
3791
|
+
//# sourceMappingURL=server-DL8hfycz.mjs.map
|