@iceinvein/code-intelligence-mcp 1.1.3 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -7
- package/bin/run.js +4 -35
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -17,7 +17,7 @@ Unlike basic text search, this server builds a local knowledge graph to understa
|
|
|
17
17
|
* **Advanced Hybrid Search**: Combines **Tantivy** (keyword BM25) + **LanceDB** (semantic vector) + **Jina Code embeddings** (768-dim code-specific model) with Reciprocal Rank Fusion (RRF).
|
|
18
18
|
* **Cross-Encoder Reranking**: Always-on ORT-based reranker for precision result ranking.
|
|
19
19
|
* **Smart Context Assembly**: Token-aware budgeting with query-aware truncation that keeps relevant lines within context limits.
|
|
20
|
-
* **On-Device LLM Descriptions**: Automatically generates natural-language descriptions for every symbol using a local **Qwen2.5-Coder-1.5B** model (
|
|
20
|
+
* **On-Device LLM Descriptions**: Automatically generates natural-language descriptions for every symbol using a local **Qwen2.5-Coder-1.5B** model (llama.cpp with Metal GPU), enriching search with human-readable summaries.
|
|
21
21
|
* **PageRank Scoring**: Graph-based symbol importance scoring that identifies central, heavily-used components.
|
|
22
22
|
* **Learns from Feedback**: Optional learning system that adapts to user selections over time.
|
|
23
23
|
* **Production First**: Multi-layer test detection (file paths, symbol names, and AST-level `#[test]`/`mod tests` analysis) ensures implementation code ranks above test helpers.
|
|
@@ -134,18 +134,19 @@ Each client session is bound to its workspace root. The server maintains separat
|
|
|
134
134
|
|
|
135
135
|
### Data Storage
|
|
136
136
|
|
|
137
|
-
|
|
137
|
+
Both embedded (stdio) and standalone (HTTP) modes store all data in `~/.code-intelligence/`:
|
|
138
138
|
|
|
139
139
|
```text
|
|
140
140
|
~/.code-intelligence/
|
|
141
|
-
├── server.toml # Optional config file
|
|
142
|
-
├── models/ # Shared
|
|
143
|
-
│
|
|
141
|
+
├── server.toml # Optional config file (standalone only)
|
|
142
|
+
├── models/ # Shared models (loaded once, shared across repos)
|
|
143
|
+
│ ├── jina-code-onnx/ # Embedding model (~500MB)
|
|
144
|
+
│ └── qwen2.5-coder-1.5b-gguf/ # LLM model (~1.1GB)
|
|
144
145
|
├── logs/
|
|
145
146
|
│ └── server.log
|
|
146
147
|
└── repos/
|
|
147
148
|
├── registry.json # Tracks all known repos
|
|
148
|
-
├── a1b2c3d4e5f6a7b8/ # Per-repo data (SHA256 hash of path)
|
|
149
|
+
├── a1b2c3d4e5f6a7b8/ # Per-repo data (SHA256 hash of repo path)
|
|
149
150
|
│ ├── code-intelligence.db
|
|
150
151
|
│ ├── tantivy-index/
|
|
151
152
|
│ └── vectors/
|
|
@@ -153,6 +154,8 @@ All standalone data lives in `~/.code-intelligence/`:
|
|
|
153
154
|
└── ...
|
|
154
155
|
```
|
|
155
156
|
|
|
157
|
+
The same repo always maps to the same hash regardless of mode, so embedded and standalone can share the same index data.
|
|
158
|
+
|
|
156
159
|
### Configuration
|
|
157
160
|
|
|
158
161
|
Standalone mode is configured via `~/.code-intelligence/server.toml` (created on first run with defaults). Environment variables and CLI flags override TOML settings.
|
|
@@ -473,7 +476,7 @@ src/
|
|
|
473
476
|
│ └── standalone.rs # Standalone HTTP handler with session routing
|
|
474
477
|
├── tools/ # Tool definitions (23 MCP tools)
|
|
475
478
|
├── embeddings/ # Jina Code embedding model wrapper
|
|
476
|
-
├── llm/ # On-device LLM (Qwen2.5-Coder-1.5B via
|
|
479
|
+
├── llm/ # On-device LLM (Qwen2.5-Coder-1.5B via llama.cpp)
|
|
477
480
|
├── reranker/ # Cross-encoder ORT implementation
|
|
478
481
|
├── path/ # Cross-platform path normalization (camino)
|
|
479
482
|
├── text.rs # Text processing (synonym expansion, morphological variants)
|
package/bin/run.js
CHANGED
|
@@ -60,41 +60,10 @@ if (!env.METRICS_ENABLED) {
|
|
|
60
60
|
env.METRICS_ENABLED = 'false';
|
|
61
61
|
}
|
|
62
62
|
|
|
63
|
-
//
|
|
64
|
-
//
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
// Ensure .cimcp directory exists
|
|
68
|
-
if (!fs.existsSync(cimcpDir)) {
|
|
69
|
-
try {
|
|
70
|
-
fs.mkdirSync(cimcpDir, { recursive: true });
|
|
71
|
-
} catch (e) {
|
|
72
|
-
console.error(`Failed to create .cimcp directory at ${cimcpDir}:`, e.message);
|
|
73
|
-
// Continue, the server might handle it or fail later
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
if (!env.DB_PATH) env.DB_PATH = path.join(cimcpDir, 'code-intelligence.db');
|
|
78
|
-
if (!env.VECTOR_DB_PATH) env.VECTOR_DB_PATH = path.join(cimcpDir, 'vectors');
|
|
79
|
-
if (!env.TANTIVY_INDEX_PATH) env.TANTIVY_INDEX_PATH = path.join(cimcpDir, 'tantivy-index');
|
|
80
|
-
|
|
81
|
-
// Also set model dir - use GLOBAL cache to avoid downloading models for every project
|
|
82
|
-
// Models are shared across projects, but indexes remain local
|
|
83
|
-
if (!env.EMBEDDINGS_MODEL_DIR) {
|
|
84
|
-
// macOS: ~/Library/Application Support/cimcp/embeddings-cache
|
|
85
|
-
env.EMBEDDINGS_MODEL_DIR = path.join(os.homedir(), 'Library', 'Application Support', 'cimcp', 'embeddings-cache');
|
|
86
|
-
|
|
87
|
-
// Ensure global model cache directory exists
|
|
88
|
-
if (!fs.existsSync(env.EMBEDDINGS_MODEL_DIR)) {
|
|
89
|
-
try {
|
|
90
|
-
fs.mkdirSync(env.EMBEDDINGS_MODEL_DIR, { recursive: true });
|
|
91
|
-
} catch (e) {
|
|
92
|
-
console.error(`Failed to create global embeddings cache at ${env.EMBEDDINGS_MODEL_DIR}:`, e.message);
|
|
93
|
-
console.warn('Falling back to local project cache for this session');
|
|
94
|
-
env.EMBEDDINGS_MODEL_DIR = path.join(cimcpDir, 'embeddings-model');
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
}
|
|
63
|
+
// Per-repo data paths are auto-derived by the Rust binary from BASE_DIR.
|
|
64
|
+
// Indexes are stored under ~/.code-intelligence/repos/<hash>/.
|
|
65
|
+
// Models are shared across projects under ~/.code-intelligence/models/.
|
|
66
|
+
// No need to set DB_PATH, VECTOR_DB_PATH, or TANTIVY_INDEX_PATH here.
|
|
98
67
|
|
|
99
68
|
// Spawn the process
|
|
100
69
|
const child = spawn(BINARY_PATH, process.argv.slice(2), {
|