@softerist/heuristic-mcp 3.2.11 → 3.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -357
- package/features/hybrid-search.js +47 -19
- package/features/index-codebase.js +66 -16
- package/lib/cache.js +125 -34
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,387 +1,50 @@
|
|
|
1
1
|
# Heuristic MCP Server
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Heuristic MCP adds smart code search to your editor or MCP client.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
## Requirements
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
- Node.js `18+`
|
|
8
|
+
- npm (for global install)
|
|
9
|
+
- Internet access at least once to download the embedding model (if install-time download is skipped, it downloads on first run)
|
|
10
|
+
- 64-bit Node.js recommended for native ONNX performance; on Windows, install Microsoft Visual C++ 2015-2022 Redistributable (x64) if native bindings fail
|
|
8
11
|
|
|
9
|
-
|
|
10
|
-
- Smart indexing: detects project type and applies smart ignore patterns on top of your excludes.
|
|
11
|
-
- Semantic search: find code by meaning, not just keywords.
|
|
12
|
-
- Find similar code: locate near-duplicate or related patterns from a snippet.
|
|
13
|
-
- Package version lookup: check latest versions from npm, PyPI, crates.io, Maven, and more.
|
|
14
|
-
- Workspace switching: change workspace at runtime without restarting the server.
|
|
15
|
-
- Recency ranking and call-graph boosting: surfaces fresh and related code.
|
|
16
|
-
- Optional ANN index: faster candidate retrieval for large codebases.
|
|
17
|
-
- Optional binary vector store: mmap-friendly cache format for large repos.
|
|
18
|
-
- Flexible embedding dimensions: MRL-compatible dimension reduction (64-768d) for speed/quality tradeoffs.
|
|
19
|
-
|
|
20
|
-
---
|
|
21
|
-
|
|
22
|
-
## Installation
|
|
23
|
-
|
|
24
|
-
Install globally (recommended):
|
|
12
|
+
## Install
|
|
25
13
|
|
|
26
14
|
```bash
|
|
27
15
|
npm install -g @softerist/heuristic-mcp
|
|
28
16
|
```
|
|
29
17
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
- Registration runs automatically (`scripts/postinstall.js`).
|
|
33
|
-
- Model pre-download is attempted (`scripts/download-model.js`). If offline, it will be skipped and downloaded on first run.
|
|
34
|
-
|
|
35
|
-
If auto-registration did not update your IDE config, run:
|
|
18
|
+
Then enable it for your client:
|
|
36
19
|
|
|
37
20
|
```bash
|
|
38
21
|
heuristic-mcp --start
|
|
39
22
|
```
|
|
40
23
|
|
|
41
|
-
|
|
24
|
+
If your editor was already open, reload it once.
|
|
42
25
|
|
|
43
|
-
##
|
|
26
|
+
## How It Works
|
|
44
27
|
|
|
45
|
-
The
|
|
28
|
+
1. The server scans your workspace and builds a searchable index of your code.
|
|
29
|
+
2. IDE AI models/MCP tools query that index using plain language so you can find relevant code quickly.
|
|
30
|
+
3. Results improve as your index stays up to date with project changes.
|
|
46
31
|
|
|
47
|
-
|
|
32
|
+
## Basic Commands
|
|
48
33
|
|
|
49
34
|
```bash
|
|
50
35
|
heuristic-mcp --status
|
|
51
|
-
```
|
|
52
|
-
|
|
53
|
-
Shows server PID(s) and cache stats.
|
|
54
|
-
|
|
55
|
-
### Logs
|
|
56
|
-
|
|
57
|
-
```bash
|
|
58
36
|
heuristic-mcp --logs
|
|
59
|
-
```
|
|
60
|
-
|
|
61
|
-
Tails the server log for the current workspace (defaults to last 200 lines and follows).
|
|
62
|
-
|
|
63
|
-
Optional flags:
|
|
64
|
-
|
|
65
|
-
```bash
|
|
66
|
-
heuristic-mcp --logs --tail 100
|
|
67
|
-
heuristic-mcp --logs --no-follow
|
|
68
|
-
```
|
|
69
|
-
|
|
70
|
-
### Version
|
|
71
|
-
|
|
72
|
-
```bash
|
|
73
|
-
heuristic-mcp --version
|
|
74
|
-
```
|
|
75
|
-
|
|
76
|
-
### Start/Stop
|
|
77
|
-
|
|
78
|
-
```bash
|
|
79
|
-
heuristic-mcp --start
|
|
80
|
-
heuristic-mcp --start antigravity
|
|
81
|
-
heuristic-mcp --start codex
|
|
82
|
-
heuristic-mcp --start cursor
|
|
83
|
-
heuristic-mcp --start vscode
|
|
84
|
-
heuristic-mcp --start windsurf
|
|
85
|
-
heuristic-mcp --start warp
|
|
86
|
-
heuristic-mcp --start "Claude Desktop"
|
|
87
37
|
heuristic-mcp --stop
|
|
88
38
|
```
|
|
89
39
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
Warp note: this package now targets `~/.warp/mcp_settings.json` (and `%APPDATA%\\Warp\\mcp_settings.json` on Windows when present). If no local Warp MCP config is writable yet, use Warp MCP settings/UI once to initialize it, then re-run `--start warp`.
|
|
93
|
-
|
|
94
|
-
### Clear Cache
|
|
95
|
-
|
|
96
|
-
```bash
|
|
97
|
-
heuristic-mcp --clear-cache
|
|
98
|
-
```
|
|
99
|
-
|
|
100
|
-
Clears the cache for the current working directory (or `--workspace` if provided) and removes stale cache directories without metadata.
|
|
101
|
-
|
|
102
|
-
---
|
|
103
|
-
|
|
104
|
-
## Configuration (`config.jsonc`)
|
|
105
|
-
|
|
106
|
-
Configuration is loaded from your workspace root when the server runs with `--workspace`. If not provided by the IDE, the server auto-detects workspace via environment variables and current working directory. In server mode, it falls back to the package `config.jsonc` (or `config.json`) and then your current working directory.
|
|
107
|
-
|
|
108
|
-
Example `config.jsonc`:
|
|
109
|
-
|
|
110
|
-
```json
|
|
111
|
-
{
|
|
112
|
-
"excludePatterns": ["**/legacy-code/**", "**/*.test.ts"],
|
|
113
|
-
"fileNames": ["Dockerfile", ".env.example", "Makefile"],
|
|
114
|
-
"indexing": {
|
|
115
|
-
"smartIndexing": true
|
|
116
|
-
},
|
|
117
|
-
"worker": {
|
|
118
|
-
"workerThreads": 0
|
|
119
|
-
},
|
|
120
|
-
"embedding": {
|
|
121
|
-
"embeddingModel": "jinaai/jina-embeddings-v2-base-code",
|
|
122
|
-
"embeddingBatchSize": null,
|
|
123
|
-
"embeddingProcessNumThreads": 8
|
|
124
|
-
},
|
|
125
|
-
"search": {
|
|
126
|
-
"recencyBoost": 0.1,
|
|
127
|
-
"recencyDecayDays": 30
|
|
128
|
-
},
|
|
129
|
-
"callGraph": {
|
|
130
|
-
"callGraphEnabled": true,
|
|
131
|
-
"callGraphBoost": 0.15
|
|
132
|
-
},
|
|
133
|
-
"ann": {
|
|
134
|
-
"annEnabled": true
|
|
135
|
-
},
|
|
136
|
-
"vectorStore": {
|
|
137
|
-
"vectorStoreFormat": "binary",
|
|
138
|
-
"vectorStoreContentMode": "external",
|
|
139
|
-
"vectorStoreLoadMode": "disk",
|
|
140
|
-
"contentCacheEntries": 256,
|
|
141
|
-
"vectorCacheEntries": 64
|
|
142
|
-
},
|
|
143
|
-
"memoryCleanup": {
|
|
144
|
-
"clearCacheAfterIndex": true
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
```
|
|
148
|
-
|
|
149
|
-
Preferred style is namespaced keys (shown above). Legacy top-level keys are still supported for backward compatibility.
|
|
150
|
-
|
|
151
|
-
### Embedding Model & Dimension Options
|
|
152
|
-
|
|
153
|
-
**Default model:** `jinaai/jina-embeddings-v2-base-code` (768 dimensions)
|
|
154
|
-
|
|
155
|
-
> **Important:** The default Jina model was **not** trained with Matryoshka Representation Learning (MRL). Dimension reduction (`embeddingDimension`) will significantly degrade search quality with this model. Only use dimension reduction with MRL-trained models.
|
|
156
|
-
|
|
157
|
-
For faster search with smaller embeddings, switch to an MRL-compatible model:
|
|
158
|
-
|
|
159
|
-
```json
|
|
160
|
-
{
|
|
161
|
-
"embedding": {
|
|
162
|
-
"embeddingModel": "nomic-ai/nomic-embed-text-v1.5",
|
|
163
|
-
"embeddingDimension": 128
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
```
|
|
167
|
-
|
|
168
|
-
**MRL-compatible models:**
|
|
169
|
-
|
|
170
|
-
- `nomic-ai/nomic-embed-text-v1.5` — recommended for 128d/256d
|
|
171
|
-
- Other models explicitly trained with Matryoshka loss
|
|
172
|
-
|
|
173
|
-
**embeddingDimension values:** `64 | 128 | 256 | 512 | 768 | null` (null = full dimensions)
|
|
174
|
-
|
|
175
|
-
Cache location:
|
|
176
|
-
|
|
177
|
-
- By default, the cache is stored in a global OS cache directory under `heuristic-mcp/<hash>`.
|
|
178
|
-
- You can override with `cacheDirectory` in your config file.
|
|
179
|
-
|
|
180
|
-
### Environment Variables
|
|
181
|
-
|
|
182
|
-
Selected overrides (prefix `SMART_CODING_`):
|
|
183
|
-
|
|
184
|
-
Environment overrides target runtime keys and are synced back into namespaces by `lib/config.js`.
|
|
185
|
-
|
|
186
|
-
- `SMART_CODING_VERBOSE=true|false` — enable detailed logging.
|
|
187
|
-
- `SMART_CODING_WORKER_THREADS=auto|N` — worker thread count.
|
|
188
|
-
- `SMART_CODING_BATCH_SIZE=100` — files per indexing batch.
|
|
189
|
-
- `SMART_CODING_CHUNK_SIZE=25` — lines per chunk.
|
|
190
|
-
- `SMART_CODING_MAX_RESULTS=5` — max search results.
|
|
191
|
-
- `SMART_CODING_EMBEDDING_BATCH_SIZE=64` — embedding batch size (1–256, overrides auto).
|
|
192
|
-
- `SMART_CODING_EMBEDDING_THREADS=8` — ONNX threads for the embedding child process.
|
|
193
|
-
- `SMART_CODING_RECENCY_BOOST=0.1` — boost for recently edited files.
|
|
194
|
-
- `SMART_CODING_RECENCY_DECAY_DAYS=30` — days until recency boost decays to 0.
|
|
195
|
-
- `SMART_CODING_ANN_ENABLED=true|false` — enable ANN index.
|
|
196
|
-
- `SMART_CODING_ANN_EF_SEARCH=64` — ANN search quality/speed tradeoff.
|
|
197
|
-
- `SMART_CODING_VECTOR_STORE_FORMAT=json|binary|sqlite` — on-disk vector store format.
|
|
198
|
-
- `SMART_CODING_VECTOR_STORE_CONTENT_MODE=external|inline` — where content is stored for binary format.
|
|
199
|
-
- `SMART_CODING_VECTOR_STORE_LOAD_MODE=memory|disk` — vector loading strategy.
|
|
200
|
-
- `SMART_CODING_CONTENT_CACHE_ENTRIES=256` — LRU entries for decoded content.
|
|
201
|
-
- `SMART_CODING_VECTOR_CACHE_ENTRIES=64` — LRU entries for vectors (disk mode).
|
|
202
|
-
- `SMART_CODING_CLEAR_CACHE_AFTER_INDEX=true|false` — drop in-memory vectors after indexing.
|
|
203
|
-
- `SMART_CODING_UNLOAD_MODEL_AFTER_INDEX=true|false` — unload embedding model after indexing to free RAM (~500MB-1GB).
|
|
204
|
-
- `SMART_CODING_EXPLICIT_GC=true|false` — opt-in to explicit GC (requires `--expose-gc`).
|
|
205
|
-
- `SMART_CODING_INCREMENTAL_GC_THRESHOLD_MB=2048` — RSS threshold for running incremental GC after watcher updates (requires explicit GC).
|
|
206
|
-
- `SMART_CODING_EMBEDDING_DIMENSION=64|128|256|512|768` — MRL dimension reduction (only for MRL-trained models).
|
|
207
|
-
|
|
208
|
-
See `lib/config.js` for the full list.
|
|
209
|
-
|
|
210
|
-
### Binary Vector Store
|
|
211
|
-
|
|
212
|
-
Set `vectorStore.vectorStoreFormat` to `binary` to use the on-disk binary cache. This keeps vectors and content out of JS heap
|
|
213
|
-
and reads on demand. Recommended for large repos.
|
|
214
|
-
|
|
215
|
-
- `vectorStore.vectorStoreContentMode=external` keeps content in the binary file and only loads for top-N results.
|
|
216
|
-
- `vectorStore.contentCacheEntries` controls the small in-memory LRU for decoded content strings.
|
|
217
|
-
- `vectorStore.vectorStoreLoadMode=disk` streams vectors from disk to reduce memory usage.
|
|
218
|
-
- `vectorStore.vectorCacheEntries` controls the small in-memory LRU for vectors when using disk mode.
|
|
219
|
-
- `memoryCleanup.clearCacheAfterIndex=true` drops in-memory vectors after indexing and reloads lazily on next query.
|
|
220
|
-
- `memoryCleanup.unloadModelAfterIndex=true` (default) unloads the embedding model after indexing to free ~500MB-1GB of RAM; the model will reload on the next search query.
|
|
221
|
-
- Note: `ann.annEnabled=true` with `vectorStore.vectorStoreLoadMode=disk` can increase disk reads during ANN rebuilds on large indexes.
|
|
222
|
-
|
|
223
|
-
### SQLite Vector Store
|
|
224
|
-
|
|
225
|
-
Set `vectorStore.vectorStoreFormat` to `sqlite` to use SQLite for persistence. This provides:
|
|
226
|
-
|
|
227
|
-
- ACID transactions for reliable writes
|
|
228
|
-
- Simpler concurrent access
|
|
229
|
-
- Standard database format for inspection
|
|
230
|
-
|
|
231
|
-
```json
|
|
232
|
-
{
|
|
233
|
-
"vectorStore": {
|
|
234
|
-
"vectorStoreFormat": "sqlite"
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
```
|
|
238
|
-
|
|
239
|
-
The vectors and content are stored in `vectors.sqlite` in your cache directory. You can inspect it with any SQLite browser.
|
|
240
|
-
`vectorStore.vectorStoreContentMode` and `vectorStore.vectorStoreLoadMode` are respected for SQLite (use `vectorStore.vectorStoreLoadMode=disk` to avoid loading vectors into memory).
|
|
241
|
-
|
|
242
|
-
**Tradeoffs vs Binary:**
|
|
243
|
-
|
|
244
|
-
- Slightly higher read overhead (SQL queries vs direct memory access)
|
|
245
|
-
- Better write reliability (transactions)
|
|
246
|
-
- Easier debugging (standard SQLite file)
|
|
247
|
-
|
|
248
|
-
### Benchmarking Search
|
|
249
|
-
|
|
250
|
-
Use the built-in script to compare memory vs latency tradeoffs:
|
|
251
|
-
|
|
252
|
-
```bash
|
|
253
|
-
node tools/scripts/benchmark-search.js --query "database connection" --runs 10
|
|
254
|
-
```
|
|
255
|
-
|
|
256
|
-
Compare modes quickly:
|
|
257
|
-
|
|
258
|
-
```bash
|
|
259
|
-
SMART_CODING_VECTOR_STORE_LOAD_MODE=memory node tools/scripts/benchmark-search.js --runs 10
|
|
260
|
-
SMART_CODING_VECTOR_STORE_LOAD_MODE=disk node tools/scripts/benchmark-search.js --runs 10
|
|
261
|
-
SMART_CODING_VECTOR_STORE_FORMAT=binary SMART_CODING_VECTOR_STORE_LOAD_MODE=disk node tools/scripts/benchmark-search.js --runs 10
|
|
262
|
-
```
|
|
263
|
-
|
|
264
|
-
Note: On small repos, disk mode may be slightly slower and show noisy RSS deltas; benefits are clearer on large indexes with a small `vectorStore.vectorCacheEntries`.
|
|
265
|
-
|
|
266
|
-
---
|
|
267
|
-
|
|
268
|
-
## MCP Tools Reference
|
|
269
|
-
|
|
270
|
-
### `a_semantic_search`
|
|
271
|
-
|
|
272
|
-
Find code by meaning. Ideal for natural language queries like "authentication logic" or "database queries".
|
|
273
|
-
|
|
274
|
-
### `b_index_codebase`
|
|
275
|
-
|
|
276
|
-
Manually trigger a full reindex. Useful after large code changes.
|
|
277
|
-
|
|
278
|
-
### `c_clear_cache`
|
|
279
|
-
|
|
280
|
-
Clear the embeddings cache and force reindex.
|
|
281
|
-
|
|
282
|
-
### `d_ann_config`
|
|
283
|
-
|
|
284
|
-
Configure the ANN (Approximate Nearest Neighbor) index. Actions: `stats`, `set_ef_search`, `rebuild`.
|
|
285
|
-
|
|
286
|
-
### `d_find_similar_code`
|
|
287
|
-
|
|
288
|
-
Find similar code patterns given a snippet. Useful for finding duplicates or refactoring opportunities.
|
|
289
|
-
|
|
290
|
-
### `e_check_package_version`
|
|
291
|
-
|
|
292
|
-
Fetch the latest version of a package from its official registry.
|
|
293
|
-
|
|
294
|
-
**Supported registries:**
|
|
295
|
-
|
|
296
|
-
- **npm** (default): `lodash`, `@types/node`
|
|
297
|
-
- **PyPI**: `pip:requests`, `pypi:django`
|
|
298
|
-
- **crates.io**: `cargo:serde`, `rust:tokio`
|
|
299
|
-
- **Maven**: `maven:org.springframework:spring-core`
|
|
300
|
-
- **Go**: `go:github.com/gin-gonic/gin`
|
|
301
|
-
- **RubyGems**: `gem:rails`
|
|
302
|
-
- **NuGet**: `nuget:Newtonsoft.Json`
|
|
303
|
-
- **Packagist**: `composer:laravel/framework`
|
|
304
|
-
- **Hex**: `hex:phoenix`
|
|
305
|
-
- **pub.dev**: `pub:flutter`
|
|
306
|
-
- **Homebrew**: `brew:node`
|
|
307
|
-
- **Conda**: `conda:numpy`
|
|
308
|
-
|
|
309
|
-
### `f_set_workspace`
|
|
310
|
-
|
|
311
|
-
Change the workspace directory at runtime. Updates search directory, cache location, and optionally triggers reindex.
|
|
312
|
-
|
|
313
|
-
The server also attempts this automatically before each tool call when it detects a new workspace path from environment variables (for example `CODEX_WORKSPACE`, `CODEX_PROJECT_ROOT`, `WORKSPACE_FOLDER`).
|
|
314
|
-
|
|
315
|
-
**Parameters:**
|
|
316
|
-
|
|
317
|
-
- `workspacePath` (required): Absolute path to the new workspace
|
|
318
|
-
- `reindex` (optional, default: `true`): Whether to trigger a full reindex
|
|
319
|
-
|
|
320
|
-
---
|
|
321
|
-
|
|
322
|
-
## Troubleshooting
|
|
323
|
-
|
|
324
|
-
**Server isn't starting**
|
|
325
|
-
|
|
326
|
-
1. Run `heuristic-mcp --status` to check config and cache status.
|
|
327
|
-
2. Run `heuristic-mcp --logs` to see startup errors.
|
|
328
|
-
|
|
329
|
-
**Native ONNX backend unavailable (falls back to WASM)**
|
|
330
|
-
|
|
331
|
-
If you see log lines like:
|
|
332
|
-
|
|
333
|
-
```
|
|
334
|
-
Native ONNX backend unavailable: The operating system cannot run %1.
|
|
335
|
-
...onnxruntime_binding.node. Falling back to WASM.
|
|
336
|
-
```
|
|
337
|
-
|
|
338
|
-
The server will automatically disable workers and force `embedding.embeddingProcessPerBatch` to reduce memory spikes, but you
|
|
339
|
-
should fix the native binding to restore stable memory usage:
|
|
340
|
-
|
|
341
|
-
- Ensure you are running **64-bit Node.js** (`node -p "process.arch"` should be `x64`).
|
|
342
|
-
- Install **Microsoft Visual C++ 2015–2022 Redistributable (x64)**.
|
|
343
|
-
- Reinstall dependencies (clears locked native binaries):
|
|
344
|
-
|
|
345
|
-
```bash
|
|
346
|
-
Remove-Item -Recurse -Force node_modules\\onnxruntime-node, node_modules\\.onnxruntime-node-* -ErrorAction SilentlyContinue
|
|
347
|
-
npm install
|
|
348
|
-
```
|
|
349
|
-
|
|
350
|
-
If you see a warning about **version mismatch** (e.g. "onnxruntime-node 1.23.x incompatible with transformers.js
|
|
351
|
-
expectation 1.14.x"), install the matching version:
|
|
352
|
-
|
|
353
|
-
```bash
|
|
354
|
-
npm install onnxruntime-node@1.14.0
|
|
355
|
-
```
|
|
356
|
-
|
|
357
|
-
**Search returns no results**
|
|
358
|
-
|
|
359
|
-
- Check `heuristic-mcp --status` for indexing progress.
|
|
360
|
-
- If indexing shows zero files, review `excludePatterns` and `fileExtensions`.
|
|
361
|
-
|
|
362
|
-
**Model download fails**
|
|
363
|
-
|
|
364
|
-
- The install step tries to pre-download the model, but it can be skipped offline.
|
|
365
|
-
- The server will download on first run; ensure network access at least once.
|
|
366
|
-
|
|
367
|
-
**Clear cache**
|
|
368
|
-
|
|
369
|
-
- Use the MCP tool `c_clear_cache`, run `heuristic-mcp --clear-cache`, or delete the cache directory. For local dev, run `npm run clean`.
|
|
370
|
-
|
|
371
|
-
**Inspect cache**
|
|
372
|
-
|
|
373
|
-
```bash
|
|
374
|
-
node tools/scripts/cache-stats.js --workspace <path>
|
|
375
|
-
```
|
|
376
|
-
|
|
377
|
-
**Stop doesn't stick**
|
|
378
|
-
|
|
379
|
-
- The IDE will auto-restart the server if it's still enabled in its config. `--stop` now disables the server entry for Antigravity, Cursor (including `~/.cursor/mcp.json`), Windsurf (`~/.codeium/windsurf/mcp_config.json`), Warp (`~/.warp/mcp_settings.json` and `%APPDATA%\\Warp\\mcp_settings.json` when present), Claude Desktop, and VS Code (when using common MCP settings keys). Restart the IDE after `--start` to re-enable.
|
|
40
|
+
Use `heuristic-mcp --status` first if something looks off.
|
|
41
|
+
Use `heuristic-mcp --cache` to see the cache status or file index progress.
|
|
380
42
|
|
|
381
|
-
|
|
43
|
+
## Advanced Docs
|
|
382
44
|
|
|
383
|
-
|
|
45
|
+
Detailed configuration, tool reference, troubleshooting, and release notes are in:
|
|
384
46
|
|
|
385
|
-
|
|
47
|
+
- [`docs/GUIDE.md`](docs/GUIDE.md)
|
|
48
|
+
- [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md)
|
|
386
49
|
|
|
387
50
|
License: MIT
|
|
@@ -3,6 +3,7 @@ import fs from 'fs/promises';
|
|
|
3
3
|
import { dotSimilarity } from '../lib/utils.js';
|
|
4
4
|
import { extractSymbolsFromContent } from '../lib/call-graph.js';
|
|
5
5
|
import { embedQueryInChildProcess } from '../lib/embed-query-process.js';
|
|
6
|
+
import { normalizePathKey } from '../lib/path-utils.js';
|
|
6
7
|
import {
|
|
7
8
|
STAT_CONCURRENCY_LIMIT,
|
|
8
9
|
SEARCH_BATCH_SIZE,
|
|
@@ -27,6 +28,10 @@ function alignQueryVectorDimension(vector, targetDim) {
|
|
|
27
28
|
return sliced;
|
|
28
29
|
}
|
|
29
30
|
|
|
31
|
+
function toFileKey(file) {
|
|
32
|
+
return normalizePathKey(file);
|
|
33
|
+
}
|
|
34
|
+
|
|
30
35
|
export class HybridSearch {
|
|
31
36
|
constructor(embedder, cache, config) {
|
|
32
37
|
this.embedder = embedder;
|
|
@@ -36,6 +41,13 @@ export class HybridSearch {
|
|
|
36
41
|
this._lastAccess = new Map();
|
|
37
42
|
}
|
|
38
43
|
|
|
44
|
+
setFileModTime(file, mtimeMs) {
|
|
45
|
+
const key = toFileKey(file);
|
|
46
|
+
if (!key) return;
|
|
47
|
+
this.fileModTimes.set(key, mtimeMs);
|
|
48
|
+
this._lastAccess.set(key, Date.now());
|
|
49
|
+
}
|
|
50
|
+
|
|
39
51
|
async getChunkContent(chunkOrIndex) {
|
|
40
52
|
return await this.cache.getChunkContent(chunkOrIndex);
|
|
41
53
|
}
|
|
@@ -54,20 +66,28 @@ export class HybridSearch {
|
|
|
54
66
|
}
|
|
55
67
|
|
|
56
68
|
async populateFileModTimes(files) {
|
|
57
|
-
const
|
|
69
|
+
const uniqueFilesByKey = new Map();
|
|
70
|
+
for (const file of files) {
|
|
71
|
+
const key = toFileKey(file);
|
|
72
|
+
if (!key) continue;
|
|
73
|
+
if (!uniqueFilesByKey.has(key)) {
|
|
74
|
+
uniqueFilesByKey.set(key, file);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
58
77
|
const missing = [];
|
|
78
|
+
const now = Date.now();
|
|
59
79
|
|
|
60
|
-
for (const file of
|
|
61
|
-
if (!this.fileModTimes.has(
|
|
80
|
+
for (const [key, file] of uniqueFilesByKey) {
|
|
81
|
+
if (!this.fileModTimes.has(key)) {
|
|
62
82
|
const meta = this.cache.getFileMeta(file);
|
|
63
83
|
if (meta && typeof meta.mtimeMs === 'number') {
|
|
64
|
-
this.fileModTimes.set(
|
|
65
|
-
this._lastAccess.set(
|
|
84
|
+
this.fileModTimes.set(key, meta.mtimeMs);
|
|
85
|
+
this._lastAccess.set(key, now);
|
|
66
86
|
} else {
|
|
67
|
-
missing.push(file);
|
|
87
|
+
missing.push({ key, file });
|
|
68
88
|
}
|
|
69
89
|
} else {
|
|
70
|
-
this._lastAccess.set(
|
|
90
|
+
this._lastAccess.set(key, now);
|
|
71
91
|
}
|
|
72
92
|
}
|
|
73
93
|
|
|
@@ -79,13 +99,15 @@ export class HybridSearch {
|
|
|
79
99
|
|
|
80
100
|
const worker = async (startIdx) => {
|
|
81
101
|
for (let i = startIdx; i < missing.length; i += workerCount) {
|
|
82
|
-
const
|
|
102
|
+
const item = missing[i];
|
|
103
|
+
if (!item) continue;
|
|
104
|
+
const { key, file } = item;
|
|
83
105
|
try {
|
|
84
106
|
const stats = await fs.stat(file);
|
|
85
|
-
this.fileModTimes.set(
|
|
86
|
-
this._lastAccess.set(
|
|
107
|
+
this.fileModTimes.set(key, stats.mtimeMs);
|
|
108
|
+
this._lastAccess.set(key, Date.now());
|
|
87
109
|
} catch {
|
|
88
|
-
this.fileModTimes.set(
|
|
110
|
+
this.fileModTimes.set(key, null);
|
|
89
111
|
}
|
|
90
112
|
}
|
|
91
113
|
};
|
|
@@ -109,7 +131,10 @@ export class HybridSearch {
|
|
|
109
131
|
}
|
|
110
132
|
|
|
111
133
|
clearFileModTime(file) {
|
|
112
|
-
|
|
134
|
+
const key = toFileKey(file);
|
|
135
|
+
if (!key) return;
|
|
136
|
+
this.fileModTimes.delete(key);
|
|
137
|
+
this._lastAccess.delete(key);
|
|
113
138
|
}
|
|
114
139
|
|
|
115
140
|
async search(query, maxResults) {
|
|
@@ -259,11 +284,11 @@ export class HybridSearch {
|
|
|
259
284
|
await this.populateFileModTimes(candidates.map((chunk) => chunk.file));
|
|
260
285
|
} else {
|
|
261
286
|
for (const chunk of candidates) {
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
287
|
+
const chunkKey = toFileKey(chunk.file);
|
|
288
|
+
if (!chunkKey || this.fileModTimes.has(chunkKey)) continue;
|
|
289
|
+
const meta = this.cache.getFileMeta(chunk.file);
|
|
290
|
+
if (meta && typeof meta.mtimeMs === 'number') {
|
|
291
|
+
this.setFileModTime(chunk.file, meta.mtimeMs);
|
|
267
292
|
}
|
|
268
293
|
}
|
|
269
294
|
}
|
|
@@ -323,7 +348,8 @@ export class HybridSearch {
|
|
|
323
348
|
}
|
|
324
349
|
|
|
325
350
|
if (recencyBoostEnabled) {
|
|
326
|
-
const
|
|
351
|
+
const chunkKey = toFileKey(chunkInfo.file);
|
|
352
|
+
const mtime = chunkKey ? this.fileModTimes.get(chunkKey) : undefined;
|
|
327
353
|
if (typeof mtime === 'number') {
|
|
328
354
|
const ageMs = now - mtime;
|
|
329
355
|
const recencyFactor = Math.max(0, 1 - ageMs / recencyDecayMs);
|
|
@@ -380,7 +406,9 @@ export class HybridSearch {
|
|
|
380
406
|
const relatedFiles = await this.cache.getRelatedFiles(Array.from(symbolsFromTop));
|
|
381
407
|
|
|
382
408
|
for (const chunk of scoredChunks) {
|
|
383
|
-
const
|
|
409
|
+
const chunkKey = toFileKey(chunk.file);
|
|
410
|
+
const proximity =
|
|
411
|
+
relatedFiles.get(chunk.file) ?? (chunkKey ? relatedFiles.get(chunkKey) : undefined);
|
|
384
412
|
if (proximity) {
|
|
385
413
|
chunk.score += proximity * this.config.callGraphBoost;
|
|
386
414
|
}
|
|
@@ -10,6 +10,7 @@ import { fileURLToPath } from 'url';
|
|
|
10
10
|
import { smartChunk, hashContent } from '../lib/utils.js';
|
|
11
11
|
import { extractCallData } from '../lib/call-graph.js';
|
|
12
12
|
import { forceShutdownEmbeddingPool, isEmbeddingPoolActive } from '../lib/embed-query-process.js';
|
|
13
|
+
import { normalizePathKey } from '../lib/path-utils.js';
|
|
13
14
|
|
|
14
15
|
import ignore from 'ignore';
|
|
15
16
|
|
|
@@ -31,6 +32,10 @@ function normalizePath(value) {
|
|
|
31
32
|
return value.split(path.sep).join('/');
|
|
32
33
|
}
|
|
33
34
|
|
|
35
|
+
function toFileKey(value) {
|
|
36
|
+
return normalizePathKey(value);
|
|
37
|
+
}
|
|
38
|
+
|
|
34
39
|
function globToRegExp(pattern) {
|
|
35
40
|
let regex = '^';
|
|
36
41
|
for (let i = 0; i < pattern.length; i += 1) {
|
|
@@ -2149,7 +2154,14 @@ export class CodebaseIndexer {
|
|
|
2149
2154
|
if (this.server && this.server.hybridSearch && this.server.hybridSearch.fileModTimes) {
|
|
2150
2155
|
for (const stat of fileStats) {
|
|
2151
2156
|
if (stat && stat.file && typeof stat.mtimeMs === 'number') {
|
|
2152
|
-
this.server.hybridSearch.
|
|
2157
|
+
if (typeof this.server.hybridSearch.setFileModTime === 'function') {
|
|
2158
|
+
this.server.hybridSearch.setFileModTime(stat.file, stat.mtimeMs);
|
|
2159
|
+
} else {
|
|
2160
|
+
const key = toFileKey(stat.file);
|
|
2161
|
+
if (key) {
|
|
2162
|
+
this.server.hybridSearch.fileModTimes.set(key, stat.mtimeMs);
|
|
2163
|
+
}
|
|
2164
|
+
}
|
|
2153
2165
|
}
|
|
2154
2166
|
}
|
|
2155
2167
|
}
|
|
@@ -2233,7 +2245,16 @@ export class CodebaseIndexer {
|
|
|
2233
2245
|
|
|
2234
2246
|
this.sendProgress(5, 100, `Discovered ${files.length} files`);
|
|
2235
2247
|
|
|
2236
|
-
const
|
|
2248
|
+
const currentFileKeySet = new Set();
|
|
2249
|
+
const currentFilePathByKey = new Map();
|
|
2250
|
+
for (const file of files) {
|
|
2251
|
+
const key = toFileKey(file);
|
|
2252
|
+
if (!key) continue;
|
|
2253
|
+
currentFileKeySet.add(key);
|
|
2254
|
+
if (!currentFilePathByKey.has(key)) {
|
|
2255
|
+
currentFilePathByKey.set(key, file);
|
|
2256
|
+
}
|
|
2257
|
+
}
|
|
2237
2258
|
|
|
2238
2259
|
if (!force) {
|
|
2239
2260
|
const cachedFiles =
|
|
@@ -2241,7 +2262,8 @@ export class CodebaseIndexer {
|
|
|
2241
2262
|
let prunedCount = 0;
|
|
2242
2263
|
|
|
2243
2264
|
for (const cachedFile of cachedFiles) {
|
|
2244
|
-
|
|
2265
|
+
const cachedKey = toFileKey(cachedFile);
|
|
2266
|
+
if (!cachedKey || !currentFileKeySet.has(cachedKey)) {
|
|
2245
2267
|
this.cache.removeFileFromStore(cachedFile);
|
|
2246
2268
|
this.cache.deleteFileHash(cachedFile);
|
|
2247
2269
|
prunedCount++;
|
|
@@ -2254,26 +2276,48 @@ export class CodebaseIndexer {
|
|
|
2254
2276
|
}
|
|
2255
2277
|
}
|
|
2256
2278
|
|
|
2257
|
-
const prunedCallGraph = this.cache.pruneCallGraphData(
|
|
2279
|
+
const prunedCallGraph = this.cache.pruneCallGraphData(currentFileKeySet);
|
|
2258
2280
|
if (prunedCallGraph > 0 && this.config.verbose) {
|
|
2259
2281
|
console.info(`[Indexer] Pruned ${prunedCallGraph} call-graph entries`);
|
|
2260
2282
|
}
|
|
2261
2283
|
}
|
|
2262
2284
|
|
|
2263
2285
|
const filesToProcess = await this.preFilterFiles(files);
|
|
2264
|
-
const
|
|
2265
|
-
const
|
|
2286
|
+
const filesToProcessKeys = new Set();
|
|
2287
|
+
const filesToProcessByKey = new Map();
|
|
2288
|
+
for (const entry of filesToProcess) {
|
|
2289
|
+
const key = toFileKey(entry?.file);
|
|
2290
|
+
if (!key) continue;
|
|
2291
|
+
filesToProcessKeys.add(key);
|
|
2292
|
+
if (!filesToProcessByKey.has(key)) {
|
|
2293
|
+
filesToProcessByKey.set(key, entry);
|
|
2294
|
+
}
|
|
2295
|
+
}
|
|
2266
2296
|
|
|
2267
2297
|
if (this.config.callGraphEnabled && this.cache.getVectorStore().length > 0) {
|
|
2268
|
-
const
|
|
2269
|
-
const
|
|
2298
|
+
const cachedFileKeys = new Set();
|
|
2299
|
+
for (const chunk of this.cache.getVectorStore()) {
|
|
2300
|
+
const key = toFileKey(chunk?.file);
|
|
2301
|
+
if (key) cachedFileKeys.add(key);
|
|
2302
|
+
}
|
|
2303
|
+
const callDataFiles = new Set();
|
|
2304
|
+
for (const file of this.cache.getFileCallDataKeys()) {
|
|
2305
|
+
const key = toFileKey(file);
|
|
2306
|
+
if (key) callDataFiles.add(key);
|
|
2307
|
+
}
|
|
2270
2308
|
|
|
2271
2309
|
const missingCallData = [];
|
|
2272
|
-
for (const
|
|
2273
|
-
if (!callDataFiles.has(
|
|
2274
|
-
|
|
2275
|
-
|
|
2276
|
-
|
|
2310
|
+
for (const key of cachedFileKeys) {
|
|
2311
|
+
if (!callDataFiles.has(key) && currentFileKeySet.has(key)) {
|
|
2312
|
+
const existing = filesToProcessByKey.get(key);
|
|
2313
|
+
if (existing) {
|
|
2314
|
+
existing.force = true;
|
|
2315
|
+
continue;
|
|
2316
|
+
}
|
|
2317
|
+
const concretePath = currentFilePathByKey.get(key);
|
|
2318
|
+
if (concretePath) {
|
|
2319
|
+
missingCallData.push({ key, file: concretePath });
|
|
2320
|
+
}
|
|
2277
2321
|
}
|
|
2278
2322
|
}
|
|
2279
2323
|
|
|
@@ -2285,7 +2329,7 @@ export class CodebaseIndexer {
|
|
|
2285
2329
|
for (let i = 0; i < missingCallData.length; i += BATCH_SIZE) {
|
|
2286
2330
|
const batch = missingCallData.slice(i, i + BATCH_SIZE);
|
|
2287
2331
|
const results = await Promise.all(
|
|
2288
|
-
batch.map(async (file) => {
|
|
2332
|
+
batch.map(async ({ file }) => {
|
|
2289
2333
|
try {
|
|
2290
2334
|
const stats = await fs.stat(file);
|
|
2291
2335
|
if (!stats || typeof stats.isDirectory !== 'function') {
|
|
@@ -2304,9 +2348,15 @@ export class CodebaseIndexer {
|
|
|
2304
2348
|
|
|
2305
2349
|
for (const result of results) {
|
|
2306
2350
|
if (!result) continue;
|
|
2307
|
-
|
|
2351
|
+
const key = toFileKey(result.file);
|
|
2352
|
+
if (!key) continue;
|
|
2353
|
+
if (!filesToProcessKeys.has(key)) {
|
|
2308
2354
|
filesToProcess.push(result);
|
|
2309
|
-
|
|
2355
|
+
filesToProcessKeys.add(key);
|
|
2356
|
+
filesToProcessByKey.set(key, result);
|
|
2357
|
+
} else {
|
|
2358
|
+
const existing = filesToProcessByKey.get(key);
|
|
2359
|
+
if (existing) existing.force = existing.force || result.force === true;
|
|
2310
2360
|
}
|
|
2311
2361
|
}
|
|
2312
2362
|
}
|
package/lib/cache.js
CHANGED
|
@@ -9,6 +9,7 @@ import {
|
|
|
9
9
|
} from './vector-store-binary.js';
|
|
10
10
|
import { SqliteVectorStore } from './vector-store-sqlite.js';
|
|
11
11
|
import { isNonProjectDirectory } from './config.js';
|
|
12
|
+
import { normalizePathKey } from './path-utils.js';
|
|
12
13
|
import {
|
|
13
14
|
JSON_WORKER_THRESHOLD_BYTES,
|
|
14
15
|
ANN_DIMENSION_SAMPLE_SIZE,
|
|
@@ -226,6 +227,26 @@ function serializeFileHashEntry(entry) {
|
|
|
226
227
|
return normalizeFileHashEntry(entry);
|
|
227
228
|
}
|
|
228
229
|
|
|
230
|
+
function fileKey(filePath) {
|
|
231
|
+
return normalizePathKey(filePath);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
function numericOrNegInfinity(value) {
|
|
235
|
+
return Number.isFinite(value) ? value : Number.NEGATIVE_INFINITY;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
function shouldPreferFileHashEntry(candidate, current) {
|
|
239
|
+
const candidateMtime = numericOrNegInfinity(candidate?.mtimeMs);
|
|
240
|
+
const currentMtime = numericOrNegInfinity(current?.mtimeMs);
|
|
241
|
+
if (candidateMtime !== currentMtime) return candidateMtime > currentMtime;
|
|
242
|
+
|
|
243
|
+
const candidateSize = numericOrNegInfinity(candidate?.size);
|
|
244
|
+
const currentSize = numericOrNegInfinity(current?.size);
|
|
245
|
+
if (candidateSize !== currentSize) return candidateSize > currentSize;
|
|
246
|
+
|
|
247
|
+
return false;
|
|
248
|
+
}
|
|
249
|
+
|
|
229
250
|
function computeAnnCapacity(total, config) {
|
|
230
251
|
const factor = typeof config.annCapacityFactor === 'number' ? config.annCapacityFactor : 1.2;
|
|
231
252
|
const extra = Number.isInteger(config.annCapacityExtra) ? config.annCapacityExtra : 1024;
|
|
@@ -674,20 +695,29 @@ export class EmbeddingsCache {
|
|
|
674
695
|
|
|
675
696
|
const hasCacheData = Array.isArray(cacheData);
|
|
676
697
|
const hasHashData = hashData && typeof hashData === 'object';
|
|
698
|
+
let normalizedHashAliasCollapses = 0;
|
|
699
|
+
let normalizedCallGraphAliasCollapses = 0;
|
|
677
700
|
|
|
678
701
|
if (hasCacheData) {
|
|
702
|
+
const isWin32 = process.platform === 'win32';
|
|
679
703
|
const allowedExtensions = new Set(
|
|
680
|
-
(this.config.fileExtensions || []).map((ext) => `.${ext}`)
|
|
704
|
+
(this.config.fileExtensions || []).map((ext) => `.${String(ext).toLowerCase()}`)
|
|
705
|
+
);
|
|
706
|
+
const allowedFileNames = new Set(
|
|
707
|
+
(this.config.fileNames || []).map((name) =>
|
|
708
|
+
isWin32 ? String(name).toLowerCase() : String(name)
|
|
709
|
+
)
|
|
681
710
|
);
|
|
682
|
-
const allowedFileNames = new Set(this.config.fileNames || []);
|
|
683
711
|
const applyExtensionFilter = !this.binaryStore;
|
|
684
712
|
const shouldKeepFile = (filePath) => {
|
|
685
|
-
const ext = path.extname(filePath);
|
|
713
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
686
714
|
if (allowedExtensions.has(ext)) return true;
|
|
687
|
-
|
|
715
|
+
const baseName = path.basename(filePath);
|
|
716
|
+
const normalizedBaseName = isWin32 ? baseName.toLowerCase() : baseName;
|
|
717
|
+
return allowedFileNames.has(normalizedBaseName);
|
|
688
718
|
};
|
|
689
719
|
|
|
690
|
-
const rawHashes = hasHashData ?
|
|
720
|
+
const rawHashes = hasHashData ? Object.entries(hashData) : [];
|
|
691
721
|
this.vectorStore = [];
|
|
692
722
|
this.fileHashes.clear();
|
|
693
723
|
|
|
@@ -707,8 +737,17 @@ export class EmbeddingsCache {
|
|
|
707
737
|
for (const [file, entry] of rawHashes) {
|
|
708
738
|
if (!applyExtensionFilter || shouldKeepFile(file)) {
|
|
709
739
|
const normalized = normalizeFileHashEntry(entry);
|
|
710
|
-
|
|
711
|
-
|
|
740
|
+
const key = fileKey(file);
|
|
741
|
+
if (normalized && key) {
|
|
742
|
+
const existing = this.fileHashes.get(key);
|
|
743
|
+
if (existing) {
|
|
744
|
+
normalizedHashAliasCollapses += 1;
|
|
745
|
+
if (shouldPreferFileHashEntry(normalized, existing)) {
|
|
746
|
+
this.fileHashes.set(key, normalized);
|
|
747
|
+
}
|
|
748
|
+
} else {
|
|
749
|
+
this.fileHashes.set(key, normalized);
|
|
750
|
+
}
|
|
712
751
|
}
|
|
713
752
|
}
|
|
714
753
|
}
|
|
@@ -739,11 +778,31 @@ export class EmbeddingsCache {
|
|
|
739
778
|
try {
|
|
740
779
|
const callGraphData = await fs.readFile(callGraphFile, 'utf8');
|
|
741
780
|
const parsed = JSON.parse(callGraphData);
|
|
742
|
-
|
|
781
|
+
const normalizedCallData = new Map();
|
|
782
|
+
if (parsed && typeof parsed === 'object') {
|
|
783
|
+
for (const [file, data] of Object.entries(parsed)) {
|
|
784
|
+
const key = fileKey(file);
|
|
785
|
+
if (!key) continue;
|
|
786
|
+
if (normalizedCallData.has(key)) {
|
|
787
|
+
normalizedCallGraphAliasCollapses += 1;
|
|
788
|
+
}
|
|
789
|
+
normalizedCallData.set(key, data);
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
this.fileCallData = normalizedCallData;
|
|
743
793
|
if (this.config.verbose) {
|
|
744
794
|
console.info(`[Cache] Loaded call-graph data for ${this.fileCallData.size} files`);
|
|
745
795
|
}
|
|
746
796
|
} catch {}
|
|
797
|
+
|
|
798
|
+
if (
|
|
799
|
+
this.config.verbose &&
|
|
800
|
+
(normalizedHashAliasCollapses > 0 || normalizedCallGraphAliasCollapses > 0)
|
|
801
|
+
) {
|
|
802
|
+
console.info(
|
|
803
|
+
`[Cache] Normalized path-key aliases on load (file-hashes=${normalizedHashAliasCollapses}, call-graph=${normalizedCallGraphAliasCollapses})`
|
|
804
|
+
);
|
|
805
|
+
}
|
|
747
806
|
} catch (error) {
|
|
748
807
|
console.warn('[Cache] Failed to load cache:', error.message);
|
|
749
808
|
this.clearInMemoryState();
|
|
@@ -943,8 +1002,9 @@ export class EmbeddingsCache {
|
|
|
943
1002
|
const hashEntries = {};
|
|
944
1003
|
for (const [file, entry] of this.fileHashes) {
|
|
945
1004
|
const serialized = serializeFileHashEntry(entry);
|
|
946
|
-
|
|
947
|
-
|
|
1005
|
+
const key = fileKey(file);
|
|
1006
|
+
if (serialized && key) {
|
|
1007
|
+
hashEntries[key] = serialized;
|
|
948
1008
|
}
|
|
949
1009
|
}
|
|
950
1010
|
|
|
@@ -955,9 +1015,15 @@ export class EmbeddingsCache {
|
|
|
955
1015
|
|
|
956
1016
|
const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
|
|
957
1017
|
if (this.fileCallData.size > 0) {
|
|
1018
|
+
const callGraphEntries = {};
|
|
1019
|
+
for (const [file, data] of this.fileCallData) {
|
|
1020
|
+
const key = fileKey(file);
|
|
1021
|
+
if (!key) continue;
|
|
1022
|
+
callGraphEntries[key] = data;
|
|
1023
|
+
}
|
|
958
1024
|
await fs.writeFile(
|
|
959
1025
|
callGraphFile,
|
|
960
|
-
JSON.stringify(
|
|
1026
|
+
JSON.stringify(callGraphEntries, null, 2)
|
|
961
1027
|
);
|
|
962
1028
|
} else {
|
|
963
1029
|
await fs.rm(callGraphFile, { force: true });
|
|
@@ -1071,7 +1137,9 @@ export class EmbeddingsCache {
|
|
|
1071
1137
|
}
|
|
1072
1138
|
|
|
1073
1139
|
getFileHash(file) {
|
|
1074
|
-
const
|
|
1140
|
+
const key = fileKey(file);
|
|
1141
|
+
if (!key) return undefined;
|
|
1142
|
+
const entry = this.fileHashes.get(key);
|
|
1075
1143
|
if (typeof entry === 'string') return entry;
|
|
1076
1144
|
return entry?.hash;
|
|
1077
1145
|
}
|
|
@@ -1095,23 +1163,31 @@ export class EmbeddingsCache {
|
|
|
1095
1163
|
if (!iterator) return;
|
|
1096
1164
|
for (const [file, entry] of iterator) {
|
|
1097
1165
|
const normalized = normalizeFileHashEntry(entry);
|
|
1098
|
-
|
|
1099
|
-
|
|
1166
|
+
const key = fileKey(file);
|
|
1167
|
+
if (normalized && key) {
|
|
1168
|
+
const existing = this.fileHashes.get(key);
|
|
1169
|
+
if (!existing || shouldPreferFileHashEntry(normalized, existing)) {
|
|
1170
|
+
this.fileHashes.set(key, normalized);
|
|
1171
|
+
}
|
|
1100
1172
|
}
|
|
1101
1173
|
}
|
|
1102
1174
|
}
|
|
1103
1175
|
|
|
1104
1176
|
setFileHash(file, hash, meta = null) {
|
|
1177
|
+
const key = fileKey(file);
|
|
1178
|
+
if (!key) return;
|
|
1105
1179
|
const entry = { hash };
|
|
1106
1180
|
if (meta && typeof meta === 'object') {
|
|
1107
1181
|
if (Number.isFinite(meta.mtimeMs)) entry.mtimeMs = meta.mtimeMs;
|
|
1108
1182
|
if (Number.isFinite(meta.size)) entry.size = meta.size;
|
|
1109
1183
|
}
|
|
1110
|
-
this.fileHashes.set(
|
|
1184
|
+
this.fileHashes.set(key, entry);
|
|
1111
1185
|
}
|
|
1112
1186
|
|
|
1113
1187
|
getFileMeta(file) {
|
|
1114
|
-
const
|
|
1188
|
+
const key = fileKey(file);
|
|
1189
|
+
if (!key) return null;
|
|
1190
|
+
const entry = this.fileHashes.get(key);
|
|
1115
1191
|
if (!entry) return null;
|
|
1116
1192
|
if (typeof entry === 'string') return { hash: entry };
|
|
1117
1193
|
return entry;
|
|
@@ -1194,16 +1270,20 @@ export class EmbeddingsCache {
|
|
|
1194
1270
|
}
|
|
1195
1271
|
|
|
1196
1272
|
deleteFileHash(file) {
|
|
1197
|
-
|
|
1273
|
+
const key = fileKey(file);
|
|
1274
|
+
if (!key) return;
|
|
1275
|
+
this.fileHashes.delete(key);
|
|
1198
1276
|
}
|
|
1199
1277
|
|
|
1200
1278
|
async removeFileFromStore(file) {
|
|
1201
1279
|
if (!Array.isArray(this.vectorStore)) return;
|
|
1280
|
+
const targetKey = fileKey(file);
|
|
1281
|
+
if (!targetKey) return;
|
|
1202
1282
|
|
|
1203
1283
|
let w = 0;
|
|
1204
1284
|
for (let r = 0; r < this.vectorStore.length; r++) {
|
|
1205
1285
|
const chunk = this.vectorStore[r];
|
|
1206
|
-
if (chunk.file !==
|
|
1286
|
+
if (fileKey(chunk.file) !== targetKey) {
|
|
1207
1287
|
chunk._index = w;
|
|
1208
1288
|
this.vectorStore[w++] = chunk;
|
|
1209
1289
|
}
|
|
@@ -1213,7 +1293,7 @@ export class EmbeddingsCache {
|
|
|
1213
1293
|
this.invalidateAnnIndex();
|
|
1214
1294
|
this.removeFileCallData(file);
|
|
1215
1295
|
|
|
1216
|
-
this.
|
|
1296
|
+
this.deleteFileHash(file);
|
|
1217
1297
|
}
|
|
1218
1298
|
|
|
1219
1299
|
addToStore(chunk) {
|
|
@@ -1627,10 +1707,15 @@ export class EmbeddingsCache {
|
|
|
1627
1707
|
|
|
1628
1708
|
pruneCallGraphData(validFiles) {
|
|
1629
1709
|
if (!validFiles || this.fileCallData.size === 0) return 0;
|
|
1710
|
+
const validKeys = new Set();
|
|
1711
|
+
for (const file of validFiles) {
|
|
1712
|
+
const key = fileKey(file);
|
|
1713
|
+
if (key) validKeys.add(key);
|
|
1714
|
+
}
|
|
1630
1715
|
|
|
1631
1716
|
let pruned = 0;
|
|
1632
1717
|
for (const file of Array.from(this.fileCallData.keys())) {
|
|
1633
|
-
if (!
|
|
1718
|
+
if (!validKeys.has(fileKey(file))) {
|
|
1634
1719
|
this.fileCallData.delete(file);
|
|
1635
1720
|
pruned++;
|
|
1636
1721
|
}
|
|
@@ -1641,11 +1726,15 @@ export class EmbeddingsCache {
|
|
|
1641
1726
|
}
|
|
1642
1727
|
|
|
1643
1728
|
getFileCallData(file) {
|
|
1644
|
-
|
|
1729
|
+
const key = fileKey(file);
|
|
1730
|
+
if (!key) return undefined;
|
|
1731
|
+
return this.fileCallData.get(key);
|
|
1645
1732
|
}
|
|
1646
1733
|
|
|
1647
1734
|
hasFileCallData(file) {
|
|
1648
|
-
|
|
1735
|
+
const key = fileKey(file);
|
|
1736
|
+
if (!key) return false;
|
|
1737
|
+
return this.fileCallData.has(key);
|
|
1649
1738
|
}
|
|
1650
1739
|
|
|
1651
1740
|
getFileCallDataKeys() {
|
|
@@ -1657,21 +1746,21 @@ export class EmbeddingsCache {
|
|
|
1657
1746
|
}
|
|
1658
1747
|
|
|
1659
1748
|
setFileCallData(file, data) {
|
|
1660
|
-
|
|
1749
|
+
const key = fileKey(file);
|
|
1750
|
+
if (!key) return;
|
|
1751
|
+
this.fileCallData.set(key, data);
|
|
1661
1752
|
this.callGraph = null;
|
|
1662
1753
|
}
|
|
1663
1754
|
|
|
1664
1755
|
setFileCallDataEntries(entries) {
|
|
1665
|
-
|
|
1666
|
-
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
if (
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
}
|
|
1674
|
-
}
|
|
1756
|
+
const normalized = new Map();
|
|
1757
|
+
const iterator = entries instanceof Map ? entries.entries() : Object.entries(entries || {});
|
|
1758
|
+
for (const [file, data] of iterator) {
|
|
1759
|
+
const key = fileKey(file);
|
|
1760
|
+
if (!key) continue;
|
|
1761
|
+
normalized.set(key, data);
|
|
1762
|
+
}
|
|
1763
|
+
this.fileCallData = normalized;
|
|
1675
1764
|
this.callGraph = null;
|
|
1676
1765
|
}
|
|
1677
1766
|
|
|
@@ -1681,7 +1770,9 @@ export class EmbeddingsCache {
|
|
|
1681
1770
|
}
|
|
1682
1771
|
|
|
1683
1772
|
removeFileCallData(file) {
|
|
1684
|
-
|
|
1773
|
+
const key = fileKey(file);
|
|
1774
|
+
if (!key) return;
|
|
1775
|
+
this.fileCallData.delete(key);
|
|
1685
1776
|
this.callGraph = null;
|
|
1686
1777
|
}
|
|
1687
1778
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@softerist/heuristic-mcp",
|
|
3
|
-
"version": "3.2.
|
|
3
|
+
"version": "3.2.13",
|
|
4
4
|
"description": "An enhanced MCP server providing intelligent semantic code search with find-similar-code, recency ranking, and improved chunking. Fork of smart-coding-mcp.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|