@softerist/heuristic-mcp 3.0.12 → 3.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -56
- package/config.jsonc +173 -102
- package/index.js +69 -57
- package/lib/cache.js +55 -26
- package/lib/config.js +528 -79
- package/lib/constants.js +27 -0
- package/lib/embed-query-process.js +7 -6
- package/lib/embedding-process.js +113 -27
- package/lib/embedding-worker.js +299 -180
- package/lib/project-detector.js +1 -1
- package/lib/vector-store-binary.js +64 -55
- package/lib/vector-store-sqlite.js +83 -73
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -99,31 +99,48 @@ Clears the cache for the current working directory (or `--workspace` if provided
|
|
|
99
99
|
|
|
100
100
|
Configuration is loaded from your workspace root when the server runs with `--workspace` (this is how IDEs launch it). In server mode, it falls back to the package `config.jsonc` (or `config.json`) and then your current working directory.
|
|
101
101
|
|
|
102
|
-
Example `config.jsonc`:
|
|
103
|
-
|
|
104
|
-
```json
|
|
105
|
-
{
|
|
106
|
-
"excludePatterns": ["**/legacy-code/**", "**/*.test.ts"],
|
|
107
|
-
"fileNames": ["Dockerfile", ".env.example", "Makefile"],
|
|
108
|
-
"
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
"
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
"
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
"
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
"
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
102
|
+
Example `config.jsonc`:
|
|
103
|
+
|
|
104
|
+
```json
|
|
105
|
+
{
|
|
106
|
+
"excludePatterns": ["**/legacy-code/**", "**/*.test.ts"],
|
|
107
|
+
"fileNames": ["Dockerfile", ".env.example", "Makefile"],
|
|
108
|
+
"indexing": {
|
|
109
|
+
"smartIndexing": true
|
|
110
|
+
},
|
|
111
|
+
"worker": {
|
|
112
|
+
"workerThreads": 0
|
|
113
|
+
},
|
|
114
|
+
"embedding": {
|
|
115
|
+
"embeddingModel": "jinaai/jina-embeddings-v2-base-code",
|
|
116
|
+
"embeddingBatchSize": null,
|
|
117
|
+
"embeddingProcessNumThreads": 8
|
|
118
|
+
},
|
|
119
|
+
"search": {
|
|
120
|
+
"recencyBoost": 0.1,
|
|
121
|
+
"recencyDecayDays": 30
|
|
122
|
+
},
|
|
123
|
+
"callGraph": {
|
|
124
|
+
"callGraphEnabled": true,
|
|
125
|
+
"callGraphBoost": 0.15
|
|
126
|
+
},
|
|
127
|
+
"ann": {
|
|
128
|
+
"annEnabled": true
|
|
129
|
+
},
|
|
130
|
+
"vectorStore": {
|
|
131
|
+
"vectorStoreFormat": "binary",
|
|
132
|
+
"vectorStoreContentMode": "external",
|
|
133
|
+
"vectorStoreLoadMode": "disk",
|
|
134
|
+
"contentCacheEntries": 256,
|
|
135
|
+
"vectorCacheEntries": 64
|
|
136
|
+
},
|
|
137
|
+
"memoryCleanup": {
|
|
138
|
+
"clearCacheAfterIndex": true
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
Preferred style is namespaced keys (shown above). Legacy top-level keys are still supported for backward compatibility.
|
|
127
144
|
|
|
128
145
|
### Embedding Model & Dimension Options
|
|
129
146
|
|
|
@@ -133,12 +150,14 @@ Example `config.jsonc`:
|
|
|
133
150
|
|
|
134
151
|
For faster search with smaller embeddings, switch to an MRL-compatible model:
|
|
135
152
|
|
|
136
|
-
```json
|
|
137
|
-
{
|
|
138
|
-
"
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
153
|
+
```json
|
|
154
|
+
{
|
|
155
|
+
"embedding": {
|
|
156
|
+
"embeddingModel": "nomic-ai/nomic-embed-text-v1.5",
|
|
157
|
+
"embeddingDimension": 128
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
```
|
|
142
161
|
|
|
143
162
|
**MRL-compatible models:**
|
|
144
163
|
- `nomic-ai/nomic-embed-text-v1.5` — recommended for 128d/256d
|
|
@@ -153,7 +172,9 @@ Cache location:
|
|
|
153
172
|
|
|
154
173
|
### Environment Variables
|
|
155
174
|
|
|
156
|
-
Selected overrides (prefix `SMART_CODING_`):
|
|
175
|
+
Selected overrides (prefix `SMART_CODING_`):
|
|
176
|
+
|
|
177
|
+
Environment overrides target runtime keys and are synced back into namespaces by `lib/config.js`.
|
|
157
178
|
|
|
158
179
|
- `SMART_CODING_VERBOSE=true|false` — enable detailed logging.
|
|
159
180
|
- `SMART_CODING_WORKER_THREADS=auto|N` — worker thread count.
|
|
@@ -179,35 +200,37 @@ Selected overrides (prefix `SMART_CODING_`):
|
|
|
179
200
|
|
|
180
201
|
See `lib/config.js` for the full list.
|
|
181
202
|
|
|
182
|
-
### Binary Vector Store
|
|
183
|
-
|
|
184
|
-
Set `vectorStoreFormat` to `binary` to use the on-disk binary cache. This keeps vectors and content out of JS heap
|
|
185
|
-
and reads on demand. Recommended for large repos.
|
|
186
|
-
|
|
187
|
-
- `vectorStoreContentMode=external` keeps content in the binary file and only loads for top-N results.
|
|
188
|
-
- `contentCacheEntries` controls the small in-memory LRU for decoded content strings.
|
|
189
|
-
- `vectorStoreLoadMode=disk` streams vectors from disk to reduce memory usage.
|
|
190
|
-
- `vectorCacheEntries` controls the small in-memory LRU for vectors when using disk mode.
|
|
191
|
-
- `clearCacheAfterIndex=true` drops in-memory vectors after indexing and reloads lazily on next query.
|
|
192
|
-
- `unloadModelAfterIndex=true` (default) unloads the embedding model after indexing to free ~500MB-1GB of RAM; the model will reload on the next search query.
|
|
193
|
-
- Note: `annEnabled=true` with `vectorStoreLoadMode=disk` can increase disk reads during ANN rebuilds on large indexes.
|
|
203
|
+
### Binary Vector Store
|
|
204
|
+
|
|
205
|
+
Set `vectorStore.vectorStoreFormat` to `binary` to use the on-disk binary cache. This keeps vectors and content out of JS heap
|
|
206
|
+
and reads on demand. Recommended for large repos.
|
|
207
|
+
|
|
208
|
+
- `vectorStore.vectorStoreContentMode=external` keeps content in the binary file and only loads for top-N results.
|
|
209
|
+
- `vectorStore.contentCacheEntries` controls the small in-memory LRU for decoded content strings.
|
|
210
|
+
- `vectorStore.vectorStoreLoadMode=disk` streams vectors from disk to reduce memory usage.
|
|
211
|
+
- `vectorStore.vectorCacheEntries` controls the small in-memory LRU for vectors when using disk mode.
|
|
212
|
+
- `memoryCleanup.clearCacheAfterIndex=true` drops in-memory vectors after indexing and reloads lazily on next query.
|
|
213
|
+
- `memoryCleanup.unloadModelAfterIndex=true` (default) unloads the embedding model after indexing to free ~500MB-1GB of RAM; the model will reload on the next search query.
|
|
214
|
+
- Note: `ann.annEnabled=true` with `vectorStore.vectorStoreLoadMode=disk` can increase disk reads during ANN rebuilds on large indexes.
|
|
194
215
|
|
|
195
216
|
### SQLite Vector Store
|
|
196
217
|
|
|
197
|
-
Set `vectorStoreFormat` to `sqlite` to use SQLite for persistence. This provides:
|
|
218
|
+
Set `vectorStore.vectorStoreFormat` to `sqlite` to use SQLite for persistence. This provides:
|
|
198
219
|
|
|
199
220
|
- ACID transactions for reliable writes
|
|
200
221
|
- Simpler concurrent access
|
|
201
222
|
- Standard database format for inspection
|
|
202
223
|
|
|
203
|
-
```json
|
|
204
|
-
{
|
|
205
|
-
"
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
224
|
+
```json
|
|
225
|
+
{
|
|
226
|
+
"vectorStore": {
|
|
227
|
+
"vectorStoreFormat": "sqlite"
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
The vectors and content are stored in `vectors.sqlite` in your cache directory. You can inspect it with any SQLite browser.
|
|
233
|
+
`vectorStore.vectorStoreContentMode` and `vectorStore.vectorStoreLoadMode` are respected for SQLite (use `vectorStore.vectorStoreLoadMode=disk` to avoid loading vectors into memory).
|
|
211
234
|
|
|
212
235
|
**Tradeoffs vs Binary:**
|
|
213
236
|
- Slightly higher read overhead (SQL queries vs direct memory access)
|
|
@@ -230,7 +253,7 @@ SMART_CODING_VECTOR_STORE_LOAD_MODE=disk node tools/scripts/benchmark-search.js
|
|
|
230
253
|
SMART_CODING_VECTOR_STORE_FORMAT=binary SMART_CODING_VECTOR_STORE_LOAD_MODE=disk node tools/scripts/benchmark-search.js --runs 10
|
|
231
254
|
```
|
|
232
255
|
|
|
233
|
-
Note: On small repos, disk mode may be slightly slower and show noisy RSS deltas; benefits are clearer on large indexes with a small `vectorCacheEntries`.
|
|
256
|
+
Note: On small repos, disk mode may be slightly slower and show noisy RSS deltas; benefits are clearer on large indexes with a small `vectorStore.vectorCacheEntries`.
|
|
234
257
|
|
|
235
258
|
---
|
|
236
259
|
|
|
@@ -293,8 +316,8 @@ Native ONNX backend unavailable: The operating system cannot run %1.
|
|
|
293
316
|
...onnxruntime_binding.node. Falling back to WASM.
|
|
294
317
|
```
|
|
295
318
|
|
|
296
|
-
The server will automatically disable workers and force `embeddingProcessPerBatch` to reduce memory spikes, but you
|
|
297
|
-
should fix the native binding to restore stable memory usage:
|
|
319
|
+
The server will automatically disable workers and force `embedding.embeddingProcessPerBatch` to reduce memory spikes, but you
|
|
320
|
+
should fix the native binding to restore stable memory usage:
|
|
298
321
|
|
|
299
322
|
- Ensure you are running **64-bit Node.js** (`node -p "process.arch"` should be `x64`).
|
|
300
323
|
- Install **Microsoft Visual C++ 2015–2022 Redistributable (x64)**.
|
package/config.jsonc
CHANGED
|
@@ -615,109 +615,180 @@
|
|
|
615
615
|
"makefile.in",
|
|
616
616
|
],
|
|
617
617
|
// Glob patterns to exclude from indexing.
|
|
618
|
-
"excludePatterns": [
|
|
619
|
-
"**/node_modules/**",
|
|
620
|
-
"**/dist/**",
|
|
621
|
-
"**/build/**",
|
|
618
|
+
"excludePatterns": [
|
|
619
|
+
"**/node_modules/**",
|
|
620
|
+
"**/dist/**",
|
|
621
|
+
"**/build/**",
|
|
622
622
|
"**/.git/**",
|
|
623
623
|
"**/coverage/**",
|
|
624
624
|
"**/.next/**",
|
|
625
625
|
"**/target/**",
|
|
626
|
-
"**/vendor/**",
|
|
627
|
-
"**/.smart-coding-cache/**",
|
|
628
|
-
],
|
|
629
|
-
//
|
|
630
|
-
"
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
//
|
|
652
|
-
"
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
//
|
|
680
|
-
"
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
//
|
|
698
|
-
"
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
//
|
|
720
|
-
"
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
626
|
+
"**/vendor/**",
|
|
627
|
+
"**/.smart-coding-cache/**",
|
|
628
|
+
],
|
|
629
|
+
// Indexing controls.
|
|
630
|
+
"indexing": {
|
|
631
|
+
// Enable project-type detection + smart ignore patterns.
|
|
632
|
+
"smartIndexing": true,
|
|
633
|
+
// Lines per chunk.
|
|
634
|
+
"chunkSize": 16,
|
|
635
|
+
// Overlapping lines between chunks.
|
|
636
|
+
"chunkOverlap": 4,
|
|
637
|
+
// Files per indexing batch.
|
|
638
|
+
"batchSize": 50,
|
|
639
|
+
// Skip files larger than this many bytes.
|
|
640
|
+
"maxFileSize": 1048576,
|
|
641
|
+
// Maximum number of search results.
|
|
642
|
+
"maxResults": 5,
|
|
643
|
+
// Enable file watcher for incremental indexing.
|
|
644
|
+
"watchFiles": true,
|
|
645
|
+
},
|
|
646
|
+
// Logging and diagnostics.
|
|
647
|
+
"logging": {
|
|
648
|
+
// Enable verbose logging.
|
|
649
|
+
"verbose": true,
|
|
650
|
+
},
|
|
651
|
+
// Cache engine and serialization controls.
|
|
652
|
+
"cache": {
|
|
653
|
+
// Persist embeddings between sessions.
|
|
654
|
+
"enableCache": true,
|
|
655
|
+
// Assume vectors are finite (skip validation).
|
|
656
|
+
"cacheVectorAssumeFinite": true,
|
|
657
|
+
// Decimal precision for cached vectors (null = default).
|
|
658
|
+
"cacheVectorFloatDigits": null,
|
|
659
|
+
// Write stream highWaterMark for cache files.
|
|
660
|
+
"cacheWriteHighWaterMark": 262144,
|
|
661
|
+
// Flush threshold (chars) for JSON writer.
|
|
662
|
+
"cacheVectorFlushChars": 262144,
|
|
663
|
+
// Validate vectors contain only finite numbers.
|
|
664
|
+
"cacheVectorCheckFinite": true,
|
|
665
|
+
// Avoid mutating vectors during serialization.
|
|
666
|
+
"cacheVectorNoMutation": false,
|
|
667
|
+
// Join threshold for JSON array chunks.
|
|
668
|
+
"cacheVectorJoinThreshold": 8192,
|
|
669
|
+
// Chunk size for JSON join optimization.
|
|
670
|
+
"cacheVectorJoinChunkSize": 2048,
|
|
671
|
+
// Wait time for active readers before saving cache (ms).
|
|
672
|
+
"saveReaderWaitTimeoutMs": 5000,
|
|
673
|
+
},
|
|
674
|
+
// Stale cache cleanup policy.
|
|
675
|
+
"cacheCleanup": {
|
|
676
|
+
// Remove stale caches automatically.
|
|
677
|
+
"autoCleanup": true,
|
|
678
|
+
},
|
|
679
|
+
// Worker-thread controls.
|
|
680
|
+
"worker": {
|
|
681
|
+
// Number of embedding workers (0 disables).
|
|
682
|
+
// Windows + heavy Jina models are more stable with child-process embedding than worker pools.
|
|
683
|
+
"workerThreads": 0,
|
|
684
|
+
// Worker batch timeout in milliseconds.
|
|
685
|
+
"workerBatchTimeoutMs": 120000,
|
|
686
|
+
// Failures before worker circuit opens.
|
|
687
|
+
"workerFailureThreshold": 1,
|
|
688
|
+
// Cooldown before re-enabling workers (ms).
|
|
689
|
+
"workerFailureCooldownMs": 600000,
|
|
690
|
+
// Max chunks per worker batch.
|
|
691
|
+
"workerMaxChunksPerBatch": 20,
|
|
692
|
+
// Allow fallback to main-thread embeddings.
|
|
693
|
+
"allowSingleThreadFallback": false,
|
|
694
|
+
// Abort worker embedding batches after repeated consecutive embedding failures.
|
|
695
|
+
"failFastEmbeddingErrors": false,
|
|
696
|
+
},
|
|
697
|
+
// Embedding/runtime controls.
|
|
698
|
+
"embedding": {
|
|
699
|
+
// Embedding model identifier.
|
|
700
|
+
"embeddingModel": "jinaai/jina-embeddings-v2-base-code",
|
|
701
|
+
// Preload the embedding model on startup. Set to false when using child processes for memory savings.
|
|
702
|
+
"preloadEmbeddingModel": false,
|
|
703
|
+
// Use child process per batch for memory isolation.
|
|
704
|
+
"embeddingProcessPerBatch": true,
|
|
705
|
+
// Auto-enable child process when no workers + heavy model.
|
|
706
|
+
"autoEmbeddingProcessPerBatch": false,
|
|
707
|
+
// Override embedding batch size (null = auto).
|
|
708
|
+
"embeddingBatchSize": null,
|
|
709
|
+
// ONNX threads used by embedding child process.
|
|
710
|
+
// 6 is a practical balance on 24-thread desktop CPUs (high throughput, less contention than 8+).
|
|
711
|
+
"embeddingProcessNumThreads": 8,
|
|
712
|
+
// Embedding-child adaptive GC RSS threshold in MB (higher = less frequent GC).
|
|
713
|
+
"embeddingProcessGcRssThresholdMb": 2048,
|
|
714
|
+
// Minimum interval between embedding-child GC runs.
|
|
715
|
+
"embeddingProcessGcMinIntervalMs": 15000,
|
|
716
|
+
// Backstop GC cadence if threshold is not crossed.
|
|
717
|
+
"embeddingProcessGcMaxRequestsWithoutCollection": 8,
|
|
718
|
+
},
|
|
719
|
+
// Vector store backend controls.
|
|
720
|
+
"vectorStore": {
|
|
721
|
+
// Vector store format: json, binary, or sqlite.
|
|
722
|
+
"vectorStoreFormat": "binary",
|
|
723
|
+
// Content storage: external or inline.
|
|
724
|
+
"vectorStoreContentMode": "external",
|
|
725
|
+
// In-memory content cache entries (binary store).
|
|
726
|
+
"contentCacheEntries": 256,
|
|
727
|
+
// Vector loading mode: "disk" keeps RAM lower by streaming vectors as needed.
|
|
728
|
+
"vectorStoreLoadMode": "disk",
|
|
729
|
+
},
|
|
730
|
+
// Search scoring controls.
|
|
731
|
+
"search": {
|
|
732
|
+
// Weight for semantic similarity scoring.
|
|
733
|
+
"semanticWeight": 0.7,
|
|
734
|
+
// Score boost for exact text matches.
|
|
735
|
+
"exactMatchBoost": 1.5,
|
|
736
|
+
},
|
|
737
|
+
// Memory cleanup and memory-footprint controls.
|
|
738
|
+
"memoryCleanup": {
|
|
739
|
+
// Enable explicit GC (requires --expose-gc).
|
|
740
|
+
"enableExplicitGc": true,
|
|
741
|
+
// Drop in-memory vectors after indexing completes.
|
|
742
|
+
"clearCacheAfterIndex": true,
|
|
743
|
+
// Unload embedding model after indexing.
|
|
744
|
+
"unloadModelAfterIndex": true,
|
|
745
|
+
// Shutdown persistent query embedding child pool after indexing.
|
|
746
|
+
"shutdownQueryEmbeddingPoolAfterIndex": true,
|
|
747
|
+
// Unload embedding model after searches.
|
|
748
|
+
"unloadModelAfterSearch": true,
|
|
749
|
+
// Idle timeout before query embedding child process exits (ms).
|
|
750
|
+
"embeddingPoolIdleTimeoutMs": 2000,
|
|
751
|
+
// RSS threshold for optional incremental GC.
|
|
752
|
+
"incrementalGcThresholdMb": 512,
|
|
753
|
+
// Optional: print detailed phase memory traces for incremental indexing/update paths.
|
|
754
|
+
"incrementalMemoryProfile": false,
|
|
755
|
+
// Optional: recycle server process when RSS stays too high after incremental cleanup.
|
|
756
|
+
// Safe default is disabled.
|
|
757
|
+
"recycleServerOnHighRssAfterIncremental": false,
|
|
758
|
+
// RSS threshold (MB) for incremental recycle trigger.
|
|
759
|
+
"recycleServerOnHighRssThresholdMb": 4096,
|
|
760
|
+
// Minimum interval between recycle attempts (ms).
|
|
761
|
+
"recycleServerOnHighRssCooldownMs": 300000,
|
|
762
|
+
// Delay before recycle to let logs/responses flush (ms).
|
|
763
|
+
"recycleServerOnHighRssDelayMs": 2000,
|
|
764
|
+
},
|
|
765
|
+
// Call graph proximity boosting controls.
|
|
766
|
+
"callGraph": {
|
|
767
|
+
"callGraphEnabled": true,
|
|
768
|
+
"callGraphBoost": 0.15,
|
|
769
|
+
"callGraphMaxHops": 1,
|
|
770
|
+
},
|
|
771
|
+
// ANN index controls.
|
|
772
|
+
"ann": {
|
|
773
|
+
// Enable ANN index for large codebases.
|
|
774
|
+
"annEnabled": true,
|
|
775
|
+
// Minimum chunks required to build ANN.
|
|
776
|
+
"annMinChunks": 5000,
|
|
777
|
+
// Minimum ANN candidates to retrieve.
|
|
778
|
+
"annMinCandidates": 50,
|
|
779
|
+
// Maximum ANN candidates to retrieve.
|
|
780
|
+
"annMaxCandidates": 200,
|
|
781
|
+
// Scale ANN candidates by this multiplier.
|
|
782
|
+
"annCandidateMultiplier": 20,
|
|
783
|
+
// HNSW efConstruction value.
|
|
784
|
+
"annEfConstruction": 200,
|
|
785
|
+
// HNSW efSearch value.
|
|
786
|
+
"annEfSearch": 64,
|
|
787
|
+
// HNSW M parameter (graph degree).
|
|
788
|
+
"annM": 16,
|
|
789
|
+
// Persist ANN index to disk.
|
|
790
|
+
"annIndexCache": true,
|
|
791
|
+
// ANN distance metric.
|
|
792
|
+
"annMetric": "cosine",
|
|
793
|
+
},
|
|
794
|
+
}
|
package/index.js
CHANGED
|
@@ -53,7 +53,11 @@ import * as PackageVersionFeature from './features/package-version.js';
|
|
|
53
53
|
import * as SetWorkspaceFeature from './features/set-workspace.js';
|
|
54
54
|
import { handleListResources, handleReadResource } from './features/resources.js';
|
|
55
55
|
|
|
56
|
-
import {
|
|
56
|
+
import {
|
|
57
|
+
MEMORY_LOG_INTERVAL_MS,
|
|
58
|
+
ONNX_THREAD_LIMIT,
|
|
59
|
+
BACKGROUND_INDEX_DELAY_MS,
|
|
60
|
+
} from './lib/constants.js';
|
|
57
61
|
const PID_FILE_NAME = '.heuristic-mcp.pid';
|
|
58
62
|
|
|
59
63
|
async function readLogTail(logPath, maxLines = 2000) {
|
|
@@ -177,41 +181,46 @@ async function initialize(workspaceDir) {
|
|
|
177
181
|
}
|
|
178
182
|
}
|
|
179
183
|
|
|
180
|
-
// Skip gc check during tests (VITEST env is set)
|
|
181
|
-
const isTest = Boolean(process.env.VITEST || process.env.VITEST_WORKER_ID);
|
|
182
|
-
if (config.enableExplicitGc && typeof global.gc !== 'function' && !isTest) {
|
|
183
|
-
console.
|
|
184
|
-
'[Server]
|
|
185
|
-
);
|
|
186
|
-
console.
|
|
187
|
-
'[Server]
|
|
188
|
-
);
|
|
189
|
-
|
|
190
|
-
}
|
|
184
|
+
// Skip gc check during tests (VITEST env is set)
|
|
185
|
+
const isTest = Boolean(process.env.VITEST || process.env.VITEST_WORKER_ID);
|
|
186
|
+
if (config.enableExplicitGc && typeof global.gc !== 'function' && !isTest) {
|
|
187
|
+
console.warn(
|
|
188
|
+
'[Server] enableExplicitGc=true but this process was not started with --expose-gc; continuing with explicit GC disabled.'
|
|
189
|
+
);
|
|
190
|
+
console.warn(
|
|
191
|
+
'[Server] Tip: start with "npm start" or add --expose-gc to enable explicit GC again.'
|
|
192
|
+
);
|
|
193
|
+
config.enableExplicitGc = false;
|
|
194
|
+
}
|
|
191
195
|
|
|
192
196
|
let mainBackendConfigured = false;
|
|
193
197
|
let nativeOnnxAvailable = null;
|
|
194
|
-
const ensureMainOnnxBackend = () => {
|
|
195
|
-
if (mainBackendConfigured) return;
|
|
196
|
-
nativeOnnxAvailable = configureNativeOnnxBackend({
|
|
197
|
-
log: config.verbose ? console.info : null,
|
|
198
|
-
label: '[Server]',
|
|
199
|
-
threads: {
|
|
198
|
+
const ensureMainOnnxBackend = () => {
|
|
199
|
+
if (mainBackendConfigured) return;
|
|
200
|
+
nativeOnnxAvailable = configureNativeOnnxBackend({
|
|
201
|
+
log: config.verbose ? console.info : null,
|
|
202
|
+
label: '[Server]',
|
|
203
|
+
threads: {
|
|
200
204
|
intraOpNumThreads: ONNX_THREAD_LIMIT,
|
|
201
205
|
interOpNumThreads: 1,
|
|
202
206
|
},
|
|
203
207
|
});
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
208
|
+
mainBackendConfigured = true;
|
|
209
|
+
};
|
|
210
|
+
|
|
211
|
+
ensureMainOnnxBackend();
|
|
212
|
+
if (nativeOnnxAvailable === false) {
|
|
213
|
+
try {
|
|
214
|
+
const { env } = await getTransformers();
|
|
215
|
+
if (env?.backends?.onnx?.wasm) {
|
|
216
|
+
env.backends.onnx.wasm.numThreads = ONNX_THREAD_LIMIT;
|
|
217
|
+
}
|
|
218
|
+
} catch {
|
|
219
|
+
// ignore: fallback tuning is best effort
|
|
220
|
+
}
|
|
221
|
+
const status = getNativeOnnxStatus();
|
|
222
|
+
const reason = status?.message || 'onnxruntime-node not available';
|
|
223
|
+
console.warn(`[Server] Native ONNX backend unavailable (${reason}); using WASM backend.`);
|
|
215
224
|
console.warn(
|
|
216
225
|
'[Server] Auto-safety: disabling workers and forcing embeddingProcessPerBatch for memory isolation.'
|
|
217
226
|
);
|
|
@@ -243,9 +252,12 @@ async function initialize(workspaceDir) {
|
|
|
243
252
|
}
|
|
244
253
|
|
|
245
254
|
// Log effective configuration for debugging
|
|
246
|
-
console.info(
|
|
247
|
-
`[Server] Config: workerThreads=${config.workerThreads}, embeddingProcessPerBatch=${config.embeddingProcessPerBatch}`
|
|
248
|
-
);
|
|
255
|
+
console.info(
|
|
256
|
+
`[Server] Config: workerThreads=${config.workerThreads}, embeddingProcessPerBatch=${config.embeddingProcessPerBatch}`
|
|
257
|
+
);
|
|
258
|
+
console.info(
|
|
259
|
+
`[Server] Config: vectorStoreLoadMode=${config.vectorStoreLoadMode}, vectorCacheEntries=${config.vectorCacheEntries}`
|
|
260
|
+
);
|
|
249
261
|
|
|
250
262
|
if (pidPath) {
|
|
251
263
|
console.info(`[Server] PID file: ${pidPath}`);
|
|
@@ -330,25 +342,22 @@ async function initialize(workspaceDir) {
|
|
|
330
342
|
cachedEmbedderPromise = null;
|
|
331
343
|
return false;
|
|
332
344
|
}
|
|
333
|
-
};
|
|
334
|
-
|
|
335
|
-
embedder = lazyEmbedder;
|
|
336
|
-
unloadMainEmbedder = unloader; // Store in module scope for tool handler access
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
// NOTE: We no longer auto-load in verbose mode when preloadEmbeddingModel=false.
|
|
351
|
-
// The model will be loaded lazily on first search or by child processes during indexing.
|
|
345
|
+
};
|
|
346
|
+
|
|
347
|
+
embedder = lazyEmbedder;
|
|
348
|
+
unloadMainEmbedder = unloader; // Store in module scope for tool handler access
|
|
349
|
+
const preloadEmbeddingModel = async () => {
|
|
350
|
+
if (config.preloadEmbeddingModel === false) return;
|
|
351
|
+
try {
|
|
352
|
+
console.info('[Server] Preloading embedding model (background)...');
|
|
353
|
+
await embedder(' ');
|
|
354
|
+
} catch (err) {
|
|
355
|
+
console.warn(`[Server] Embedding model preload failed: ${err.message}`);
|
|
356
|
+
}
|
|
357
|
+
};
|
|
358
|
+
|
|
359
|
+
// NOTE: We no longer auto-load in verbose mode when preloadEmbeddingModel=false.
|
|
360
|
+
// The model will be loaded lazily on first search or by child processes during indexing.
|
|
352
361
|
|
|
353
362
|
// Initialize cache (load deferred until after server is ready)
|
|
354
363
|
cache = new EmbeddingsCache(config);
|
|
@@ -382,10 +391,13 @@ async function initialize(workspaceDir) {
|
|
|
382
391
|
// Attach hybridSearch to server for cross-feature access (e.g. cache invalidation)
|
|
383
392
|
server.hybridSearch = hybridSearch;
|
|
384
393
|
|
|
385
|
-
const startBackgroundTasks = async () => {
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
394
|
+
const startBackgroundTasks = async () => {
|
|
395
|
+
// Keep startup responsive: do not block server readiness on model preload.
|
|
396
|
+
void preloadEmbeddingModel();
|
|
397
|
+
|
|
398
|
+
try {
|
|
399
|
+
console.info('[Server] Loading cache (deferred)...');
|
|
400
|
+
await cache.load();
|
|
389
401
|
if (config.verbose) {
|
|
390
402
|
logMemory('[Server] Memory (after cache load)');
|
|
391
403
|
}
|
|
@@ -411,8 +423,8 @@ async function initialize(workspaceDir) {
|
|
|
411
423
|
.catch((err) => {
|
|
412
424
|
console.error('[Server] Background indexing error:', err.message);
|
|
413
425
|
});
|
|
414
|
-
},
|
|
415
|
-
};
|
|
426
|
+
}, BACKGROUND_INDEX_DELAY_MS);
|
|
427
|
+
};
|
|
416
428
|
|
|
417
429
|
return { startBackgroundTasks, config };
|
|
418
430
|
}
|