@softerist/heuristic-mcp 2.1.47 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/.agent/workflows/code-review.md +60 -0
  2. package/.prettierrc +7 -0
  3. package/ARCHITECTURE.md +105 -170
  4. package/CONTRIBUTING.md +32 -113
  5. package/GEMINI.md +73 -0
  6. package/LICENSE +21 -21
  7. package/README.md +161 -54
  8. package/config.json +876 -75
  9. package/debug-pids.js +27 -0
  10. package/eslint.config.js +36 -0
  11. package/features/ann-config.js +37 -26
  12. package/features/clear-cache.js +28 -19
  13. package/features/find-similar-code.js +142 -66
  14. package/features/hybrid-search.js +253 -93
  15. package/features/index-codebase.js +1455 -394
  16. package/features/lifecycle.js +813 -180
  17. package/features/register.js +58 -52
  18. package/index.js +450 -306
  19. package/lib/cache-ops.js +22 -0
  20. package/lib/cache-utils.js +68 -0
  21. package/lib/cache.js +1392 -587
  22. package/lib/call-graph.js +165 -50
  23. package/lib/cli.js +154 -0
  24. package/lib/config.js +462 -121
  25. package/lib/embedding-process.js +77 -0
  26. package/lib/embedding-worker.js +545 -30
  27. package/lib/ignore-patterns.js +61 -59
  28. package/lib/json-worker.js +14 -0
  29. package/lib/json-writer.js +344 -0
  30. package/lib/logging.js +88 -0
  31. package/lib/memory-logger.js +13 -0
  32. package/lib/project-detector.js +13 -17
  33. package/lib/server-lifecycle.js +38 -0
  34. package/lib/settings-editor.js +645 -0
  35. package/lib/tokenizer.js +207 -104
  36. package/lib/utils.js +273 -198
  37. package/lib/vector-store-binary.js +592 -0
  38. package/mcp_config.example.json +13 -0
  39. package/package.json +13 -2
  40. package/scripts/clear-cache.js +6 -17
  41. package/scripts/download-model.js +14 -9
  42. package/scripts/postinstall.js +5 -5
  43. package/search-configs.js +36 -0
  44. package/test/ann-config.test.js +179 -0
  45. package/test/ann-fallback.test.js +6 -6
  46. package/test/binary-store.test.js +69 -0
  47. package/test/cache-branches.test.js +120 -0
  48. package/test/cache-errors.test.js +264 -0
  49. package/test/cache-extra.test.js +300 -0
  50. package/test/cache-helpers.test.js +205 -0
  51. package/test/cache-hnsw-failure.test.js +40 -0
  52. package/test/cache-json-worker.test.js +190 -0
  53. package/test/cache-worker.test.js +102 -0
  54. package/test/cache.test.js +443 -0
  55. package/test/call-graph.test.js +103 -4
  56. package/test/clear-cache.test.js +69 -68
  57. package/test/code-review-workflow.test.js +50 -0
  58. package/test/config.test.js +418 -0
  59. package/test/coverage-gap.test.js +497 -0
  60. package/test/coverage-maximizer.test.js +236 -0
  61. package/test/debug-analysis.js +107 -0
  62. package/test/embedding-model.test.js +173 -103
  63. package/test/embedding-worker-extra.test.js +272 -0
  64. package/test/embedding-worker.test.js +158 -0
  65. package/test/features.test.js +139 -0
  66. package/test/final-boost.test.js +271 -0
  67. package/test/final-polish.test.js +183 -0
  68. package/test/final.test.js +95 -0
  69. package/test/find-similar-code.test.js +191 -0
  70. package/test/helpers.js +92 -11
  71. package/test/helpers.test.js +46 -0
  72. package/test/hybrid-search-basic.test.js +62 -0
  73. package/test/hybrid-search-branch.test.js +202 -0
  74. package/test/hybrid-search-callgraph.test.js +229 -0
  75. package/test/hybrid-search-extra.test.js +81 -0
  76. package/test/hybrid-search.test.js +484 -71
  77. package/test/index-cli.test.js +520 -0
  78. package/test/index-codebase-batch.test.js +119 -0
  79. package/test/index-codebase-branches.test.js +585 -0
  80. package/test/index-codebase-core.test.js +1032 -0
  81. package/test/index-codebase-edge-cases.test.js +254 -0
  82. package/test/index-codebase-errors.test.js +132 -0
  83. package/test/index-codebase-gap.test.js +239 -0
  84. package/test/index-codebase-lines.test.js +151 -0
  85. package/test/index-codebase-watcher.test.js +259 -0
  86. package/test/index-codebase-zone.test.js +259 -0
  87. package/test/index-codebase.test.js +371 -69
  88. package/test/index-memory.test.js +220 -0
  89. package/test/indexer-detailed.test.js +176 -0
  90. package/test/integration.test.js +148 -92
  91. package/test/json-worker.test.js +50 -0
  92. package/test/lifecycle.test.js +541 -0
  93. package/test/master.test.js +198 -0
  94. package/test/perfection.test.js +349 -0
  95. package/test/project-detector.test.js +65 -0
  96. package/test/register.test.js +262 -0
  97. package/test/tokenizer.test.js +55 -93
  98. package/test/ultra-maximizer.test.js +116 -0
  99. package/test/utils-branches.test.js +161 -0
  100. package/test/utils-extra.test.js +116 -0
  101. package/test/utils.test.js +131 -0
  102. package/test/verify_fixes.js +76 -0
  103. package/test/worker-errors.test.js +96 -0
  104. package/test/worker-init.test.js +102 -0
  105. package/test/worker_throttling.test.js +93 -0
  106. package/tools/scripts/benchmark-search.js +95 -0
  107. package/tools/scripts/cache-stats.js +71 -0
  108. package/tools/scripts/manual-search.js +34 -0
  109. package/vitest.config.js +19 -9
package/lib/config.js CHANGED
@@ -1,92 +1,327 @@
1
- import fs from "fs/promises";
2
- import path from "path";
3
- import os from "os";
4
- import crypto from "crypto";
5
- import { fileURLToPath } from "url";
6
- import { ProjectDetector } from "./project-detector.js";
1
+ import fs from 'fs/promises';
2
+ import path from 'path';
3
+ import os from 'os';
4
+ import crypto from 'crypto';
5
+ import { fileURLToPath } from 'url';
6
+ import { ProjectDetector } from './project-detector.js';
7
7
 
8
8
  const DEFAULT_CONFIG = {
9
- searchDirectory: ".",
9
+ searchDirectory: '.',
10
10
  fileExtensions: [
11
11
  // JavaScript/TypeScript
12
- "js", "ts", "jsx", "tsx", "mjs", "cjs",
12
+ 'js',
13
+ 'ts',
14
+ 'jsx',
15
+ 'tsx',
16
+ 'mjs',
17
+ 'cjs',
18
+ 'mts',
19
+ 'cts',
13
20
  // Styles
14
- "css", "scss", "sass", "less", "styl",
21
+ 'css',
22
+ 'scss',
23
+ 'sass',
24
+ 'less',
25
+ 'styl',
26
+ 'stylus',
27
+ 'postcss',
28
+ // Web Frameworks
29
+ 'vue',
30
+ 'svelte',
31
+ 'astro',
15
32
  // Markup
16
- "html", "htm", "xml", "svg",
33
+ 'html',
34
+ 'htm',
35
+ 'xml',
36
+ 'svg',
37
+ 'xhtml',
38
+ 'pug',
39
+ 'jade',
40
+ // Templating
41
+ 'handlebars',
42
+ 'hbs',
43
+ 'mustache',
44
+ 'ejs',
45
+ 'njk',
46
+ 'liquid',
17
47
  // Python
18
- "py", "pyw", "pyx",
19
- // Java/Kotlin/Scala
20
- "java", "kt", "kts", "scala",
21
- // C/C++
22
- "c", "cpp", "cc", "cxx", "h", "hpp", "hxx",
23
- // C#
24
- "cs", "csx",
25
- // Go
26
- "go",
27
- // Rust
28
- "rs",
48
+ 'py',
49
+ 'pyw',
50
+ 'pyx',
51
+ 'pxd',
52
+ 'pxi',
53
+ 'ipynb',
54
+ // Java/JVM
55
+ 'java',
56
+ 'kt',
57
+ 'kts',
58
+ 'groovy',
59
+ 'gvy',
60
+ 'gradle',
61
+ 'scala',
62
+ 'sbt',
63
+ 'clj',
64
+ 'cljs',
65
+ 'cljc',
66
+ 'edn',
67
+ // C/C++ family
68
+ 'c',
69
+ 'cpp',
70
+ 'cc',
71
+ 'cxx',
72
+ 'h',
73
+ 'hpp',
74
+ 'hxx',
75
+ 'h++',
76
+ 'm',
77
+ 'mm',
78
+ // .NET
79
+ 'cs',
80
+ 'csx',
81
+ 'vb',
82
+ 'vbs',
83
+ 'fs',
84
+ 'fsx',
85
+ 'fsi',
86
+ // System
87
+ 'go',
88
+ 'rs',
89
+ 'rlib',
90
+ 'swift',
29
91
  // Ruby
30
- "rb", "rake",
92
+ 'rb',
93
+ 'erb',
94
+ 'rake',
95
+ 'gemspec',
31
96
  // PHP
32
- "php", "phtml",
33
- // Swift
34
- "swift",
35
- // Shell scripts
36
- "sh", "bash", "zsh", "fish",
37
- // Config & Data
38
- "json", "yaml", "yml", "toml", "ini", "env",
39
- // Documentation
40
- "md", "mdx", "txt", "rst",
97
+ 'php',
98
+ 'phtml',
99
+ 'php3',
100
+ 'php4',
101
+ 'php5',
102
+ 'phps',
103
+ // Neural/AI/Data
104
+ 'r',
105
+ 'rmd',
106
+ 'jl',
107
+ // Shell/Config
108
+ 'sh',
109
+ 'bash',
110
+ 'zsh',
111
+ 'fish',
112
+ 'ksh',
113
+ 'csh',
114
+ 'tcsh',
115
+ 'bat',
116
+ 'cmd',
117
+ 'ps1',
118
+ 'psm1',
119
+ 'json',
120
+ 'json5',
121
+ 'jsonc',
122
+ 'yaml',
123
+ 'yml',
124
+ 'toml',
125
+ 'ini',
126
+ 'cfg',
127
+ 'conf',
128
+ 'properties',
129
+ 'env',
130
+ 'dockerfile',
131
+ 'containerfile',
132
+ 'makefile',
133
+ 'mk',
134
+ 'cmake',
135
+ 'jenkinsfile',
136
+ 'vagrantfile',
41
137
  // Database
42
- "sql",
43
- // Other
44
- "r", "R", "lua", "vim", "pl", "pm"
138
+ 'sql',
139
+ 'pgsql',
140
+ 'mysql',
141
+ 'sqlite',
142
+ // Docs
143
+ 'md',
144
+ 'markdown',
145
+ 'mdx',
146
+ 'txt',
147
+ 'rst',
148
+ 'adoc',
149
+ 'asciidoc',
150
+ 'tex',
151
+ 'latex',
152
+ // Functional/Other
153
+ 'lua',
154
+ 'pl',
155
+ 'pm',
156
+ 't',
157
+ 'dart',
158
+ 'el',
159
+ 'lisp',
160
+ 'lsp',
161
+ 'scm',
162
+ 'ss',
163
+ 'erl',
164
+ 'hrl',
165
+ 'ex',
166
+ 'exs',
167
+ 'hs',
168
+ 'lhs',
169
+ 'ml',
170
+ 'mli',
171
+ 'v',
172
+ 'vh',
173
+ 'sv',
174
+ 'svh',
175
+ 'coffee',
176
+ 'litcoffee',
177
+ // API/Structs
178
+ 'proto',
179
+ 'graphql',
180
+ 'gql',
181
+ 'sol',
182
+ 'vy',
183
+ 'tf',
184
+ 'tfvars',
185
+ 'hcl',
186
+ 'nix',
187
+ ],
188
+ fileNames: [
189
+ 'Dockerfile',
190
+ 'Containerfile',
191
+ 'Makefile',
192
+ 'Jenkinsfile',
193
+ 'Vagrantfile',
194
+ 'CMakeLists.txt',
195
+ '.gitignore',
196
+ '.env',
197
+ 'docker-compose.yml',
198
+ 'docker-compose.yaml',
45
199
  ],
46
200
  excludePatterns: [
47
- "**/node_modules/**",
48
- "**/dist/**",
49
- "**/build/**",
50
- "**/.git/**",
51
- "**/coverage/**",
52
- "**/.next/**",
53
- "**/target/**",
54
- "**/vendor/**",
55
- "**/.smart-coding-cache/**"
201
+ '**/node_modules/**',
202
+ '**/dist/**',
203
+ '**/build/**',
204
+ '**/.git/**',
205
+ '**/coverage/**',
206
+ '**/.next/**',
207
+ '**/target/**',
208
+ '**/vendor/**',
209
+ '**/.smart-coding-cache/**',
56
210
  ],
57
- chunkSize: 25, // Lines per chunk (larger = fewer embeddings = faster indexing)
58
- chunkOverlap: 5, // Overlap between chunks for context continuity
59
- batchSize: 100,
211
+ chunkSize: 16, // Lines per chunk (tuned for speed/memory balance)
212
+ chunkOverlap: 4, // Overlap between chunks for context continuity
213
+ batchSize: 50, // Number of files to process in a single indexing batch
60
214
  maxFileSize: 1048576, // 1MB - skip files larger than this
61
- maxResults: 5,
62
- enableCache: true,
215
+ prefilterContentMaxBytes: 512 * 1024, // 512KB - cache content during prefilter to avoid double reads
216
+ maxResults: 5, // Maximum number of semantic search results to return
217
+ enableCache: true, // Whether to persist and reload embeddings between sessions
218
+ autoCleanStaleCaches: true, // Automatically remove project caches not accessed for a long time
63
219
  cacheDirectory: null, // Will be set dynamically by loadConfig()
64
- watchFiles: true,
65
- verbose: false,
66
- workerThreads: "auto", // "auto" = CPU cores - 1, or set a number
67
- embeddingModel: "Xenova/all-MiniLM-L6-v2",
68
- semanticWeight: 0.7,
69
- exactMatchBoost: 1.5,
220
+ watchFiles: true, // Enable file system watcher to re-index changed files in real-time
221
+ verbose: false, // Enable detailed logging for debugging and progress tracking
222
+ workerThreads: 0, // 0 = run in main thread (no workers), "auto" = CPU cores - 1, or set a number
223
+ workerBatchTimeoutMs: 120000, // Timeout per worker batch before fallback (ms)
224
+ workerFailureThreshold: 1, // Open circuit after N worker failures
225
+ workerFailureCooldownMs: 10 * 60 * 1000, // Cooldown before retrying workers
226
+ workerMaxChunksPerBatch: 100, // Cap chunks per worker batch to reduce hang risk
227
+ allowSingleThreadFallback: false, // Allow fallback to main-thread embeddings if workers fail
228
+ embeddingProcessPerBatch: false, // Use child process per batch for memory isolation
229
+ embeddingModel: 'jinaai/jina-embeddings-v2-base-code', // AI model ID used for semantic search - can be changed with a lighter model for speed
230
+ preloadEmbeddingModel: true, // Preload the embedding model at startup (server mode)
231
+ vectorStoreFormat: 'binary', // json | binary (binary uses mmap-friendly on-disk store)
232
+ vectorStoreContentMode: 'external', // external = content loaded on-demand for binary store
233
+ contentCacheEntries: 256, // In-memory content cache entries for binary store
234
+ vectorStoreLoadMode: 'memory', // memory | disk (disk streams vectors from disk / memory is faster but requires more RAM)
235
+ vectorCacheEntries: 0, // In-memory vector cache entries for disk-backed loads
236
+ clearCacheAfterIndex: false, // Drop in-memory vectors after indexing completes
237
+ semanticWeight: 0.7, // Balance between semantic and keyword scores (0.0 to 1.0)
238
+ exactMatchBoost: 1.5, // Multiplier applied when an exact string match is found
70
239
  recencyBoost: 0.1, // Boost for recently modified files (max 0.1 added to score)
71
240
  recencyDecayDays: 30, // After this many days, recency boost is 0
72
- smartIndexing: true,
241
+ smartIndexing: true, // Enable automatic project type detection and smart ignore patterns
73
242
  callGraphEnabled: true, // Enable call graph extraction for proximity boosting
74
- callGraphBoost: 0.15, // Boost for files related via call graph (0-1)
75
- callGraphMaxHops: 1, // How many levels of calls to follow (1 = direct only)
76
- annEnabled: true,
77
- annMinChunks: 5000,
78
- annMinCandidates: 50,
79
- annMaxCandidates: 200,
80
- annCandidateMultiplier: 20,
81
- annEfConstruction: 200,
82
- annEfSearch: 64,
83
- annM: 16,
84
- annIndexCache: true,
85
- annMetric: "cosine"
243
+ callGraphBoost: 0.15, // Boost for files related via call graph (0-1)
244
+ callGraphMaxHops: 1, // How many levels of calls to follow (1 = direct only)
245
+ annEnabled: true, // Enable Approximate Nearest Neighbor (ANN) index for large codebases
246
+ annMinChunks: 5000, // Minimum number of chunks required to trigger ANN indexing
247
+ annMinCandidates: 50, // Minimum initial candidates to pull from ANN before refinement
248
+ annMaxCandidates: 200, // Hard limit on the number of ANN candidates to process
249
+ annCandidateMultiplier: 20, // Scale initial search depth based on requested maxResults
250
+ annEfConstruction: 200, // HNSW index construction quality (higher = better index, slower build)
251
+ annEfSearch: 64, // HNSW search parameter (higher = more accurate, slower search)
252
+ annM: 16, // Number of connections per element in HNSW index
253
+ annIndexCache: true, // Whether to cache the built HNSW index on disk
254
+ annMetric: 'cosine', // Distance metric for similarity (currently locked to cosine)
86
255
  };
87
256
 
88
257
  let config = { ...DEFAULT_CONFIG };
89
258
 
259
+ const WORKSPACE_ENV_VARS = [
260
+ 'HEURISTIC_MCP_WORKSPACE',
261
+ 'MCP_WORKSPACE',
262
+ 'WORKSPACE_FOLDER',
263
+ 'WORKSPACE_ROOT',
264
+ 'CURSOR_WORKSPACE',
265
+ 'CLAUDE_WORKSPACE',
266
+ 'ANTIGRAVITY_WORKSPACE',
267
+ 'INIT_CWD',
268
+ ];
269
+
270
+ const WORKSPACE_MARKERS = [
271
+ '.git',
272
+ 'package.json',
273
+ 'pyproject.toml',
274
+ 'go.mod',
275
+ 'Cargo.toml',
276
+ 'pom.xml',
277
+ 'build.gradle',
278
+ 'build.gradle.kts',
279
+ 'requirements.txt',
280
+ 'Gemfile',
281
+ 'Makefile',
282
+ 'CMakeLists.txt',
283
+ ];
284
+
285
+ async function pathExists(filePath) {
286
+ try {
287
+ await fs.access(filePath);
288
+ return true;
289
+ } catch {
290
+ return false;
291
+ }
292
+ }
293
+
294
+ async function findWorkspaceRoot(startDir) {
295
+ let current = path.resolve(startDir);
296
+ while (true) {
297
+ for (const marker of WORKSPACE_MARKERS) {
298
+ if (await pathExists(path.join(current, marker))) {
299
+ return current;
300
+ }
301
+ }
302
+ const parent = path.dirname(current);
303
+ if (parent === current) break;
304
+ current = parent;
305
+ }
306
+ return path.resolve(startDir);
307
+ }
308
+
309
+ async function resolveWorkspaceDir(workspaceDir) {
310
+ if (workspaceDir) return path.resolve(workspaceDir);
311
+ if (process.env.VITEST === 'true' || process.env.NODE_ENV === 'test') {
312
+ return path.resolve(process.cwd());
313
+ }
314
+
315
+ for (const key of WORKSPACE_ENV_VARS) {
316
+ const value = process.env[key];
317
+ if (!value || value.includes('${')) continue;
318
+ const candidate = path.resolve(value);
319
+ if (await pathExists(candidate)) return candidate;
320
+ }
321
+
322
+ return await findWorkspaceRoot(process.cwd());
323
+ }
324
+
90
325
  export async function loadConfig(workspaceDir = null) {
91
326
  try {
92
327
  // Determine the base directory for configuration
@@ -96,27 +331,27 @@ export async function loadConfig(workspaceDir = null) {
96
331
  if (workspaceDir) {
97
332
  // Workspace mode: load config from workspace root
98
333
  baseDir = path.resolve(workspaceDir);
99
- configPath = path.join(baseDir, "config.json");
100
- console.error(`[Config] Workspace mode: ${baseDir}`);
334
+ configPath = path.join(baseDir, 'config.json');
335
+ console.info(`[Config] Workspace mode: ${baseDir}`);
101
336
  } else {
102
337
  // Server mode: load config from server directory for global settings,
103
338
  // but use process.cwd() as base for searching if not specified otherwise
104
339
  const scriptDir = path.dirname(fileURLToPath(import.meta.url));
105
340
  const serverDir = path.resolve(scriptDir, '..');
106
- configPath = path.join(serverDir, "config.json");
107
- baseDir = process.cwd();
341
+ configPath = path.join(serverDir, 'config.json');
342
+ baseDir = await resolveWorkspaceDir(null);
108
343
  }
109
344
 
110
345
  let userConfig = {};
111
346
  try {
112
- const configData = await fs.readFile(configPath, "utf-8");
347
+ const configData = await fs.readFile(configPath, 'utf-8');
113
348
  userConfig = JSON.parse(configData);
114
- } catch (configError) {
349
+ } catch (_configError) {
115
350
  // If config not found in server dir, try CWD
116
351
  if (!workspaceDir) {
117
352
  try {
118
- const localConfigPath = path.join(baseDir, "config.json");
119
- const configData = await fs.readFile(localConfigPath, "utf-8");
353
+ const localConfigPath = path.join(baseDir, 'config.json');
354
+ const configData = await fs.readFile(localConfigPath, 'utf-8');
120
355
  userConfig = JSON.parse(configData);
121
356
  configPath = localConfigPath;
122
357
  } catch {
@@ -127,8 +362,14 @@ export async function loadConfig(workspaceDir = null) {
127
362
 
128
363
  config = { ...DEFAULT_CONFIG, ...userConfig };
129
364
 
130
- // Set search and cache directories
131
- config.searchDirectory = baseDir;
365
+ // Set search directory (respect user override when provided)
366
+ if (userConfig.searchDirectory) {
367
+ config.searchDirectory = path.isAbsolute(userConfig.searchDirectory)
368
+ ? userConfig.searchDirectory
369
+ : path.join(baseDir, userConfig.searchDirectory);
370
+ } else {
371
+ config.searchDirectory = baseDir;
372
+ }
132
373
 
133
374
  // Determine cache directory
134
375
  if (userConfig.cacheDirectory) {
@@ -139,12 +380,16 @@ export async function loadConfig(workspaceDir = null) {
139
380
  } else {
140
381
  // Use global cache directory to prevent cluttering project root
141
382
  // Hash the absolute path to ensure uniqueness per project
142
- const projectHash = crypto.createHash('md5').update(baseDir).digest('hex').slice(0, 12);
383
+ const projectHash = crypto
384
+ .createHash('md5')
385
+ .update(config.searchDirectory)
386
+ .digest('hex')
387
+ .slice(0, 12);
143
388
  const globalCacheRoot = getGlobalCacheDir();
144
- config.cacheDirectory = path.join(globalCacheRoot, "heuristic-mcp", projectHash);
389
+ config.cacheDirectory = path.join(globalCacheRoot, 'heuristic-mcp', projectHash);
145
390
 
146
391
  // Support legacy .smart-coding-cache if it already exists in the project root
147
- const legacyPath = path.join(baseDir, ".smart-coding-cache");
392
+ const legacyPath = path.join(baseDir, '.smart-coding-cache');
148
393
  try {
149
394
  const stats = await fs.stat(legacyPath);
150
395
  if (stats.isDirectory()) {
@@ -168,22 +413,19 @@ export async function loadConfig(workspaceDir = null) {
168
413
 
169
414
  // Merge smart patterns with user patterns (user patterns take precedence)
170
415
  const userPatterns = userConfig.excludePatterns || [];
171
- config.excludePatterns = [
172
- ...smartPatterns,
173
- ...userPatterns
174
- ];
416
+ config.excludePatterns = [...smartPatterns, ...userPatterns];
175
417
 
176
- console.error(`[Config] Smart indexing: ${detectedTypes.join(', ')}`);
177
- console.error(`[Config] Applied ${smartPatterns.length} smart ignore patterns`);
418
+ console.info(`[Config] Smart indexing: ${detectedTypes.join(', ')}`);
419
+ console.info(`[Config] Applied ${smartPatterns.length} smart ignore patterns`);
178
420
  } else {
179
- console.error("[Config] No project markers detected, using default patterns");
421
+ console.info('[Config] No project markers detected, using default patterns');
180
422
  }
181
423
  }
182
424
 
183
- console.error("[Config] Loaded configuration from config.json");
425
+ console.info('[Config] Loaded configuration from config.json');
184
426
  } catch (error) {
185
- console.error("[Config] Using default configuration (config.json not found or invalid)");
186
- console.error(`[Config] Error: ${error.message}`);
427
+ console.warn('[Config] Using default configuration (config.json not found or invalid)');
428
+ console.warn(`[Config] Error: ${error.message}`);
187
429
  }
188
430
 
189
431
  // Apply environment variable overrides (prefix: SMART_CODING_) with validation
@@ -199,7 +441,9 @@ export async function loadConfig(workspaceDir = null) {
199
441
  if (!isNaN(value) && value > 0 && value <= 1000) {
200
442
  config.batchSize = value;
201
443
  } else {
202
- console.error(`[Config] Invalid SMART_CODING_BATCH_SIZE: ${process.env.SMART_CODING_BATCH_SIZE}, using default`);
444
+ console.warn(
445
+ `[Config] Invalid SMART_CODING_BATCH_SIZE: ${process.env.SMART_CODING_BATCH_SIZE}, using default`
446
+ );
203
447
  }
204
448
  }
205
449
 
@@ -208,7 +452,9 @@ export async function loadConfig(workspaceDir = null) {
208
452
  if (!isNaN(value) && value > 0) {
209
453
  config.maxFileSize = value;
210
454
  } else {
211
- console.error(`[Config] Invalid SMART_CODING_MAX_FILE_SIZE: ${process.env.SMART_CODING_MAX_FILE_SIZE}, using default`);
455
+ console.warn(
456
+ `[Config] Invalid SMART_CODING_MAX_FILE_SIZE: ${process.env.SMART_CODING_MAX_FILE_SIZE}, using default`
457
+ );
212
458
  }
213
459
  }
214
460
 
@@ -217,7 +463,9 @@ export async function loadConfig(workspaceDir = null) {
217
463
  if (!isNaN(value) && value > 0 && value <= 100) {
218
464
  config.chunkSize = value;
219
465
  } else {
220
- console.error(`[Config] Invalid SMART_CODING_CHUNK_SIZE: ${process.env.SMART_CODING_CHUNK_SIZE}, using default`);
466
+ console.warn(
467
+ `[Config] Invalid SMART_CODING_CHUNK_SIZE: ${process.env.SMART_CODING_CHUNK_SIZE}, using default`
468
+ );
221
469
  }
222
470
  }
223
471
 
@@ -226,7 +474,9 @@ export async function loadConfig(workspaceDir = null) {
226
474
  if (!isNaN(value) && value > 0 && value <= 100) {
227
475
  config.maxResults = value;
228
476
  } else {
229
- console.error(`[Config] Invalid SMART_CODING_MAX_RESULTS: ${process.env.SMART_CODING_MAX_RESULTS}, using default`);
477
+ console.warn(
478
+ `[Config] Invalid SMART_CODING_MAX_RESULTS: ${process.env.SMART_CODING_MAX_RESULTS}, using default`
479
+ );
230
480
  }
231
481
  }
232
482
 
@@ -242,7 +492,9 @@ export async function loadConfig(workspaceDir = null) {
242
492
  if (!isNaN(value) && value >= 0 && value <= 1) {
243
493
  config.recencyBoost = value;
244
494
  } else {
245
- console.error(`[Config] Invalid SMART_CODING_RECENCY_BOOST: ${process.env.SMART_CODING_RECENCY_BOOST}, using default`);
495
+ console.warn(
496
+ `[Config] Invalid SMART_CODING_RECENCY_BOOST: ${process.env.SMART_CODING_RECENCY_BOOST}, using default`
497
+ );
246
498
  }
247
499
  }
248
500
 
@@ -251,7 +503,9 @@ export async function loadConfig(workspaceDir = null) {
251
503
  if (!isNaN(value) && value > 0 && value <= 365) {
252
504
  config.recencyDecayDays = value;
253
505
  } else {
254
- console.error(`[Config] Invalid SMART_CODING_RECENCY_DECAY_DAYS: ${process.env.SMART_CODING_RECENCY_DECAY_DAYS}, using default`);
506
+ console.warn(
507
+ `[Config] Invalid SMART_CODING_RECENCY_DECAY_DAYS: ${process.env.SMART_CODING_RECENCY_DECAY_DAYS}, using default`
508
+ );
255
509
  }
256
510
  }
257
511
 
@@ -267,7 +521,9 @@ export async function loadConfig(workspaceDir = null) {
267
521
  if (!isNaN(value) && value >= 0 && value <= 1) {
268
522
  config.semanticWeight = value;
269
523
  } else {
270
- console.error(`[Config] Invalid SMART_CODING_SEMANTIC_WEIGHT: ${process.env.SMART_CODING_SEMANTIC_WEIGHT}, using default (must be 0-1)`);
524
+ console.warn(
525
+ `[Config] Invalid SMART_CODING_SEMANTIC_WEIGHT: ${process.env.SMART_CODING_SEMANTIC_WEIGHT}, using default (must be 0-1)`
526
+ );
271
527
  }
272
528
  }
273
529
 
@@ -276,7 +532,9 @@ export async function loadConfig(workspaceDir = null) {
276
532
  if (!isNaN(value) && value >= 0) {
277
533
  config.exactMatchBoost = value;
278
534
  } else {
279
- console.error(`[Config] Invalid SMART_CODING_EXACT_MATCH_BOOST: ${process.env.SMART_CODING_EXACT_MATCH_BOOST}, using default`);
535
+ console.warn(
536
+ `[Config] Invalid SMART_CODING_EXACT_MATCH_BOOST: ${process.env.SMART_CODING_EXACT_MATCH_BOOST}, using default`
537
+ );
280
538
  }
281
539
  }
282
540
 
@@ -284,7 +542,80 @@ export async function loadConfig(workspaceDir = null) {
284
542
  const value = process.env.SMART_CODING_EMBEDDING_MODEL.trim();
285
543
  if (value.length > 0) {
286
544
  config.embeddingModel = value;
287
- console.error(`[Config] Using custom embedding model: ${value}`);
545
+ console.info(`[Config] Using custom embedding model: ${value}`);
546
+ }
547
+ }
548
+
549
+ if (process.env.SMART_CODING_PRELOAD_EMBEDDING_MODEL !== undefined) {
550
+ const value = process.env.SMART_CODING_PRELOAD_EMBEDDING_MODEL;
551
+ if (value === 'true' || value === 'false') {
552
+ config.preloadEmbeddingModel = value === 'true';
553
+ } else {
554
+ console.warn(
555
+ `[Config] Invalid SMART_CODING_PRELOAD_EMBEDDING_MODEL: ${value}, using default`
556
+ );
557
+ }
558
+ }
559
+
560
+ if (process.env.SMART_CODING_VECTOR_STORE_FORMAT !== undefined) {
561
+ const value = process.env.SMART_CODING_VECTOR_STORE_FORMAT.trim().toLowerCase();
562
+ if (value === 'json' || value === 'binary') {
563
+ config.vectorStoreFormat = value;
564
+ } else {
565
+ console.warn(
566
+ `[Config] Invalid SMART_CODING_VECTOR_STORE_FORMAT: ${value}, using default`
567
+ );
568
+ }
569
+ }
570
+
571
+ if (process.env.SMART_CODING_VECTOR_STORE_CONTENT_MODE !== undefined) {
572
+ const value = process.env.SMART_CODING_VECTOR_STORE_CONTENT_MODE.trim().toLowerCase();
573
+ if (value === 'external' || value === 'inline') {
574
+ config.vectorStoreContentMode = value;
575
+ } else {
576
+ console.warn(
577
+ `[Config] Invalid SMART_CODING_VECTOR_STORE_CONTENT_MODE: ${value}, using default`
578
+ );
579
+ }
580
+ }
581
+
582
+ if (process.env.SMART_CODING_VECTOR_STORE_LOAD_MODE !== undefined) {
583
+ const value = process.env.SMART_CODING_VECTOR_STORE_LOAD_MODE.trim().toLowerCase();
584
+ if (value === 'memory' || value === 'disk') {
585
+ config.vectorStoreLoadMode = value;
586
+ } else {
587
+ console.warn(
588
+ `[Config] Invalid SMART_CODING_VECTOR_STORE_LOAD_MODE: ${value}, using default`
589
+ );
590
+ }
591
+ }
592
+
593
+ if (process.env.SMART_CODING_CLEAR_CACHE_AFTER_INDEX !== undefined) {
594
+ const value = process.env.SMART_CODING_CLEAR_CACHE_AFTER_INDEX;
595
+ if (value === 'true' || value === 'false') {
596
+ config.clearCacheAfterIndex = value === 'true';
597
+ }
598
+ }
599
+
600
+ if (process.env.SMART_CODING_CONTENT_CACHE_ENTRIES !== undefined) {
601
+ const value = parseInt(process.env.SMART_CODING_CONTENT_CACHE_ENTRIES, 10);
602
+ if (!isNaN(value) && value >= 0 && value <= 10000) {
603
+ config.contentCacheEntries = value;
604
+ } else {
605
+ console.warn(
606
+ `[Config] Invalid SMART_CODING_CONTENT_CACHE_ENTRIES: ${process.env.SMART_CODING_CONTENT_CACHE_ENTRIES}, using default`
607
+ );
608
+ }
609
+ }
610
+
611
+ if (process.env.SMART_CODING_VECTOR_CACHE_ENTRIES !== undefined) {
612
+ const value = parseInt(process.env.SMART_CODING_VECTOR_CACHE_ENTRIES, 10);
613
+ if (!isNaN(value) && value >= 0 && value <= 100000) {
614
+ config.vectorCacheEntries = value;
615
+ } else {
616
+ console.warn(
617
+ `[Config] Invalid SMART_CODING_VECTOR_CACHE_ENTRIES: ${process.env.SMART_CODING_VECTOR_CACHE_ENTRIES}, using default`
618
+ );
288
619
  }
289
620
  }
290
621
 
@@ -294,10 +625,12 @@ export async function loadConfig(workspaceDir = null) {
294
625
  config.workerThreads = 'auto';
295
626
  } else {
296
627
  const numValue = parseInt(value, 10);
297
- if (!isNaN(numValue) && numValue >= 1 && numValue <= 32) {
628
+ if (!isNaN(numValue) && numValue >= 0 && numValue <= 32) {
298
629
  config.workerThreads = numValue;
299
630
  } else {
300
- console.error(`[Config] Invalid SMART_CODING_WORKER_THREADS: ${value}, using default (must be 'auto' or 1-32)`);
631
+ console.warn(
632
+ `[Config] Invalid SMART_CODING_WORKER_THREADS: ${value}, using default (must be 'auto' or 0-32)`
633
+ );
301
634
  }
302
635
  }
303
636
  }
@@ -314,7 +647,9 @@ export async function loadConfig(workspaceDir = null) {
314
647
  if (!isNaN(value) && value >= 0) {
315
648
  config.annMinChunks = value;
316
649
  } else {
317
- console.error(`[Config] Invalid SMART_CODING_ANN_MIN_CHUNKS: ${process.env.SMART_CODING_ANN_MIN_CHUNKS}, using default`);
650
+ console.warn(
651
+ `[Config] Invalid SMART_CODING_ANN_MIN_CHUNKS: ${process.env.SMART_CODING_ANN_MIN_CHUNKS}, using default`
652
+ );
318
653
  }
319
654
  }
320
655
 
@@ -323,7 +658,9 @@ export async function loadConfig(workspaceDir = null) {
323
658
  if (!isNaN(value) && value >= 0) {
324
659
  config.annMinCandidates = value;
325
660
  } else {
326
- console.error(`[Config] Invalid SMART_CODING_ANN_MIN_CANDIDATES: ${process.env.SMART_CODING_ANN_MIN_CANDIDATES}, using default`);
661
+ console.warn(
662
+ `[Config] Invalid SMART_CODING_ANN_MIN_CANDIDATES: ${process.env.SMART_CODING_ANN_MIN_CANDIDATES}, using default`
663
+ );
327
664
  }
328
665
  }
329
666
 
@@ -332,7 +669,9 @@ export async function loadConfig(workspaceDir = null) {
332
669
  if (!isNaN(value) && value > 0) {
333
670
  config.annMaxCandidates = value;
334
671
  } else {
335
- console.error(`[Config] Invalid SMART_CODING_ANN_MAX_CANDIDATES: ${process.env.SMART_CODING_ANN_MAX_CANDIDATES}, using default`);
672
+ console.warn(
673
+ `[Config] Invalid SMART_CODING_ANN_MAX_CANDIDATES: ${process.env.SMART_CODING_ANN_MAX_CANDIDATES}, using default`
674
+ );
336
675
  }
337
676
  }
338
677
 
@@ -341,7 +680,9 @@ export async function loadConfig(workspaceDir = null) {
341
680
  if (!isNaN(value) && value > 0) {
342
681
  config.annCandidateMultiplier = value;
343
682
  } else {
344
- console.error(`[Config] Invalid SMART_CODING_ANN_CANDIDATE_MULTIPLIER: ${process.env.SMART_CODING_ANN_CANDIDATE_MULTIPLIER}, using default`);
683
+ console.warn(
684
+ `[Config] Invalid SMART_CODING_ANN_CANDIDATE_MULTIPLIER: ${process.env.SMART_CODING_ANN_CANDIDATE_MULTIPLIER}, using default`
685
+ );
345
686
  }
346
687
  }
347
688
 
@@ -350,7 +691,9 @@ export async function loadConfig(workspaceDir = null) {
350
691
  if (!isNaN(value) && value > 0) {
351
692
  config.annEfConstruction = value;
352
693
  } else {
353
- console.error(`[Config] Invalid SMART_CODING_ANN_EF_CONSTRUCTION: ${process.env.SMART_CODING_ANN_EF_CONSTRUCTION}, using default`);
694
+ console.warn(
695
+ `[Config] Invalid SMART_CODING_ANN_EF_CONSTRUCTION: ${process.env.SMART_CODING_ANN_EF_CONSTRUCTION}, using default`
696
+ );
354
697
  }
355
698
  }
356
699
 
@@ -359,7 +702,9 @@ export async function loadConfig(workspaceDir = null) {
359
702
  if (!isNaN(value) && value > 0) {
360
703
  config.annEfSearch = value;
361
704
  } else {
362
- console.error(`[Config] Invalid SMART_CODING_ANN_EF_SEARCH: ${process.env.SMART_CODING_ANN_EF_SEARCH}, using default`);
705
+ console.warn(
706
+ `[Config] Invalid SMART_CODING_ANN_EF_SEARCH: ${process.env.SMART_CODING_ANN_EF_SEARCH}, using default`
707
+ );
363
708
  }
364
709
  }
365
710
 
@@ -368,7 +713,9 @@ export async function loadConfig(workspaceDir = null) {
368
713
  if (!isNaN(value) && value > 0 && value <= 64) {
369
714
  config.annM = value;
370
715
  } else {
371
- console.error(`[Config] Invalid SMART_CODING_ANN_M: ${process.env.SMART_CODING_ANN_M}, using default`);
716
+ console.warn(
717
+ `[Config] Invalid SMART_CODING_ANN_M: ${process.env.SMART_CODING_ANN_M}, using default`
718
+ );
372
719
  }
373
720
  }
374
721
 
@@ -381,24 +728,18 @@ export async function loadConfig(workspaceDir = null) {
381
728
 
382
729
  if (process.env.SMART_CODING_ANN_METRIC !== undefined) {
383
730
  const value = process.env.SMART_CODING_ANN_METRIC.trim().toLowerCase();
384
- if (value === "cosine" || value === "ip" || value === "l2") {
731
+ if (value === 'cosine' || value === 'ip' || value === 'l2') {
385
732
  config.annMetric = value;
386
733
  } else {
387
- console.error(`[Config] Invalid SMART_CODING_ANN_METRIC: ${process.env.SMART_CODING_ANN_METRIC}, using default`);
734
+ console.warn(
735
+ `[Config] Invalid SMART_CODING_ANN_METRIC: ${process.env.SMART_CODING_ANN_METRIC}, using default`
736
+ );
388
737
  }
389
738
  }
390
739
 
391
- if (config.annMetric !== "cosine") {
392
- console.error(`[Config] ANN metric locked to cosine, overriding "${config.annMetric}"`);
393
- config.annMetric = "cosine";
394
- }
395
-
396
- // Safety cap for auto workers
397
- if (config.workerThreads === 'auto') {
398
- // Cap at 4 workers max by default to prevent OOM (each model ~150MB)
399
- // Users can override this by setting a specific number
400
- const cpuCount = process.env.UV_THREADPOOL_SIZE || 4; // Node doesn't expose os.cpus() in some envs
401
- // Actual logic happens in index-codebase.js, but we document the intent here
740
+ if (config.annMetric !== 'cosine') {
741
+ console.warn(`[Config] ANN metric locked to cosine, overriding "${config.annMetric}"`);
742
+ config.annMetric = 'cosine';
402
743
  }
403
744
 
404
745
  return config;