@softerist/heuristic-mcp 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +287 -0
- package/CONTRIBUTING.md +308 -0
- package/LICENSE +21 -0
- package/README.md +249 -0
- package/config.json +66 -0
- package/example.png +0 -0
- package/features/clear-cache.js +75 -0
- package/features/find-similar-code.js +127 -0
- package/features/hybrid-search.js +173 -0
- package/features/index-codebase.js +811 -0
- package/how-its-works.png +0 -0
- package/index.js +208 -0
- package/lib/cache.js +163 -0
- package/lib/config.js +257 -0
- package/lib/embedding-worker.js +67 -0
- package/lib/ignore-patterns.js +314 -0
- package/lib/project-detector.js +75 -0
- package/lib/tokenizer.js +142 -0
- package/lib/utils.js +301 -0
- package/package.json +65 -0
- package/scripts/clear-cache.js +31 -0
- package/test/clear-cache.test.js +288 -0
- package/test/embedding-model.test.js +230 -0
- package/test/helpers.js +128 -0
- package/test/hybrid-search.test.js +243 -0
- package/test/index-codebase.test.js +246 -0
- package/test/integration.test.js +223 -0
- package/test/tokenizer.test.js +225 -0
- package/vitest.config.js +29 -0
|
Binary file
|
package/index.js
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
3
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
4
|
+
import { CallToolRequestSchema, ListToolsRequestSchema } from "@modelcontextprotocol/sdk/types.js";
|
|
5
|
+
import { pipeline } from "@xenova/transformers";
|
|
6
|
+
import fs from "fs/promises";
|
|
7
|
+
import { createRequire } from "module";
|
|
8
|
+
|
|
9
|
+
// Import package.json for version
|
|
10
|
+
const require = createRequire(import.meta.url);
|
|
11
|
+
const packageJson = require("./package.json");
|
|
12
|
+
|
|
13
|
+
import { loadConfig } from "./lib/config.js";
|
|
14
|
+
import { EmbeddingsCache } from "./lib/cache.js";
|
|
15
|
+
import { CodebaseIndexer } from "./features/index-codebase.js";
|
|
16
|
+
import { HybridSearch } from "./features/hybrid-search.js";
|
|
17
|
+
|
|
18
|
+
import * as IndexCodebaseFeature from "./features/index-codebase.js";
|
|
19
|
+
import * as HybridSearchFeature from "./features/hybrid-search.js";
|
|
20
|
+
import * as ClearCacheFeature from "./features/clear-cache.js";
|
|
21
|
+
import * as FindSimilarCodeFeature from "./features/find-similar-code.js";
|
|
22
|
+
|
|
23
|
+
// Parse workspace from command line arguments
|
|
24
|
+
const args = process.argv.slice(2);
|
|
25
|
+
const workspaceIndex = args.findIndex(arg => arg.startsWith('--workspace'));
|
|
26
|
+
let workspaceDir = null;
|
|
27
|
+
|
|
28
|
+
if (workspaceIndex !== -1) {
|
|
29
|
+
const arg = args[workspaceIndex];
|
|
30
|
+
let rawWorkspace = null;
|
|
31
|
+
|
|
32
|
+
if (arg.includes('=')) {
|
|
33
|
+
rawWorkspace = arg.split('=')[1];
|
|
34
|
+
} else if (workspaceIndex + 1 < args.length) {
|
|
35
|
+
rawWorkspace = args[workspaceIndex + 1];
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Check if IDE variable wasn't expanded (contains ${})
|
|
39
|
+
if (rawWorkspace && rawWorkspace.includes('${')) {
|
|
40
|
+
console.error(`[Server] IDE variable not expanded: ${rawWorkspace}, using current directory`);
|
|
41
|
+
workspaceDir = process.cwd();
|
|
42
|
+
} else if (rawWorkspace) {
|
|
43
|
+
workspaceDir = rawWorkspace;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if (workspaceDir) {
|
|
47
|
+
console.error(`[Server] Workspace mode: ${workspaceDir}`);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Global state
|
|
52
|
+
let embedder = null;
|
|
53
|
+
let cache = null;
|
|
54
|
+
let indexer = null;
|
|
55
|
+
let hybridSearch = null;
|
|
56
|
+
let config = null;
|
|
57
|
+
|
|
58
|
+
// Feature registry - ordered by priority (semantic_search first as primary tool)
|
|
59
|
+
const features = [
|
|
60
|
+
{
|
|
61
|
+
module: HybridSearchFeature,
|
|
62
|
+
instance: null,
|
|
63
|
+
handler: HybridSearchFeature.handleToolCall
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
module: IndexCodebaseFeature,
|
|
67
|
+
instance: null,
|
|
68
|
+
handler: IndexCodebaseFeature.handleToolCall
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
module: ClearCacheFeature,
|
|
72
|
+
instance: null,
|
|
73
|
+
handler: ClearCacheFeature.handleToolCall
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
module: FindSimilarCodeFeature,
|
|
77
|
+
instance: null,
|
|
78
|
+
handler: FindSimilarCodeFeature.handleToolCall
|
|
79
|
+
}
|
|
80
|
+
];
|
|
81
|
+
|
|
82
|
+
// Initialize application
|
|
83
|
+
async function initialize() {
|
|
84
|
+
// Load configuration with workspace support
|
|
85
|
+
config = await loadConfig(workspaceDir);
|
|
86
|
+
|
|
87
|
+
// Ensure search directory exists
|
|
88
|
+
try {
|
|
89
|
+
await fs.access(config.searchDirectory);
|
|
90
|
+
} catch {
|
|
91
|
+
console.error(`[Server] Error: Search directory "${config.searchDirectory}" does not exist`);
|
|
92
|
+
process.exit(1);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Load AI model
|
|
96
|
+
console.error("[Server] Loading AI embedding model (this may take time on first run)...");
|
|
97
|
+
embedder = await pipeline("feature-extraction", config.embeddingModel);
|
|
98
|
+
|
|
99
|
+
// Initialize cache
|
|
100
|
+
cache = new EmbeddingsCache(config);
|
|
101
|
+
await cache.load();
|
|
102
|
+
|
|
103
|
+
// Initialize features
|
|
104
|
+
indexer = new CodebaseIndexer(embedder, cache, config, server);
|
|
105
|
+
hybridSearch = new HybridSearch(embedder, cache, config);
|
|
106
|
+
const cacheClearer = new ClearCacheFeature.CacheClearer(embedder, cache, config, indexer);
|
|
107
|
+
const findSimilarCode = new FindSimilarCodeFeature.FindSimilarCode(embedder, cache, config);
|
|
108
|
+
|
|
109
|
+
// Store feature instances (matches features array order)
|
|
110
|
+
features[0].instance = hybridSearch;
|
|
111
|
+
features[1].instance = indexer;
|
|
112
|
+
features[2].instance = cacheClearer;
|
|
113
|
+
features[3].instance = findSimilarCode;
|
|
114
|
+
|
|
115
|
+
// Attach hybridSearch to server for cross-feature access (e.g. cache invalidation)
|
|
116
|
+
server.hybridSearch = hybridSearch;
|
|
117
|
+
|
|
118
|
+
// Start indexing in background (non-blocking)
|
|
119
|
+
console.error("[Server] Starting background indexing...");
|
|
120
|
+
indexer.indexAll().then(() => {
|
|
121
|
+
// Only start file watcher if explicitly enabled in config
|
|
122
|
+
if (config.watchFiles) {
|
|
123
|
+
indexer.setupFileWatcher();
|
|
124
|
+
}
|
|
125
|
+
}).catch(err => {
|
|
126
|
+
console.error("[Server] Background indexing error:", err.message);
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Setup MCP server
|
|
131
|
+
const server = new Server(
|
|
132
|
+
{
|
|
133
|
+
name: "heuristic-mcp",
|
|
134
|
+
version: packageJson.version
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
capabilities: {
|
|
138
|
+
tools: {}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
);
|
|
142
|
+
|
|
143
|
+
// Register tools from all features
|
|
144
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
145
|
+
const tools = [];
|
|
146
|
+
|
|
147
|
+
for (const feature of features) {
|
|
148
|
+
const toolDef = feature.module.getToolDefinition(config);
|
|
149
|
+
tools.push(toolDef);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return { tools };
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
// Handle tool calls
|
|
156
|
+
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
157
|
+
for (const feature of features) {
|
|
158
|
+
const toolDef = feature.module.getToolDefinition(config);
|
|
159
|
+
|
|
160
|
+
if (request.params.name === toolDef.name) {
|
|
161
|
+
return await feature.handler(request, feature.instance);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return {
|
|
166
|
+
content: [{
|
|
167
|
+
type: "text",
|
|
168
|
+
text: `Unknown tool: ${request.params.name}`
|
|
169
|
+
}]
|
|
170
|
+
};
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
// Main entry point
|
|
174
|
+
async function main() {
|
|
175
|
+
await initialize();
|
|
176
|
+
|
|
177
|
+
const transport = new StdioServerTransport();
|
|
178
|
+
await server.connect(transport);
|
|
179
|
+
|
|
180
|
+
console.error("[Server] Smart Coding MCP server ready!");
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Graceful shutdown
|
|
184
|
+
process.on('SIGINT', async () => {
|
|
185
|
+
console.error("\n[Server] Shutting down gracefully...");
|
|
186
|
+
|
|
187
|
+
// Stop file watcher
|
|
188
|
+
if (indexer && indexer.watcher) {
|
|
189
|
+
await indexer.watcher.close();
|
|
190
|
+
console.error("[Server] File watcher stopped");
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Save cache
|
|
194
|
+
if (cache) {
|
|
195
|
+
await cache.save();
|
|
196
|
+
console.error("[Server] Cache saved");
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
console.error("[Server] Goodbye!");
|
|
200
|
+
process.exit(0);
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
process.on('SIGTERM', async () => {
|
|
204
|
+
console.error("\n[Server] Received SIGTERM, shutting down...");
|
|
205
|
+
process.exit(0);
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
main().catch(console.error);
|
package/lib/cache.js
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import fs from "fs/promises";
|
|
2
|
+
import path from "path";
|
|
3
|
+
|
|
4
|
+
const CACHE_META_VERSION = 1;
|
|
5
|
+
const CACHE_META_FILE = "meta.json";
|
|
6
|
+
|
|
7
|
+
export class EmbeddingsCache {
|
|
8
|
+
constructor(config) {
|
|
9
|
+
this.config = config;
|
|
10
|
+
this.vectorStore = [];
|
|
11
|
+
this.fileHashes = new Map();
|
|
12
|
+
this.isSaving = false;
|
|
13
|
+
this.cacheMeta = {
|
|
14
|
+
version: CACHE_META_VERSION,
|
|
15
|
+
embeddingModel: config.embeddingModel
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
async load() {
|
|
20
|
+
if (!this.config.enableCache) return;
|
|
21
|
+
|
|
22
|
+
try {
|
|
23
|
+
await fs.mkdir(this.config.cacheDirectory, { recursive: true });
|
|
24
|
+
const cacheFile = path.join(this.config.cacheDirectory, "embeddings.json");
|
|
25
|
+
const hashFile = path.join(this.config.cacheDirectory, "file-hashes.json");
|
|
26
|
+
const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
|
|
27
|
+
|
|
28
|
+
const [metaData, cacheData, hashData] = await Promise.all([
|
|
29
|
+
fs.readFile(metaFile, "utf-8").catch(() => null),
|
|
30
|
+
fs.readFile(cacheFile, "utf-8").catch(() => null),
|
|
31
|
+
fs.readFile(hashFile, "utf-8").catch(() => null)
|
|
32
|
+
]);
|
|
33
|
+
|
|
34
|
+
if (!metaData && !cacheData && !hashData) {
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (!metaData) {
|
|
39
|
+
console.error("[Cache] Missing cache metadata, ignoring cache");
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
let meta = null;
|
|
44
|
+
try {
|
|
45
|
+
meta = JSON.parse(metaData);
|
|
46
|
+
} catch {
|
|
47
|
+
console.error("[Cache] Invalid cache metadata, ignoring cache");
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (meta?.version !== CACHE_META_VERSION) {
|
|
52
|
+
console.error(`[Cache] Cache version mismatch (${meta?.version}), ignoring cache`);
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
if (meta?.embeddingModel !== this.config.embeddingModel) {
|
|
57
|
+
console.error(`[Cache] Embedding model changed, ignoring cache (${meta?.embeddingModel} -> ${this.config.embeddingModel})`);
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
this.cacheMeta = meta;
|
|
62
|
+
|
|
63
|
+
if (cacheData && hashData) {
|
|
64
|
+
const rawVectorStore = JSON.parse(cacheData);
|
|
65
|
+
const rawHashes = new Map(Object.entries(JSON.parse(hashData)));
|
|
66
|
+
|
|
67
|
+
// Filter cache to only include files matching current extensions
|
|
68
|
+
const allowedExtensions = this.config.fileExtensions.map(ext => `.${ext}`);
|
|
69
|
+
|
|
70
|
+
this.vectorStore = rawVectorStore.filter(chunk => {
|
|
71
|
+
const ext = path.extname(chunk.file);
|
|
72
|
+
return allowedExtensions.includes(ext);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
// Only keep hashes for files matching current extensions
|
|
76
|
+
for (const [file, hash] of rawHashes) {
|
|
77
|
+
const ext = path.extname(file);
|
|
78
|
+
if (allowedExtensions.includes(ext)) {
|
|
79
|
+
this.fileHashes.set(file, hash);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const filtered = rawVectorStore.length - this.vectorStore.length;
|
|
84
|
+
if (filtered > 0) {
|
|
85
|
+
console.error(`[Cache] Filtered ${filtered} outdated cache entries`);
|
|
86
|
+
}
|
|
87
|
+
console.error(`[Cache] Loaded ${this.vectorStore.length} cached embeddings`);
|
|
88
|
+
}
|
|
89
|
+
} catch (error) {
|
|
90
|
+
console.error("[Cache] Failed to load cache:", error.message);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
async save() {
|
|
95
|
+
if (!this.config.enableCache) return;
|
|
96
|
+
|
|
97
|
+
this.isSaving = true;
|
|
98
|
+
|
|
99
|
+
try {
|
|
100
|
+
await fs.mkdir(this.config.cacheDirectory, { recursive: true });
|
|
101
|
+
const cacheFile = path.join(this.config.cacheDirectory, "embeddings.json");
|
|
102
|
+
const hashFile = path.join(this.config.cacheDirectory, "file-hashes.json");
|
|
103
|
+
const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
|
|
104
|
+
this.cacheMeta = {
|
|
105
|
+
version: CACHE_META_VERSION,
|
|
106
|
+
embeddingModel: this.config.embeddingModel
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
await Promise.all([
|
|
110
|
+
fs.writeFile(cacheFile, JSON.stringify(this.vectorStore, null, 2)),
|
|
111
|
+
fs.writeFile(hashFile, JSON.stringify(Object.fromEntries(this.fileHashes), null, 2)),
|
|
112
|
+
fs.writeFile(metaFile, JSON.stringify(this.cacheMeta, null, 2))
|
|
113
|
+
]);
|
|
114
|
+
} catch (error) {
|
|
115
|
+
console.error("[Cache] Failed to save cache:", error.message);
|
|
116
|
+
} finally {
|
|
117
|
+
this.isSaving = false;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
getVectorStore() {
|
|
122
|
+
return this.vectorStore;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
setVectorStore(store) {
|
|
126
|
+
this.vectorStore = store;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
getFileHash(file) {
|
|
130
|
+
return this.fileHashes.get(file);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
setFileHash(file, hash) {
|
|
134
|
+
this.fileHashes.set(file, hash);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
deleteFileHash(file) {
|
|
138
|
+
this.fileHashes.delete(file);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
removeFileFromStore(file) {
|
|
142
|
+
this.vectorStore = this.vectorStore.filter(chunk => chunk.file !== file);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
addToStore(chunk) {
|
|
147
|
+
this.vectorStore.push(chunk);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
async clear() {
|
|
151
|
+
if (!this.config.enableCache) return;
|
|
152
|
+
|
|
153
|
+
try {
|
|
154
|
+
await fs.rm(this.config.cacheDirectory, { recursive: true, force: true });
|
|
155
|
+
this.vectorStore = [];
|
|
156
|
+
this.fileHashes = new Map();
|
|
157
|
+
console.error(`[Cache] Cache cleared successfully: ${this.config.cacheDirectory}`);
|
|
158
|
+
} catch (error) {
|
|
159
|
+
console.error("[Cache] Failed to clear cache:", error.message);
|
|
160
|
+
throw error;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
package/lib/config.js
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
import fs from "fs/promises";
|
|
2
|
+
import path from "path";
|
|
3
|
+
import { fileURLToPath } from "url";
|
|
4
|
+
import { ProjectDetector } from "./project-detector.js";
|
|
5
|
+
|
|
6
|
+
const DEFAULT_CONFIG = {
|
|
7
|
+
searchDirectory: ".",
|
|
8
|
+
fileExtensions: [
|
|
9
|
+
// JavaScript/TypeScript
|
|
10
|
+
"js", "ts", "jsx", "tsx", "mjs", "cjs",
|
|
11
|
+
// Styles
|
|
12
|
+
"css", "scss", "sass", "less", "styl",
|
|
13
|
+
// Markup
|
|
14
|
+
"html", "htm", "xml", "svg",
|
|
15
|
+
// Python
|
|
16
|
+
"py", "pyw", "pyx",
|
|
17
|
+
// Java/Kotlin/Scala
|
|
18
|
+
"java", "kt", "kts", "scala",
|
|
19
|
+
// C/C++
|
|
20
|
+
"c", "cpp", "cc", "cxx", "h", "hpp", "hxx",
|
|
21
|
+
// C#
|
|
22
|
+
"cs", "csx",
|
|
23
|
+
// Go
|
|
24
|
+
"go",
|
|
25
|
+
// Rust
|
|
26
|
+
"rs",
|
|
27
|
+
// Ruby
|
|
28
|
+
"rb", "rake",
|
|
29
|
+
// PHP
|
|
30
|
+
"php", "phtml",
|
|
31
|
+
// Swift
|
|
32
|
+
"swift",
|
|
33
|
+
// Shell scripts
|
|
34
|
+
"sh", "bash", "zsh", "fish",
|
|
35
|
+
// Config & Data
|
|
36
|
+
"json", "yaml", "yml", "toml", "ini", "env",
|
|
37
|
+
// Documentation
|
|
38
|
+
"md", "mdx", "txt", "rst",
|
|
39
|
+
// Database
|
|
40
|
+
"sql",
|
|
41
|
+
// Other
|
|
42
|
+
"r", "R", "lua", "vim", "pl", "pm"
|
|
43
|
+
],
|
|
44
|
+
excludePatterns: [
|
|
45
|
+
"**/node_modules/**",
|
|
46
|
+
"**/dist/**",
|
|
47
|
+
"**/build/**",
|
|
48
|
+
"**/.git/**",
|
|
49
|
+
"**/coverage/**",
|
|
50
|
+
"**/.next/**",
|
|
51
|
+
"**/target/**",
|
|
52
|
+
"**/vendor/**",
|
|
53
|
+
"**/.smart-coding-cache/**"
|
|
54
|
+
],
|
|
55
|
+
chunkSize: 25, // Lines per chunk (larger = fewer embeddings = faster indexing)
|
|
56
|
+
chunkOverlap: 5, // Overlap between chunks for context continuity
|
|
57
|
+
batchSize: 100,
|
|
58
|
+
maxFileSize: 1048576, // 1MB - skip files larger than this
|
|
59
|
+
maxResults: 5,
|
|
60
|
+
enableCache: true,
|
|
61
|
+
cacheDirectory: "./.smart-coding-cache",
|
|
62
|
+
watchFiles: true,
|
|
63
|
+
verbose: false,
|
|
64
|
+
workerThreads: "auto", // "auto" = CPU cores - 1, or set a number
|
|
65
|
+
embeddingModel: "Xenova/all-MiniLM-L6-v2",
|
|
66
|
+
semanticWeight: 0.7,
|
|
67
|
+
exactMatchBoost: 1.5,
|
|
68
|
+
recencyBoost: 0.1, // Boost for recently modified files (max 0.1 added to score)
|
|
69
|
+
recencyDecayDays: 30, // After this many days, recency boost is 0
|
|
70
|
+
smartIndexing: true
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
let config = { ...DEFAULT_CONFIG };
|
|
74
|
+
|
|
75
|
+
export async function loadConfig(workspaceDir = null) {
|
|
76
|
+
try {
|
|
77
|
+
// Determine the base directory for configuration
|
|
78
|
+
let baseDir;
|
|
79
|
+
let configPath;
|
|
80
|
+
|
|
81
|
+
if (workspaceDir) {
|
|
82
|
+
// Workspace mode: load config from workspace root
|
|
83
|
+
baseDir = path.resolve(workspaceDir);
|
|
84
|
+
configPath = path.join(baseDir, "config.json");
|
|
85
|
+
console.error(`[Config] Workspace mode: ${baseDir}`);
|
|
86
|
+
} else {
|
|
87
|
+
// Server mode: load config from server directory
|
|
88
|
+
const scriptDir = path.dirname(fileURLToPath(import.meta.url));
|
|
89
|
+
baseDir = path.resolve(scriptDir, '..');
|
|
90
|
+
configPath = path.join(baseDir, "config.json");
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
let userConfig = {};
|
|
94
|
+
try {
|
|
95
|
+
const configData = await fs.readFile(configPath, "utf-8");
|
|
96
|
+
userConfig = JSON.parse(configData);
|
|
97
|
+
} catch (configError) {
|
|
98
|
+
if (workspaceDir) {
|
|
99
|
+
console.error(`[Config] No config.json in workspace, using defaults`);
|
|
100
|
+
} else {
|
|
101
|
+
console.error(`[Config] No config.json found: ${configError.message}`);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
config = { ...DEFAULT_CONFIG, ...userConfig };
|
|
106
|
+
|
|
107
|
+
// Set workspace-specific directories
|
|
108
|
+
if (workspaceDir) {
|
|
109
|
+
config.searchDirectory = baseDir;
|
|
110
|
+
config.cacheDirectory = path.join(baseDir, ".smart-coding-cache");
|
|
111
|
+
} else {
|
|
112
|
+
config.searchDirectory = path.resolve(baseDir, config.searchDirectory);
|
|
113
|
+
config.cacheDirectory = path.resolve(baseDir, config.cacheDirectory);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Smart project detection
|
|
117
|
+
if (config.smartIndexing !== false) {
|
|
118
|
+
const detector = new ProjectDetector(config.searchDirectory);
|
|
119
|
+
const detectedTypes = await detector.detectProjectTypes();
|
|
120
|
+
|
|
121
|
+
if (detectedTypes.length > 0) {
|
|
122
|
+
const smartPatterns = detector.getSmartIgnorePatterns();
|
|
123
|
+
|
|
124
|
+
// Merge smart patterns with user patterns (user patterns take precedence)
|
|
125
|
+
const userPatterns = userConfig.excludePatterns || [];
|
|
126
|
+
config.excludePatterns = [
|
|
127
|
+
...smartPatterns,
|
|
128
|
+
...userPatterns
|
|
129
|
+
];
|
|
130
|
+
|
|
131
|
+
console.error(`[Config] Smart indexing: ${detectedTypes.join(', ')}`);
|
|
132
|
+
console.error(`[Config] Applied ${smartPatterns.length} smart ignore patterns`);
|
|
133
|
+
} else {
|
|
134
|
+
console.error("[Config] No project markers detected, using default patterns");
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
console.error("[Config] Loaded configuration from config.json");
|
|
139
|
+
} catch (error) {
|
|
140
|
+
console.error("[Config] Using default configuration (config.json not found or invalid)");
|
|
141
|
+
console.error(`[Config] Error: ${error.message}`);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Apply environment variable overrides (prefix: SMART_CODING_) with validation
|
|
145
|
+
if (process.env.SMART_CODING_VERBOSE !== undefined) {
|
|
146
|
+
const value = process.env.SMART_CODING_VERBOSE;
|
|
147
|
+
if (value === 'true' || value === 'false') {
|
|
148
|
+
config.verbose = value === 'true';
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
if (process.env.SMART_CODING_BATCH_SIZE !== undefined) {
|
|
153
|
+
const value = parseInt(process.env.SMART_CODING_BATCH_SIZE, 10);
|
|
154
|
+
if (!isNaN(value) && value > 0 && value <= 1000) {
|
|
155
|
+
config.batchSize = value;
|
|
156
|
+
} else {
|
|
157
|
+
console.error(`[Config] Invalid SMART_CODING_BATCH_SIZE: ${process.env.SMART_CODING_BATCH_SIZE}, using default`);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (process.env.SMART_CODING_MAX_FILE_SIZE !== undefined) {
|
|
162
|
+
const value = parseInt(process.env.SMART_CODING_MAX_FILE_SIZE, 10);
|
|
163
|
+
if (!isNaN(value) && value > 0) {
|
|
164
|
+
config.maxFileSize = value;
|
|
165
|
+
} else {
|
|
166
|
+
console.error(`[Config] Invalid SMART_CODING_MAX_FILE_SIZE: ${process.env.SMART_CODING_MAX_FILE_SIZE}, using default`);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if (process.env.SMART_CODING_CHUNK_SIZE !== undefined) {
|
|
171
|
+
const value = parseInt(process.env.SMART_CODING_CHUNK_SIZE, 10);
|
|
172
|
+
if (!isNaN(value) && value > 0 && value <= 100) {
|
|
173
|
+
config.chunkSize = value;
|
|
174
|
+
} else {
|
|
175
|
+
console.error(`[Config] Invalid SMART_CODING_CHUNK_SIZE: ${process.env.SMART_CODING_CHUNK_SIZE}, using default`);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if (process.env.SMART_CODING_MAX_RESULTS !== undefined) {
|
|
180
|
+
const value = parseInt(process.env.SMART_CODING_MAX_RESULTS, 10);
|
|
181
|
+
if (!isNaN(value) && value > 0 && value <= 100) {
|
|
182
|
+
config.maxResults = value;
|
|
183
|
+
} else {
|
|
184
|
+
console.error(`[Config] Invalid SMART_CODING_MAX_RESULTS: ${process.env.SMART_CODING_MAX_RESULTS}, using default`);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
if (process.env.SMART_CODING_SMART_INDEXING !== undefined) {
|
|
189
|
+
const value = process.env.SMART_CODING_SMART_INDEXING;
|
|
190
|
+
if (value === 'true' || value === 'false') {
|
|
191
|
+
config.smartIndexing = value === 'true';
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
if (process.env.SMART_CODING_WATCH_FILES !== undefined) {
|
|
196
|
+
const value = process.env.SMART_CODING_WATCH_FILES;
|
|
197
|
+
if (value === 'true' || value === 'false') {
|
|
198
|
+
config.watchFiles = value === 'true';
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if (process.env.SMART_CODING_SEMANTIC_WEIGHT !== undefined) {
|
|
203
|
+
const value = parseFloat(process.env.SMART_CODING_SEMANTIC_WEIGHT);
|
|
204
|
+
if (!isNaN(value) && value >= 0 && value <= 1) {
|
|
205
|
+
config.semanticWeight = value;
|
|
206
|
+
} else {
|
|
207
|
+
console.error(`[Config] Invalid SMART_CODING_SEMANTIC_WEIGHT: ${process.env.SMART_CODING_SEMANTIC_WEIGHT}, using default (must be 0-1)`);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if (process.env.SMART_CODING_EXACT_MATCH_BOOST !== undefined) {
|
|
212
|
+
const value = parseFloat(process.env.SMART_CODING_EXACT_MATCH_BOOST);
|
|
213
|
+
if (!isNaN(value) && value >= 0) {
|
|
214
|
+
config.exactMatchBoost = value;
|
|
215
|
+
} else {
|
|
216
|
+
console.error(`[Config] Invalid SMART_CODING_EXACT_MATCH_BOOST: ${process.env.SMART_CODING_EXACT_MATCH_BOOST}, using default`);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
if (process.env.SMART_CODING_EMBEDDING_MODEL !== undefined) {
|
|
221
|
+
const value = process.env.SMART_CODING_EMBEDDING_MODEL.trim();
|
|
222
|
+
if (value.length > 0) {
|
|
223
|
+
config.embeddingModel = value;
|
|
224
|
+
console.error(`[Config] Using custom embedding model: ${value}`);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
if (process.env.SMART_CODING_WORKER_THREADS !== undefined) {
|
|
229
|
+
const value = process.env.SMART_CODING_WORKER_THREADS.trim().toLowerCase();
|
|
230
|
+
if (value === 'auto') {
|
|
231
|
+
config.workerThreads = 'auto';
|
|
232
|
+
} else {
|
|
233
|
+
const numValue = parseInt(value, 10);
|
|
234
|
+
if (!isNaN(numValue) && numValue >= 1 && numValue <= 32) {
|
|
235
|
+
config.workerThreads = numValue;
|
|
236
|
+
} else {
|
|
237
|
+
console.error(`[Config] Invalid SMART_CODING_WORKER_THREADS: ${value}, using default (must be 'auto' or 1-32)`);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// Safety cap for auto workers
|
|
243
|
+
if (config.workerThreads === 'auto') {
|
|
244
|
+
// Cap at 4 workers max by default to prevent OOM (each model ~150MB)
|
|
245
|
+
// Users can override this by setting a specific number
|
|
246
|
+
const cpuCount = process.env.UV_THREADPOOL_SIZE || 4; // Node doesn't expose os.cpus() in some envs
|
|
247
|
+
// Actual logic happens in index-codebase.js, but we document the intent here
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return config;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
export function getConfig() {
|
|
254
|
+
return config;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
export { DEFAULT_CONFIG };
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { parentPort, workerData } from "worker_threads";
|
|
2
|
+
import { pipeline } from "@xenova/transformers";
|
|
3
|
+
|
|
4
|
+
let embedder = null;
|
|
5
|
+
|
|
6
|
+
// Initialize the embedding model once when worker starts
|
|
7
|
+
async function initializeEmbedder() {
|
|
8
|
+
if (!embedder) {
|
|
9
|
+
embedder = await pipeline("feature-extraction", workerData.embeddingModel);
|
|
10
|
+
}
|
|
11
|
+
return embedder;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Process chunks with optimized single-text embedding
|
|
16
|
+
* Note: Batch processing with transformers.js WASM backend doesn't improve speed
|
|
17
|
+
* because it loops internally. Single calls are actually faster.
|
|
18
|
+
*/
|
|
19
|
+
async function processChunks(chunks) {
|
|
20
|
+
const embedder = await initializeEmbedder();
|
|
21
|
+
const results = [];
|
|
22
|
+
|
|
23
|
+
for (const chunk of chunks) {
|
|
24
|
+
try {
|
|
25
|
+
const output = await embedder(chunk.text, { pooling: "mean", normalize: true });
|
|
26
|
+
results.push({
|
|
27
|
+
file: chunk.file,
|
|
28
|
+
startLine: chunk.startLine,
|
|
29
|
+
endLine: chunk.endLine,
|
|
30
|
+
content: chunk.text,
|
|
31
|
+
vector: Array.from(output.data),
|
|
32
|
+
success: true
|
|
33
|
+
});
|
|
34
|
+
} catch (error) {
|
|
35
|
+
results.push({
|
|
36
|
+
file: chunk.file,
|
|
37
|
+
startLine: chunk.startLine,
|
|
38
|
+
endLine: chunk.endLine,
|
|
39
|
+
error: error.message,
|
|
40
|
+
success: false
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return results;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Listen for messages from main thread
|
|
49
|
+
parentPort.on("message", async (message) => {
|
|
50
|
+
if (message.type === "process") {
|
|
51
|
+
try {
|
|
52
|
+
const results = await processChunks(message.chunks);
|
|
53
|
+
parentPort.postMessage({ type: "results", results, batchId: message.batchId });
|
|
54
|
+
} catch (error) {
|
|
55
|
+
parentPort.postMessage({ type: "error", error: error.message, batchId: message.batchId });
|
|
56
|
+
}
|
|
57
|
+
} else if (message.type === "shutdown") {
|
|
58
|
+
process.exit(0);
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
// Signal that worker is ready
|
|
63
|
+
initializeEmbedder().then(() => {
|
|
64
|
+
parentPort.postMessage({ type: "ready" });
|
|
65
|
+
}).catch((error) => {
|
|
66
|
+
parentPort.postMessage({ type: "error", error: error.message });
|
|
67
|
+
});
|