@softerist/heuristic-mcp 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
package/index.js ADDED
@@ -0,0 +1,208 @@
1
+ #!/usr/bin/env node
2
+ import { Server } from "@modelcontextprotocol/sdk/server/index.js";
3
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
+ import { CallToolRequestSchema, ListToolsRequestSchema } from "@modelcontextprotocol/sdk/types.js";
5
+ import { pipeline } from "@xenova/transformers";
6
+ import fs from "fs/promises";
7
+ import { createRequire } from "module";
8
+
9
+ // Import package.json for version
10
+ const require = createRequire(import.meta.url);
11
+ const packageJson = require("./package.json");
12
+
13
+ import { loadConfig } from "./lib/config.js";
14
+ import { EmbeddingsCache } from "./lib/cache.js";
15
+ import { CodebaseIndexer } from "./features/index-codebase.js";
16
+ import { HybridSearch } from "./features/hybrid-search.js";
17
+
18
+ import * as IndexCodebaseFeature from "./features/index-codebase.js";
19
+ import * as HybridSearchFeature from "./features/hybrid-search.js";
20
+ import * as ClearCacheFeature from "./features/clear-cache.js";
21
+ import * as FindSimilarCodeFeature from "./features/find-similar-code.js";
22
+
23
+ // Parse workspace from command line arguments
24
+ const args = process.argv.slice(2);
25
+ const workspaceIndex = args.findIndex(arg => arg.startsWith('--workspace'));
26
+ let workspaceDir = null;
27
+
28
+ if (workspaceIndex !== -1) {
29
+ const arg = args[workspaceIndex];
30
+ let rawWorkspace = null;
31
+
32
+ if (arg.includes('=')) {
33
+ rawWorkspace = arg.split('=')[1];
34
+ } else if (workspaceIndex + 1 < args.length) {
35
+ rawWorkspace = args[workspaceIndex + 1];
36
+ }
37
+
38
+ // Check if IDE variable wasn't expanded (contains ${})
39
+ if (rawWorkspace && rawWorkspace.includes('${')) {
40
+ console.error(`[Server] IDE variable not expanded: ${rawWorkspace}, using current directory`);
41
+ workspaceDir = process.cwd();
42
+ } else if (rawWorkspace) {
43
+ workspaceDir = rawWorkspace;
44
+ }
45
+
46
+ if (workspaceDir) {
47
+ console.error(`[Server] Workspace mode: ${workspaceDir}`);
48
+ }
49
+ }
50
+
51
+ // Global state
52
+ let embedder = null;
53
+ let cache = null;
54
+ let indexer = null;
55
+ let hybridSearch = null;
56
+ let config = null;
57
+
58
+ // Feature registry - ordered by priority (semantic_search first as primary tool)
59
+ const features = [
60
+ {
61
+ module: HybridSearchFeature,
62
+ instance: null,
63
+ handler: HybridSearchFeature.handleToolCall
64
+ },
65
+ {
66
+ module: IndexCodebaseFeature,
67
+ instance: null,
68
+ handler: IndexCodebaseFeature.handleToolCall
69
+ },
70
+ {
71
+ module: ClearCacheFeature,
72
+ instance: null,
73
+ handler: ClearCacheFeature.handleToolCall
74
+ },
75
+ {
76
+ module: FindSimilarCodeFeature,
77
+ instance: null,
78
+ handler: FindSimilarCodeFeature.handleToolCall
79
+ }
80
+ ];
81
+
82
+ // Initialize application
83
+ async function initialize() {
84
+ // Load configuration with workspace support
85
+ config = await loadConfig(workspaceDir);
86
+
87
+ // Ensure search directory exists
88
+ try {
89
+ await fs.access(config.searchDirectory);
90
+ } catch {
91
+ console.error(`[Server] Error: Search directory "${config.searchDirectory}" does not exist`);
92
+ process.exit(1);
93
+ }
94
+
95
+ // Load AI model
96
+ console.error("[Server] Loading AI embedding model (this may take time on first run)...");
97
+ embedder = await pipeline("feature-extraction", config.embeddingModel);
98
+
99
+ // Initialize cache
100
+ cache = new EmbeddingsCache(config);
101
+ await cache.load();
102
+
103
+ // Initialize features
104
+ indexer = new CodebaseIndexer(embedder, cache, config, server);
105
+ hybridSearch = new HybridSearch(embedder, cache, config);
106
+ const cacheClearer = new ClearCacheFeature.CacheClearer(embedder, cache, config, indexer);
107
+ const findSimilarCode = new FindSimilarCodeFeature.FindSimilarCode(embedder, cache, config);
108
+
109
+ // Store feature instances (matches features array order)
110
+ features[0].instance = hybridSearch;
111
+ features[1].instance = indexer;
112
+ features[2].instance = cacheClearer;
113
+ features[3].instance = findSimilarCode;
114
+
115
+ // Attach hybridSearch to server for cross-feature access (e.g. cache invalidation)
116
+ server.hybridSearch = hybridSearch;
117
+
118
+ // Start indexing in background (non-blocking)
119
+ console.error("[Server] Starting background indexing...");
120
+ indexer.indexAll().then(() => {
121
+ // Only start file watcher if explicitly enabled in config
122
+ if (config.watchFiles) {
123
+ indexer.setupFileWatcher();
124
+ }
125
+ }).catch(err => {
126
+ console.error("[Server] Background indexing error:", err.message);
127
+ });
128
+ }
129
+
130
+ // Setup MCP server
131
+ const server = new Server(
132
+ {
133
+ name: "heuristic-mcp",
134
+ version: packageJson.version
135
+ },
136
+ {
137
+ capabilities: {
138
+ tools: {}
139
+ }
140
+ }
141
+ );
142
+
143
+ // Register tools from all features
144
+ server.setRequestHandler(ListToolsRequestSchema, async () => {
145
+ const tools = [];
146
+
147
+ for (const feature of features) {
148
+ const toolDef = feature.module.getToolDefinition(config);
149
+ tools.push(toolDef);
150
+ }
151
+
152
+ return { tools };
153
+ });
154
+
155
+ // Handle tool calls
156
+ server.setRequestHandler(CallToolRequestSchema, async (request) => {
157
+ for (const feature of features) {
158
+ const toolDef = feature.module.getToolDefinition(config);
159
+
160
+ if (request.params.name === toolDef.name) {
161
+ return await feature.handler(request, feature.instance);
162
+ }
163
+ }
164
+
165
+ return {
166
+ content: [{
167
+ type: "text",
168
+ text: `Unknown tool: ${request.params.name}`
169
+ }]
170
+ };
171
+ });
172
+
173
+ // Main entry point
174
+ async function main() {
175
+ await initialize();
176
+
177
+ const transport = new StdioServerTransport();
178
+ await server.connect(transport);
179
+
180
+ console.error("[Server] Smart Coding MCP server ready!");
181
+ }
182
+
183
+ // Graceful shutdown
184
+ process.on('SIGINT', async () => {
185
+ console.error("\n[Server] Shutting down gracefully...");
186
+
187
+ // Stop file watcher
188
+ if (indexer && indexer.watcher) {
189
+ await indexer.watcher.close();
190
+ console.error("[Server] File watcher stopped");
191
+ }
192
+
193
+ // Save cache
194
+ if (cache) {
195
+ await cache.save();
196
+ console.error("[Server] Cache saved");
197
+ }
198
+
199
+ console.error("[Server] Goodbye!");
200
+ process.exit(0);
201
+ });
202
+
203
+ process.on('SIGTERM', async () => {
204
+ console.error("\n[Server] Received SIGTERM, shutting down...");
205
+ process.exit(0);
206
+ });
207
+
208
+ main().catch(console.error);
package/lib/cache.js ADDED
@@ -0,0 +1,163 @@
1
+ import fs from "fs/promises";
2
+ import path from "path";
3
+
4
+ const CACHE_META_VERSION = 1;
5
+ const CACHE_META_FILE = "meta.json";
6
+
7
+ export class EmbeddingsCache {
8
+ constructor(config) {
9
+ this.config = config;
10
+ this.vectorStore = [];
11
+ this.fileHashes = new Map();
12
+ this.isSaving = false;
13
+ this.cacheMeta = {
14
+ version: CACHE_META_VERSION,
15
+ embeddingModel: config.embeddingModel
16
+ };
17
+ }
18
+
19
+ async load() {
20
+ if (!this.config.enableCache) return;
21
+
22
+ try {
23
+ await fs.mkdir(this.config.cacheDirectory, { recursive: true });
24
+ const cacheFile = path.join(this.config.cacheDirectory, "embeddings.json");
25
+ const hashFile = path.join(this.config.cacheDirectory, "file-hashes.json");
26
+ const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
27
+
28
+ const [metaData, cacheData, hashData] = await Promise.all([
29
+ fs.readFile(metaFile, "utf-8").catch(() => null),
30
+ fs.readFile(cacheFile, "utf-8").catch(() => null),
31
+ fs.readFile(hashFile, "utf-8").catch(() => null)
32
+ ]);
33
+
34
+ if (!metaData && !cacheData && !hashData) {
35
+ return;
36
+ }
37
+
38
+ if (!metaData) {
39
+ console.error("[Cache] Missing cache metadata, ignoring cache");
40
+ return;
41
+ }
42
+
43
+ let meta = null;
44
+ try {
45
+ meta = JSON.parse(metaData);
46
+ } catch {
47
+ console.error("[Cache] Invalid cache metadata, ignoring cache");
48
+ return;
49
+ }
50
+
51
+ if (meta?.version !== CACHE_META_VERSION) {
52
+ console.error(`[Cache] Cache version mismatch (${meta?.version}), ignoring cache`);
53
+ return;
54
+ }
55
+
56
+ if (meta?.embeddingModel !== this.config.embeddingModel) {
57
+ console.error(`[Cache] Embedding model changed, ignoring cache (${meta?.embeddingModel} -> ${this.config.embeddingModel})`);
58
+ return;
59
+ }
60
+
61
+ this.cacheMeta = meta;
62
+
63
+ if (cacheData && hashData) {
64
+ const rawVectorStore = JSON.parse(cacheData);
65
+ const rawHashes = new Map(Object.entries(JSON.parse(hashData)));
66
+
67
+ // Filter cache to only include files matching current extensions
68
+ const allowedExtensions = this.config.fileExtensions.map(ext => `.${ext}`);
69
+
70
+ this.vectorStore = rawVectorStore.filter(chunk => {
71
+ const ext = path.extname(chunk.file);
72
+ return allowedExtensions.includes(ext);
73
+ });
74
+
75
+ // Only keep hashes for files matching current extensions
76
+ for (const [file, hash] of rawHashes) {
77
+ const ext = path.extname(file);
78
+ if (allowedExtensions.includes(ext)) {
79
+ this.fileHashes.set(file, hash);
80
+ }
81
+ }
82
+
83
+ const filtered = rawVectorStore.length - this.vectorStore.length;
84
+ if (filtered > 0) {
85
+ console.error(`[Cache] Filtered ${filtered} outdated cache entries`);
86
+ }
87
+ console.error(`[Cache] Loaded ${this.vectorStore.length} cached embeddings`);
88
+ }
89
+ } catch (error) {
90
+ console.error("[Cache] Failed to load cache:", error.message);
91
+ }
92
+ }
93
+
94
+ async save() {
95
+ if (!this.config.enableCache) return;
96
+
97
+ this.isSaving = true;
98
+
99
+ try {
100
+ await fs.mkdir(this.config.cacheDirectory, { recursive: true });
101
+ const cacheFile = path.join(this.config.cacheDirectory, "embeddings.json");
102
+ const hashFile = path.join(this.config.cacheDirectory, "file-hashes.json");
103
+ const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
104
+ this.cacheMeta = {
105
+ version: CACHE_META_VERSION,
106
+ embeddingModel: this.config.embeddingModel
107
+ };
108
+
109
+ await Promise.all([
110
+ fs.writeFile(cacheFile, JSON.stringify(this.vectorStore, null, 2)),
111
+ fs.writeFile(hashFile, JSON.stringify(Object.fromEntries(this.fileHashes), null, 2)),
112
+ fs.writeFile(metaFile, JSON.stringify(this.cacheMeta, null, 2))
113
+ ]);
114
+ } catch (error) {
115
+ console.error("[Cache] Failed to save cache:", error.message);
116
+ } finally {
117
+ this.isSaving = false;
118
+ }
119
+ }
120
+
121
+ getVectorStore() {
122
+ return this.vectorStore;
123
+ }
124
+
125
+ setVectorStore(store) {
126
+ this.vectorStore = store;
127
+ }
128
+
129
+ getFileHash(file) {
130
+ return this.fileHashes.get(file);
131
+ }
132
+
133
+ setFileHash(file, hash) {
134
+ this.fileHashes.set(file, hash);
135
+ }
136
+
137
+ deleteFileHash(file) {
138
+ this.fileHashes.delete(file);
139
+ }
140
+
141
+ removeFileFromStore(file) {
142
+ this.vectorStore = this.vectorStore.filter(chunk => chunk.file !== file);
143
+ }
144
+
145
+
146
+ addToStore(chunk) {
147
+ this.vectorStore.push(chunk);
148
+ }
149
+
150
+ async clear() {
151
+ if (!this.config.enableCache) return;
152
+
153
+ try {
154
+ await fs.rm(this.config.cacheDirectory, { recursive: true, force: true });
155
+ this.vectorStore = [];
156
+ this.fileHashes = new Map();
157
+ console.error(`[Cache] Cache cleared successfully: ${this.config.cacheDirectory}`);
158
+ } catch (error) {
159
+ console.error("[Cache] Failed to clear cache:", error.message);
160
+ throw error;
161
+ }
162
+ }
163
+ }
package/lib/config.js ADDED
@@ -0,0 +1,257 @@
1
+ import fs from "fs/promises";
2
+ import path from "path";
3
+ import { fileURLToPath } from "url";
4
+ import { ProjectDetector } from "./project-detector.js";
5
+
6
+ const DEFAULT_CONFIG = {
7
+ searchDirectory: ".",
8
+ fileExtensions: [
9
+ // JavaScript/TypeScript
10
+ "js", "ts", "jsx", "tsx", "mjs", "cjs",
11
+ // Styles
12
+ "css", "scss", "sass", "less", "styl",
13
+ // Markup
14
+ "html", "htm", "xml", "svg",
15
+ // Python
16
+ "py", "pyw", "pyx",
17
+ // Java/Kotlin/Scala
18
+ "java", "kt", "kts", "scala",
19
+ // C/C++
20
+ "c", "cpp", "cc", "cxx", "h", "hpp", "hxx",
21
+ // C#
22
+ "cs", "csx",
23
+ // Go
24
+ "go",
25
+ // Rust
26
+ "rs",
27
+ // Ruby
28
+ "rb", "rake",
29
+ // PHP
30
+ "php", "phtml",
31
+ // Swift
32
+ "swift",
33
+ // Shell scripts
34
+ "sh", "bash", "zsh", "fish",
35
+ // Config & Data
36
+ "json", "yaml", "yml", "toml", "ini", "env",
37
+ // Documentation
38
+ "md", "mdx", "txt", "rst",
39
+ // Database
40
+ "sql",
41
+ // Other
42
+ "r", "R", "lua", "vim", "pl", "pm"
43
+ ],
44
+ excludePatterns: [
45
+ "**/node_modules/**",
46
+ "**/dist/**",
47
+ "**/build/**",
48
+ "**/.git/**",
49
+ "**/coverage/**",
50
+ "**/.next/**",
51
+ "**/target/**",
52
+ "**/vendor/**",
53
+ "**/.smart-coding-cache/**"
54
+ ],
55
+ chunkSize: 25, // Lines per chunk (larger = fewer embeddings = faster indexing)
56
+ chunkOverlap: 5, // Overlap between chunks for context continuity
57
+ batchSize: 100,
58
+ maxFileSize: 1048576, // 1MB - skip files larger than this
59
+ maxResults: 5,
60
+ enableCache: true,
61
+ cacheDirectory: "./.smart-coding-cache",
62
+ watchFiles: true,
63
+ verbose: false,
64
+ workerThreads: "auto", // "auto" = CPU cores - 1, or set a number
65
+ embeddingModel: "Xenova/all-MiniLM-L6-v2",
66
+ semanticWeight: 0.7,
67
+ exactMatchBoost: 1.5,
68
+ recencyBoost: 0.1, // Boost for recently modified files (max 0.1 added to score)
69
+ recencyDecayDays: 30, // After this many days, recency boost is 0
70
+ smartIndexing: true
71
+ };
72
+
73
+ let config = { ...DEFAULT_CONFIG };
74
+
75
+ export async function loadConfig(workspaceDir = null) {
76
+ try {
77
+ // Determine the base directory for configuration
78
+ let baseDir;
79
+ let configPath;
80
+
81
+ if (workspaceDir) {
82
+ // Workspace mode: load config from workspace root
83
+ baseDir = path.resolve(workspaceDir);
84
+ configPath = path.join(baseDir, "config.json");
85
+ console.error(`[Config] Workspace mode: ${baseDir}`);
86
+ } else {
87
+ // Server mode: load config from server directory
88
+ const scriptDir = path.dirname(fileURLToPath(import.meta.url));
89
+ baseDir = path.resolve(scriptDir, '..');
90
+ configPath = path.join(baseDir, "config.json");
91
+ }
92
+
93
+ let userConfig = {};
94
+ try {
95
+ const configData = await fs.readFile(configPath, "utf-8");
96
+ userConfig = JSON.parse(configData);
97
+ } catch (configError) {
98
+ if (workspaceDir) {
99
+ console.error(`[Config] No config.json in workspace, using defaults`);
100
+ } else {
101
+ console.error(`[Config] No config.json found: ${configError.message}`);
102
+ }
103
+ }
104
+
105
+ config = { ...DEFAULT_CONFIG, ...userConfig };
106
+
107
+ // Set workspace-specific directories
108
+ if (workspaceDir) {
109
+ config.searchDirectory = baseDir;
110
+ config.cacheDirectory = path.join(baseDir, ".smart-coding-cache");
111
+ } else {
112
+ config.searchDirectory = path.resolve(baseDir, config.searchDirectory);
113
+ config.cacheDirectory = path.resolve(baseDir, config.cacheDirectory);
114
+ }
115
+
116
+ // Smart project detection
117
+ if (config.smartIndexing !== false) {
118
+ const detector = new ProjectDetector(config.searchDirectory);
119
+ const detectedTypes = await detector.detectProjectTypes();
120
+
121
+ if (detectedTypes.length > 0) {
122
+ const smartPatterns = detector.getSmartIgnorePatterns();
123
+
124
+ // Merge smart patterns with user patterns (user patterns take precedence)
125
+ const userPatterns = userConfig.excludePatterns || [];
126
+ config.excludePatterns = [
127
+ ...smartPatterns,
128
+ ...userPatterns
129
+ ];
130
+
131
+ console.error(`[Config] Smart indexing: ${detectedTypes.join(', ')}`);
132
+ console.error(`[Config] Applied ${smartPatterns.length} smart ignore patterns`);
133
+ } else {
134
+ console.error("[Config] No project markers detected, using default patterns");
135
+ }
136
+ }
137
+
138
+ console.error("[Config] Loaded configuration from config.json");
139
+ } catch (error) {
140
+ console.error("[Config] Using default configuration (config.json not found or invalid)");
141
+ console.error(`[Config] Error: ${error.message}`);
142
+ }
143
+
144
+ // Apply environment variable overrides (prefix: SMART_CODING_) with validation
145
+ if (process.env.SMART_CODING_VERBOSE !== undefined) {
146
+ const value = process.env.SMART_CODING_VERBOSE;
147
+ if (value === 'true' || value === 'false') {
148
+ config.verbose = value === 'true';
149
+ }
150
+ }
151
+
152
+ if (process.env.SMART_CODING_BATCH_SIZE !== undefined) {
153
+ const value = parseInt(process.env.SMART_CODING_BATCH_SIZE, 10);
154
+ if (!isNaN(value) && value > 0 && value <= 1000) {
155
+ config.batchSize = value;
156
+ } else {
157
+ console.error(`[Config] Invalid SMART_CODING_BATCH_SIZE: ${process.env.SMART_CODING_BATCH_SIZE}, using default`);
158
+ }
159
+ }
160
+
161
+ if (process.env.SMART_CODING_MAX_FILE_SIZE !== undefined) {
162
+ const value = parseInt(process.env.SMART_CODING_MAX_FILE_SIZE, 10);
163
+ if (!isNaN(value) && value > 0) {
164
+ config.maxFileSize = value;
165
+ } else {
166
+ console.error(`[Config] Invalid SMART_CODING_MAX_FILE_SIZE: ${process.env.SMART_CODING_MAX_FILE_SIZE}, using default`);
167
+ }
168
+ }
169
+
170
+ if (process.env.SMART_CODING_CHUNK_SIZE !== undefined) {
171
+ const value = parseInt(process.env.SMART_CODING_CHUNK_SIZE, 10);
172
+ if (!isNaN(value) && value > 0 && value <= 100) {
173
+ config.chunkSize = value;
174
+ } else {
175
+ console.error(`[Config] Invalid SMART_CODING_CHUNK_SIZE: ${process.env.SMART_CODING_CHUNK_SIZE}, using default`);
176
+ }
177
+ }
178
+
179
+ if (process.env.SMART_CODING_MAX_RESULTS !== undefined) {
180
+ const value = parseInt(process.env.SMART_CODING_MAX_RESULTS, 10);
181
+ if (!isNaN(value) && value > 0 && value <= 100) {
182
+ config.maxResults = value;
183
+ } else {
184
+ console.error(`[Config] Invalid SMART_CODING_MAX_RESULTS: ${process.env.SMART_CODING_MAX_RESULTS}, using default`);
185
+ }
186
+ }
187
+
188
+ if (process.env.SMART_CODING_SMART_INDEXING !== undefined) {
189
+ const value = process.env.SMART_CODING_SMART_INDEXING;
190
+ if (value === 'true' || value === 'false') {
191
+ config.smartIndexing = value === 'true';
192
+ }
193
+ }
194
+
195
+ if (process.env.SMART_CODING_WATCH_FILES !== undefined) {
196
+ const value = process.env.SMART_CODING_WATCH_FILES;
197
+ if (value === 'true' || value === 'false') {
198
+ config.watchFiles = value === 'true';
199
+ }
200
+ }
201
+
202
+ if (process.env.SMART_CODING_SEMANTIC_WEIGHT !== undefined) {
203
+ const value = parseFloat(process.env.SMART_CODING_SEMANTIC_WEIGHT);
204
+ if (!isNaN(value) && value >= 0 && value <= 1) {
205
+ config.semanticWeight = value;
206
+ } else {
207
+ console.error(`[Config] Invalid SMART_CODING_SEMANTIC_WEIGHT: ${process.env.SMART_CODING_SEMANTIC_WEIGHT}, using default (must be 0-1)`);
208
+ }
209
+ }
210
+
211
+ if (process.env.SMART_CODING_EXACT_MATCH_BOOST !== undefined) {
212
+ const value = parseFloat(process.env.SMART_CODING_EXACT_MATCH_BOOST);
213
+ if (!isNaN(value) && value >= 0) {
214
+ config.exactMatchBoost = value;
215
+ } else {
216
+ console.error(`[Config] Invalid SMART_CODING_EXACT_MATCH_BOOST: ${process.env.SMART_CODING_EXACT_MATCH_BOOST}, using default`);
217
+ }
218
+ }
219
+
220
+ if (process.env.SMART_CODING_EMBEDDING_MODEL !== undefined) {
221
+ const value = process.env.SMART_CODING_EMBEDDING_MODEL.trim();
222
+ if (value.length > 0) {
223
+ config.embeddingModel = value;
224
+ console.error(`[Config] Using custom embedding model: ${value}`);
225
+ }
226
+ }
227
+
228
+ if (process.env.SMART_CODING_WORKER_THREADS !== undefined) {
229
+ const value = process.env.SMART_CODING_WORKER_THREADS.trim().toLowerCase();
230
+ if (value === 'auto') {
231
+ config.workerThreads = 'auto';
232
+ } else {
233
+ const numValue = parseInt(value, 10);
234
+ if (!isNaN(numValue) && numValue >= 1 && numValue <= 32) {
235
+ config.workerThreads = numValue;
236
+ } else {
237
+ console.error(`[Config] Invalid SMART_CODING_WORKER_THREADS: ${value}, using default (must be 'auto' or 1-32)`);
238
+ }
239
+ }
240
+ }
241
+
242
+ // Safety cap for auto workers
243
+ if (config.workerThreads === 'auto') {
244
+ // Cap at 4 workers max by default to prevent OOM (each model ~150MB)
245
+ // Users can override this by setting a specific number
246
+ const cpuCount = process.env.UV_THREADPOOL_SIZE || 4; // Node doesn't expose os.cpus() in some envs
247
+ // Actual logic happens in index-codebase.js, but we document the intent here
248
+ }
249
+
250
+ return config;
251
+ }
252
+
253
+ export function getConfig() {
254
+ return config;
255
+ }
256
+
257
+ export { DEFAULT_CONFIG };
@@ -0,0 +1,67 @@
1
+ import { parentPort, workerData } from "worker_threads";
2
+ import { pipeline } from "@xenova/transformers";
3
+
4
+ let embedder = null;
5
+
6
+ // Initialize the embedding model once when worker starts
7
+ async function initializeEmbedder() {
8
+ if (!embedder) {
9
+ embedder = await pipeline("feature-extraction", workerData.embeddingModel);
10
+ }
11
+ return embedder;
12
+ }
13
+
14
+ /**
15
+ * Process chunks with optimized single-text embedding
16
+ * Note: Batch processing with transformers.js WASM backend doesn't improve speed
17
+ * because it loops internally. Single calls are actually faster.
18
+ */
19
+ async function processChunks(chunks) {
20
+ const embedder = await initializeEmbedder();
21
+ const results = [];
22
+
23
+ for (const chunk of chunks) {
24
+ try {
25
+ const output = await embedder(chunk.text, { pooling: "mean", normalize: true });
26
+ results.push({
27
+ file: chunk.file,
28
+ startLine: chunk.startLine,
29
+ endLine: chunk.endLine,
30
+ content: chunk.text,
31
+ vector: Array.from(output.data),
32
+ success: true
33
+ });
34
+ } catch (error) {
35
+ results.push({
36
+ file: chunk.file,
37
+ startLine: chunk.startLine,
38
+ endLine: chunk.endLine,
39
+ error: error.message,
40
+ success: false
41
+ });
42
+ }
43
+ }
44
+
45
+ return results;
46
+ }
47
+
48
+ // Listen for messages from main thread
49
+ parentPort.on("message", async (message) => {
50
+ if (message.type === "process") {
51
+ try {
52
+ const results = await processChunks(message.chunks);
53
+ parentPort.postMessage({ type: "results", results, batchId: message.batchId });
54
+ } catch (error) {
55
+ parentPort.postMessage({ type: "error", error: error.message, batchId: message.batchId });
56
+ }
57
+ } else if (message.type === "shutdown") {
58
+ process.exit(0);
59
+ }
60
+ });
61
+
62
+ // Signal that worker is ready
63
+ initializeEmbedder().then(() => {
64
+ parentPort.postMessage({ type: "ready" });
65
+ }).catch((error) => {
66
+ parentPort.postMessage({ type: "error", error: error.message });
67
+ });