codebasesearch 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/mcp.js +17 -96
- package/package.json +1 -1
- package/src/search-worker.js +103 -0
- package/src/supervisor.js +124 -0
- package/scripts/patch-transformers.js +0 -42
package/mcp.js
CHANGED
|
@@ -22,13 +22,8 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
|
22
22
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
23
23
|
import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
|
|
24
24
|
import { cwd } from 'process';
|
|
25
|
-
import { join,
|
|
26
|
-
import {
|
|
27
|
-
import { loadIgnorePatterns } from './src/ignore-parser.js';
|
|
28
|
-
import { scanRepository } from './src/scanner.js';
|
|
29
|
-
import { generateEmbeddings } from './src/embeddings.js';
|
|
30
|
-
import { initStore, upsertChunks, closeStore } from './src/store.js';
|
|
31
|
-
import { executeSearch } from './src/search.js';
|
|
25
|
+
import { join, existsSync, readFileSync, appendFileSync, writeFileSync } from 'fs';
|
|
26
|
+
import { supervisor } from './src/supervisor.js';
|
|
32
27
|
|
|
33
28
|
async function ensureIgnoreEntry(rootPath) {
|
|
34
29
|
const gitignorePath = join(rootPath, '.gitignore');
|
|
@@ -44,96 +39,10 @@ async function ensureIgnoreEntry(rootPath) {
|
|
|
44
39
|
writeFileSync(gitignorePath, `${entry}\n`);
|
|
45
40
|
}
|
|
46
41
|
} catch (e) {
|
|
47
|
-
// Ignore write errors
|
|
42
|
+
// Ignore write errors
|
|
48
43
|
}
|
|
49
44
|
}
|
|
50
45
|
|
|
51
|
-
class CodeSearchManager {
|
|
52
|
-
async search(repositoryPath, query) {
|
|
53
|
-
const absolutePath = resolve(repositoryPath);
|
|
54
|
-
|
|
55
|
-
if (!existsSync(absolutePath)) {
|
|
56
|
-
return {
|
|
57
|
-
error: `Repository path not found: ${absolutePath}`,
|
|
58
|
-
results: [],
|
|
59
|
-
};
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
try {
|
|
63
|
-
// Ensure .code-search/ is in .gitignore
|
|
64
|
-
await ensureIgnoreEntry(absolutePath);
|
|
65
|
-
|
|
66
|
-
// Load ignore patterns
|
|
67
|
-
const ignorePatterns = loadIgnorePatterns(absolutePath);
|
|
68
|
-
const dbPath = join(absolutePath, '.code-search');
|
|
69
|
-
|
|
70
|
-
// Initialize store
|
|
71
|
-
await initStore(dbPath);
|
|
72
|
-
|
|
73
|
-
// Scan repository
|
|
74
|
-
const chunks = scanRepository(absolutePath, ignorePatterns);
|
|
75
|
-
|
|
76
|
-
if (chunks.length === 0) {
|
|
77
|
-
await closeStore();
|
|
78
|
-
return {
|
|
79
|
-
query,
|
|
80
|
-
results: [],
|
|
81
|
-
message: 'No code chunks found in repository',
|
|
82
|
-
};
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
// Generate embeddings in batches
|
|
86
|
-
const batchSize = 32;
|
|
87
|
-
const chunkTexts = chunks.map(c => c.content);
|
|
88
|
-
const allEmbeddings = [];
|
|
89
|
-
|
|
90
|
-
for (let i = 0; i < chunkTexts.length; i += batchSize) {
|
|
91
|
-
const batchTexts = chunkTexts.slice(i, i + batchSize);
|
|
92
|
-
const batchEmbeddings = await generateEmbeddings(batchTexts);
|
|
93
|
-
allEmbeddings.push(...batchEmbeddings);
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
// Create chunks with embeddings
|
|
97
|
-
const chunksWithEmbeddings = chunks.map((chunk, idx) => ({
|
|
98
|
-
...chunk,
|
|
99
|
-
vector: allEmbeddings[idx],
|
|
100
|
-
}));
|
|
101
|
-
|
|
102
|
-
// Upsert to store
|
|
103
|
-
await upsertChunks(chunksWithEmbeddings);
|
|
104
|
-
|
|
105
|
-
// Execute search
|
|
106
|
-
const results = await executeSearch(query);
|
|
107
|
-
|
|
108
|
-
// Format results
|
|
109
|
-
const formattedResults = results.map((result, idx) => ({
|
|
110
|
-
rank: idx + 1,
|
|
111
|
-
file: result.file_path,
|
|
112
|
-
lines: `${result.line_start}-${result.line_end}`,
|
|
113
|
-
score: (result.score * 100).toFixed(1),
|
|
114
|
-
snippet: result.content.split('\n').slice(0, 3).join('\n'),
|
|
115
|
-
}));
|
|
116
|
-
|
|
117
|
-
await closeStore();
|
|
118
|
-
|
|
119
|
-
return {
|
|
120
|
-
query,
|
|
121
|
-
repository: absolutePath,
|
|
122
|
-
resultsCount: formattedResults.length,
|
|
123
|
-
results: formattedResults,
|
|
124
|
-
};
|
|
125
|
-
} catch (error) {
|
|
126
|
-
await closeStore().catch(() => {});
|
|
127
|
-
return {
|
|
128
|
-
error: error.message,
|
|
129
|
-
results: [],
|
|
130
|
-
};
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
const manager = new CodeSearchManager();
|
|
136
|
-
|
|
137
46
|
const server = new Server(
|
|
138
47
|
{
|
|
139
48
|
name: 'code-search-mcp',
|
|
@@ -205,7 +114,12 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
205
114
|
}
|
|
206
115
|
|
|
207
116
|
try {
|
|
208
|
-
|
|
117
|
+
await ensureIgnoreEntry(repositoryPath);
|
|
118
|
+
const result = await supervisor.sendRequest({
|
|
119
|
+
type: 'search',
|
|
120
|
+
query,
|
|
121
|
+
repositoryPath,
|
|
122
|
+
});
|
|
209
123
|
|
|
210
124
|
if (result.error) {
|
|
211
125
|
return {
|
|
@@ -267,10 +181,17 @@ const isMain = process.argv[1] && (
|
|
|
267
181
|
if (isMain) {
|
|
268
182
|
main().catch((error) => {
|
|
269
183
|
console.error('Server error:', error);
|
|
270
|
-
process.exit(1);
|
|
271
184
|
});
|
|
272
185
|
}
|
|
273
186
|
|
|
187
|
+
process.on('uncaughtException', (error) => {
|
|
188
|
+
console.error('Uncaught exception:', error);
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
process.on('unhandledRejection', (reason) => {
|
|
192
|
+
console.error('Unhandled rejection:', reason);
|
|
193
|
+
});
|
|
194
|
+
|
|
274
195
|
async function main() {
|
|
275
196
|
await startMcpServer();
|
|
276
197
|
}
|
package/package.json
CHANGED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import { parentPort } from 'worker_threads';
|
|
2
|
+
import { resolve } from 'path';
|
|
3
|
+
import { existsSync } from 'fs';
|
|
4
|
+
import { loadIgnorePatterns } from './ignore-parser.js';
|
|
5
|
+
import { scanRepository } from './scanner.js';
|
|
6
|
+
import { generateEmbeddings } from './embeddings.js';
|
|
7
|
+
import { initStore, upsertChunks, closeStore } from './store.js';
|
|
8
|
+
import { executeSearch } from './search.js';
|
|
9
|
+
|
|
10
|
+
async function performSearch(repositoryPath, query) {
|
|
11
|
+
const absolutePath = resolve(repositoryPath);
|
|
12
|
+
|
|
13
|
+
if (!existsSync(absolutePath)) {
|
|
14
|
+
return { error: 'Repository path not found', results: [] };
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
try {
|
|
18
|
+
const ignorePatterns = loadIgnorePatterns(absolutePath);
|
|
19
|
+
const dbPath = resolve(absolutePath, '.code-search');
|
|
20
|
+
|
|
21
|
+
await initStore(dbPath);
|
|
22
|
+
|
|
23
|
+
const chunks = scanRepository(absolutePath, ignorePatterns);
|
|
24
|
+
if (chunks.length === 0) {
|
|
25
|
+
await closeStore();
|
|
26
|
+
return { query, results: [], message: 'No code chunks found' };
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const batchSize = 32;
|
|
30
|
+
const allEmbeddings = [];
|
|
31
|
+
|
|
32
|
+
for (let i = 0; i < chunks.length; i += batchSize) {
|
|
33
|
+
const batchTexts = chunks.slice(i, i + batchSize).map(c => c.content);
|
|
34
|
+
const batchEmbeddings = await generateEmbeddings(batchTexts);
|
|
35
|
+
allEmbeddings.push(...batchEmbeddings);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const chunksWithEmbeddings = chunks.map((chunk, idx) => ({
|
|
39
|
+
...chunk,
|
|
40
|
+
vector: allEmbeddings[idx],
|
|
41
|
+
}));
|
|
42
|
+
|
|
43
|
+
await upsertChunks(chunksWithEmbeddings);
|
|
44
|
+
const results = await executeSearch(query);
|
|
45
|
+
await closeStore();
|
|
46
|
+
|
|
47
|
+
return {
|
|
48
|
+
query,
|
|
49
|
+
repository: absolutePath,
|
|
50
|
+
resultsCount: results.length,
|
|
51
|
+
results: results.map((result, idx) => ({
|
|
52
|
+
rank: idx + 1,
|
|
53
|
+
file: result.file_path,
|
|
54
|
+
lines: `${result.line_start}-${result.line_end}`,
|
|
55
|
+
score: (result.score * 100).toFixed(1),
|
|
56
|
+
snippet: result.content.split('\n').slice(0, 3).join('\n'),
|
|
57
|
+
})),
|
|
58
|
+
};
|
|
59
|
+
} catch (error) {
|
|
60
|
+
await closeStore().catch(() => {});
|
|
61
|
+
return { error: error.message, results: [] };
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
if (parentPort) {
|
|
66
|
+
parentPort.on('message', async (msg) => {
|
|
67
|
+
try {
|
|
68
|
+
if (msg.type === 'health-check') {
|
|
69
|
+
parentPort.postMessage({ id: -1, type: 'pong' });
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (msg.type === 'search') {
|
|
74
|
+
const result = await performSearch(msg.repositoryPath || process.cwd(), msg.query);
|
|
75
|
+
parentPort.postMessage({ id: msg.id, result });
|
|
76
|
+
}
|
|
77
|
+
} catch (error) {
|
|
78
|
+
console.error('[Worker] Uncaught error:', error.message);
|
|
79
|
+
try {
|
|
80
|
+
parentPort.postMessage({
|
|
81
|
+
id: msg?.id || -1,
|
|
82
|
+
result: { error: error.message, results: [] }
|
|
83
|
+
});
|
|
84
|
+
} catch (e) {
|
|
85
|
+
console.error('[Worker] Failed to send error response');
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
process.on('uncaughtException', (error) => {
|
|
91
|
+
console.error('[Worker] Uncaught exception:', error.message);
|
|
92
|
+
try {
|
|
93
|
+
parentPort.postMessage({
|
|
94
|
+
error: error.message,
|
|
95
|
+
results: []
|
|
96
|
+
});
|
|
97
|
+
} catch (e) {}
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
process.on('unhandledRejection', (reason) => {
|
|
101
|
+
console.error('[Worker] Unhandled rejection:', reason);
|
|
102
|
+
});
|
|
103
|
+
}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import { Worker } from 'worker_threads';
|
|
2
|
+
import { resolve, dirname } from 'path';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
|
|
5
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
6
|
+
|
|
7
|
+
export class WorkerSupervisor {
|
|
8
|
+
constructor() {
|
|
9
|
+
this.worker = null;
|
|
10
|
+
this.restartDelay = 1000;
|
|
11
|
+
this.failureCount = 0;
|
|
12
|
+
this.requestQueue = new Map();
|
|
13
|
+
this.requestId = 0;
|
|
14
|
+
this.healthCheckInterval = null;
|
|
15
|
+
this.startWorker();
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
startWorker() {
|
|
19
|
+
try {
|
|
20
|
+
const workerPath = resolve(__dirname, 'search-worker.js');
|
|
21
|
+
this.worker = new Worker(workerPath);
|
|
22
|
+
|
|
23
|
+
this.worker.on('message', this.handleMessage.bind(this));
|
|
24
|
+
this.worker.on('error', (err) => {
|
|
25
|
+
console.error('[Supervisor] Worker error:', err.message);
|
|
26
|
+
this.scheduleRestart();
|
|
27
|
+
});
|
|
28
|
+
this.worker.on('exit', (code) => {
|
|
29
|
+
console.error('[Supervisor] Worker exited with code:', code);
|
|
30
|
+
this.worker = null;
|
|
31
|
+
if (code !== 0) {
|
|
32
|
+
this.failureCount++;
|
|
33
|
+
this.scheduleRestart();
|
|
34
|
+
}
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
this.restartDelay = 1000;
|
|
38
|
+
this.failureCount = 0;
|
|
39
|
+
console.error('[Supervisor] Worker started');
|
|
40
|
+
this.setupHealthCheck();
|
|
41
|
+
} catch (e) {
|
|
42
|
+
console.error('[Supervisor] Worker start failed:', e.message);
|
|
43
|
+
this.scheduleRestart();
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
setupHealthCheck() {
|
|
48
|
+
if (this.healthCheckInterval) clearInterval(this.healthCheckInterval);
|
|
49
|
+
this.healthCheckInterval = setInterval(() => {
|
|
50
|
+
if (this.worker) {
|
|
51
|
+
try {
|
|
52
|
+
this.worker.postMessage({ type: 'health-check', id: -1 });
|
|
53
|
+
} catch (e) {
|
|
54
|
+
console.error('[Supervisor] Health check failed:', e.message);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}, 30000);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
scheduleRestart() {
|
|
61
|
+
const delay = Math.min(this.restartDelay, 60000);
|
|
62
|
+
console.error('[Supervisor] Restart scheduled in', delay, 'ms');
|
|
63
|
+
setTimeout(() => {
|
|
64
|
+
if (!this.worker) {
|
|
65
|
+
this.startWorker();
|
|
66
|
+
}
|
|
67
|
+
}, delay);
|
|
68
|
+
this.restartDelay = Math.min(this.restartDelay * 2, 60000);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async sendRequest(data) {
|
|
72
|
+
if (!this.worker) {
|
|
73
|
+
return { error: 'Worker unavailable, restarting...', results: [] };
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return new Promise((resolve) => {
|
|
77
|
+
const id = this.requestId++;
|
|
78
|
+
const resolveWrapper = resolve;
|
|
79
|
+
|
|
80
|
+
const timeout = setTimeout(() => {
|
|
81
|
+
if (this.requestQueue.has(id)) {
|
|
82
|
+
this.requestQueue.delete(id);
|
|
83
|
+
resolveWrapper({ error: 'Request timeout', results: [] });
|
|
84
|
+
}
|
|
85
|
+
}, 600000);
|
|
86
|
+
|
|
87
|
+
this.requestQueue.set(id, (result) => {
|
|
88
|
+
clearTimeout(timeout);
|
|
89
|
+
resolveWrapper(result);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
try {
|
|
93
|
+
this.worker.postMessage({ id, ...data });
|
|
94
|
+
} catch (e) {
|
|
95
|
+
clearTimeout(timeout);
|
|
96
|
+
this.requestQueue.delete(id);
|
|
97
|
+
resolveWrapper({ error: 'Worker communication failed', results: [] });
|
|
98
|
+
this.scheduleRestart();
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
handleMessage(msg) {
|
|
104
|
+
if (msg.id === -1) return;
|
|
105
|
+
|
|
106
|
+
const resolve = this.requestQueue.get(msg.id);
|
|
107
|
+
if (resolve) {
|
|
108
|
+
this.requestQueue.delete(msg.id);
|
|
109
|
+
resolve(msg.result || msg);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
shutdown() {
|
|
114
|
+
if (this.healthCheckInterval) clearInterval(this.healthCheckInterval);
|
|
115
|
+
if (this.worker) {
|
|
116
|
+
try {
|
|
117
|
+
this.worker.terminate();
|
|
118
|
+
} catch (e) {}
|
|
119
|
+
this.worker = null;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export const supervisor = new WorkerSupervisor();
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
import fs from 'fs';
|
|
2
|
-
import path from 'path';
|
|
3
|
-
import { fileURLToPath } from 'url';
|
|
4
|
-
|
|
5
|
-
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
6
|
-
|
|
7
|
-
// Patch @huggingface/transformers dist file for Windows compatibility
|
|
8
|
-
const distPath = path.join(__dirname, '..', 'node_modules', '@huggingface', 'transformers', 'dist', 'transformers.node.mjs');
|
|
9
|
-
|
|
10
|
-
if (!fs.existsSync(distPath)) {
|
|
11
|
-
console.log('transformers.node.mjs not found, skipping patch');
|
|
12
|
-
process.exit(0);
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
let distContent = fs.readFileSync(distPath, 'utf-8');
|
|
16
|
-
|
|
17
|
-
// Check if already patched
|
|
18
|
-
if (distContent.includes('SHARP_PATCHED_FOR_WINDOWS')) {
|
|
19
|
-
console.log('transformers.node.mjs already patched');
|
|
20
|
-
process.exit(0);
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
// Remove sharp import line
|
|
24
|
-
distContent = distContent.replace(
|
|
25
|
-
/import \* as __WEBPACK_EXTERNAL_MODULE_sharp__ from "sharp";\n/,
|
|
26
|
-
'// SHARP_PATCHED_FOR_WINDOWS: sharp removed\n'
|
|
27
|
-
);
|
|
28
|
-
|
|
29
|
-
// Replace sharp module exports with stub
|
|
30
|
-
distContent = distContent.replace(
|
|
31
|
-
/module\.exports = __WEBPACK_EXTERNAL_MODULE_sharp__;/g,
|
|
32
|
-
'module.exports = {};'
|
|
33
|
-
);
|
|
34
|
-
|
|
35
|
-
// Replace image processing error with fallback
|
|
36
|
-
distContent = distContent.replace(
|
|
37
|
-
/} else \{\s*throw new Error\('Unable to load image processing library\.'\);\s*\}/,
|
|
38
|
-
'} else {\n loadImageFunction = async () => { throw new Error(\'Image processing unavailable\'); };\n}'
|
|
39
|
-
);
|
|
40
|
-
|
|
41
|
-
fs.writeFileSync(distPath, distContent);
|
|
42
|
-
console.log('Successfully patched transformers.node.mjs for Windows compatibility');
|