code-graph-context 2.9.0 → 2.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +101 -26
- package/dist/cli/cli.js +250 -10
- package/dist/core/embeddings/embedding-sidecar.js +244 -0
- package/dist/core/embeddings/embeddings.service.js +60 -132
- package/dist/core/embeddings/local-embeddings.service.js +43 -0
- package/dist/core/embeddings/openai-embeddings.service.js +114 -0
- package/dist/mcp/handlers/graph-generator.handler.js +6 -5
- package/dist/mcp/mcp.server.js +5 -0
- package/dist/mcp/service-init.js +24 -3
- package/dist/mcp/tools/search-codebase.tool.js +37 -13
- package/dist/mcp/tools/session-note.tool.js +5 -6
- package/dist/storage/neo4j/neo4j.service.js +4 -4
- package/package.json +3 -1
- package/sidecar/embedding_server.py +147 -0
- package/sidecar/requirements.txt +5 -0
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Sidecar Manager
|
|
3
|
+
* Manages a Python FastAPI process that serves local embedding requests.
|
|
4
|
+
* The sidecar loads the model once and keeps it warm between requests.
|
|
5
|
+
*/
|
|
6
|
+
import { spawn } from 'child_process';
|
|
7
|
+
import { existsSync } from 'fs';
|
|
8
|
+
import { dirname, join } from 'path';
|
|
9
|
+
import { fileURLToPath } from 'url';
|
|
10
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
11
|
+
const __dirname = dirname(__filename);
|
|
12
|
+
const DEFAULT_CONFIG = {
|
|
13
|
+
port: parseInt(process.env.EMBEDDING_SIDECAR_PORT ?? '', 10) || 8787,
|
|
14
|
+
host: '127.0.0.1',
|
|
15
|
+
model: process.env.EMBEDDING_MODEL ?? 'Qodo/Qodo-Embed-1-1.5B',
|
|
16
|
+
startupTimeoutMs: 120_000, // 2 min — first run downloads the model
|
|
17
|
+
requestTimeoutMs: 60_000,
|
|
18
|
+
};
|
|
19
|
+
export class EmbeddingSidecar {
|
|
20
|
+
process = null;
|
|
21
|
+
readyPromise = null;
|
|
22
|
+
config;
|
|
23
|
+
_dimensions = null;
|
|
24
|
+
stopping = false;
|
|
25
|
+
constructor(config = {}) {
|
|
26
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
27
|
+
}
|
|
28
|
+
get baseUrl() {
|
|
29
|
+
return `http://${this.config.host}:${this.config.port}`;
|
|
30
|
+
}
|
|
31
|
+
get dimensions() {
|
|
32
|
+
return this._dimensions;
|
|
33
|
+
}
|
|
34
|
+
get isRunning() {
|
|
35
|
+
return this.process !== null && !this.stopping;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Start the sidecar process. No-ops if already running.
|
|
39
|
+
* Resolves when the server is healthy and ready to serve requests.
|
|
40
|
+
*/
|
|
41
|
+
async start() {
|
|
42
|
+
if (this.readyPromise)
|
|
43
|
+
return this.readyPromise;
|
|
44
|
+
this.stopping = false;
|
|
45
|
+
this.readyPromise = this.doStart();
|
|
46
|
+
try {
|
|
47
|
+
await this.readyPromise;
|
|
48
|
+
}
|
|
49
|
+
catch (err) {
|
|
50
|
+
// Clean up on failed start
|
|
51
|
+
this.cleanup();
|
|
52
|
+
throw err;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Resolve the python binary — prefer venv, fall back to system python3.
|
|
57
|
+
*/
|
|
58
|
+
resolvePython(sidecarDir) {
|
|
59
|
+
const venvPython = join(sidecarDir, '.venv', 'bin', 'python3');
|
|
60
|
+
if (existsSync(venvPython))
|
|
61
|
+
return venvPython;
|
|
62
|
+
return 'python3';
|
|
63
|
+
}
|
|
64
|
+
async doStart() {
|
|
65
|
+
// Check if something is already listening on the port (e.g. previous run)
|
|
66
|
+
if (await this.checkHealth()) {
|
|
67
|
+
console.error(`[embedding-sidecar] Server already running on ${this.baseUrl}`);
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
await this.verifyPython();
|
|
71
|
+
// sidecar/ lives at project root — go up from dist/core/embeddings/ or src/core/embeddings/
|
|
72
|
+
const sidecarDir = join(__dirname, '..', '..', '..', 'sidecar');
|
|
73
|
+
const python = this.resolvePython(sidecarDir);
|
|
74
|
+
console.error(`[embedding-sidecar] Starting on ${this.baseUrl} (python: ${python}, model: ${this.config.model})`);
|
|
75
|
+
this.process = spawn(python, ['-m', 'uvicorn', 'embedding_server:app', '--host', this.config.host, '--port', String(this.config.port)], {
|
|
76
|
+
cwd: sidecarDir,
|
|
77
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
78
|
+
env: {
|
|
79
|
+
...process.env,
|
|
80
|
+
EMBEDDING_MODEL: this.config.model,
|
|
81
|
+
},
|
|
82
|
+
});
|
|
83
|
+
// Forward stderr for visibility (model loading progress, errors)
|
|
84
|
+
this.process.stderr?.on('data', (data) => {
|
|
85
|
+
const line = data.toString().trim();
|
|
86
|
+
if (line)
|
|
87
|
+
console.error(`[embedding-sidecar] ${line}`);
|
|
88
|
+
});
|
|
89
|
+
this.process.on('error', (err) => {
|
|
90
|
+
console.error(`[embedding-sidecar] Process error: ${err.message}`);
|
|
91
|
+
});
|
|
92
|
+
this.process.on('exit', (code, signal) => {
|
|
93
|
+
if (!this.stopping) {
|
|
94
|
+
console.error(`[embedding-sidecar] Process exited unexpectedly (code=${code}, signal=${signal})`);
|
|
95
|
+
}
|
|
96
|
+
this.cleanup();
|
|
97
|
+
});
|
|
98
|
+
// Poll until healthy
|
|
99
|
+
await this.waitForHealthy();
|
|
100
|
+
}
|
|
101
|
+
async verifyPython() {
|
|
102
|
+
return new Promise((resolve, reject) => {
|
|
103
|
+
const check = spawn('python3', ['--version'], { stdio: 'pipe' });
|
|
104
|
+
let output = '';
|
|
105
|
+
check.stdout?.on('data', (d) => (output += d.toString()));
|
|
106
|
+
check.stderr?.on('data', (d) => (output += d.toString()));
|
|
107
|
+
check.on('error', () => {
|
|
108
|
+
reject(new Error('python3 not found. Local embeddings require Python 3.10+.\n\n' +
|
|
109
|
+
'Install Python and the sidecar dependencies:\n' +
|
|
110
|
+
' pip install -r sidecar/requirements.txt\n\n' +
|
|
111
|
+
'Or set OPENAI_ENABLED=true to use OpenAI instead.'));
|
|
112
|
+
});
|
|
113
|
+
check.on('close', (code) => {
|
|
114
|
+
if (code !== 0) {
|
|
115
|
+
reject(new Error(`python3 check failed: ${output}`));
|
|
116
|
+
}
|
|
117
|
+
else {
|
|
118
|
+
resolve();
|
|
119
|
+
}
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
async waitForHealthy() {
|
|
124
|
+
const start = Date.now();
|
|
125
|
+
const pollInterval = 1000;
|
|
126
|
+
while (Date.now() - start < this.config.startupTimeoutMs) {
|
|
127
|
+
if (this.stopping)
|
|
128
|
+
throw new Error('Sidecar stopped during startup');
|
|
129
|
+
if (this.process?.exitCode !== null && this.process?.exitCode !== undefined) {
|
|
130
|
+
throw new Error(`Sidecar process exited during startup with code ${this.process.exitCode}`);
|
|
131
|
+
}
|
|
132
|
+
if (await this.checkHealth()) {
|
|
133
|
+
console.error(`[embedding-sidecar] Ready (${Date.now() - start}ms)`);
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
await new Promise((r) => setTimeout(r, pollInterval));
|
|
137
|
+
}
|
|
138
|
+
throw new Error(`Embedding sidecar failed to start within ${this.config.startupTimeoutMs}ms.\n` +
|
|
139
|
+
'This usually means the model is still downloading or dependencies are missing.\n\n' +
|
|
140
|
+
'Try running manually:\n' +
|
|
141
|
+
' cd sidecar && python3 -m uvicorn embedding_server:app --host 127.0.0.1 --port 8787');
|
|
142
|
+
}
|
|
143
|
+
async checkHealth() {
|
|
144
|
+
try {
|
|
145
|
+
const controller = new AbortController();
|
|
146
|
+
const timeout = setTimeout(() => controller.abort(), 2000);
|
|
147
|
+
const res = await fetch(`${this.baseUrl}/health`, { signal: controller.signal });
|
|
148
|
+
clearTimeout(timeout);
|
|
149
|
+
if (res.ok) {
|
|
150
|
+
const data = (await res.json());
|
|
151
|
+
if (data.dimensions)
|
|
152
|
+
this._dimensions = data.dimensions;
|
|
153
|
+
return data.status === 'ok';
|
|
154
|
+
}
|
|
155
|
+
return false;
|
|
156
|
+
}
|
|
157
|
+
catch {
|
|
158
|
+
return false;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Embed an array of texts. Lazily starts the sidecar if not running.
|
|
163
|
+
*/
|
|
164
|
+
async embed(texts) {
|
|
165
|
+
await this.start();
|
|
166
|
+
const controller = new AbortController();
|
|
167
|
+
const timeout = setTimeout(() => controller.abort(), this.config.requestTimeoutMs);
|
|
168
|
+
try {
|
|
169
|
+
const res = await fetch(`${this.baseUrl}/embed`, {
|
|
170
|
+
method: 'POST',
|
|
171
|
+
headers: { 'Content-Type': 'application/json' },
|
|
172
|
+
body: JSON.stringify({ texts }),
|
|
173
|
+
signal: controller.signal,
|
|
174
|
+
});
|
|
175
|
+
if (!res.ok) {
|
|
176
|
+
const detail = await res.text();
|
|
177
|
+
throw new Error(`Sidecar embed failed (${res.status}): ${detail}`);
|
|
178
|
+
}
|
|
179
|
+
const data = (await res.json());
|
|
180
|
+
if (data.dimensions)
|
|
181
|
+
this._dimensions = data.dimensions;
|
|
182
|
+
return data.embeddings;
|
|
183
|
+
}
|
|
184
|
+
catch (err) {
|
|
185
|
+
if (err instanceof Error && err.name === 'AbortError') {
|
|
186
|
+
throw new Error(`Embedding request timed out after ${this.config.requestTimeoutMs}ms`);
|
|
187
|
+
}
|
|
188
|
+
throw err;
|
|
189
|
+
}
|
|
190
|
+
finally {
|
|
191
|
+
clearTimeout(timeout);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Embed a single text. Convenience wrapper.
|
|
196
|
+
*/
|
|
197
|
+
async embedText(text) {
|
|
198
|
+
const [embedding] = await this.embed([text]);
|
|
199
|
+
return embedding;
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Stop the sidecar process.
|
|
203
|
+
*/
|
|
204
|
+
async stop() {
|
|
205
|
+
this.stopping = true;
|
|
206
|
+
if (this.process) {
|
|
207
|
+
console.error('[embedding-sidecar] Stopping...');
|
|
208
|
+
this.process.kill('SIGTERM');
|
|
209
|
+
// Give it 5s to shut down gracefully, then force kill
|
|
210
|
+
await new Promise((resolve) => {
|
|
211
|
+
const forceKill = setTimeout(() => {
|
|
212
|
+
if (this.process) {
|
|
213
|
+
this.process.kill('SIGKILL');
|
|
214
|
+
}
|
|
215
|
+
resolve();
|
|
216
|
+
}, 5000);
|
|
217
|
+
this.process?.on('exit', () => {
|
|
218
|
+
clearTimeout(forceKill);
|
|
219
|
+
resolve();
|
|
220
|
+
});
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
this.cleanup();
|
|
224
|
+
}
|
|
225
|
+
cleanup() {
|
|
226
|
+
this.process = null;
|
|
227
|
+
this.readyPromise = null;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
/**
|
|
231
|
+
* Singleton sidecar instance — shared across all tool calls.
|
|
232
|
+
* The sidecar starts lazily on first embed request and stays warm.
|
|
233
|
+
*/
|
|
234
|
+
let sidecarInstance = null;
|
|
235
|
+
export const getEmbeddingSidecar = () => {
|
|
236
|
+
sidecarInstance ??= new EmbeddingSidecar();
|
|
237
|
+
return sidecarInstance;
|
|
238
|
+
};
|
|
239
|
+
export const stopEmbeddingSidecar = async () => {
|
|
240
|
+
if (sidecarInstance) {
|
|
241
|
+
await sidecarInstance.stop();
|
|
242
|
+
sidecarInstance = null;
|
|
243
|
+
}
|
|
244
|
+
};
|
|
@@ -1,146 +1,74 @@
|
|
|
1
|
-
import OpenAI from 'openai';
|
|
2
|
-
import { debugLog } from '../../mcp/utils.js';
|
|
3
|
-
import { getTimeoutConfig } from '../config/timeouts.js';
|
|
4
1
|
/**
|
|
5
|
-
*
|
|
6
|
-
*
|
|
2
|
+
* Embeddings Service — barrel module
|
|
3
|
+
*
|
|
4
|
+
* Exports a common interface and a factory. Consumers do `new EmbeddingsService()`
|
|
5
|
+
* and get the right implementation based on OPENAI_ENABLED.
|
|
6
|
+
*
|
|
7
|
+
* OPENAI_ENABLED=true → OpenAI text-embedding-3-large (requires OPENAI_API_KEY)
|
|
8
|
+
* default → Local Python sidecar with Qodo-Embed-1-1.5B
|
|
7
9
|
*/
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
import { LocalEmbeddingsService } from './local-embeddings.service.js';
|
|
11
|
+
import { OpenAIEmbeddingsService } from './openai-embeddings.service.js';
|
|
12
|
+
// Re-export error classes so existing imports keep working
|
|
13
|
+
export { OpenAIConfigError, OpenAIAPIError } from './openai-embeddings.service.js';
|
|
14
|
+
export const EMBEDDING_BATCH_CONFIG = {
|
|
15
|
+
maxBatchSize: 100,
|
|
16
|
+
delayBetweenBatchesMs: 500,
|
|
17
|
+
};
|
|
14
18
|
/**
|
|
15
|
-
*
|
|
19
|
+
* Known dimensions per model.
|
|
20
|
+
* For unlisted models, dimensions are detected at runtime from the sidecar health endpoint.
|
|
16
21
|
*/
|
|
17
|
-
export
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
22
|
+
export const EMBEDDING_DIMENSIONS = {
|
|
23
|
+
// OpenAI models
|
|
24
|
+
'text-embedding-3-large': 3072,
|
|
25
|
+
'text-embedding-3-small': 1536,
|
|
26
|
+
// Local models (via sidecar)
|
|
27
|
+
'Qodo/Qodo-Embed-1-1.5B': 1536,
|
|
28
|
+
'sentence-transformers/all-MiniLM-L6-v2': 384,
|
|
29
|
+
'sentence-transformers/all-mpnet-base-v2': 768,
|
|
30
|
+
'BAAI/bge-small-en-v1.5': 384,
|
|
31
|
+
'BAAI/bge-base-en-v1.5': 768,
|
|
32
|
+
'nomic-ai/nomic-embed-text-v1.5': 768,
|
|
33
|
+
};
|
|
34
|
+
export const isOpenAIEnabled = () => {
|
|
35
|
+
return process.env.OPENAI_ENABLED?.toLowerCase() === 'true';
|
|
36
|
+
};
|
|
37
|
+
/**
|
|
38
|
+
* Get the vector dimensions for the active embedding provider.
|
|
39
|
+
* For known models, returns a static value. For unknown local models,
|
|
40
|
+
* falls back to 1536 — the actual dimensions are verified at runtime
|
|
41
|
+
* when the sidecar starts and reports via /health.
|
|
42
|
+
*/
|
|
43
|
+
export const getEmbeddingDimensions = () => {
|
|
44
|
+
if (isOpenAIEnabled()) {
|
|
45
|
+
const model = process.env.OPENAI_EMBEDDING_MODEL ?? 'text-embedding-3-large';
|
|
46
|
+
return EMBEDDING_DIMENSIONS[model] ?? 3072;
|
|
23
47
|
}
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
maxBatchSize: 100, // OpenAI supports up to 2048, but 100 is efficient
|
|
27
|
-
delayBetweenBatchesMs: 500, // Rate limit protection (500ms = ~2 batches/sec)
|
|
48
|
+
const model = process.env.EMBEDDING_MODEL ?? 'Qodo/Qodo-Embed-1-1.5B';
|
|
49
|
+
return EMBEDDING_DIMENSIONS[model] ?? 1536;
|
|
28
50
|
};
|
|
51
|
+
/**
|
|
52
|
+
* Factory that returns the correct service based on OPENAI_ENABLED.
|
|
53
|
+
* Drop-in replacement everywhere `new EmbeddingsService()` was used.
|
|
54
|
+
*/
|
|
29
55
|
export class EmbeddingsService {
|
|
30
|
-
|
|
31
|
-
model
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
if (!apiKey) {
|
|
35
|
-
throw new OpenAIConfigError('OPENAI_API_KEY environment variable is required.\n\n' +
|
|
36
|
-
'To use semantic search features (search_codebase, natural_language_to_cypher), ' +
|
|
37
|
-
'you need an OpenAI API key.\n\n' +
|
|
38
|
-
'Set it in your environment:\n' +
|
|
39
|
-
' export OPENAI_API_KEY=sk-...\n\n' +
|
|
40
|
-
'Or in .env file:\n' +
|
|
41
|
-
' OPENAI_API_KEY=sk-...\n\n' +
|
|
42
|
-
'Alternative: Use impact_analysis or traverse_from_node which do not require OpenAI.');
|
|
43
|
-
}
|
|
44
|
-
const timeoutConfig = getTimeoutConfig();
|
|
45
|
-
this.openai = new OpenAI({
|
|
46
|
-
apiKey,
|
|
47
|
-
timeout: timeoutConfig.openai.embeddingTimeoutMs,
|
|
48
|
-
maxRetries: 2, // Built-in retry for transient errors
|
|
49
|
-
});
|
|
50
|
-
this.model = model;
|
|
51
|
-
}
|
|
52
|
-
/**
|
|
53
|
-
* Embed a single text string
|
|
54
|
-
*/
|
|
55
|
-
async embedText(text) {
|
|
56
|
-
try {
|
|
57
|
-
const response = await this.openai.embeddings.create({
|
|
58
|
-
model: this.model,
|
|
59
|
-
input: text,
|
|
60
|
-
});
|
|
61
|
-
return response.data[0].embedding;
|
|
56
|
+
impl;
|
|
57
|
+
constructor(model) {
|
|
58
|
+
if (isOpenAIEnabled()) {
|
|
59
|
+
this.impl = new OpenAIEmbeddingsService(model);
|
|
62
60
|
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
if (error.code === 'ETIMEDOUT' || error.message?.includes('timeout')) {
|
|
66
|
-
throw new OpenAIAPIError('OpenAI embedding request timed out. Consider increasing OPENAI_EMBEDDING_TIMEOUT_MS.');
|
|
67
|
-
}
|
|
68
|
-
if (error.status === 429) {
|
|
69
|
-
throw new OpenAIAPIError('OpenAI rate limit exceeded.\n\n' +
|
|
70
|
-
'This usually means:\n' +
|
|
71
|
-
'- You have hit your API rate limit\n' +
|
|
72
|
-
'- You have exceeded your quota\n\n' +
|
|
73
|
-
'Solutions:\n' +
|
|
74
|
-
'- Wait a few minutes and try again\n' +
|
|
75
|
-
'- Check your OpenAI usage at https://platform.openai.com/usage\n' +
|
|
76
|
-
'- Use impact_analysis or traverse_from_node which do not require OpenAI', 429);
|
|
77
|
-
}
|
|
78
|
-
if (error.status === 401) {
|
|
79
|
-
throw new OpenAIAPIError('OpenAI API key is invalid or expired.\n\n' + 'Please check your OPENAI_API_KEY environment variable.', 401);
|
|
80
|
-
}
|
|
81
|
-
if (error.status === 402 || error.message?.includes('quota') || error.message?.includes('billing')) {
|
|
82
|
-
throw new OpenAIAPIError('OpenAI quota exceeded or billing issue.\n\n' +
|
|
83
|
-
'Solutions:\n' +
|
|
84
|
-
'- Check your OpenAI billing at https://platform.openai.com/settings/organization/billing\n' +
|
|
85
|
-
'- Add credits to your account\n' +
|
|
86
|
-
'- Use impact_analysis or traverse_from_node which do not require OpenAI', 402);
|
|
87
|
-
}
|
|
88
|
-
console.error('Error creating embedding:', error);
|
|
89
|
-
throw error;
|
|
61
|
+
else {
|
|
62
|
+
this.impl = new LocalEmbeddingsService();
|
|
90
63
|
}
|
|
91
64
|
}
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
* OpenAI's embedding API supports batching natively.
|
|
95
|
-
*/
|
|
96
|
-
async embedTexts(texts) {
|
|
97
|
-
if (texts.length === 0)
|
|
98
|
-
return [];
|
|
99
|
-
try {
|
|
100
|
-
const response = await this.openai.embeddings.create({
|
|
101
|
-
model: this.model,
|
|
102
|
-
input: texts,
|
|
103
|
-
});
|
|
104
|
-
// Map results back to original order (OpenAI returns with index)
|
|
105
|
-
return response.data.sort((a, b) => a.index - b.index).map((d) => d.embedding);
|
|
106
|
-
}
|
|
107
|
-
catch (error) {
|
|
108
|
-
if (error.code === 'ETIMEDOUT' || error.message?.includes('timeout')) {
|
|
109
|
-
throw new OpenAIAPIError('OpenAI batch embedding request timed out. Consider reducing batch size or increasing timeout.');
|
|
110
|
-
}
|
|
111
|
-
// Rate limited - SDK already has maxRetries:2, don't add recursive retry
|
|
112
|
-
if (error.status === 429) {
|
|
113
|
-
throw new OpenAIAPIError('OpenAI rate limit exceeded. Wait a few minutes and try again.\n' +
|
|
114
|
-
'Check your usage at https://platform.openai.com/usage', 429);
|
|
115
|
-
}
|
|
116
|
-
// Re-throw with context
|
|
117
|
-
throw new OpenAIAPIError(`OpenAI embedding failed: ${error.message}`, error.status);
|
|
118
|
-
}
|
|
65
|
+
embedText(text) {
|
|
66
|
+
return this.impl.embedText(text);
|
|
119
67
|
}
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
* Returns array of embeddings in same order as input.
|
|
123
|
-
* @param texts Array of texts to embed
|
|
124
|
-
* @param batchSize Number of texts per API call (default: 100)
|
|
125
|
-
*/
|
|
126
|
-
async embedTextsInBatches(texts, batchSize = EMBEDDING_BATCH_CONFIG.maxBatchSize) {
|
|
127
|
-
await debugLog('Batch embedding started', { textCount: texts.length });
|
|
128
|
-
const results = [];
|
|
129
|
-
const totalBatches = Math.ceil(texts.length / batchSize);
|
|
130
|
-
for (let i = 0; i < texts.length; i += batchSize) {
|
|
131
|
-
const batch = texts.slice(i, i + batchSize);
|
|
132
|
-
const batchIndex = Math.floor(i / batchSize) + 1;
|
|
133
|
-
await debugLog('Embedding batch progress', { batchIndex, totalBatches, batchSize: batch.length });
|
|
134
|
-
const batchResults = await this.embedTexts(batch);
|
|
135
|
-
results.push(...batchResults);
|
|
136
|
-
// Rate limit protection between batches
|
|
137
|
-
if (i + batchSize < texts.length) {
|
|
138
|
-
await this.delay(EMBEDDING_BATCH_CONFIG.delayBetweenBatchesMs);
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
return results;
|
|
68
|
+
embedTexts(texts) {
|
|
69
|
+
return this.impl.embedTexts(texts);
|
|
142
70
|
}
|
|
143
|
-
|
|
144
|
-
return
|
|
71
|
+
embedTextsInBatches(texts, batchSize) {
|
|
72
|
+
return this.impl.embedTextsInBatches(texts, batchSize);
|
|
145
73
|
}
|
|
146
74
|
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local Embeddings Service
|
|
3
|
+
* Uses a Python sidecar running Qodo-Embed-1-1.5B (or configurable model).
|
|
4
|
+
* Default provider — no API key required.
|
|
5
|
+
*/
|
|
6
|
+
import { debugLog } from '../../mcp/utils.js';
|
|
7
|
+
import { getEmbeddingSidecar } from './embedding-sidecar.js';
|
|
8
|
+
const BATCH_CONFIG = {
|
|
9
|
+
maxBatchSize: 8, // Small batches — 1.5B model on MPS OOMs at higher values on 16GB machines
|
|
10
|
+
};
|
|
11
|
+
export class LocalEmbeddingsService {
|
|
12
|
+
async embedText(text) {
|
|
13
|
+
const sidecar = getEmbeddingSidecar();
|
|
14
|
+
return sidecar.embedText(text);
|
|
15
|
+
}
|
|
16
|
+
async embedTexts(texts) {
|
|
17
|
+
if (texts.length === 0)
|
|
18
|
+
return [];
|
|
19
|
+
const sidecar = getEmbeddingSidecar();
|
|
20
|
+
return sidecar.embed(texts);
|
|
21
|
+
}
|
|
22
|
+
async embedTextsInBatches(texts, batchSize = BATCH_CONFIG.maxBatchSize) {
|
|
23
|
+
// Cap batch size — callers (e.g. graph-generator) may pass 100 which OOMs the local model
|
|
24
|
+
const safeBatchSize = Math.min(batchSize, BATCH_CONFIG.maxBatchSize);
|
|
25
|
+
await debugLog('Batch embedding started', { provider: 'local', textCount: texts.length });
|
|
26
|
+
const sidecar = getEmbeddingSidecar();
|
|
27
|
+
const results = [];
|
|
28
|
+
const totalBatches = Math.ceil(texts.length / safeBatchSize);
|
|
29
|
+
for (let i = 0; i < texts.length; i += safeBatchSize) {
|
|
30
|
+
const batch = texts.slice(i, i + safeBatchSize);
|
|
31
|
+
const batchIndex = Math.floor(i / batchSize) + 1;
|
|
32
|
+
await debugLog('Embedding batch progress', {
|
|
33
|
+
provider: 'local',
|
|
34
|
+
batchIndex,
|
|
35
|
+
totalBatches,
|
|
36
|
+
batchSize: batch.length,
|
|
37
|
+
});
|
|
38
|
+
const batchResults = await sidecar.embed(batch);
|
|
39
|
+
results.push(...batchResults);
|
|
40
|
+
}
|
|
41
|
+
return results;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI Embeddings Service
|
|
3
|
+
* Uses OpenAI's text-embedding API. Requires OPENAI_API_KEY.
|
|
4
|
+
* Opt-in via OPENAI_ENABLED=true.
|
|
5
|
+
*/
|
|
6
|
+
import OpenAI from 'openai';
|
|
7
|
+
import { debugLog } from '../../mcp/utils.js';
|
|
8
|
+
import { getTimeoutConfig } from '../config/timeouts.js';
|
|
9
|
+
export class OpenAIConfigError extends Error {
|
|
10
|
+
constructor(message) {
|
|
11
|
+
super(message);
|
|
12
|
+
this.name = 'OpenAIConfigError';
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
export class OpenAIAPIError extends Error {
|
|
16
|
+
statusCode;
|
|
17
|
+
constructor(message, statusCode) {
|
|
18
|
+
super(message);
|
|
19
|
+
this.statusCode = statusCode;
|
|
20
|
+
this.name = 'OpenAIAPIError';
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
const BATCH_CONFIG = {
|
|
24
|
+
maxBatchSize: 100,
|
|
25
|
+
delayBetweenBatchesMs: 500,
|
|
26
|
+
};
|
|
27
|
+
export class OpenAIEmbeddingsService {
|
|
28
|
+
openai;
|
|
29
|
+
model;
|
|
30
|
+
constructor(model = 'text-embedding-3-large') {
|
|
31
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
32
|
+
if (!apiKey) {
|
|
33
|
+
throw new OpenAIConfigError('OPENAI_API_KEY environment variable is required.\n\n' +
|
|
34
|
+
'To use semantic search features (search_codebase, natural_language_to_cypher), ' +
|
|
35
|
+
'you need an OpenAI API key.\n\n' +
|
|
36
|
+
'Set it in your environment:\n' +
|
|
37
|
+
' export OPENAI_API_KEY=sk-...\n\n' +
|
|
38
|
+
'Or in .env file:\n' +
|
|
39
|
+
' OPENAI_API_KEY=sk-...\n\n' +
|
|
40
|
+
'Alternative: Use local embeddings (default) which require no API key.');
|
|
41
|
+
}
|
|
42
|
+
const timeoutConfig = getTimeoutConfig();
|
|
43
|
+
this.openai = new OpenAI({
|
|
44
|
+
apiKey,
|
|
45
|
+
timeout: timeoutConfig.openai.embeddingTimeoutMs,
|
|
46
|
+
maxRetries: 2,
|
|
47
|
+
});
|
|
48
|
+
this.model = model;
|
|
49
|
+
}
|
|
50
|
+
async embedText(text) {
|
|
51
|
+
try {
|
|
52
|
+
const response = await this.openai.embeddings.create({
|
|
53
|
+
model: this.model,
|
|
54
|
+
input: text,
|
|
55
|
+
});
|
|
56
|
+
return response.data[0].embedding;
|
|
57
|
+
}
|
|
58
|
+
catch (error) {
|
|
59
|
+
throw this.wrapError(error);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
async embedTexts(texts) {
|
|
63
|
+
if (texts.length === 0)
|
|
64
|
+
return [];
|
|
65
|
+
try {
|
|
66
|
+
const response = await this.openai.embeddings.create({
|
|
67
|
+
model: this.model,
|
|
68
|
+
input: texts,
|
|
69
|
+
});
|
|
70
|
+
return response.data.sort((a, b) => a.index - b.index).map((d) => d.embedding);
|
|
71
|
+
}
|
|
72
|
+
catch (error) {
|
|
73
|
+
throw this.wrapError(error);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
async embedTextsInBatches(texts, batchSize = BATCH_CONFIG.maxBatchSize) {
|
|
77
|
+
await debugLog('Batch embedding started', { provider: 'openai', textCount: texts.length });
|
|
78
|
+
const results = [];
|
|
79
|
+
const totalBatches = Math.ceil(texts.length / batchSize);
|
|
80
|
+
for (let i = 0; i < texts.length; i += batchSize) {
|
|
81
|
+
const batch = texts.slice(i, i + batchSize);
|
|
82
|
+
const batchIndex = Math.floor(i / batchSize) + 1;
|
|
83
|
+
await debugLog('Embedding batch progress', {
|
|
84
|
+
provider: 'openai',
|
|
85
|
+
batchIndex,
|
|
86
|
+
totalBatches,
|
|
87
|
+
batchSize: batch.length,
|
|
88
|
+
});
|
|
89
|
+
const batchResults = await this.embedTexts(batch);
|
|
90
|
+
results.push(...batchResults);
|
|
91
|
+
if (i + batchSize < texts.length) {
|
|
92
|
+
await new Promise((resolve) => setTimeout(resolve, BATCH_CONFIG.delayBetweenBatchesMs));
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return results;
|
|
96
|
+
}
|
|
97
|
+
wrapError(error) {
|
|
98
|
+
if (error.code === 'ETIMEDOUT' || error.message?.includes('timeout')) {
|
|
99
|
+
return new OpenAIAPIError('OpenAI embedding request timed out. Consider increasing OPENAI_EMBEDDING_TIMEOUT_MS.');
|
|
100
|
+
}
|
|
101
|
+
if (error.status === 429) {
|
|
102
|
+
return new OpenAIAPIError('OpenAI rate limit exceeded. Wait a few minutes and try again.\n' +
|
|
103
|
+
'Check your usage at https://platform.openai.com/usage', 429);
|
|
104
|
+
}
|
|
105
|
+
if (error.status === 401) {
|
|
106
|
+
return new OpenAIAPIError('OpenAI API key is invalid or expired.\nPlease check your OPENAI_API_KEY.', 401);
|
|
107
|
+
}
|
|
108
|
+
if (error.status === 402 || error.message?.includes('quota') || error.message?.includes('billing')) {
|
|
109
|
+
return new OpenAIAPIError('OpenAI quota exceeded or billing issue.\n' +
|
|
110
|
+
'Check billing at https://platform.openai.com/settings/organization/billing', 402);
|
|
111
|
+
}
|
|
112
|
+
return new OpenAIAPIError(`OpenAI embedding failed: ${error.message}`, error.status);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* Handles importing parsed graph data into Neo4j with embeddings
|
|
4
4
|
*/
|
|
5
5
|
import fs from 'fs/promises';
|
|
6
|
-
import { EMBEDDING_BATCH_CONFIG } from '../../core/embeddings/embeddings.service.js';
|
|
6
|
+
import { EMBEDDING_BATCH_CONFIG, getEmbeddingDimensions } from '../../core/embeddings/embeddings.service.js';
|
|
7
7
|
import { QUERIES } from '../../storage/neo4j/neo4j.service.js';
|
|
8
8
|
import { DEFAULTS } from '../constants.js';
|
|
9
9
|
import { debugLog } from '../utils.js';
|
|
@@ -181,10 +181,11 @@ export class GraphGeneratorHandler {
|
|
|
181
181
|
}
|
|
182
182
|
}
|
|
183
183
|
async createVectorIndexes() {
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
await this.neo4jService.run(QUERIES.
|
|
187
|
-
await
|
|
184
|
+
const dims = getEmbeddingDimensions();
|
|
185
|
+
console.error(`Creating vector indexes (dimensions: ${dims})...`);
|
|
186
|
+
await this.neo4jService.run(QUERIES.CREATE_EMBEDDED_VECTOR_INDEX(dims));
|
|
187
|
+
await this.neo4jService.run(QUERIES.CREATE_SESSION_NOTES_VECTOR_INDEX(dims));
|
|
188
|
+
await debugLog('Vector indexes created', { dimensions: dims });
|
|
188
189
|
}
|
|
189
190
|
flattenProperties(properties) {
|
|
190
191
|
const flattened = {};
|
package/dist/mcp/mcp.server.js
CHANGED
|
@@ -16,6 +16,7 @@ const rootDir = join(__dirname, '..', '..');
|
|
|
16
16
|
dotenv.config({ path: join(rootDir, '.env'), quiet: true });
|
|
17
17
|
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
18
18
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
19
|
+
import { stopEmbeddingSidecar } from '../core/embeddings/embedding-sidecar.js';
|
|
19
20
|
import { MCP_SERVER_CONFIG, MESSAGES } from './constants.js';
|
|
20
21
|
import { performIncrementalParse } from './handlers/incremental-parse.handler.js';
|
|
21
22
|
import { initializeServices } from './service-init.js';
|
|
@@ -109,6 +110,7 @@ const shutdown = async (signal) => {
|
|
|
109
110
|
await logServerStats(`shutdown-${signal}`);
|
|
110
111
|
try {
|
|
111
112
|
await watchManager.stopAllWatchers();
|
|
113
|
+
await stopEmbeddingSidecar();
|
|
112
114
|
await debugLog('Shutdown complete', { signal });
|
|
113
115
|
}
|
|
114
116
|
catch (error) {
|
|
@@ -122,11 +124,13 @@ process.on('uncaughtException', async (error) => {
|
|
|
122
124
|
console.error(JSON.stringify({ level: 'error', message: 'Uncaught exception', error: String(error), stack: error.stack }));
|
|
123
125
|
await debugLog('Uncaught exception', { error: String(error), stack: error.stack });
|
|
124
126
|
await logServerStats('uncaught-exception');
|
|
127
|
+
await stopEmbeddingSidecar();
|
|
125
128
|
});
|
|
126
129
|
process.on('unhandledRejection', async (reason) => {
|
|
127
130
|
console.error(JSON.stringify({ level: 'error', message: 'Unhandled rejection', reason: String(reason) }));
|
|
128
131
|
await debugLog('Unhandled rejection', { reason: String(reason) });
|
|
129
132
|
await logServerStats('unhandled-rejection');
|
|
133
|
+
await stopEmbeddingSidecar();
|
|
130
134
|
});
|
|
131
135
|
// Log other process events that might indicate issues
|
|
132
136
|
process.on('warning', async (warning) => {
|
|
@@ -135,6 +139,7 @@ process.on('warning', async (warning) => {
|
|
|
135
139
|
process.on('beforeExit', async (code) => {
|
|
136
140
|
await debugLog('Process beforeExit', { code });
|
|
137
141
|
await logServerStats('before-exit');
|
|
142
|
+
await stopEmbeddingSidecar();
|
|
138
143
|
});
|
|
139
144
|
process.on('exit', (code) => {
|
|
140
145
|
// Note: Can't use async here, exit is synchronous
|