specmem-hardwicksoftware 3.7.9 → 3.7.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/claude-hooks/settings.json +73 -123
- package/claude-hooks/team-comms-enforcer.cjs +66 -55
- package/dist/codebase/fileReadWorker.js +90 -0
- package/dist/codebase/ingestion.js +231 -50
- package/dist/index.js +32 -0
- package/dist/mcp/embeddingServerManager.js +154 -0
- package/dist/team-members/teamCommsService.js +14 -0
- package/embedding-sandbox/frankenstein-embeddings.py +30 -6
- package/package.json +1 -1
- package/scripts/specmem-init.cjs +520 -157
package/scripts/specmem-init.cjs
CHANGED
|
@@ -2847,6 +2847,43 @@ async function coldStartEmbeddingDocker(projectPath, modelConfig, ui, codebaseRe
|
|
|
2847
2847
|
return { serverRunning: false, warmupLatency: null, timeoutConfig };
|
|
2848
2848
|
}
|
|
2849
2849
|
|
|
2850
|
+
// EARLY BAIL-OUT: Check if Docker is actually usable before wasting time
|
|
2851
|
+
// 1. Docker daemon must be running
|
|
2852
|
+
// 2. The embedding image must exist locally (warm-start.sh uses specmem-embedding:latest)
|
|
2853
|
+
// 3. OR there must be an existing container we can resume
|
|
2854
|
+
// If none of these are true, skip Docker entirely — Stage 5 will use native Python.
|
|
2855
|
+
try {
|
|
2856
|
+
const { execSync } = require('child_process');
|
|
2857
|
+
// Check if Docker daemon is accessible
|
|
2858
|
+
try { execSync('docker info', { stdio: 'ignore', timeout: 5000 }); } catch {
|
|
2859
|
+
initLog('[DOCKER] Docker daemon not accessible - skipping Docker stage');
|
|
2860
|
+
ui.setSubStatus('⚠️ Docker not available - will use native Python');
|
|
2861
|
+
return { serverRunning: false, warmupLatency: null, timeoutConfig };
|
|
2862
|
+
}
|
|
2863
|
+
// Check for existing specmem embedding containers (any state)
|
|
2864
|
+
const containers = execSync(
|
|
2865
|
+
`docker ps -a --filter "name=specmem-embedding" --format "{{.Names}}" 2>/dev/null`,
|
|
2866
|
+
{ encoding: 'utf8', timeout: 5000 }
|
|
2867
|
+
).trim();
|
|
2868
|
+
if (!containers) {
|
|
2869
|
+
// No containers — check if the image exists to cold-start from
|
|
2870
|
+
const images = execSync(
|
|
2871
|
+
`docker images -q specmem-embedding:latest 2>/dev/null`,
|
|
2872
|
+
{ encoding: 'utf8', timeout: 5000 }
|
|
2873
|
+
).trim();
|
|
2874
|
+
if (!images) {
|
|
2875
|
+
initLog('[DOCKER] No embedding containers and no specmem-embedding:latest image - skipping Docker');
|
|
2876
|
+
ui.setSubStatus('⚠️ No Docker embedding image - will use native Python');
|
|
2877
|
+
return { serverRunning: false, warmupLatency: null, timeoutConfig };
|
|
2878
|
+
}
|
|
2879
|
+
}
|
|
2880
|
+
initLog(`[DOCKER] Found containers/image - proceeding with Docker warm-start`);
|
|
2881
|
+
} catch (e) {
|
|
2882
|
+
initLog(`[DOCKER] Pre-check failed (${e.message}) - skipping Docker`);
|
|
2883
|
+
ui.setSubStatus('⚠️ Docker pre-check failed - will use native Python');
|
|
2884
|
+
return { serverRunning: false, warmupLatency: null, timeoutConfig };
|
|
2885
|
+
}
|
|
2886
|
+
|
|
2850
2887
|
// Spawn warm-start.sh with env vars for per-project socket
|
|
2851
2888
|
const dockerProcess = spawn('bash', [warmStartScript], {
|
|
2852
2889
|
cwd: path.dirname(warmStartScript),
|
|
@@ -2885,21 +2922,26 @@ async function coldStartEmbeddingDocker(projectPath, modelConfig, ui, codebaseRe
|
|
|
2885
2922
|
});
|
|
2886
2923
|
|
|
2887
2924
|
// Wait for Docker to start (up to 60s)
|
|
2888
|
-
//
|
|
2889
|
-
|
|
2925
|
+
// DYNAMIC WAIT: Poll for Docker socket with adaptive intervals (max 5min)
|
|
2926
|
+
const MAX_DOCKER_WAIT_MS = 300000; // 5 minute absolute cap
|
|
2890
2927
|
const dockerStart = Date.now();
|
|
2891
2928
|
ui.setStatus('Waiting for Docker container...');
|
|
2892
2929
|
ui.setSubProgress(0.3);
|
|
2893
2930
|
|
|
2894
|
-
|
|
2931
|
+
let dockerPollInterval = 200; // start fast for Docker
|
|
2932
|
+
while (Date.now() - dockerStart < MAX_DOCKER_WAIT_MS) {
|
|
2895
2933
|
if (dockerStarted || fs.existsSync(socketPath)) {
|
|
2896
2934
|
break;
|
|
2897
2935
|
}
|
|
2898
|
-
await new Promise(r => setTimeout(r,
|
|
2899
|
-
|
|
2900
|
-
//
|
|
2901
|
-
if (
|
|
2902
|
-
|
|
2936
|
+
await new Promise(r => setTimeout(r, dockerPollInterval));
|
|
2937
|
+
const elapsed = Date.now() - dockerStart;
|
|
2938
|
+
// Slow down polling after 10s
|
|
2939
|
+
if (elapsed > 10000) dockerPollInterval = Math.min(dockerPollInterval + 100, 2000);
|
|
2940
|
+
// Progress update every ~5s
|
|
2941
|
+
if (Math.floor(elapsed / 5000) !== Math.floor((elapsed - dockerPollInterval) / 5000)) {
|
|
2942
|
+
const progress = Math.min(0.7, 0.3 + (elapsed / MAX_DOCKER_WAIT_MS) * 0.4);
|
|
2943
|
+
ui.setSubProgress(progress);
|
|
2944
|
+
ui.setSubStatus(`Waiting for Docker... (${Math.round(elapsed / 1000)}s)`);
|
|
2903
2945
|
}
|
|
2904
2946
|
}
|
|
2905
2947
|
|
|
@@ -3040,6 +3082,13 @@ async function coldStartEmbeddingDocker(projectPath, modelConfig, ui, codebaseRe
|
|
|
3040
3082
|
// from the start. Previously, indexing only happened when MCP server started,
|
|
3041
3083
|
// which meant the first session had no code search capability.
|
|
3042
3084
|
|
|
3085
|
+
// CRITICAL BUG FIX: Track the PID of the embedding server spawned by THIS init
|
|
3086
|
+
// process. killExistingEmbeddingServer() must NEVER kill our own child.
|
|
3087
|
+
// Without this, the init process spawns the server, the server writes its PID
|
|
3088
|
+
// to embedding.pid, and then a later call to killExistingEmbeddingServer()
|
|
3089
|
+
// reads that PID file and SIGTERMs our own child process.
|
|
3090
|
+
let spawnedEmbeddingPid = null;
|
|
3091
|
+
|
|
3043
3092
|
async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
3044
3093
|
ui.setStage(5, 'CODEBASE INDEXING');
|
|
3045
3094
|
|
|
@@ -3071,16 +3120,114 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
|
3071
3120
|
initLog(`Socket paths: project=${projectSocketPath}, shared=${sharedSocketPath}`);
|
|
3072
3121
|
initLog(`Project socket exists: ${fs.existsSync(projectSocketPath)}, Shared socket exists: ${fs.existsSync(sharedSocketPath)}`);
|
|
3073
3122
|
|
|
3123
|
+
// SOCKET LIVENESS CHECK - validate socket is alive, not just that file exists
|
|
3124
|
+
// Orphaned socket files (process died, socket file remains) cause ECONNREFUSED
|
|
3125
|
+
// and stall the entire indexing pipeline.
|
|
3126
|
+
function quickSocketAlive(sockPath) {
|
|
3127
|
+
return new Promise((resolve) => {
|
|
3128
|
+
const client = new net.Socket();
|
|
3129
|
+
const timeout = setTimeout(() => { client.destroy(); resolve(false); }, 3000);
|
|
3130
|
+
client.on('connect', () => { clearTimeout(timeout); client.destroy(); resolve(true); });
|
|
3131
|
+
client.on('error', () => { clearTimeout(timeout); client.destroy(); resolve(false); });
|
|
3132
|
+
client.connect(sockPath);
|
|
3133
|
+
});
|
|
3134
|
+
}
|
|
3135
|
+
|
|
3136
|
+
// DYNAMIC READINESS POLLING — no hardcoded timeouts.
|
|
3137
|
+
// Polls for socket file + health check with adaptive intervals.
|
|
3138
|
+
// Starts fast (500ms), slows down after 10s (2s intervals).
|
|
3139
|
+
// Max wait: 300s (5 minutes). Returns true if server is ready.
|
|
3140
|
+
const MAX_EMBED_WAIT_MS = 300000; // 5 minute absolute cap
|
|
3141
|
+
async function waitForEmbeddingReady(sockPath, opts = {}) {
|
|
3142
|
+
const { ui: _ui, label = 'server', logFn = initLog } = opts;
|
|
3143
|
+
const start = Date.now();
|
|
3144
|
+
let pollInterval = 500; // start fast
|
|
3145
|
+
let lastLogTime = 0;
|
|
3146
|
+
|
|
3147
|
+
while (Date.now() - start < MAX_EMBED_WAIT_MS) {
|
|
3148
|
+
const elapsed = Date.now() - start;
|
|
3149
|
+
|
|
3150
|
+
// Phase 1: Wait for socket FILE to appear
|
|
3151
|
+
if (!fs.existsSync(sockPath)) {
|
|
3152
|
+
if (elapsed - lastLogTime > 5000) {
|
|
3153
|
+
const elapsedSec = Math.round(elapsed / 1000);
|
|
3154
|
+
if (_ui) _ui.setSubStatus(`Waiting for ${label} socket... (${elapsedSec}s)`);
|
|
3155
|
+
logFn(`[EMBED] Waiting for socket file: ${sockPath} (${elapsedSec}s elapsed)`);
|
|
3156
|
+
lastLogTime = elapsed;
|
|
3157
|
+
}
|
|
3158
|
+
await new Promise(r => setTimeout(r, pollInterval));
|
|
3159
|
+
if (elapsed > 10000) pollInterval = Math.min(pollInterval + 250, 2000);
|
|
3160
|
+
continue;
|
|
3161
|
+
}
|
|
3162
|
+
|
|
3163
|
+
// Phase 2: Socket file exists — check if server is actually responding
|
|
3164
|
+
const alive = await quickSocketAlive(sockPath);
|
|
3165
|
+
if (alive) {
|
|
3166
|
+
const elapsedSec = Math.round((Date.now() - start) / 1000);
|
|
3167
|
+
logFn(`[EMBED] ${label} ready after ${elapsedSec}s`);
|
|
3168
|
+
if (_ui) _ui.setSubStatus(`✓ ${label} ready (${elapsedSec}s)`);
|
|
3169
|
+
return true;
|
|
3170
|
+
}
|
|
3171
|
+
|
|
3172
|
+
// Socket exists but not responding yet — server still warming up
|
|
3173
|
+
if (elapsed - lastLogTime > 3000) {
|
|
3174
|
+
const elapsedSec = Math.round(elapsed / 1000);
|
|
3175
|
+
if (_ui) _ui.setSubStatus(`${label} warming up... (${elapsedSec}s)`);
|
|
3176
|
+
lastLogTime = elapsed;
|
|
3177
|
+
}
|
|
3178
|
+
await new Promise(r => setTimeout(r, pollInterval));
|
|
3179
|
+
if (elapsed > 10000) pollInterval = Math.min(pollInterval + 250, 2000);
|
|
3180
|
+
}
|
|
3181
|
+
|
|
3182
|
+
const totalSec = Math.round((Date.now() - start) / 1000);
|
|
3183
|
+
logFn(`[EMBED] ${label} failed to become ready after ${totalSec}s (max ${MAX_EMBED_WAIT_MS / 1000}s)`);
|
|
3184
|
+
if (_ui) _ui.setSubStatus(`⚠️ ${label} not ready after ${totalSec}s`);
|
|
3185
|
+
return false;
|
|
3186
|
+
}
|
|
3187
|
+
|
|
3074
3188
|
// Project socket takes priority - it's the fresh one from Docker stage 4
|
|
3075
3189
|
let activeSocketPath = null;
|
|
3076
3190
|
if (fs.existsSync(projectSocketPath)) {
|
|
3077
|
-
|
|
3078
|
-
|
|
3079
|
-
|
|
3080
|
-
|
|
3081
|
-
|
|
3082
|
-
|
|
3083
|
-
|
|
3191
|
+
const alive = await quickSocketAlive(projectSocketPath);
|
|
3192
|
+
if (alive) {
|
|
3193
|
+
activeSocketPath = projectSocketPath;
|
|
3194
|
+
initLog(`Using PROJECT socket: ${projectSocketPath} (verified alive)`);
|
|
3195
|
+
} else {
|
|
3196
|
+
// CRITICAL FIX: Don't remove socket if our spawned server owns it.
|
|
3197
|
+
// Check the PID file to see if this socket belongs to our child process.
|
|
3198
|
+
const pidFile = path.join(projectPath, 'specmem', 'sockets', 'embedding.pid');
|
|
3199
|
+
let ownedByUs = false;
|
|
3200
|
+
if (spawnedEmbeddingPid) {
|
|
3201
|
+
try {
|
|
3202
|
+
if (fs.existsSync(pidFile)) {
|
|
3203
|
+
const pidContent = fs.readFileSync(pidFile, 'utf8').trim();
|
|
3204
|
+
const filePid = parseInt(pidContent.split(':')[0], 10);
|
|
3205
|
+
if (filePid === spawnedEmbeddingPid) {
|
|
3206
|
+
ownedByUs = true;
|
|
3207
|
+
initLog(`PROJECT socket not yet responsive but owned by our spawned PID ${spawnedEmbeddingPid} - keeping socket, server may still be warming up`);
|
|
3208
|
+
activeSocketPath = projectSocketPath; // Trust our child, it's just warming up
|
|
3209
|
+
}
|
|
3210
|
+
}
|
|
3211
|
+
} catch { /* ignore */ }
|
|
3212
|
+
}
|
|
3213
|
+
if (!ownedByUs) {
|
|
3214
|
+
initLog(`PROJECT socket exists but DEAD (ECONNREFUSED) - removing orphaned socket: ${projectSocketPath}`);
|
|
3215
|
+
try { fs.unlinkSync(projectSocketPath); } catch {}
|
|
3216
|
+
}
|
|
3217
|
+
}
|
|
3218
|
+
}
|
|
3219
|
+
if (!activeSocketPath && fs.existsSync(sharedSocketPath)) {
|
|
3220
|
+
const alive = await quickSocketAlive(sharedSocketPath);
|
|
3221
|
+
if (alive) {
|
|
3222
|
+
activeSocketPath = sharedSocketPath;
|
|
3223
|
+
initLog(`Using SHARED socket: ${sharedSocketPath} (verified alive)`);
|
|
3224
|
+
} else {
|
|
3225
|
+
initLog(`SHARED socket exists but DEAD - removing orphaned socket: ${sharedSocketPath}`);
|
|
3226
|
+
try { fs.unlinkSync(sharedSocketPath); } catch {}
|
|
3227
|
+
}
|
|
3228
|
+
}
|
|
3229
|
+
if (!activeSocketPath) {
|
|
3230
|
+
initLog('WARNING: No live embedding socket found! Will spawn new server.');
|
|
3084
3231
|
}
|
|
3085
3232
|
|
|
3086
3233
|
// Check if embedding server is available from Stage 4
|
|
@@ -3088,6 +3235,8 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
|
3088
3235
|
const socketExists = activeSocketPath !== null;
|
|
3089
3236
|
|
|
3090
3237
|
// Helper: Kill any existing embedding server for this project before spawning a new one
|
|
3238
|
+
// CRITICAL FIX: Never kill a server that was spawned by THIS init process.
|
|
3239
|
+
// The spawnedEmbeddingPid variable tracks our child's PID to prevent self-kill.
|
|
3091
3240
|
function killExistingEmbeddingServer(projectPath) {
|
|
3092
3241
|
const pidFile = path.join(projectPath, 'specmem', 'sockets', 'embedding.pid');
|
|
3093
3242
|
try {
|
|
@@ -3095,6 +3244,18 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
|
3095
3244
|
const content = fs.readFileSync(pidFile, 'utf8').trim();
|
|
3096
3245
|
const pid = parseInt(content.split(':')[0], 10);
|
|
3097
3246
|
if (!pid || isNaN(pid)) return false;
|
|
3247
|
+
|
|
3248
|
+
// CRITICAL FIX: Never kill our own child process!
|
|
3249
|
+
// This prevents the race condition where:
|
|
3250
|
+
// 1. Init spawns embedding server
|
|
3251
|
+
// 2. Server writes PID to embedding.pid
|
|
3252
|
+
// 3. Later code path calls killExistingEmbeddingServer()
|
|
3253
|
+
// 4. It reads the PID file and kills our own child
|
|
3254
|
+
if (spawnedEmbeddingPid && pid === spawnedEmbeddingPid) {
|
|
3255
|
+
initLog(`[EMBED] Skipping kill of PID ${pid} - this is OUR spawned server`);
|
|
3256
|
+
return false;
|
|
3257
|
+
}
|
|
3258
|
+
|
|
3098
3259
|
// Check if process is alive
|
|
3099
3260
|
try { process.kill(pid, 0); } catch {
|
|
3100
3261
|
// Process dead, clean up PID file
|
|
@@ -3150,6 +3311,8 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
|
3150
3311
|
safeMkdir(socketsDir);
|
|
3151
3312
|
|
|
3152
3313
|
// Kill any existing embedding server before spawning a new one
|
|
3314
|
+
// Reset spawnedEmbeddingPid since we're about to spawn a replacement
|
|
3315
|
+
spawnedEmbeddingPid = null;
|
|
3153
3316
|
killExistingEmbeddingServer(projectPath);
|
|
3154
3317
|
|
|
3155
3318
|
// Clean up stale socket
|
|
@@ -3177,6 +3340,7 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
|
3177
3340
|
cwd: path.dirname(embeddingScript),
|
|
3178
3341
|
env: {
|
|
3179
3342
|
...process.env,
|
|
3343
|
+
SPECMEM_EMBEDDING_SOCKET: projectSocketPath,
|
|
3180
3344
|
SPECMEM_SOCKET_PATH: projectSocketPath,
|
|
3181
3345
|
SPECMEM_PROJECT_PATH: projectPath
|
|
3182
3346
|
},
|
|
@@ -3184,6 +3348,10 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
|
3184
3348
|
stdio: ['ignore', embedLogFd, embedLogFd]
|
|
3185
3349
|
});
|
|
3186
3350
|
|
|
3351
|
+
// CRITICAL FIX: Track spawned PID so killExistingEmbeddingServer() won't kill it
|
|
3352
|
+
spawnedEmbeddingPid = embeddingProcess.pid;
|
|
3353
|
+
initLog(`[EMBED] Spawned embedding server with PID ${spawnedEmbeddingPid} - tracking to prevent self-kill`);
|
|
3354
|
+
|
|
3187
3355
|
// error handler BEFORE unref - prevents silent spawn failures
|
|
3188
3356
|
embeddingProcess.on('error', (err) => {
|
|
3189
3357
|
ui.setSubStatus('Embedding spawn error: ' + err.message);
|
|
@@ -3194,20 +3362,11 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
|
3194
3362
|
|
|
3195
3363
|
embeddingProcess.unref();
|
|
3196
3364
|
|
|
3197
|
-
//
|
|
3198
|
-
// RELIABILITY FIX: Increased from 30s (60x500ms) to 60s (120x500ms)
|
|
3199
|
-
// First-time model loading can take 20-30s, need enough buffer
|
|
3365
|
+
// DYNAMIC WAIT: Poll for socket + health check with adaptive intervals (max 5min)
|
|
3200
3366
|
ui.setSubStatus('Waiting for embedding server to start...');
|
|
3201
|
-
|
|
3202
|
-
|
|
3203
|
-
|
|
3204
|
-
activeSocketPath = projectSocketPath;
|
|
3205
|
-
ui.setSubStatus('✓ Embedding server started!');
|
|
3206
|
-
break;
|
|
3207
|
-
}
|
|
3208
|
-
if (i % 10 === 0) {
|
|
3209
|
-
ui.setSubStatus(`Waiting for embedding server... (${i/2}s)`);
|
|
3210
|
-
}
|
|
3367
|
+
const serverReady = await waitForEmbeddingReady(projectSocketPath, { ui, label: 'Embedding server' });
|
|
3368
|
+
if (serverReady) {
|
|
3369
|
+
activeSocketPath = projectSocketPath;
|
|
3211
3370
|
}
|
|
3212
3371
|
|
|
3213
3372
|
if (!activeSocketPath) {
|
|
@@ -3267,14 +3426,23 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
|
3267
3426
|
ui.setSubStatus('✓ Embedding server responding');
|
|
3268
3427
|
initLog('Embedding socket test passed');
|
|
3269
3428
|
} catch (e) {
|
|
3270
|
-
ui.setSubStatus(`⚠️ Socket
|
|
3271
|
-
initLog(`Embedding socket test FAILED: ${e.message}`, e);
|
|
3429
|
+
ui.setSubStatus(`⚠️ Socket test failed: ${e.message} - retrying...`);
|
|
3430
|
+
initLog(`Embedding socket test FAILED: ${e.message} - will retry before killing`, e);
|
|
3272
3431
|
|
|
3273
|
-
//
|
|
3432
|
+
// DYNAMIC READINESS POLL: Don't immediately kill — poll with adaptive intervals (max 5min)
|
|
3433
|
+
const recovered = await waitForEmbeddingReady(activeSocketPath, { ui, label: 'Embedding server warmup' });
|
|
3434
|
+
|
|
3435
|
+
if (recovered) {
|
|
3436
|
+
// Server is alive after retries - continue to indexing
|
|
3437
|
+
} else {
|
|
3438
|
+
// Server truly dead after 15s of retries - now kill and restart
|
|
3439
|
+
// If this is our spawned server, it's been unresponsive for 15s - allow the kill
|
|
3440
|
+
// by resetting spawnedEmbeddingPid (we're giving up on it)
|
|
3274
3441
|
initLog('Attempting socket recovery - cleaning stale socket and restarting server...');
|
|
3275
3442
|
ui.setStatus('Recovering embedding server...');
|
|
3276
3443
|
|
|
3277
|
-
//
|
|
3444
|
+
// Reset PID tracking since we're abandoning this server
|
|
3445
|
+
spawnedEmbeddingPid = null;
|
|
3278
3446
|
killExistingEmbeddingServer(projectPath);
|
|
3279
3447
|
|
|
3280
3448
|
// Clean up the stale socket file
|
|
@@ -3313,49 +3481,15 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
|
3313
3481
|
ui.setSubStatus('Waiting for Docker container to recover...');
|
|
3314
3482
|
const recoverStart = Date.now();
|
|
3315
3483
|
|
|
3316
|
-
|
|
3317
|
-
|
|
3318
|
-
break;
|
|
3319
|
-
}
|
|
3320
|
-
await new Promise(r => setTimeout(r, 500));
|
|
3321
|
-
if (i % 10 === 0) {
|
|
3322
|
-
ui.setSubStatus(`Recovery in progress... (${Math.round(i/2)}s)`);
|
|
3323
|
-
}
|
|
3324
|
-
}
|
|
3325
|
-
|
|
3484
|
+
// DYNAMIC WAIT: Poll for recovery with adaptive intervals (max 5min)
|
|
3485
|
+
const recoveryReady = await waitForEmbeddingReady(projectSocketPath, { ui, label: 'Docker recovery' });
|
|
3326
3486
|
const recoverLatency = Date.now() - recoverStart;
|
|
3327
3487
|
|
|
3328
|
-
if (
|
|
3488
|
+
if (recoveryReady) {
|
|
3329
3489
|
initLog(`Docker recovery successful in ${recoverLatency}ms`);
|
|
3330
|
-
ui.setSubStatus('✓ Embedding server recovered!');
|
|
3331
3490
|
activeSocketPath = projectSocketPath;
|
|
3332
|
-
|
|
3333
|
-
// Verify the recovered socket is actually responding
|
|
3334
|
-
try {
|
|
3335
|
-
await new Promise((resolve, reject) => {
|
|
3336
|
-
const verifyClient = new net.Socket();
|
|
3337
|
-
let verifyData = '';
|
|
3338
|
-
verifyClient.setTimeout(10000);
|
|
3339
|
-
verifyClient.connect(projectSocketPath, () => {
|
|
3340
|
-
verifyClient.write(JSON.stringify({ type: 'embed', text: 'recovery test' }) + '\n');
|
|
3341
|
-
});
|
|
3342
|
-
verifyClient.on('data', chunk => {
|
|
3343
|
-
verifyData += chunk.toString();
|
|
3344
|
-
if (verifyData.includes('embedding') || verifyData.includes('processing')) {
|
|
3345
|
-
verifyClient.destroy();
|
|
3346
|
-
resolve(true);
|
|
3347
|
-
}
|
|
3348
|
-
});
|
|
3349
|
-
verifyClient.on('error', reject);
|
|
3350
|
-
verifyClient.on('timeout', () => reject(new Error('timeout')));
|
|
3351
|
-
});
|
|
3352
|
-
initLog('Recovered socket verified - responding correctly');
|
|
3353
|
-
} catch (verifyErr) {
|
|
3354
|
-
initLog(`Recovered socket not responding: ${verifyErr.message}`);
|
|
3355
|
-
activeSocketPath = null;
|
|
3356
|
-
}
|
|
3357
3491
|
} else {
|
|
3358
|
-
initLog(`Docker recovery failed
|
|
3492
|
+
initLog(`Docker recovery failed after ${recoverLatency}ms`);
|
|
3359
3493
|
activeSocketPath = null;
|
|
3360
3494
|
}
|
|
3361
3495
|
} catch (recoverErr) {
|
|
@@ -3366,7 +3500,8 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
|
3366
3500
|
initLog('warm-start.sh not found - cannot recover Docker container');
|
|
3367
3501
|
activeSocketPath = null; // Mark as unavailable
|
|
3368
3502
|
}
|
|
3369
|
-
}
|
|
3503
|
+
} // end else (server truly dead after retries)
|
|
3504
|
+
} // end catch
|
|
3370
3505
|
await qqms();
|
|
3371
3506
|
}
|
|
3372
3507
|
|
|
@@ -3398,13 +3533,19 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
|
3398
3533
|
initLog('Database connection test passed');
|
|
3399
3534
|
|
|
3400
3535
|
// CRITICAL FIX: Create and set search_path to project schema for proper isolation
|
|
3401
|
-
const schemaName = 'specmem_' + path.basename(projectPath).toLowerCase().replace(/[^a-z0-
|
|
3536
|
+
const schemaName = 'specmem_' + path.basename(projectPath).toLowerCase().replace(/[^a-z0-9_]/g, '_').replace(/_+/g, '_').replace(/^_|_$/g, '');
|
|
3402
3537
|
|
|
3403
3538
|
// Create schema if it doesn't exist
|
|
3404
3539
|
await pool.query(`CREATE SCHEMA IF NOT EXISTS ${schemaName}`);
|
|
3405
3540
|
initLog(`Project schema ensured: ${schemaName}`);
|
|
3406
3541
|
|
|
3407
|
-
// Set search_path for
|
|
3542
|
+
// Set search_path for ALL pool connections (not just the current one)
|
|
3543
|
+
// pool.query() checks out different connections; SET only affects one.
|
|
3544
|
+
// Using pool.on('connect') ensures every new connection gets the right search_path.
|
|
3545
|
+
pool.on('connect', (client) => {
|
|
3546
|
+
client.query(`SET search_path TO ${schemaName}, public`).catch(() => {});
|
|
3547
|
+
});
|
|
3548
|
+
// Also set it on the existing connection
|
|
3408
3549
|
await pool.query(`SET search_path TO ${schemaName}, public`);
|
|
3409
3550
|
initLog(`Database schema set to: ${schemaName}`);
|
|
3410
3551
|
ui.setSubStatus(`Database connected (schema: ${schemaName})`);
|
|
@@ -3514,6 +3655,50 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
|
3514
3655
|
return results;
|
|
3515
3656
|
}
|
|
3516
3657
|
|
|
3658
|
+
// AUTO-CREATE codebase_files table if it doesn't exist
|
|
3659
|
+
// CRITICAL: Init must not depend on MCP migrations having run first
|
|
3660
|
+
// FIX: Use gen_random_uuid() (built-in PG13+) instead of uuid_generate_v4() (uuid-ossp extension)
|
|
3661
|
+
// The uuid-ossp extension is installed in specmem_specmem schema, NOT public,
|
|
3662
|
+
// so uuid_generate_v4() is unavailable when search_path is set to other project schemas.
|
|
3663
|
+
// Also ensure vector extension exists in public schema (accessible to all project schemas).
|
|
3664
|
+
try {
|
|
3665
|
+
// vector extension must be in public schema so all project schemas can use vector type
|
|
3666
|
+
await pool.query(`CREATE EXTENSION IF NOT EXISTS "vector" SCHEMA public`);
|
|
3667
|
+
await pool.query(`
|
|
3668
|
+
CREATE TABLE IF NOT EXISTS codebase_files (
|
|
3669
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
3670
|
+
file_path TEXT NOT NULL,
|
|
3671
|
+
absolute_path TEXT NOT NULL,
|
|
3672
|
+
file_name VARCHAR(255) NOT NULL,
|
|
3673
|
+
extension VARCHAR(50),
|
|
3674
|
+
language_id VARCHAR(50) NOT NULL DEFAULT 'unknown',
|
|
3675
|
+
language_name VARCHAR(100) NOT NULL DEFAULT 'Unknown',
|
|
3676
|
+
language_type VARCHAR(50) NOT NULL DEFAULT 'data',
|
|
3677
|
+
content TEXT NOT NULL,
|
|
3678
|
+
content_hash VARCHAR(64),
|
|
3679
|
+
size_bytes INTEGER NOT NULL DEFAULT 0,
|
|
3680
|
+
line_count INTEGER NOT NULL DEFAULT 0,
|
|
3681
|
+
char_count INTEGER NOT NULL DEFAULT 0,
|
|
3682
|
+
last_modified TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
3683
|
+
chunk_index INTEGER,
|
|
3684
|
+
total_chunks INTEGER,
|
|
3685
|
+
original_file_id UUID,
|
|
3686
|
+
embedding vector(384),
|
|
3687
|
+
project_path TEXT DEFAULT '/',
|
|
3688
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
3689
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
3690
|
+
CONSTRAINT content_not_empty CHECK (length(content) > 0)
|
|
3691
|
+
)
|
|
3692
|
+
`);
|
|
3693
|
+
await pool.query(`CREATE INDEX IF NOT EXISTS idx_codebase_files_content_hash ON codebase_files(content_hash)`);
|
|
3694
|
+
await pool.query(`CREATE INDEX IF NOT EXISTS idx_codebase_files_path ON codebase_files(file_path)`);
|
|
3695
|
+
await pool.query(`CREATE INDEX IF NOT EXISTS idx_codebase_files_project_path_file ON codebase_files(file_path, project_path)`);
|
|
3696
|
+
await pool.query(`CREATE INDEX IF NOT EXISTS idx_codebase_files_project_path_hash ON codebase_files(project_path, content_hash)`);
|
|
3697
|
+
initLog('[CODEBASE] Table codebase_files ensured');
|
|
3698
|
+
} catch (e) {
|
|
3699
|
+
initLog(`[CODEBASE] Table ensure warning: ${e.message}`);
|
|
3700
|
+
}
|
|
3701
|
+
|
|
3517
3702
|
// Load existing hashes to skip unchanged files ONLY if they have embeddings
|
|
3518
3703
|
// CRITICAL: Files without embeddings need to be re-indexed even if content matches!
|
|
3519
3704
|
ui.setStatus('Checking existing index...');
|
|
@@ -3711,6 +3896,8 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
|
3711
3896
|
const socketsDir = path.join(projectPath, 'specmem', 'sockets');
|
|
3712
3897
|
if (!fs.existsSync(socketsDir)) fs.mkdirSync(socketsDir, { recursive: true });
|
|
3713
3898
|
// Kill any existing embedding server before respawn
|
|
3899
|
+
// Reset spawnedEmbeddingPid since we're about to spawn a replacement
|
|
3900
|
+
spawnedEmbeddingPid = null;
|
|
3714
3901
|
killExistingEmbeddingServer(projectPath);
|
|
3715
3902
|
// Clean stale socket
|
|
3716
3903
|
if (fs.existsSync(projectSocketPath)) {
|
|
@@ -3723,28 +3910,26 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
|
3723
3910
|
const revalLogFd = fs.openSync(revalLogPath, 'a');
|
|
3724
3911
|
const proc = spawn(pythonPath, [embeddingScript], {
|
|
3725
3912
|
cwd: path.dirname(embeddingScript),
|
|
3726
|
-
env: { ...process.env, SPECMEM_SOCKET_PATH: projectSocketPath, SPECMEM_PROJECT_PATH: projectPath },
|
|
3913
|
+
env: { ...process.env, SPECMEM_EMBEDDING_SOCKET: projectSocketPath, SPECMEM_SOCKET_PATH: projectSocketPath, SPECMEM_PROJECT_PATH: projectPath },
|
|
3727
3914
|
detached: true,
|
|
3728
3915
|
stdio: ['ignore', revalLogFd, revalLogFd]
|
|
3729
3916
|
});
|
|
3917
|
+
// CRITICAL FIX: Track spawned PID so killExistingEmbeddingServer() won't kill it
|
|
3918
|
+
spawnedEmbeddingPid = proc.pid;
|
|
3919
|
+
initLog(`[EMBED] Revalidation spawned embedding server with PID ${spawnedEmbeddingPid} - tracking to prevent self-kill`);
|
|
3730
3920
|
proc.on('error', () => {});
|
|
3731
3921
|
fs.closeSync(revalLogFd);
|
|
3732
3922
|
proc.unref();
|
|
3733
|
-
//
|
|
3734
|
-
|
|
3735
|
-
|
|
3736
|
-
|
|
3737
|
-
|
|
3738
|
-
|
|
3739
|
-
|
|
3740
|
-
|
|
3741
|
-
consecutiveEmbeddingFailures = 0;
|
|
3742
|
-
revalidateBackoffMs = 1000;
|
|
3743
|
-
return true;
|
|
3744
|
-
}
|
|
3745
|
-
}
|
|
3923
|
+
// DYNAMIC WAIT: Poll for readiness with adaptive intervals (max 5min)
|
|
3924
|
+
const revalReady = await waitForEmbeddingReady(projectSocketPath, { label: 'Revalidation restart' });
|
|
3925
|
+
if (revalReady) {
|
|
3926
|
+
activeSocketPath = projectSocketPath;
|
|
3927
|
+
initLog(`Embedding server auto-restarted successfully, socket at ${projectSocketPath}`);
|
|
3928
|
+
consecutiveEmbeddingFailures = 0;
|
|
3929
|
+
revalidateBackoffMs = 1000;
|
|
3930
|
+
return true;
|
|
3746
3931
|
}
|
|
3747
|
-
initLog('Embedding server auto-restart:
|
|
3932
|
+
initLog('Embedding server auto-restart: server did not become ready');
|
|
3748
3933
|
}
|
|
3749
3934
|
} catch (restartErr) {
|
|
3750
3935
|
initLog(`Embedding server auto-restart failed: ${restartErr.message || restartErr}`);
|
|
@@ -4152,6 +4337,136 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
|
4152
4337
|
let processed = 0;
|
|
4153
4338
|
let lastUIUpdate = Date.now(); // throttle UI updates to reduce overhead
|
|
4154
4339
|
|
|
4340
|
+
// Read maxConcurrent from model-config.json for parallel embedding batches
|
|
4341
|
+
let embeddingMaxConcurrent = 3; // default
|
|
4342
|
+
try {
|
|
4343
|
+
const mcPath = path.join(projectPath, 'specmem', 'model-config.json');
|
|
4344
|
+
if (fs.existsSync(mcPath)) {
|
|
4345
|
+
const mc = JSON.parse(fs.readFileSync(mcPath, 'utf8'));
|
|
4346
|
+
if (mc.embedding && mc.embedding.maxConcurrent) {
|
|
4347
|
+
embeddingMaxConcurrent = mc.embedding.maxConcurrent;
|
|
4348
|
+
}
|
|
4349
|
+
}
|
|
4350
|
+
} catch { /* use default */ }
|
|
4351
|
+
initLog(`Embedding concurrency: ${embeddingMaxConcurrent} parallel batches`);
|
|
4352
|
+
|
|
4353
|
+
// STORE-THEN-EMBED: For large codebases (>1000 files), store files first
|
|
4354
|
+
// then trigger Python server's batch processing (200 files/batch, direct DB)
|
|
4355
|
+
if (files.length > 1000 && activeSocketPath) {
|
|
4356
|
+
initLog(`Large codebase detected (${files.length} files) - using store-then-embed mode`);
|
|
4357
|
+
ui.setStatus('Store-then-embed mode (large codebase)');
|
|
4358
|
+
ui.setSubStatus('Phase 1: Storing files without embeddings...');
|
|
4359
|
+
|
|
4360
|
+
// Phase 1: Store all files without embeddings (fast - no socket calls)
|
|
4361
|
+
await runWithConcurrency(files, async (filePath, idx) => {
|
|
4362
|
+
try {
|
|
4363
|
+
const relativePath = path.relative(projectPath, filePath);
|
|
4364
|
+
const stats = fs.statSync(filePath);
|
|
4365
|
+
if (stats.size > 500 * 1024) { results.filesSkipped++; return; }
|
|
4366
|
+
|
|
4367
|
+
// Binary check
|
|
4368
|
+
const fd = fs.openSync(filePath, 'r');
|
|
4369
|
+
const buf = Buffer.alloc(Math.min(8192, stats.size));
|
|
4370
|
+
fs.readSync(fd, buf, 0, buf.length, 0);
|
|
4371
|
+
fs.closeSync(fd);
|
|
4372
|
+
if (buf.includes(0)) { results.filesSkipped++; return; }
|
|
4373
|
+
|
|
4374
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
4375
|
+
const contentHash = crypto.createHash('sha256').update(content).digest('hex');
|
|
4376
|
+
|
|
4377
|
+
// Skip if already indexed with embedding
|
|
4378
|
+
if (existingHashes.get(relativePath) === contentHash) {
|
|
4379
|
+
results.filesSkipped++;
|
|
4380
|
+
return;
|
|
4381
|
+
}
|
|
4382
|
+
|
|
4383
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
4384
|
+
const lang = fileLanguageMap.get(filePath) || { id: 'unknown', name: 'Unknown' };
|
|
4385
|
+
const lineCount = content.split('\n').length;
|
|
4386
|
+
const fileId = uuidv4();
|
|
4387
|
+
|
|
4388
|
+
try {
|
|
4389
|
+
await pool.query(`DELETE FROM codebase_files WHERE file_path = $1 AND project_path = $2`, [relativePath, projectPath]);
|
|
4390
|
+
} catch { /* ignore */ }
|
|
4391
|
+
|
|
4392
|
+
await pool.query(`
|
|
4393
|
+
INSERT INTO codebase_files (
|
|
4394
|
+
id, file_path, absolute_path, file_name, extension,
|
|
4395
|
+
language_id, language_name, content, content_hash,
|
|
4396
|
+
size_bytes, line_count, project_path
|
|
4397
|
+
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
|
|
4398
|
+
`, [
|
|
4399
|
+
fileId, relativePath, filePath, path.basename(filePath), ext,
|
|
4400
|
+
(lang.id || 'unknown').toLowerCase(), lang.name || 'Unknown',
|
|
4401
|
+
content, contentHash, stats.size, lineCount, projectPath
|
|
4402
|
+
]);
|
|
4403
|
+
|
|
4404
|
+
results.filesIndexed++;
|
|
4405
|
+
if (idx % 100 === 0) {
|
|
4406
|
+
ui.setSubStatus(`Stored ${results.filesIndexed} / ${files.length} files...`);
|
|
4407
|
+
}
|
|
4408
|
+
} catch (e) {
|
|
4409
|
+
results.errors.push(path.relative(projectPath, filePath) + ': ' + e.message);
|
|
4410
|
+
}
|
|
4411
|
+
}, parallelLimit);
|
|
4412
|
+
|
|
4413
|
+
initLog(`Phase 1 complete: ${results.filesIndexed} files stored without embeddings`);
|
|
4414
|
+
ui.setSubStatus(`${results.filesIndexed} files stored, triggering server-side embedding...`);
|
|
4415
|
+
|
|
4416
|
+
// Phase 2: Trigger Python server's process_codebase endpoint
|
|
4417
|
+
try {
|
|
4418
|
+
const ssResult = await new Promise((resolve, reject) => {
|
|
4419
|
+
const client = new net.Socket();
|
|
4420
|
+
let buffer = '';
|
|
4421
|
+
let settled = false;
|
|
4422
|
+
const timeout = setTimeout(() => {
|
|
4423
|
+
if (!settled) { settled = true; client.destroy(); reject(new Error('Server-side processing timeout (10min)')); }
|
|
4424
|
+
}, 600000);
|
|
4425
|
+
client.on('connect', () => {
|
|
4426
|
+
client.write(JSON.stringify({ process_codebase: true, batch_size: 200, limit: 0, project_path: projectPath }) + '\n');
|
|
4427
|
+
});
|
|
4428
|
+
client.on('data', (data) => {
|
|
4429
|
+
buffer += data.toString();
|
|
4430
|
+
let newlineIdx;
|
|
4431
|
+
while ((newlineIdx = buffer.indexOf('\n')) !== -1) {
|
|
4432
|
+
if (settled) return;
|
|
4433
|
+
const line = buffer.slice(0, newlineIdx);
|
|
4434
|
+
buffer = buffer.slice(newlineIdx + 1);
|
|
4435
|
+
try {
|
|
4436
|
+
const resp = JSON.parse(line);
|
|
4437
|
+
if (resp.error) { clearTimeout(timeout); settled = true; client.end(); reject(new Error(resp.error)); return; }
|
|
4438
|
+
if (resp.status === 'processing') continue;
|
|
4439
|
+
if (resp.total_processed !== undefined || resp.processed !== undefined) {
|
|
4440
|
+
clearTimeout(timeout); settled = true; client.end(); resolve(resp); return;
|
|
4441
|
+
}
|
|
4442
|
+
} catch { /* keep waiting */ }
|
|
4443
|
+
}
|
|
4444
|
+
});
|
|
4445
|
+
client.on('error', (e) => { clearTimeout(timeout); if (!settled) { settled = true; reject(e); } });
|
|
4446
|
+
client.connect(activeSocketPath);
|
|
4447
|
+
});
|
|
4448
|
+
|
|
4449
|
+
results.embeddingsGenerated = ssResult.total_processed || ssResult.processed || 0;
|
|
4450
|
+
initLog(`Server-side embedding complete: ${results.embeddingsGenerated} embeddings generated`);
|
|
4451
|
+
ui.setSubStatus(`Server-side: ${results.embeddingsGenerated} embeddings generated`);
|
|
4452
|
+
} catch (ssErr) {
|
|
4453
|
+
initLog(`Server-side embedding failed: ${ssErr.message} - falling back to client-side`);
|
|
4454
|
+
ui.setSubStatus('Server-side failed, falling back to client-side...');
|
|
4455
|
+
// Fall through to standard loop below (it will handle the remaining files)
|
|
4456
|
+
}
|
|
4457
|
+
|
|
4458
|
+
// Skip the standard indexing loop
|
|
4459
|
+
results.durationMs = Date.now() - startTime;
|
|
4460
|
+
ui.enableFileFeed(false);
|
|
4461
|
+
ui.slowRendering(0);
|
|
4462
|
+
await pool.end();
|
|
4463
|
+
|
|
4464
|
+
initLog(`=== CODEBASE INDEXING COMPLETE (store-then-embed) ===`);
|
|
4465
|
+
initLog(`Files: ${results.filesScanned} scanned, ${results.filesIndexed} indexed, ${results.embeddingsGenerated} embeddings`);
|
|
4466
|
+
initLog(`Duration: ${results.durationMs}ms`);
|
|
4467
|
+
return results;
|
|
4468
|
+
}
|
|
4469
|
+
|
|
4155
4470
|
// Track current file for better progress display
|
|
4156
4471
|
let currentFile = '';
|
|
4157
4472
|
let currentFileChunk = 0;
|
|
@@ -4344,26 +4659,33 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
|
4344
4659
|
try {
|
|
4345
4660
|
ui.setSubStatus(`Generating ${fileEmbedTexts.length} embeddings in batch...`);
|
|
4346
4661
|
|
|
4347
|
-
// Split into smaller batches
|
|
4662
|
+
// Split into smaller batches
|
|
4663
|
+
const embBatches = [];
|
|
4348
4664
|
for (let j = 0; j < fileEmbedTexts.length; j += EMBEDDING_BATCH_SIZE) {
|
|
4349
|
-
|
|
4350
|
-
|
|
4351
|
-
|
|
4352
|
-
|
|
4353
|
-
|
|
4354
|
-
|
|
4665
|
+
embBatches.push({ start: j, texts: fileEmbedTexts.slice(j, j + EMBEDDING_BATCH_SIZE) });
|
|
4666
|
+
}
|
|
4667
|
+
|
|
4668
|
+
// Process embedding batches with concurrency (uses maxConcurrent from model config)
|
|
4669
|
+
const embResults = new Array(embBatches.length);
|
|
4670
|
+
await runWithConcurrency(embBatches, async (batch, batchIdx) => {
|
|
4671
|
+
const batchNum = batchIdx + 1;
|
|
4672
|
+
if (embBatches.length > 1) {
|
|
4673
|
+
ui.setSubStatus(`Embedding batch ${batchNum}/${embBatches.length} (${batch.texts.length} files)...`);
|
|
4355
4674
|
}
|
|
4356
|
-
|
|
4357
|
-
|
|
4358
|
-
const fileIdx = j + k;
|
|
4675
|
+
for (let k = 0; k < batch.texts.length; k++) {
|
|
4676
|
+
const fileIdx = batch.start + k;
|
|
4359
4677
|
if (fileDataList[fileIdx]) {
|
|
4360
4678
|
const fd = fileDataList[fileIdx];
|
|
4361
4679
|
const sizeKB = (fd.stats.size / 1024).toFixed(1);
|
|
4362
4680
|
ui.addFileToFeed(fd.relativePath, 'embedding', `(${sizeKB}KB)`);
|
|
4363
4681
|
}
|
|
4364
4682
|
}
|
|
4365
|
-
|
|
4366
|
-
|
|
4683
|
+
embResults[batchIdx] = await generateBatchEmbeddings(batch.texts);
|
|
4684
|
+
}, embeddingMaxConcurrent);
|
|
4685
|
+
|
|
4686
|
+
// Flatten results in order
|
|
4687
|
+
for (const br of embResults) {
|
|
4688
|
+
if (br) fileEmbeddings.push(...br);
|
|
4367
4689
|
}
|
|
4368
4690
|
|
|
4369
4691
|
// Count successful embeddings
|
|
@@ -4377,7 +4699,6 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
|
4377
4699
|
} catch (e) {
|
|
4378
4700
|
initLog(`Batch embedding failed: ${e.message}`);
|
|
4379
4701
|
results.embeddingsFailed = (results.embeddingsFailed || 0) + fileEmbedTexts.length;
|
|
4380
|
-
// FIX: Categorize error types for better debugging
|
|
4381
4702
|
results.errorTypes = results.errorTypes || {};
|
|
4382
4703
|
const errorType = categorizeEmbeddingError(e);
|
|
4383
4704
|
results.errorTypes[errorType] = (results.errorTypes[errorType] || 0) + 1;
|
|
@@ -4447,27 +4768,31 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
|
|
|
4447
4768
|
let defEmbeddings = [];
|
|
4448
4769
|
if (defEmbedTexts.length > 0 && activeSocketPath) {
|
|
4449
4770
|
try {
|
|
4450
|
-
// FIX: Show definition embedding progress
|
|
4451
4771
|
ui.setSubStatus(`Embedding ${defEmbedTexts.length} definitions...`);
|
|
4772
|
+
// Split into batches
|
|
4773
|
+
const defBatches = [];
|
|
4452
4774
|
for (let j = 0; j < defEmbedTexts.length; j += EMBEDDING_BATCH_SIZE) {
|
|
4453
|
-
|
|
4454
|
-
|
|
4455
|
-
|
|
4456
|
-
|
|
4457
|
-
|
|
4458
|
-
|
|
4775
|
+
defBatches.push({ start: j, texts: defEmbedTexts.slice(j, j + EMBEDDING_BATCH_SIZE) });
|
|
4776
|
+
}
|
|
4777
|
+
// Process with concurrency
|
|
4778
|
+
const defResults = new Array(defBatches.length);
|
|
4779
|
+
await runWithConcurrency(defBatches, async (batch, batchIdx) => {
|
|
4780
|
+
if (defBatches.length > 1) {
|
|
4781
|
+
ui.setSubStatus(`[Defs] Batch ${batchIdx + 1}/${defBatches.length} (${batch.texts.length} defs)...`);
|
|
4459
4782
|
}
|
|
4460
|
-
|
|
4461
|
-
|
|
4462
|
-
const defIdx = j + k;
|
|
4783
|
+
for (let k = 0; k < batch.texts.length; k++) {
|
|
4784
|
+
const defIdx = batch.start + k;
|
|
4463
4785
|
if (defDataList[defIdx]) {
|
|
4464
4786
|
const dd = defDataList[defIdx];
|
|
4465
4787
|
const defLabel = `${dd.def.type} ${dd.def.name}()`;
|
|
4466
4788
|
ui.addFileToFeed(dd.relativePath, 'def', defLabel);
|
|
4467
4789
|
}
|
|
4468
4790
|
}
|
|
4469
|
-
|
|
4470
|
-
|
|
4791
|
+
defResults[batchIdx] = await generateBatchEmbeddings(batch.texts);
|
|
4792
|
+
}, embeddingMaxConcurrent);
|
|
4793
|
+
// Flatten in order
|
|
4794
|
+
for (const dr of defResults) {
|
|
4795
|
+
if (dr) defEmbeddings.push(...dr);
|
|
4471
4796
|
}
|
|
4472
4797
|
} catch (e) {
|
|
4473
4798
|
initLog(`Definition batch embedding failed: ${e.message}`);
|
|
@@ -4918,6 +5243,8 @@ async function extractSessions(projectPath, ui, embeddingResult = null) {
|
|
|
4918
5243
|
safeMkdir(socketsDir);
|
|
4919
5244
|
|
|
4920
5245
|
// Kill any existing embedding server before spawning for session extraction
|
|
5246
|
+
// Reset spawnedEmbeddingPid since we're about to spawn a replacement
|
|
5247
|
+
spawnedEmbeddingPid = null;
|
|
4921
5248
|
killExistingEmbeddingServer(projectPath);
|
|
4922
5249
|
|
|
4923
5250
|
// Clean up stale socket
|
|
@@ -4940,6 +5267,7 @@ async function extractSessions(projectPath, ui, embeddingResult = null) {
|
|
|
4940
5267
|
cwd: path.dirname(embeddingScript),
|
|
4941
5268
|
env: {
|
|
4942
5269
|
...process.env,
|
|
5270
|
+
SPECMEM_EMBEDDING_SOCKET: projectSocketPath,
|
|
4943
5271
|
SPECMEM_SOCKET_PATH: projectSocketPath,
|
|
4944
5272
|
SPECMEM_PROJECT_PATH: projectPath
|
|
4945
5273
|
},
|
|
@@ -4947,6 +5275,10 @@ async function extractSessions(projectPath, ui, embeddingResult = null) {
|
|
|
4947
5275
|
stdio: ['ignore', sessEmbedLogFd, sessEmbedLogFd]
|
|
4948
5276
|
});
|
|
4949
5277
|
|
|
5278
|
+
// CRITICAL FIX: Track spawned PID so killExistingEmbeddingServer() won't kill it
|
|
5279
|
+
spawnedEmbeddingPid = embeddingProcess.pid;
|
|
5280
|
+
initLog(`[EMBED] Session extraction spawned embedding server with PID ${spawnedEmbeddingPid} - tracking to prevent self-kill`);
|
|
5281
|
+
|
|
4950
5282
|
// error handler BEFORE unref - prevents silent spawn failures
|
|
4951
5283
|
embeddingProcess.on('error', (err) => {
|
|
4952
5284
|
ui.setSubStatus('Embedding spawn error: ' + err.message);
|
|
@@ -4955,18 +5287,11 @@ async function extractSessions(projectPath, ui, embeddingResult = null) {
|
|
|
4955
5287
|
fs.closeSync(sessEmbedLogFd);
|
|
4956
5288
|
embeddingProcess.unref();
|
|
4957
5289
|
|
|
4958
|
-
//
|
|
5290
|
+
// DYNAMIC WAIT: Poll for readiness with adaptive intervals (max 5min)
|
|
4959
5291
|
ui.setSubStatus('Waiting for embedding server to start...');
|
|
4960
|
-
|
|
4961
|
-
|
|
4962
|
-
|
|
4963
|
-
socketPath = projectSocketPath;
|
|
4964
|
-
ui.setSubStatus('✓ Embedding server started!');
|
|
4965
|
-
break;
|
|
4966
|
-
}
|
|
4967
|
-
if (i % 10 === 0) {
|
|
4968
|
-
ui.setSubStatus(`Waiting for embedding server... (${i/2}s)`);
|
|
4969
|
-
}
|
|
5292
|
+
const sessReady = await waitForEmbeddingReady(projectSocketPath, { ui, label: 'Session embedding server' });
|
|
5293
|
+
if (sessReady) {
|
|
5294
|
+
socketPath = projectSocketPath;
|
|
4970
5295
|
}
|
|
4971
5296
|
|
|
4972
5297
|
if (!socketPath) {
|
|
@@ -5186,12 +5511,15 @@ async function extractSessions(projectPath, ui, embeddingResult = null) {
|
|
|
5186
5511
|
await pool.query('SELECT 1');
|
|
5187
5512
|
|
|
5188
5513
|
// CRITICAL FIX: Create and set search_path to project schema for proper isolation
|
|
5189
|
-
const schemaName = 'specmem_' + path.basename(projectPath).toLowerCase().replace(/[^a-z0-
|
|
5514
|
+
const schemaName = 'specmem_' + path.basename(projectPath).toLowerCase().replace(/[^a-z0-9_]/g, '_').replace(/_+/g, '_').replace(/^_|_$/g, '');
|
|
5190
5515
|
|
|
5191
5516
|
// Create schema if it doesn't exist
|
|
5192
5517
|
await pool.query(`CREATE SCHEMA IF NOT EXISTS ${schemaName}`);
|
|
5193
5518
|
|
|
5194
|
-
// Set search_path for
|
|
5519
|
+
// Set search_path for ALL pool connections (not just the current one)
|
|
5520
|
+
pool.on('connect', (client) => {
|
|
5521
|
+
client.query(`SET search_path TO ${schemaName}, public`).catch(() => {});
|
|
5522
|
+
});
|
|
5195
5523
|
await pool.query(`SET search_path TO ${schemaName}, public`);
|
|
5196
5524
|
ui.setSubStatus(`Connected (schema: ${schemaName})`);
|
|
5197
5525
|
|
|
@@ -5995,26 +6323,44 @@ async function runAutoSetup(projectPath) {
|
|
|
5995
6323
|
// SpecMem hooks take priority for same event types + matchers
|
|
5996
6324
|
const mergedSettings = { ...existingSettings };
|
|
5997
6325
|
|
|
5998
|
-
// Fix hardcoded
|
|
6326
|
+
// Fix hardcoded paths in srcSettings for actual install environment
|
|
5999
6327
|
const homeDir = os.homedir();
|
|
6328
|
+
const pkgRoot = path.resolve(__dirname, '..');
|
|
6000
6329
|
let srcSettingsStr = JSON.stringify(srcSettings);
|
|
6001
6330
|
if (homeDir !== '/root') {
|
|
6002
6331
|
srcSettingsStr = srcSettingsStr.replace(/\/root\//g, homeDir + '/');
|
|
6003
6332
|
srcSettingsStr = srcSettingsStr.replace(/"\/root"/g, '"' + homeDir + '"');
|
|
6004
6333
|
}
|
|
6334
|
+
// Fix SPECMEM_PKG to point to actual install location (not dev /specmem)
|
|
6335
|
+
srcSettingsStr = srcSettingsStr.replace(/"SPECMEM_PKG":\s*"\/specmem"/g, `"SPECMEM_PKG": "${pkgRoot}"`);
|
|
6336
|
+
// Fix SPECMEM_HOME to use actual home directory
|
|
6337
|
+
srcSettingsStr = srcSettingsStr.replace(/"SPECMEM_HOME":\s*"\/root\/.specmem"/g, `"SPECMEM_HOME": "${path.join(homeDir, '.specmem')}"`);
|
|
6005
6338
|
const fixedSrcSettings = JSON.parse(srcSettingsStr);
|
|
6006
6339
|
|
|
6007
6340
|
if (fixedSrcSettings.hooks) {
|
|
6008
6341
|
mergedSettings.hooks = mergeHooksDeep(existingSettings.hooks || {}, fixedSrcSettings.hooks);
|
|
6009
6342
|
}
|
|
6010
6343
|
|
|
6011
|
-
// Helper
|
|
6344
|
+
// Helper: check if a hook command belongs to specmem
|
|
6345
|
+
function isSpecmemHookCmd(hookEntry) {
|
|
6346
|
+
const cmd = (hookEntry.command || '');
|
|
6347
|
+
return cmd.includes('specmem') || cmd.includes('team-comms-enforcer') ||
|
|
6348
|
+
cmd.includes('agent-loading-hook') || cmd.includes('agent-output-interceptor') ||
|
|
6349
|
+
cmd.includes('task-progress-hook') || cmd.includes('subagent-loading-hook') ||
|
|
6350
|
+
cmd.includes('use-code-pointers') || cmd.includes('post-write-memory-hook') ||
|
|
6351
|
+
cmd.includes('bullshit-radar') || cmd.includes('input-aware-improver') ||
|
|
6352
|
+
cmd.includes('smart-context-hook');
|
|
6353
|
+
}
|
|
6354
|
+
|
|
6355
|
+
// Deep merge hooks: specmem hooks take priority per-matcher, but user's
|
|
6356
|
+
// custom (non-specmem) hooks within the same matcher are preserved.
|
|
6357
|
+
// On re-init, old specmem hooks are cleaned up and replaced with new ones.
|
|
6012
6358
|
function mergeHooksDeep(existingHooks, specmemHooks) {
|
|
6013
6359
|
const merged = {};
|
|
6014
6360
|
|
|
6015
|
-
// Copy all existing event types
|
|
6361
|
+
// Copy all existing event types (deep clone to avoid mutations)
|
|
6016
6362
|
for (const eventType of Object.keys(existingHooks)) {
|
|
6017
|
-
merged[eventType] =
|
|
6363
|
+
merged[eventType] = JSON.parse(JSON.stringify(existingHooks[eventType]));
|
|
6018
6364
|
}
|
|
6019
6365
|
|
|
6020
6366
|
// Process each specmem event type
|
|
@@ -6022,44 +6368,61 @@ async function runAutoSetup(projectPath) {
|
|
|
6022
6368
|
const specmemGroups = specmemHooks[eventType];
|
|
6023
6369
|
|
|
6024
6370
|
if (!merged[eventType]) {
|
|
6025
|
-
// No existing hooks for this event type, use specmem's
|
|
6026
6371
|
merged[eventType] = specmemGroups;
|
|
6027
6372
|
continue;
|
|
6028
6373
|
}
|
|
6029
6374
|
|
|
6030
|
-
// Build
|
|
6031
|
-
const
|
|
6375
|
+
// Build specmem's desired state: one entry per matcher
|
|
6376
|
+
const specmemByMatcher = new Map();
|
|
6032
6377
|
for (const group of specmemGroups) {
|
|
6033
|
-
const
|
|
6034
|
-
|
|
6378
|
+
const key = group.matcher || '__CATCHALL__';
|
|
6379
|
+
if (!specmemByMatcher.has(key)) {
|
|
6380
|
+
specmemByMatcher.set(key, { ...group, hooks: [...(group.hooks || [])] });
|
|
6381
|
+
} else {
|
|
6382
|
+
// Consolidate duplicate matchers from source
|
|
6383
|
+
specmemByMatcher.get(key).hooks.push(...(group.hooks || []));
|
|
6384
|
+
}
|
|
6035
6385
|
}
|
|
6036
6386
|
|
|
6037
|
-
//
|
|
6038
|
-
|
|
6039
|
-
|
|
6040
|
-
|
|
6041
|
-
const
|
|
6042
|
-
|
|
6043
|
-
|
|
6044
|
-
|
|
6045
|
-
|
|
6387
|
+
// Extract user's custom (non-specmem) hooks per matcher from existing config
|
|
6388
|
+
const userHooksByMatcher = new Map();
|
|
6389
|
+
for (const group of merged[eventType]) {
|
|
6390
|
+
const key = group.matcher || '__CATCHALL__';
|
|
6391
|
+
const userHooks = (group.hooks || []).filter(h => !isSpecmemHookCmd(h));
|
|
6392
|
+
if (userHooks.length > 0) {
|
|
6393
|
+
if (!userHooksByMatcher.has(key)) {
|
|
6394
|
+
userHooksByMatcher.set(key, []);
|
|
6395
|
+
}
|
|
6396
|
+
userHooksByMatcher.get(key).push(...userHooks);
|
|
6046
6397
|
}
|
|
6398
|
+
}
|
|
6047
6399
|
|
|
6048
|
-
|
|
6049
|
-
|
|
6050
|
-
|
|
6051
|
-
groupStr.includes('/specmem/') ||
|
|
6052
|
-
groupStr.includes('team-comms-enforcer') ||
|
|
6053
|
-
groupStr.includes('smart-context-hook') ||
|
|
6054
|
-
groupStr.includes('agent-loading-hook')) {
|
|
6055
|
-
return false;
|
|
6056
|
-
}
|
|
6400
|
+
// Build final result for this event type
|
|
6401
|
+
const result = [];
|
|
6402
|
+
const handledMatchers = new Set();
|
|
6057
6403
|
|
|
6058
|
-
|
|
6059
|
-
|
|
6404
|
+
// First: all specmem matchers (with user's custom hooks merged in)
|
|
6405
|
+
for (const [key, group] of specmemByMatcher) {
|
|
6406
|
+
const userHooks = userHooksByMatcher.get(key) || [];
|
|
6407
|
+
result.push({ ...group, hooks: [...group.hooks, ...userHooks] });
|
|
6408
|
+
handledMatchers.add(key);
|
|
6409
|
+
}
|
|
6410
|
+
|
|
6411
|
+
// Then: existing matchers that specmem doesn't touch
|
|
6412
|
+
for (const group of merged[eventType]) {
|
|
6413
|
+
const key = group.matcher || '__CATCHALL__';
|
|
6414
|
+
if (handledMatchers.has(key)) continue;
|
|
6415
|
+
handledMatchers.add(key);
|
|
6416
|
+
|
|
6417
|
+
// Clean orphaned specmem hooks from non-specmem matchers
|
|
6418
|
+
const cleanHooks = (group.hooks || []).filter(h => !isSpecmemHookCmd(h));
|
|
6419
|
+
if (cleanHooks.length > 0) {
|
|
6420
|
+
result.push({ ...group, hooks: cleanHooks });
|
|
6421
|
+
}
|
|
6422
|
+
// If 100% specmem hooks and specmem no longer uses this matcher, drop it
|
|
6423
|
+
}
|
|
6060
6424
|
|
|
6061
|
-
|
|
6062
|
-
merged[eventType] = [...preservedGroups, ...specmemGroups];
|
|
6425
|
+
merged[eventType] = result;
|
|
6063
6426
|
}
|
|
6064
6427
|
|
|
6065
6428
|
return merged;
|