specmem-hardwicksoftware 3.7.9 → 3.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2847,6 +2847,43 @@ async function coldStartEmbeddingDocker(projectPath, modelConfig, ui, codebaseRe
2847
2847
  return { serverRunning: false, warmupLatency: null, timeoutConfig };
2848
2848
  }
2849
2849
 
2850
+ // EARLY BAIL-OUT: Check if Docker is actually usable before wasting time
2851
+ // 1. Docker daemon must be running
2852
+ // 2. The embedding image must exist locally (warm-start.sh uses specmem-embedding:latest)
2853
+ // 3. OR there must be an existing container we can resume
2854
+ // If none of these are true, skip Docker entirely — Stage 5 will use native Python.
2855
+ try {
2856
+ const { execSync } = require('child_process');
2857
+ // Check if Docker daemon is accessible
2858
+ try { execSync('docker info', { stdio: 'ignore', timeout: 5000 }); } catch {
2859
+ initLog('[DOCKER] Docker daemon not accessible - skipping Docker stage');
2860
+ ui.setSubStatus('⚠️ Docker not available - will use native Python');
2861
+ return { serverRunning: false, warmupLatency: null, timeoutConfig };
2862
+ }
2863
+ // Check for existing specmem embedding containers (any state)
2864
+ const containers = execSync(
2865
+ `docker ps -a --filter "name=specmem-embedding" --format "{{.Names}}" 2>/dev/null`,
2866
+ { encoding: 'utf8', timeout: 5000 }
2867
+ ).trim();
2868
+ if (!containers) {
2869
+ // No containers — check if the image exists to cold-start from
2870
+ const images = execSync(
2871
+ `docker images -q specmem-embedding:latest 2>/dev/null`,
2872
+ { encoding: 'utf8', timeout: 5000 }
2873
+ ).trim();
2874
+ if (!images) {
2875
+ initLog('[DOCKER] No embedding containers and no specmem-embedding:latest image - skipping Docker');
2876
+ ui.setSubStatus('⚠️ No Docker embedding image - will use native Python');
2877
+ return { serverRunning: false, warmupLatency: null, timeoutConfig };
2878
+ }
2879
+ }
2880
+ initLog(`[DOCKER] Found containers/image - proceeding with Docker warm-start`);
2881
+ } catch (e) {
2882
+ initLog(`[DOCKER] Pre-check failed (${e.message}) - skipping Docker`);
2883
+ ui.setSubStatus('⚠️ Docker pre-check failed - will use native Python');
2884
+ return { serverRunning: false, warmupLatency: null, timeoutConfig };
2885
+ }
2886
+
2850
2887
  // Spawn warm-start.sh with env vars for per-project socket
2851
2888
  const dockerProcess = spawn('bash', [warmStartScript], {
2852
2889
  cwd: path.dirname(warmStartScript),
@@ -2885,21 +2922,26 @@ async function coldStartEmbeddingDocker(projectPath, modelConfig, ui, codebaseRe
2885
2922
  });
2886
2923
 
2887
2924
  // Wait for Docker to start (up to 60s)
2888
- // RELIABILITY FIX: Increased from 24s (120x200ms) to 60s (300x200ms) to match
2889
- // warm-start.sh wait times. First-time model loading can take 20-30s.
2925
+ // DYNAMIC WAIT: Poll for Docker socket with adaptive intervals (max 5min)
2926
+ const MAX_DOCKER_WAIT_MS = 300000; // 5 minute absolute cap
2890
2927
  const dockerStart = Date.now();
2891
2928
  ui.setStatus('Waiting for Docker container...');
2892
2929
  ui.setSubProgress(0.3);
2893
2930
 
2894
- for (let i = 0; i < 300; i++) {
2931
+ let dockerPollInterval = 200; // start fast for Docker
2932
+ while (Date.now() - dockerStart < MAX_DOCKER_WAIT_MS) {
2895
2933
  if (dockerStarted || fs.existsSync(socketPath)) {
2896
2934
  break;
2897
2935
  }
2898
- await new Promise(r => setTimeout(r, 200)); // Faster polling (200ms)
2899
-
2900
- // Smooth progress increment
2901
- if (i % 25 === 0) {
2902
- ui.setSubProgress(0.3 + (i / 300) * 0.4); // Progress from 30% to 70%
2936
+ await new Promise(r => setTimeout(r, dockerPollInterval));
2937
+ const elapsed = Date.now() - dockerStart;
2938
+ // Slow down polling after 10s
2939
+ if (elapsed > 10000) dockerPollInterval = Math.min(dockerPollInterval + 100, 2000);
2940
+ // Progress update every ~5s
2941
+ if (Math.floor(elapsed / 5000) !== Math.floor((elapsed - dockerPollInterval) / 5000)) {
2942
+ const progress = Math.min(0.7, 0.3 + (elapsed / MAX_DOCKER_WAIT_MS) * 0.4);
2943
+ ui.setSubProgress(progress);
2944
+ ui.setSubStatus(`Waiting for Docker... (${Math.round(elapsed / 1000)}s)`);
2903
2945
  }
2904
2946
  }
2905
2947
 
@@ -3040,6 +3082,13 @@ async function coldStartEmbeddingDocker(projectPath, modelConfig, ui, codebaseRe
3040
3082
  // from the start. Previously, indexing only happened when MCP server started,
3041
3083
  // which meant the first session had no code search capability.
3042
3084
 
3085
+ // CRITICAL BUG FIX: Track the PID of the embedding server spawned by THIS init
3086
+ // process. killExistingEmbeddingServer() must NEVER kill our own child.
3087
+ // Without this, the init process spawns the server, the server writes its PID
3088
+ // to embedding.pid, and then a later call to killExistingEmbeddingServer()
3089
+ // reads that PID file and SIGTERMs our own child process.
3090
+ let spawnedEmbeddingPid = null;
3091
+
3043
3092
  async function indexCodebase(projectPath, ui, embeddingResult) {
3044
3093
  ui.setStage(5, 'CODEBASE INDEXING');
3045
3094
 
@@ -3071,16 +3120,114 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
3071
3120
  initLog(`Socket paths: project=${projectSocketPath}, shared=${sharedSocketPath}`);
3072
3121
  initLog(`Project socket exists: ${fs.existsSync(projectSocketPath)}, Shared socket exists: ${fs.existsSync(sharedSocketPath)}`);
3073
3122
 
3123
+ // SOCKET LIVENESS CHECK - validate socket is alive, not just that file exists
3124
+ // Orphaned socket files (process died, socket file remains) cause ECONNREFUSED
3125
+ // and stall the entire indexing pipeline.
3126
+ function quickSocketAlive(sockPath) {
3127
+ return new Promise((resolve) => {
3128
+ const client = new net.Socket();
3129
+ const timeout = setTimeout(() => { client.destroy(); resolve(false); }, 3000);
3130
+ client.on('connect', () => { clearTimeout(timeout); client.destroy(); resolve(true); });
3131
+ client.on('error', () => { clearTimeout(timeout); client.destroy(); resolve(false); });
3132
+ client.connect(sockPath);
3133
+ });
3134
+ }
3135
+
3136
+ // DYNAMIC READINESS POLLING — no hardcoded timeouts.
3137
+ // Polls for socket file + health check with adaptive intervals.
3138
+ // Starts fast (500ms), slows down after 10s (2s intervals).
3139
+ // Max wait: 300s (5 minutes). Returns true if server is ready.
3140
+ const MAX_EMBED_WAIT_MS = 300000; // 5 minute absolute cap
3141
+ async function waitForEmbeddingReady(sockPath, opts = {}) {
3142
+ const { ui: _ui, label = 'server', logFn = initLog } = opts;
3143
+ const start = Date.now();
3144
+ let pollInterval = 500; // start fast
3145
+ let lastLogTime = 0;
3146
+
3147
+ while (Date.now() - start < MAX_EMBED_WAIT_MS) {
3148
+ const elapsed = Date.now() - start;
3149
+
3150
+ // Phase 1: Wait for socket FILE to appear
3151
+ if (!fs.existsSync(sockPath)) {
3152
+ if (elapsed - lastLogTime > 5000) {
3153
+ const elapsedSec = Math.round(elapsed / 1000);
3154
+ if (_ui) _ui.setSubStatus(`Waiting for ${label} socket... (${elapsedSec}s)`);
3155
+ logFn(`[EMBED] Waiting for socket file: ${sockPath} (${elapsedSec}s elapsed)`);
3156
+ lastLogTime = elapsed;
3157
+ }
3158
+ await new Promise(r => setTimeout(r, pollInterval));
3159
+ if (elapsed > 10000) pollInterval = Math.min(pollInterval + 250, 2000);
3160
+ continue;
3161
+ }
3162
+
3163
+ // Phase 2: Socket file exists — check if server is actually responding
3164
+ const alive = await quickSocketAlive(sockPath);
3165
+ if (alive) {
3166
+ const elapsedSec = Math.round((Date.now() - start) / 1000);
3167
+ logFn(`[EMBED] ${label} ready after ${elapsedSec}s`);
3168
+ if (_ui) _ui.setSubStatus(`✓ ${label} ready (${elapsedSec}s)`);
3169
+ return true;
3170
+ }
3171
+
3172
+ // Socket exists but not responding yet — server still warming up
3173
+ if (elapsed - lastLogTime > 3000) {
3174
+ const elapsedSec = Math.round(elapsed / 1000);
3175
+ if (_ui) _ui.setSubStatus(`${label} warming up... (${elapsedSec}s)`);
3176
+ lastLogTime = elapsed;
3177
+ }
3178
+ await new Promise(r => setTimeout(r, pollInterval));
3179
+ if (elapsed > 10000) pollInterval = Math.min(pollInterval + 250, 2000);
3180
+ }
3181
+
3182
+ const totalSec = Math.round((Date.now() - start) / 1000);
3183
+ logFn(`[EMBED] ${label} failed to become ready after ${totalSec}s (max ${MAX_EMBED_WAIT_MS / 1000}s)`);
3184
+ if (_ui) _ui.setSubStatus(`⚠️ ${label} not ready after ${totalSec}s`);
3185
+ return false;
3186
+ }
3187
+
3074
3188
  // Project socket takes priority - it's the fresh one from Docker stage 4
3075
3189
  let activeSocketPath = null;
3076
3190
  if (fs.existsSync(projectSocketPath)) {
3077
- activeSocketPath = projectSocketPath;
3078
- initLog(`Using PROJECT socket: ${projectSocketPath}`);
3079
- } else if (fs.existsSync(sharedSocketPath)) {
3080
- activeSocketPath = sharedSocketPath;
3081
- initLog(`Using SHARED socket: ${sharedSocketPath}`);
3082
- } else {
3083
- initLog('WARNING: No embedding socket found!');
3191
+ const alive = await quickSocketAlive(projectSocketPath);
3192
+ if (alive) {
3193
+ activeSocketPath = projectSocketPath;
3194
+ initLog(`Using PROJECT socket: ${projectSocketPath} (verified alive)`);
3195
+ } else {
3196
+ // CRITICAL FIX: Don't remove socket if our spawned server owns it.
3197
+ // Check the PID file to see if this socket belongs to our child process.
3198
+ const pidFile = path.join(projectPath, 'specmem', 'sockets', 'embedding.pid');
3199
+ let ownedByUs = false;
3200
+ if (spawnedEmbeddingPid) {
3201
+ try {
3202
+ if (fs.existsSync(pidFile)) {
3203
+ const pidContent = fs.readFileSync(pidFile, 'utf8').trim();
3204
+ const filePid = parseInt(pidContent.split(':')[0], 10);
3205
+ if (filePid === spawnedEmbeddingPid) {
3206
+ ownedByUs = true;
3207
+ initLog(`PROJECT socket not yet responsive but owned by our spawned PID ${spawnedEmbeddingPid} - keeping socket, server may still be warming up`);
3208
+ activeSocketPath = projectSocketPath; // Trust our child, it's just warming up
3209
+ }
3210
+ }
3211
+ } catch { /* ignore */ }
3212
+ }
3213
+ if (!ownedByUs) {
3214
+ initLog(`PROJECT socket exists but DEAD (ECONNREFUSED) - removing orphaned socket: ${projectSocketPath}`);
3215
+ try { fs.unlinkSync(projectSocketPath); } catch {}
3216
+ }
3217
+ }
3218
+ }
3219
+ if (!activeSocketPath && fs.existsSync(sharedSocketPath)) {
3220
+ const alive = await quickSocketAlive(sharedSocketPath);
3221
+ if (alive) {
3222
+ activeSocketPath = sharedSocketPath;
3223
+ initLog(`Using SHARED socket: ${sharedSocketPath} (verified alive)`);
3224
+ } else {
3225
+ initLog(`SHARED socket exists but DEAD - removing orphaned socket: ${sharedSocketPath}`);
3226
+ try { fs.unlinkSync(sharedSocketPath); } catch {}
3227
+ }
3228
+ }
3229
+ if (!activeSocketPath) {
3230
+ initLog('WARNING: No live embedding socket found! Will spawn new server.');
3084
3231
  }
3085
3232
 
3086
3233
  // Check if embedding server is available from Stage 4
@@ -3088,6 +3235,8 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
3088
3235
  const socketExists = activeSocketPath !== null;
3089
3236
 
3090
3237
  // Helper: Kill any existing embedding server for this project before spawning a new one
3238
+ // CRITICAL FIX: Never kill a server that was spawned by THIS init process.
3239
+ // The spawnedEmbeddingPid variable tracks our child's PID to prevent self-kill.
3091
3240
  function killExistingEmbeddingServer(projectPath) {
3092
3241
  const pidFile = path.join(projectPath, 'specmem', 'sockets', 'embedding.pid');
3093
3242
  try {
@@ -3095,6 +3244,18 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
3095
3244
  const content = fs.readFileSync(pidFile, 'utf8').trim();
3096
3245
  const pid = parseInt(content.split(':')[0], 10);
3097
3246
  if (!pid || isNaN(pid)) return false;
3247
+
3248
+ // CRITICAL FIX: Never kill our own child process!
3249
+ // This prevents the race condition where:
3250
+ // 1. Init spawns embedding server
3251
+ // 2. Server writes PID to embedding.pid
3252
+ // 3. Later code path calls killExistingEmbeddingServer()
3253
+ // 4. It reads the PID file and kills our own child
3254
+ if (spawnedEmbeddingPid && pid === spawnedEmbeddingPid) {
3255
+ initLog(`[EMBED] Skipping kill of PID ${pid} - this is OUR spawned server`);
3256
+ return false;
3257
+ }
3258
+
3098
3259
  // Check if process is alive
3099
3260
  try { process.kill(pid, 0); } catch {
3100
3261
  // Process dead, clean up PID file
@@ -3150,6 +3311,8 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
3150
3311
  safeMkdir(socketsDir);
3151
3312
 
3152
3313
  // Kill any existing embedding server before spawning a new one
3314
+ // Reset spawnedEmbeddingPid since we're about to spawn a replacement
3315
+ spawnedEmbeddingPid = null;
3153
3316
  killExistingEmbeddingServer(projectPath);
3154
3317
 
3155
3318
  // Clean up stale socket
@@ -3177,6 +3340,7 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
3177
3340
  cwd: path.dirname(embeddingScript),
3178
3341
  env: {
3179
3342
  ...process.env,
3343
+ SPECMEM_EMBEDDING_SOCKET: projectSocketPath,
3180
3344
  SPECMEM_SOCKET_PATH: projectSocketPath,
3181
3345
  SPECMEM_PROJECT_PATH: projectPath
3182
3346
  },
@@ -3184,6 +3348,10 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
3184
3348
  stdio: ['ignore', embedLogFd, embedLogFd]
3185
3349
  });
3186
3350
 
3351
+ // CRITICAL FIX: Track spawned PID so killExistingEmbeddingServer() won't kill it
3352
+ spawnedEmbeddingPid = embeddingProcess.pid;
3353
+ initLog(`[EMBED] Spawned embedding server with PID ${spawnedEmbeddingPid} - tracking to prevent self-kill`);
3354
+
3187
3355
  // error handler BEFORE unref - prevents silent spawn failures
3188
3356
  embeddingProcess.on('error', (err) => {
3189
3357
  ui.setSubStatus('Embedding spawn error: ' + err.message);
@@ -3194,20 +3362,11 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
3194
3362
 
3195
3363
  embeddingProcess.unref();
3196
3364
 
3197
- // Wait for socket to appear (up to 60s)
3198
- // RELIABILITY FIX: Increased from 30s (60x500ms) to 60s (120x500ms)
3199
- // First-time model loading can take 20-30s, need enough buffer
3365
+ // DYNAMIC WAIT: Poll for socket + health check with adaptive intervals (max 5min)
3200
3366
  ui.setSubStatus('Waiting for embedding server to start...');
3201
- for (let i = 0; i < 120; i++) {
3202
- await new Promise(r => setTimeout(r, 500));
3203
- if (fs.existsSync(projectSocketPath)) {
3204
- activeSocketPath = projectSocketPath;
3205
- ui.setSubStatus('✓ Embedding server started!');
3206
- break;
3207
- }
3208
- if (i % 10 === 0) {
3209
- ui.setSubStatus(`Waiting for embedding server... (${i/2}s)`);
3210
- }
3367
+ const serverReady = await waitForEmbeddingReady(projectSocketPath, { ui, label: 'Embedding server' });
3368
+ if (serverReady) {
3369
+ activeSocketPath = projectSocketPath;
3211
3370
  }
3212
3371
 
3213
3372
  if (!activeSocketPath) {
@@ -3267,14 +3426,23 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
3267
3426
  ui.setSubStatus('✓ Embedding server responding');
3268
3427
  initLog('Embedding socket test passed');
3269
3428
  } catch (e) {
3270
- ui.setSubStatus(`⚠️ Socket exists but not responding: ${e.message}`);
3271
- initLog(`Embedding socket test FAILED: ${e.message}`, e);
3429
+ ui.setSubStatus(`⚠️ Socket test failed: ${e.message} - retrying...`);
3430
+ initLog(`Embedding socket test FAILED: ${e.message} - will retry before killing`, e);
3272
3431
 
3273
- // CRITICAL FIX: Stale socket exists but nothing listening - clean up and restart!
3432
+ // DYNAMIC READINESS POLL: Don't immediately kill poll with adaptive intervals (max 5min)
3433
+ const recovered = await waitForEmbeddingReady(activeSocketPath, { ui, label: 'Embedding server warmup' });
3434
+
3435
+ if (recovered) {
3436
+ // Server is alive after retries - continue to indexing
3437
+ } else {
3438
+ // Server truly dead after 15s of retries - now kill and restart
3439
+ // If this is our spawned server, it's been unresponsive for 15s - allow the kill
3440
+ // by resetting spawnedEmbeddingPid (we're giving up on it)
3274
3441
  initLog('Attempting socket recovery - cleaning stale socket and restarting server...');
3275
3442
  ui.setStatus('Recovering embedding server...');
3276
3443
 
3277
- // Kill any existing embedding server before recovery
3444
+ // Reset PID tracking since we're abandoning this server
3445
+ spawnedEmbeddingPid = null;
3278
3446
  killExistingEmbeddingServer(projectPath);
3279
3447
 
3280
3448
  // Clean up the stale socket file
@@ -3313,49 +3481,15 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
3313
3481
  ui.setSubStatus('Waiting for Docker container to recover...');
3314
3482
  const recoverStart = Date.now();
3315
3483
 
3316
- for (let i = 0; i < 120; i++) {
3317
- if (fs.existsSync(projectSocketPath)) {
3318
- break;
3319
- }
3320
- await new Promise(r => setTimeout(r, 500));
3321
- if (i % 10 === 0) {
3322
- ui.setSubStatus(`Recovery in progress... (${Math.round(i/2)}s)`);
3323
- }
3324
- }
3325
-
3484
+ // DYNAMIC WAIT: Poll for recovery with adaptive intervals (max 5min)
3485
+ const recoveryReady = await waitForEmbeddingReady(projectSocketPath, { ui, label: 'Docker recovery' });
3326
3486
  const recoverLatency = Date.now() - recoverStart;
3327
3487
 
3328
- if (fs.existsSync(projectSocketPath)) {
3488
+ if (recoveryReady) {
3329
3489
  initLog(`Docker recovery successful in ${recoverLatency}ms`);
3330
- ui.setSubStatus('✓ Embedding server recovered!');
3331
3490
  activeSocketPath = projectSocketPath;
3332
-
3333
- // Verify the recovered socket is actually responding
3334
- try {
3335
- await new Promise((resolve, reject) => {
3336
- const verifyClient = new net.Socket();
3337
- let verifyData = '';
3338
- verifyClient.setTimeout(10000);
3339
- verifyClient.connect(projectSocketPath, () => {
3340
- verifyClient.write(JSON.stringify({ type: 'embed', text: 'recovery test' }) + '\n');
3341
- });
3342
- verifyClient.on('data', chunk => {
3343
- verifyData += chunk.toString();
3344
- if (verifyData.includes('embedding') || verifyData.includes('processing')) {
3345
- verifyClient.destroy();
3346
- resolve(true);
3347
- }
3348
- });
3349
- verifyClient.on('error', reject);
3350
- verifyClient.on('timeout', () => reject(new Error('timeout')));
3351
- });
3352
- initLog('Recovered socket verified - responding correctly');
3353
- } catch (verifyErr) {
3354
- initLog(`Recovered socket not responding: ${verifyErr.message}`);
3355
- activeSocketPath = null;
3356
- }
3357
3491
  } else {
3358
- initLog(`Docker recovery failed - socket not created after ${recoverLatency}ms`);
3492
+ initLog(`Docker recovery failed after ${recoverLatency}ms`);
3359
3493
  activeSocketPath = null;
3360
3494
  }
3361
3495
  } catch (recoverErr) {
@@ -3366,7 +3500,8 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
3366
3500
  initLog('warm-start.sh not found - cannot recover Docker container');
3367
3501
  activeSocketPath = null; // Mark as unavailable
3368
3502
  }
3369
- }
3503
+ } // end else (server truly dead after retries)
3504
+ } // end catch
3370
3505
  await qqms();
3371
3506
  }
3372
3507
 
@@ -3398,13 +3533,19 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
3398
3533
  initLog('Database connection test passed');
3399
3534
 
3400
3535
  // CRITICAL FIX: Create and set search_path to project schema for proper isolation
3401
- const schemaName = 'specmem_' + path.basename(projectPath).toLowerCase().replace(/[^a-z0-9]/g, '');
3536
+ const schemaName = 'specmem_' + path.basename(projectPath).toLowerCase().replace(/[^a-z0-9_]/g, '_').replace(/_+/g, '_').replace(/^_|_$/g, '');
3402
3537
 
3403
3538
  // Create schema if it doesn't exist
3404
3539
  await pool.query(`CREATE SCHEMA IF NOT EXISTS ${schemaName}`);
3405
3540
  initLog(`Project schema ensured: ${schemaName}`);
3406
3541
 
3407
- // Set search_path for this connection
3542
+ // Set search_path for ALL pool connections (not just the current one)
3543
+ // pool.query() checks out different connections; SET only affects one.
3544
+ // Using pool.on('connect') ensures every new connection gets the right search_path.
3545
+ pool.on('connect', (client) => {
3546
+ client.query(`SET search_path TO ${schemaName}, public`).catch(() => {});
3547
+ });
3548
+ // Also set it on the existing connection
3408
3549
  await pool.query(`SET search_path TO ${schemaName}, public`);
3409
3550
  initLog(`Database schema set to: ${schemaName}`);
3410
3551
  ui.setSubStatus(`Database connected (schema: ${schemaName})`);
@@ -3514,6 +3655,50 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
3514
3655
  return results;
3515
3656
  }
3516
3657
 
3658
+ // AUTO-CREATE codebase_files table if it doesn't exist
3659
+ // CRITICAL: Init must not depend on MCP migrations having run first
3660
+ // FIX: Use gen_random_uuid() (built-in PG13+) instead of uuid_generate_v4() (uuid-ossp extension)
3661
+ // The uuid-ossp extension is installed in specmem_specmem schema, NOT public,
3662
+ // so uuid_generate_v4() is unavailable when search_path is set to other project schemas.
3663
+ // Also ensure vector extension exists in public schema (accessible to all project schemas).
3664
+ try {
3665
+ // vector extension must be in public schema so all project schemas can use vector type
3666
+ await pool.query(`CREATE EXTENSION IF NOT EXISTS "vector" SCHEMA public`);
3667
+ await pool.query(`
3668
+ CREATE TABLE IF NOT EXISTS codebase_files (
3669
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
3670
+ file_path TEXT NOT NULL,
3671
+ absolute_path TEXT NOT NULL,
3672
+ file_name VARCHAR(255) NOT NULL,
3673
+ extension VARCHAR(50),
3674
+ language_id VARCHAR(50) NOT NULL DEFAULT 'unknown',
3675
+ language_name VARCHAR(100) NOT NULL DEFAULT 'Unknown',
3676
+ language_type VARCHAR(50) NOT NULL DEFAULT 'data',
3677
+ content TEXT NOT NULL,
3678
+ content_hash VARCHAR(64),
3679
+ size_bytes INTEGER NOT NULL DEFAULT 0,
3680
+ line_count INTEGER NOT NULL DEFAULT 0,
3681
+ char_count INTEGER NOT NULL DEFAULT 0,
3682
+ last_modified TIMESTAMPTZ NOT NULL DEFAULT NOW(),
3683
+ chunk_index INTEGER,
3684
+ total_chunks INTEGER,
3685
+ original_file_id UUID,
3686
+ embedding vector(384),
3687
+ project_path TEXT DEFAULT '/',
3688
+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
3689
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
3690
+ CONSTRAINT content_not_empty CHECK (length(content) > 0)
3691
+ )
3692
+ `);
3693
+ await pool.query(`CREATE INDEX IF NOT EXISTS idx_codebase_files_content_hash ON codebase_files(content_hash)`);
3694
+ await pool.query(`CREATE INDEX IF NOT EXISTS idx_codebase_files_path ON codebase_files(file_path)`);
3695
+ await pool.query(`CREATE INDEX IF NOT EXISTS idx_codebase_files_project_path_file ON codebase_files(file_path, project_path)`);
3696
+ await pool.query(`CREATE INDEX IF NOT EXISTS idx_codebase_files_project_path_hash ON codebase_files(project_path, content_hash)`);
3697
+ initLog('[CODEBASE] Table codebase_files ensured');
3698
+ } catch (e) {
3699
+ initLog(`[CODEBASE] Table ensure warning: ${e.message}`);
3700
+ }
3701
+
3517
3702
  // Load existing hashes to skip unchanged files ONLY if they have embeddings
3518
3703
  // CRITICAL: Files without embeddings need to be re-indexed even if content matches!
3519
3704
  ui.setStatus('Checking existing index...');
@@ -3711,6 +3896,8 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
3711
3896
  const socketsDir = path.join(projectPath, 'specmem', 'sockets');
3712
3897
  if (!fs.existsSync(socketsDir)) fs.mkdirSync(socketsDir, { recursive: true });
3713
3898
  // Kill any existing embedding server before respawn
3899
+ // Reset spawnedEmbeddingPid since we're about to spawn a replacement
3900
+ spawnedEmbeddingPid = null;
3714
3901
  killExistingEmbeddingServer(projectPath);
3715
3902
  // Clean stale socket
3716
3903
  if (fs.existsSync(projectSocketPath)) {
@@ -3723,28 +3910,26 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
3723
3910
  const revalLogFd = fs.openSync(revalLogPath, 'a');
3724
3911
  const proc = spawn(pythonPath, [embeddingScript], {
3725
3912
  cwd: path.dirname(embeddingScript),
3726
- env: { ...process.env, SPECMEM_SOCKET_PATH: projectSocketPath, SPECMEM_PROJECT_PATH: projectPath },
3913
+ env: { ...process.env, SPECMEM_EMBEDDING_SOCKET: projectSocketPath, SPECMEM_SOCKET_PATH: projectSocketPath, SPECMEM_PROJECT_PATH: projectPath },
3727
3914
  detached: true,
3728
3915
  stdio: ['ignore', revalLogFd, revalLogFd]
3729
3916
  });
3917
+ // CRITICAL FIX: Track spawned PID so killExistingEmbeddingServer() won't kill it
3918
+ spawnedEmbeddingPid = proc.pid;
3919
+ initLog(`[EMBED] Revalidation spawned embedding server with PID ${spawnedEmbeddingPid} - tracking to prevent self-kill`);
3730
3920
  proc.on('error', () => {});
3731
3921
  fs.closeSync(revalLogFd);
3732
3922
  proc.unref();
3733
- // Wait up to 15s for socket to appear
3734
- for (let i = 0; i < 30; i++) {
3735
- await new Promise(r => setTimeout(r, 500));
3736
- if (fs.existsSync(projectSocketPath)) {
3737
- activeSocketPath = projectSocketPath;
3738
- const healthy = await checkSocketHealth();
3739
- if (healthy) {
3740
- initLog(`Embedding server auto-restarted successfully, socket at ${projectSocketPath}`);
3741
- consecutiveEmbeddingFailures = 0;
3742
- revalidateBackoffMs = 1000;
3743
- return true;
3744
- }
3745
- }
3923
+ // DYNAMIC WAIT: Poll for readiness with adaptive intervals (max 5min)
3924
+ const revalReady = await waitForEmbeddingReady(projectSocketPath, { label: 'Revalidation restart' });
3925
+ if (revalReady) {
3926
+ activeSocketPath = projectSocketPath;
3927
+ initLog(`Embedding server auto-restarted successfully, socket at ${projectSocketPath}`);
3928
+ consecutiveEmbeddingFailures = 0;
3929
+ revalidateBackoffMs = 1000;
3930
+ return true;
3746
3931
  }
3747
- initLog('Embedding server auto-restart: socket did not appear within 15s');
3932
+ initLog('Embedding server auto-restart: server did not become ready');
3748
3933
  }
3749
3934
  } catch (restartErr) {
3750
3935
  initLog(`Embedding server auto-restart failed: ${restartErr.message || restartErr}`);
@@ -4152,6 +4337,136 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
4152
4337
  let processed = 0;
4153
4338
  let lastUIUpdate = Date.now(); // throttle UI updates to reduce overhead
4154
4339
 
4340
+ // Read maxConcurrent from model-config.json for parallel embedding batches
4341
+ let embeddingMaxConcurrent = 3; // default
4342
+ try {
4343
+ const mcPath = path.join(projectPath, 'specmem', 'model-config.json');
4344
+ if (fs.existsSync(mcPath)) {
4345
+ const mc = JSON.parse(fs.readFileSync(mcPath, 'utf8'));
4346
+ if (mc.embedding && mc.embedding.maxConcurrent) {
4347
+ embeddingMaxConcurrent = mc.embedding.maxConcurrent;
4348
+ }
4349
+ }
4350
+ } catch { /* use default */ }
4351
+ initLog(`Embedding concurrency: ${embeddingMaxConcurrent} parallel batches`);
4352
+
4353
+ // STORE-THEN-EMBED: For large codebases (>1000 files), store files first
4354
+ // then trigger Python server's batch processing (200 files/batch, direct DB)
4355
+ if (files.length > 1000 && activeSocketPath) {
4356
+ initLog(`Large codebase detected (${files.length} files) - using store-then-embed mode`);
4357
+ ui.setStatus('Store-then-embed mode (large codebase)');
4358
+ ui.setSubStatus('Phase 1: Storing files without embeddings...');
4359
+
4360
+ // Phase 1: Store all files without embeddings (fast - no socket calls)
4361
+ await runWithConcurrency(files, async (filePath, idx) => {
4362
+ try {
4363
+ const relativePath = path.relative(projectPath, filePath);
4364
+ const stats = fs.statSync(filePath);
4365
+ if (stats.size > 500 * 1024) { results.filesSkipped++; return; }
4366
+
4367
+ // Binary check
4368
+ const fd = fs.openSync(filePath, 'r');
4369
+ const buf = Buffer.alloc(Math.min(8192, stats.size));
4370
+ fs.readSync(fd, buf, 0, buf.length, 0);
4371
+ fs.closeSync(fd);
4372
+ if (buf.includes(0)) { results.filesSkipped++; return; }
4373
+
4374
+ const content = fs.readFileSync(filePath, 'utf-8');
4375
+ const contentHash = crypto.createHash('sha256').update(content).digest('hex');
4376
+
4377
+ // Skip if already indexed with embedding
4378
+ if (existingHashes.get(relativePath) === contentHash) {
4379
+ results.filesSkipped++;
4380
+ return;
4381
+ }
4382
+
4383
+ const ext = path.extname(filePath).toLowerCase();
4384
+ const lang = fileLanguageMap.get(filePath) || { id: 'unknown', name: 'Unknown' };
4385
+ const lineCount = content.split('\n').length;
4386
+ const fileId = uuidv4();
4387
+
4388
+ try {
4389
+ await pool.query(`DELETE FROM codebase_files WHERE file_path = $1 AND project_path = $2`, [relativePath, projectPath]);
4390
+ } catch { /* ignore */ }
4391
+
4392
+ await pool.query(`
4393
+ INSERT INTO codebase_files (
4394
+ id, file_path, absolute_path, file_name, extension,
4395
+ language_id, language_name, content, content_hash,
4396
+ size_bytes, line_count, project_path
4397
+ ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
4398
+ `, [
4399
+ fileId, relativePath, filePath, path.basename(filePath), ext,
4400
+ (lang.id || 'unknown').toLowerCase(), lang.name || 'Unknown',
4401
+ content, contentHash, stats.size, lineCount, projectPath
4402
+ ]);
4403
+
4404
+ results.filesIndexed++;
4405
+ if (idx % 100 === 0) {
4406
+ ui.setSubStatus(`Stored ${results.filesIndexed} / ${files.length} files...`);
4407
+ }
4408
+ } catch (e) {
4409
+ results.errors.push(path.relative(projectPath, filePath) + ': ' + e.message);
4410
+ }
4411
+ }, parallelLimit);
4412
+
4413
+ initLog(`Phase 1 complete: ${results.filesIndexed} files stored without embeddings`);
4414
+ ui.setSubStatus(`${results.filesIndexed} files stored, triggering server-side embedding...`);
4415
+
4416
+ // Phase 2: Trigger Python server's process_codebase endpoint
4417
+ try {
4418
+ const ssResult = await new Promise((resolve, reject) => {
4419
+ const client = new net.Socket();
4420
+ let buffer = '';
4421
+ let settled = false;
4422
+ const timeout = setTimeout(() => {
4423
+ if (!settled) { settled = true; client.destroy(); reject(new Error('Server-side processing timeout (10min)')); }
4424
+ }, 600000);
4425
+ client.on('connect', () => {
4426
+ client.write(JSON.stringify({ process_codebase: true, batch_size: 200, limit: 0, project_path: projectPath }) + '\n');
4427
+ });
4428
+ client.on('data', (data) => {
4429
+ buffer += data.toString();
4430
+ let newlineIdx;
4431
+ while ((newlineIdx = buffer.indexOf('\n')) !== -1) {
4432
+ if (settled) return;
4433
+ const line = buffer.slice(0, newlineIdx);
4434
+ buffer = buffer.slice(newlineIdx + 1);
4435
+ try {
4436
+ const resp = JSON.parse(line);
4437
+ if (resp.error) { clearTimeout(timeout); settled = true; client.end(); reject(new Error(resp.error)); return; }
4438
+ if (resp.status === 'processing') continue;
4439
+ if (resp.total_processed !== undefined || resp.processed !== undefined) {
4440
+ clearTimeout(timeout); settled = true; client.end(); resolve(resp); return;
4441
+ }
4442
+ } catch { /* keep waiting */ }
4443
+ }
4444
+ });
4445
+ client.on('error', (e) => { clearTimeout(timeout); if (!settled) { settled = true; reject(e); } });
4446
+ client.connect(activeSocketPath);
4447
+ });
4448
+
4449
+ results.embeddingsGenerated = ssResult.total_processed || ssResult.processed || 0;
4450
+ initLog(`Server-side embedding complete: ${results.embeddingsGenerated} embeddings generated`);
4451
+ ui.setSubStatus(`Server-side: ${results.embeddingsGenerated} embeddings generated`);
4452
+ } catch (ssErr) {
4453
+ initLog(`Server-side embedding failed: ${ssErr.message} - falling back to client-side`);
4454
+ ui.setSubStatus('Server-side failed, falling back to client-side...');
4455
+ // Fall through to standard loop below (it will handle the remaining files)
4456
+ }
4457
+
4458
+ // Skip the standard indexing loop
4459
+ results.durationMs = Date.now() - startTime;
4460
+ ui.enableFileFeed(false);
4461
+ ui.slowRendering(0);
4462
+ await pool.end();
4463
+
4464
+ initLog(`=== CODEBASE INDEXING COMPLETE (store-then-embed) ===`);
4465
+ initLog(`Files: ${results.filesScanned} scanned, ${results.filesIndexed} indexed, ${results.embeddingsGenerated} embeddings`);
4466
+ initLog(`Duration: ${results.durationMs}ms`);
4467
+ return results;
4468
+ }
4469
+
4155
4470
  // Track current file for better progress display
4156
4471
  let currentFile = '';
4157
4472
  let currentFileChunk = 0;
@@ -4344,26 +4659,33 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
4344
4659
  try {
4345
4660
  ui.setSubStatus(`Generating ${fileEmbedTexts.length} embeddings in batch...`);
4346
4661
 
4347
- // Split into smaller batches if needed
4662
+ // Split into smaller batches
4663
+ const embBatches = [];
4348
4664
  for (let j = 0; j < fileEmbedTexts.length; j += EMBEDDING_BATCH_SIZE) {
4349
- const textBatch = fileEmbedTexts.slice(j, j + EMBEDDING_BATCH_SIZE);
4350
- // FIX: Show which batch we're processing
4351
- const batchNum = Math.floor(j / EMBEDDING_BATCH_SIZE) + 1;
4352
- const totalBatches = Math.ceil(fileEmbedTexts.length / EMBEDDING_BATCH_SIZE);
4353
- if (totalBatches > 1) {
4354
- ui.setSubStatus(`Embedding batch ${batchNum}/${totalBatches} (${textBatch.length} files)...`);
4665
+ embBatches.push({ start: j, texts: fileEmbedTexts.slice(j, j + EMBEDDING_BATCH_SIZE) });
4666
+ }
4667
+
4668
+ // Process embedding batches with concurrency (uses maxConcurrent from model config)
4669
+ const embResults = new Array(embBatches.length);
4670
+ await runWithConcurrency(embBatches, async (batch, batchIdx) => {
4671
+ const batchNum = batchIdx + 1;
4672
+ if (embBatches.length > 1) {
4673
+ ui.setSubStatus(`Embedding batch ${batchNum}/${embBatches.length} (${batch.texts.length} files)...`);
4355
4674
  }
4356
- // Show each file being embedded in this batch
4357
- for (let k = 0; k < textBatch.length; k++) {
4358
- const fileIdx = j + k;
4675
+ for (let k = 0; k < batch.texts.length; k++) {
4676
+ const fileIdx = batch.start + k;
4359
4677
  if (fileDataList[fileIdx]) {
4360
4678
  const fd = fileDataList[fileIdx];
4361
4679
  const sizeKB = (fd.stats.size / 1024).toFixed(1);
4362
4680
  ui.addFileToFeed(fd.relativePath, 'embedding', `(${sizeKB}KB)`);
4363
4681
  }
4364
4682
  }
4365
- const batchResults = await generateBatchEmbeddings(textBatch);
4366
- fileEmbeddings.push(...batchResults);
4683
+ embResults[batchIdx] = await generateBatchEmbeddings(batch.texts);
4684
+ }, embeddingMaxConcurrent);
4685
+
4686
+ // Flatten results in order
4687
+ for (const br of embResults) {
4688
+ if (br) fileEmbeddings.push(...br);
4367
4689
  }
4368
4690
 
4369
4691
  // Count successful embeddings
@@ -4377,7 +4699,6 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
4377
4699
  } catch (e) {
4378
4700
  initLog(`Batch embedding failed: ${e.message}`);
4379
4701
  results.embeddingsFailed = (results.embeddingsFailed || 0) + fileEmbedTexts.length;
4380
- // FIX: Categorize error types for better debugging
4381
4702
  results.errorTypes = results.errorTypes || {};
4382
4703
  const errorType = categorizeEmbeddingError(e);
4383
4704
  results.errorTypes[errorType] = (results.errorTypes[errorType] || 0) + 1;
@@ -4447,27 +4768,31 @@ async function indexCodebase(projectPath, ui, embeddingResult) {
4447
4768
  let defEmbeddings = [];
4448
4769
  if (defEmbedTexts.length > 0 && activeSocketPath) {
4449
4770
  try {
4450
- // FIX: Show definition embedding progress
4451
4771
  ui.setSubStatus(`Embedding ${defEmbedTexts.length} definitions...`);
4772
+ // Split into batches
4773
+ const defBatches = [];
4452
4774
  for (let j = 0; j < defEmbedTexts.length; j += EMBEDDING_BATCH_SIZE) {
4453
- const textBatch = defEmbedTexts.slice(j, j + EMBEDDING_BATCH_SIZE);
4454
- // FIX: Show batch progress for large definition sets
4455
- const batchNum = Math.floor(j / EMBEDDING_BATCH_SIZE) + 1;
4456
- const totalBatches = Math.ceil(defEmbedTexts.length / EMBEDDING_BATCH_SIZE);
4457
- if (totalBatches > 1) {
4458
- ui.setSubStatus(`[Defs] Batch ${batchNum}/${totalBatches} (${textBatch.length} defs)...`);
4775
+ defBatches.push({ start: j, texts: defEmbedTexts.slice(j, j + EMBEDDING_BATCH_SIZE) });
4776
+ }
4777
+ // Process with concurrency
4778
+ const defResults = new Array(defBatches.length);
4779
+ await runWithConcurrency(defBatches, async (batch, batchIdx) => {
4780
+ if (defBatches.length > 1) {
4781
+ ui.setSubStatus(`[Defs] Batch ${batchIdx + 1}/${defBatches.length} (${batch.texts.length} defs)...`);
4459
4782
  }
4460
- // Show each definition being embedded
4461
- for (let k = 0; k < textBatch.length; k++) {
4462
- const defIdx = j + k;
4783
+ for (let k = 0; k < batch.texts.length; k++) {
4784
+ const defIdx = batch.start + k;
4463
4785
  if (defDataList[defIdx]) {
4464
4786
  const dd = defDataList[defIdx];
4465
4787
  const defLabel = `${dd.def.type} ${dd.def.name}()`;
4466
4788
  ui.addFileToFeed(dd.relativePath, 'def', defLabel);
4467
4789
  }
4468
4790
  }
4469
- const batchResults = await generateBatchEmbeddings(textBatch);
4470
- defEmbeddings.push(...batchResults);
4791
+ defResults[batchIdx] = await generateBatchEmbeddings(batch.texts);
4792
+ }, embeddingMaxConcurrent);
4793
+ // Flatten in order
4794
+ for (const dr of defResults) {
4795
+ if (dr) defEmbeddings.push(...dr);
4471
4796
  }
4472
4797
  } catch (e) {
4473
4798
  initLog(`Definition batch embedding failed: ${e.message}`);
@@ -4918,6 +5243,8 @@ async function extractSessions(projectPath, ui, embeddingResult = null) {
4918
5243
  safeMkdir(socketsDir);
4919
5244
 
4920
5245
  // Kill any existing embedding server before spawning for session extraction
5246
+ // Reset spawnedEmbeddingPid since we're about to spawn a replacement
5247
+ spawnedEmbeddingPid = null;
4921
5248
  killExistingEmbeddingServer(projectPath);
4922
5249
 
4923
5250
  // Clean up stale socket
@@ -4940,6 +5267,7 @@ async function extractSessions(projectPath, ui, embeddingResult = null) {
4940
5267
  cwd: path.dirname(embeddingScript),
4941
5268
  env: {
4942
5269
  ...process.env,
5270
+ SPECMEM_EMBEDDING_SOCKET: projectSocketPath,
4943
5271
  SPECMEM_SOCKET_PATH: projectSocketPath,
4944
5272
  SPECMEM_PROJECT_PATH: projectPath
4945
5273
  },
@@ -4947,6 +5275,10 @@ async function extractSessions(projectPath, ui, embeddingResult = null) {
4947
5275
  stdio: ['ignore', sessEmbedLogFd, sessEmbedLogFd]
4948
5276
  });
4949
5277
 
5278
+ // CRITICAL FIX: Track spawned PID so killExistingEmbeddingServer() won't kill it
5279
+ spawnedEmbeddingPid = embeddingProcess.pid;
5280
+ initLog(`[EMBED] Session extraction spawned embedding server with PID ${spawnedEmbeddingPid} - tracking to prevent self-kill`);
5281
+
4950
5282
  // error handler BEFORE unref - prevents silent spawn failures
4951
5283
  embeddingProcess.on('error', (err) => {
4952
5284
  ui.setSubStatus('Embedding spawn error: ' + err.message);
@@ -4955,18 +5287,11 @@ async function extractSessions(projectPath, ui, embeddingResult = null) {
4955
5287
  fs.closeSync(sessEmbedLogFd);
4956
5288
  embeddingProcess.unref();
4957
5289
 
4958
- // Wait for socket to appear (up to 30s)
5290
+ // DYNAMIC WAIT: Poll for readiness with adaptive intervals (max 5min)
4959
5291
  ui.setSubStatus('Waiting for embedding server to start...');
4960
- for (let i = 0; i < 60; i++) {
4961
- await new Promise(r => setTimeout(r, 500));
4962
- if (fs.existsSync(projectSocketPath)) {
4963
- socketPath = projectSocketPath;
4964
- ui.setSubStatus('✓ Embedding server started!');
4965
- break;
4966
- }
4967
- if (i % 10 === 0) {
4968
- ui.setSubStatus(`Waiting for embedding server... (${i/2}s)`);
4969
- }
5292
+ const sessReady = await waitForEmbeddingReady(projectSocketPath, { ui, label: 'Session embedding server' });
5293
+ if (sessReady) {
5294
+ socketPath = projectSocketPath;
4970
5295
  }
4971
5296
 
4972
5297
  if (!socketPath) {
@@ -5186,12 +5511,15 @@ async function extractSessions(projectPath, ui, embeddingResult = null) {
5186
5511
  await pool.query('SELECT 1');
5187
5512
 
5188
5513
  // CRITICAL FIX: Create and set search_path to project schema for proper isolation
5189
- const schemaName = 'specmem_' + path.basename(projectPath).toLowerCase().replace(/[^a-z0-9]/g, '');
5514
+ const schemaName = 'specmem_' + path.basename(projectPath).toLowerCase().replace(/[^a-z0-9_]/g, '_').replace(/_+/g, '_').replace(/^_|_$/g, '');
5190
5515
 
5191
5516
  // Create schema if it doesn't exist
5192
5517
  await pool.query(`CREATE SCHEMA IF NOT EXISTS ${schemaName}`);
5193
5518
 
5194
- // Set search_path for this connection
5519
+ // Set search_path for ALL pool connections (not just the current one)
5520
+ pool.on('connect', (client) => {
5521
+ client.query(`SET search_path TO ${schemaName}, public`).catch(() => {});
5522
+ });
5195
5523
  await pool.query(`SET search_path TO ${schemaName}, public`);
5196
5524
  ui.setSubStatus(`Connected (schema: ${schemaName})`);
5197
5525
 
@@ -5995,26 +6323,44 @@ async function runAutoSetup(projectPath) {
5995
6323
  // SpecMem hooks take priority for same event types + matchers
5996
6324
  const mergedSettings = { ...existingSettings };
5997
6325
 
5998
- // Fix hardcoded /root paths in srcSettings for non-root users
6326
+ // Fix hardcoded paths in srcSettings for actual install environment
5999
6327
  const homeDir = os.homedir();
6328
+ const pkgRoot = path.resolve(__dirname, '..');
6000
6329
  let srcSettingsStr = JSON.stringify(srcSettings);
6001
6330
  if (homeDir !== '/root') {
6002
6331
  srcSettingsStr = srcSettingsStr.replace(/\/root\//g, homeDir + '/');
6003
6332
  srcSettingsStr = srcSettingsStr.replace(/"\/root"/g, '"' + homeDir + '"');
6004
6333
  }
6334
+ // Fix SPECMEM_PKG to point to actual install location (not dev /specmem)
6335
+ srcSettingsStr = srcSettingsStr.replace(/"SPECMEM_PKG":\s*"\/specmem"/g, `"SPECMEM_PKG": "${pkgRoot}"`);
6336
+ // Fix SPECMEM_HOME to use actual home directory
6337
+ srcSettingsStr = srcSettingsStr.replace(/"SPECMEM_HOME":\s*"\/root\/.specmem"/g, `"SPECMEM_HOME": "${path.join(homeDir, '.specmem')}"`);
6005
6338
  const fixedSrcSettings = JSON.parse(srcSettingsStr);
6006
6339
 
6007
6340
  if (fixedSrcSettings.hooks) {
6008
6341
  mergedSettings.hooks = mergeHooksDeep(existingSettings.hooks || {}, fixedSrcSettings.hooks);
6009
6342
  }
6010
6343
 
6011
- // Helper function defined inline for merge logic
6344
+ // Helper: check if a hook command belongs to specmem
6345
+ function isSpecmemHookCmd(hookEntry) {
6346
+ const cmd = (hookEntry.command || '');
6347
+ return cmd.includes('specmem') || cmd.includes('team-comms-enforcer') ||
6348
+ cmd.includes('agent-loading-hook') || cmd.includes('agent-output-interceptor') ||
6349
+ cmd.includes('task-progress-hook') || cmd.includes('subagent-loading-hook') ||
6350
+ cmd.includes('use-code-pointers') || cmd.includes('post-write-memory-hook') ||
6351
+ cmd.includes('bullshit-radar') || cmd.includes('input-aware-improver') ||
6352
+ cmd.includes('smart-context-hook');
6353
+ }
6354
+
6355
+ // Deep merge hooks: specmem hooks take priority per-matcher, but user's
6356
+ // custom (non-specmem) hooks within the same matcher are preserved.
6357
+ // On re-init, old specmem hooks are cleaned up and replaced with new ones.
6012
6358
  function mergeHooksDeep(existingHooks, specmemHooks) {
6013
6359
  const merged = {};
6014
6360
 
6015
- // Copy all existing event types first
6361
+ // Copy all existing event types (deep clone to avoid mutations)
6016
6362
  for (const eventType of Object.keys(existingHooks)) {
6017
- merged[eventType] = [...existingHooks[eventType]];
6363
+ merged[eventType] = JSON.parse(JSON.stringify(existingHooks[eventType]));
6018
6364
  }
6019
6365
 
6020
6366
  // Process each specmem event type
@@ -6022,44 +6368,61 @@ async function runAutoSetup(projectPath) {
6022
6368
  const specmemGroups = specmemHooks[eventType];
6023
6369
 
6024
6370
  if (!merged[eventType]) {
6025
- // No existing hooks for this event type, use specmem's
6026
6371
  merged[eventType] = specmemGroups;
6027
6372
  continue;
6028
6373
  }
6029
6374
 
6030
- // Build set of specmem matchers (undefined = catch-all)
6031
- const specmemMatchers = new Map();
6375
+ // Build specmem's desired state: one entry per matcher
6376
+ const specmemByMatcher = new Map();
6032
6377
  for (const group of specmemGroups) {
6033
- const matcherKey = group.matcher || '__CATCHALL__';
6034
- specmemMatchers.set(matcherKey, group);
6378
+ const key = group.matcher || '__CATCHALL__';
6379
+ if (!specmemByMatcher.has(key)) {
6380
+ specmemByMatcher.set(key, { ...group, hooks: [...(group.hooks || [])] });
6381
+ } else {
6382
+ // Consolidate duplicate matchers from source
6383
+ specmemByMatcher.get(key).hooks.push(...(group.hooks || []));
6384
+ }
6035
6385
  }
6036
6386
 
6037
- // Filter existing groups:
6038
- // - Remove groups with same matcher as specmem (specmem takes priority)
6039
- // - Remove groups that contain specmem hook commands (avoid duplicates)
6040
- const preservedGroups = merged[eventType].filter(group => {
6041
- const matcherKey = group.matcher || '__CATCHALL__';
6042
-
6043
- // If specmem has a hook for this matcher, remove existing
6044
- if (specmemMatchers.has(matcherKey)) {
6045
- return false;
6387
+ // Extract user's custom (non-specmem) hooks per matcher from existing config
6388
+ const userHooksByMatcher = new Map();
6389
+ for (const group of merged[eventType]) {
6390
+ const key = group.matcher || '__CATCHALL__';
6391
+ const userHooks = (group.hooks || []).filter(h => !isSpecmemHookCmd(h));
6392
+ if (userHooks.length > 0) {
6393
+ if (!userHooksByMatcher.has(key)) {
6394
+ userHooksByMatcher.set(key, []);
6395
+ }
6396
+ userHooksByMatcher.get(key).push(...userHooks);
6046
6397
  }
6398
+ }
6047
6399
 
6048
- // Check if this is a specmem hook (to avoid duplicates on re-init)
6049
- const groupStr = JSON.stringify(group);
6050
- if (groupStr.includes('specmem-') ||
6051
- groupStr.includes('/specmem/') ||
6052
- groupStr.includes('team-comms-enforcer') ||
6053
- groupStr.includes('smart-context-hook') ||
6054
- groupStr.includes('agent-loading-hook')) {
6055
- return false;
6056
- }
6400
+ // Build final result for this event type
6401
+ const result = [];
6402
+ const handledMatchers = new Set();
6057
6403
 
6058
- return true; // Preserve user's custom hooks
6059
- });
6404
+ // First: all specmem matchers (with user's custom hooks merged in)
6405
+ for (const [key, group] of specmemByMatcher) {
6406
+ const userHooks = userHooksByMatcher.get(key) || [];
6407
+ result.push({ ...group, hooks: [...group.hooks, ...userHooks] });
6408
+ handledMatchers.add(key);
6409
+ }
6410
+
6411
+ // Then: existing matchers that specmem doesn't touch
6412
+ for (const group of merged[eventType]) {
6413
+ const key = group.matcher || '__CATCHALL__';
6414
+ if (handledMatchers.has(key)) continue;
6415
+ handledMatchers.add(key);
6416
+
6417
+ // Clean orphaned specmem hooks from non-specmem matchers
6418
+ const cleanHooks = (group.hooks || []).filter(h => !isSpecmemHookCmd(h));
6419
+ if (cleanHooks.length > 0) {
6420
+ result.push({ ...group, hooks: cleanHooks });
6421
+ }
6422
+ // If 100% specmem hooks and specmem no longer uses this matcher, drop it
6423
+ }
6060
6424
 
6061
- // Merge: user's preserved hooks first, then specmem hooks
6062
- merged[eventType] = [...preservedGroups, ...specmemGroups];
6425
+ merged[eventType] = result;
6063
6426
  }
6064
6427
 
6065
6428
  return merged;