npm - specmem-hardwicksoftware - Versions diffs - 3.7.29 → 3.7.31 - Mend

specmem-hardwicksoftware 3.7.29 → 3.7.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/bootstrap.cjs +19 -0
package/claude-hooks/settings.json +99 -0
package/claude-hooks/specmem-search-enforcer.cjs +229 -0
package/claude-hooks/specmem-search-tracker.cjs +71 -0
package/dist/config.js +11 -16
package/dist/db/connectionPoolGoBrrr.js +3 -3
package/dist/index.js +21 -4
package/dist/mcp/compactionProxy.js +52 -17
package/dist/mcp/embeddingServerManager.js +15 -1
package/dist/mcp/mcpProtocolHandler.js +22 -4
package/dist/mcp/specMemServer.js +16 -3
package/dist/mcp/toolRegistry.js +19 -21
package/dist/tools/goofy/checkSyncStatus.js +14 -7
package/dist/watcher/fileWatcher.js +57 -20
package/dist/watcher/syncChecker.js +11 -7
package/package.json +1 -1
package/scripts/global-postinstall.cjs +7 -2
package/scripts/specmem-init.cjs +91 -111
package/specmem/model-config.json +26 -6
package/specmem/supervisord.conf +1 -1
package/specmem/user-config.json +12 -0

package/bootstrap.cjs CHANGED Viewed

@@ -4919,6 +4919,25 @@ async function autoInstallThisMf() {
       // Non-fatal - MCP server will retry
     }
+    // Acquire socket lock so statusbar/health checks can detect us
+    const projectPath_uf = getProjectPath();
+    const lockAcquired_uf = tryAcquireSocketLock(projectPath_uf);
+    if (lockAcquired_uf) {
+      writeProjectPidFile(projectPath_uf, process.pid);
+      writeInstanceState(projectPath_uf, {
+        pid: process.pid,
+        projectPath: projectPath_uf,
+        projectHash: hashProjectPath(projectPath_uf),
+        startTime: new Date().toISOString(),
+        status: 'running',
+        bootstrapVersion: '1.0.0',
+        mode: 'ultra-fast'
+      });
+      startupLog('Ultra-fast path: socket lock acquired, PID file written');
+    } else {
+      startupLog('Ultra-fast path: could not acquire socket lock (non-fatal)');
+    }
     // Start server BEFORE any other operations
     // The server handles its own deferred initialization
     // CRITICAL: startServer() is now async and imports the ES module directly

package/claude-hooks/settings.json CHANGED Viewed

@@ -55,6 +55,14 @@
       {
         "matcher": "Read",
         "hooks": [
+          {
+            "type": "command",
+            "command": "node /root/.claude/hooks/specmem-search-enforcer.cjs",
+            "timeout": 2,
+            "env": {
+              "SPECMEM_PROJECT_PATH": "${cwd}"
+            }
+          },
           {
             "type": "command",
             "command": "node /root/.claude/hooks/team-comms-enforcer.cjs",
@@ -84,6 +92,14 @@
       {
         "matcher": "Edit",
         "hooks": [
+          {
+            "type": "command",
+            "command": "node /root/.claude/hooks/specmem-search-enforcer.cjs",
+            "timeout": 2,
+            "env": {
+              "SPECMEM_PROJECT_PATH": "${cwd}"
+            }
+          },
           {
             "type": "command",
             "command": "node /root/.claude/hooks/team-comms-enforcer.cjs",
@@ -97,6 +113,11 @@
       {
         "matcher": "Write",
         "hooks": [
+          {
+            "type": "command",
+            "command": "node /root/.claude/hooks/specmem-search-enforcer.cjs",
+            "timeout": 2
+          },
           {
             "type": "command",
             "command": "node /root/.claude/hooks/team-comms-enforcer.cjs",
@@ -110,6 +131,11 @@
       {
         "matcher": "Grep",
         "hooks": [
+          {
+            "type": "command",
+            "command": "node /root/.claude/hooks/specmem-search-enforcer.cjs",
+            "timeout": 2
+          },
           {
             "type": "command",
             "command": "node /root/.claude/hooks/team-comms-enforcer.cjs",
@@ -147,6 +173,11 @@
       {
         "matcher": "Glob",
         "hooks": [
+          {
+            "type": "command",
+            "command": "node /root/.claude/hooks/specmem-search-enforcer.cjs",
+            "timeout": 2
+          },
           {
             "type": "command",
             "command": "node /root/.claude/hooks/team-comms-enforcer.cjs",
@@ -184,6 +215,14 @@
       {
         "matcher": "Bash",
         "hooks": [
+          {
+            "type": "command",
+            "command": "node /root/.claude/hooks/specmem-search-enforcer.cjs",
+            "timeout": 2,
+            "env": {
+              "SPECMEM_PROJECT_PATH": "${cwd}"
+            }
+          },
           {
             "type": "command",
             "command": "node /root/.claude/hooks/team-comms-enforcer.cjs",
@@ -306,6 +345,66 @@
       }
     ],
     "PostToolUse": [
+      {
+        "matcher": "Grep",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node /root/.claude/hooks/specmem-search-tracker.cjs",
+            "timeout": 5
+          }
+        ]
+      },
+      {
+        "matcher": "Glob",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node /root/.claude/hooks/specmem-search-tracker.cjs",
+            "timeout": 5
+          }
+        ]
+      },
+      {
+        "matcher": "Read",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node /root/.claude/hooks/specmem-search-tracker.cjs",
+            "timeout": 5
+          }
+        ]
+      },
+      {
+        "matcher": "mcp__specmem__find_memory",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node /root/.claude/hooks/specmem-search-tracker.cjs",
+            "timeout": 5
+          }
+        ]
+      },
+      {
+        "matcher": "mcp__specmem__find_code_pointers",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node /root/.claude/hooks/specmem-search-tracker.cjs",
+            "timeout": 5
+          }
+        ]
+      },
+      {
+        "matcher": "mcp__specmem__drill_down",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node /root/.claude/hooks/specmem-search-tracker.cjs",
+            "timeout": 5
+          }
+        ]
+      },
       {
         "matcher": "Task",
         "hooks": [

package/claude-hooks/specmem-search-enforcer.cjs ADDED Viewed

@@ -0,0 +1,229 @@
+#!/usr/bin/env node
+/**
+ * SPECMEM SEARCH ENFORCER - PreToolUse Hook
+ * ==========================================
+ *
+ * HARD BLOCKS agents that skip SpecMem semantic search.
+ *
+ * Rules:
+ * 1. Agents CANNOT do ANYTHING until they've called find_memory or find_code_pointers at least once
+ * 2. Every 3 searches (Grep/Glob/Read), agents MUST call find_code_pointers again
+ * 3. After find_code_pointers, agents MUST drill_down before continuing
+ * 4. 2nd search in a cycle: WARNING injected
+ * 5. 3rd search in a cycle: HARD BLOCK (deny)
+ * 6. Tool calls and other non-search tools DO NOT reset the counter
+ * 7. Main session (non-agent) gets suggestions, not blocks
+ *
+ * State: /tmp/specmem-search-enforcer-{session}.json
+ */
+const fs = require('fs');
+const path = require('path');
+// --- Agent detection (inline, no require chain issues) ---
+function isAgent() {
+  const markers = [
+    process.env.CLAUDE_AGENT === 'true',
+    process.env.CLAUDE_AGENT_TYPE,
+    process.env.TASK_ID,
+    (process.env.CLAUDE_WORKTREE || '').length > 0,
+    (process.env.CLAUDE_SESSION_ID || '').includes('task-'),
+  ];
+  return markers.some(Boolean);
+}
+// --- Config ---
+const SEARCH_TOOLS = ['Grep', 'Glob', 'Read', 'Bash'];
+const WRITE_TOOLS = ['Edit', 'Write'];
+const ALL_BLOCKED_TOOLS = [...SEARCH_TOOLS, ...WRITE_TOOLS];
+const SPECMEM_SEARCH_TOOLS = [
+  'mcp__specmem__find_memory',
+  'mcp__specmem__find_code_pointers',
+  'mcp__specmem__smart_search',
+];
+const SPECMEM_DRILLDOWN_TOOLS = [
+  'mcp__specmem__drill_down',
+  'mcp__specmem__get_memory',
+  'mcp__specmem__get_memory_by_id',
+  'mcp__specmem__getMemoryFull',
+];
+const SPECMEM_CODE_POINTER_TOOLS = [
+  'mcp__specmem__find_code_pointers',
+];
+const SEARCH_CYCLE_LIMIT = 3; // block on 3rd search
+const WARN_AT = 2; // warn on 2nd search
+// --- State management ---
+function getStateFile() {
+  const sessionId = process.env.CLAUDE_SESSION_ID || process.env.TASK_ID || 'default';
+  const sanitized = sessionId.replace(/[^a-zA-Z0-9_-]/g, '_');
+  return `/tmp/specmem-search-enforcer-${sanitized}.json`;
+}
+function getState() {
+  try {
+    const f = getStateFile();
+    if (fs.existsSync(f)) {
+      const data = JSON.parse(fs.readFileSync(f, 'utf-8'));
+      // Expire after 30 min
+      if (data.timestamp && (Date.now() - data.timestamp > 30 * 60 * 1000)) {
+        return freshState();
+      }
+      return data;
+    }
+  } catch (e) {
+    try { fs.unlinkSync(getStateFile()); } catch (_) {}
+  }
+  return freshState();
+}
+function freshState() {
+  return {
+    hasUsedSpecmemSearch: false,
+    searchesSinceLastCodePointers: 0,
+    pendingDrilldown: false,
+    lastCodePointersQuery: null,
+    timestamp: Date.now(),
+  };
+}
+function saveState(state) {
+  try {
+    state.timestamp = Date.now();
+    fs.writeFileSync(getStateFile(), JSON.stringify(state, null, 2));
+  } catch (e) { /* silent */ }
+}
+// --- stdin reader with timeout ---
+function readStdinWithTimeout(timeoutMs = 5000) {
+  return new Promise((resolve) => {
+    let input = '';
+    const timer = setTimeout(() => {
+      process.stdin.destroy();
+      resolve(input);
+    }, timeoutMs);
+    process.stdin.setEncoding('utf8');
+    process.stdin.on('data', (chunk) => { input += chunk; });
+    process.stdin.on('end', () => { clearTimeout(timer); resolve(input); });
+    process.stdin.on('error', () => { clearTimeout(timer); resolve(input); });
+  });
+}
+// --- Main ---
+async function main() {
+  const inputData = await readStdinWithTimeout(5000);
+  try {
+    const hookData = JSON.parse(inputData);
+    const toolName = hookData.tool_name || '';
+    const toolInput = hookData.tool_input || {};
+    const state = getState();
+    // --- SpecMem search tool used (find_memory, find_code_pointers, smart_search) ---
+    if (SPECMEM_SEARCH_TOOLS.includes(toolName)) {
+      state.hasUsedSpecmemSearch = true;
+      // find_code_pointers resets the search counter AND sets drilldown pending
+      if (SPECMEM_CODE_POINTER_TOOLS.includes(toolName)) {
+        state.searchesSinceLastCodePointers = 0;
+        state.pendingDrilldown = true;
+        state.lastCodePointersQuery = toolInput.query || '(unknown)';
+      }
+      saveState(state);
+      process.exit(0); // allow
+    }
+    // --- Drilldown tool used (drill_down, get_memory, etc) ---
+    if (SPECMEM_DRILLDOWN_TOOLS.includes(toolName)) {
+      state.pendingDrilldown = false;
+      saveState(state);
+      process.exit(0); // allow
+    }
+    // --- Non-agent: suggest only, never block ---
+    if (!isAgent()) {
+      process.exit(0); // allow everything for main session
+    }
+    // === AGENT ENFORCEMENT BELOW ===
+    // --- Rule 1: Agent hasn't used ANY specmem search yet -> HARD BLOCK everything ---
+    if (!state.hasUsedSpecmemSearch && ALL_BLOCKED_TOOLS.includes(toolName)) {
+      const output = {
+        hookSpecificOutput: {
+          hookEventName: 'PreToolUse',
+          permissionDecision: 'deny',
+          permissionDecisionReason: `BLOCKED: You MUST call find_memory or find_code_pointers BEFORE using ${toolName}. No Read/Write/Grep/Glob/Edit/Bash allowed until you search SpecMem first. Run: mcp__specmem__find_code_pointers({query: "your task description"})`
+        }
+      };
+      console.log(JSON.stringify(output));
+      process.exit(0);
+    }
+    // --- Rule 3: Pending drilldown after find_code_pointers -> BLOCK until drilled ---
+    if (state.pendingDrilldown && ALL_BLOCKED_TOOLS.includes(toolName)) {
+      const output = {
+        hookSpecificOutput: {
+          hookEventName: 'PreToolUse',
+          permissionDecision: 'deny',
+          permissionDecisionReason: `BLOCKED: You ran find_code_pointers("${state.lastCodePointersQuery}") but haven't drilled down into the results yet. You MUST call drill_down({drilldownID: N}) or get_memory({id: "ID"}) before using ${toolName}. Drill into the results first!`
+        }
+      };
+      console.log(JSON.stringify(output));
+      process.exit(0);
+    }
+    // --- Count searches for cycle enforcement ---
+    if (SEARCH_TOOLS.includes(toolName)) {
+      state.searchesSinceLastCodePointers++;
+      saveState(state);
+      // Rule 5: 3rd search -> HARD BLOCK
+      if (state.searchesSinceLastCodePointers >= SEARCH_CYCLE_LIMIT) {
+        const output = {
+          hookSpecificOutput: {
+            hookEventName: 'PreToolUse',
+            permissionDecision: 'deny',
+            permissionDecisionReason: `BLOCKED: You've done ${state.searchesSinceLastCodePointers} searches without calling find_code_pointers. Every 3 searches you MUST call mcp__specmem__find_code_pointers to refresh your semantic context. Do it now before continuing.`
+          }
+        };
+        console.log(JSON.stringify(output));
+        process.exit(0);
+      }
+      // Rule 4: 2nd search -> WARNING
+      if (state.searchesSinceLastCodePointers >= WARN_AT) {
+        const output = {
+          hookSpecificOutput: {
+            hookEventName: 'PreToolUse',
+            permissionDecision: 'allow',
+            permissionDecisionReason: `WARNING: ${state.searchesSinceLastCodePointers}/${SEARCH_CYCLE_LIMIT} searches used. You MUST call find_code_pointers before your next search or you'll be blocked. Consider running it now.`,
+            additionalContext: `\u26a0\ufe0f SEARCH LIMIT WARNING: ${state.searchesSinceLastCodePointers}/${SEARCH_CYCLE_LIMIT} searches since last find_code_pointers. Next search WILL BE BLOCKED. Run mcp__specmem__find_code_pointers now.`
+          }
+        };
+        console.log(JSON.stringify(output));
+        process.exit(0);
+      }
+    }
+    // --- Write tools increment search counter too (they shouldn't write blind) ---
+    if (WRITE_TOOLS.includes(toolName)) {
+      // Don't count writes toward search limit, but they're allowed if we passed the checks above
+      saveState(state);
+    }
+    // Allow everything else (Task, ToolSearch, MCP tools, etc)
+    process.exit(0);
+  } catch (error) {
+    // Parse failure = allow (don't break the session)
+    process.exit(0);
+  }
+}
+main().catch(() => process.exit(0));

package/claude-hooks/specmem-search-tracker.cjs ADDED Viewed

@@ -0,0 +1,71 @@
+#!/usr/bin/env node
+/**
+ * SPECMEM SEARCH TRACKER - PostToolUse Hook
+ * ===========================================
+ *
+ * Tracks when agents use SpecMem semantic tools (find_memory, find_code_pointers, drill_down)
+ * and resets the search counter so enforcer unblocks.
+ *
+ * Also tracks Grep/Glob calls to increment search counter.
+ *
+ * AGENTS ONLY - main session skipped.
+ */
+const fs = require('fs');
+const path = require('path');
+// Agent detection
+function isAgent() {
+  const e = process.env;
+  if (e.CLAUDE_AGENT === '1' || e.CLAUDE_AGENT === 'true') return true;
+  if (e.TASK_ID || e.AGENT_ID || e.WORKTREE_PATH) return true;
+  if (e.CLAUDE_CODE_ENTRYPOINT === 'task') return true;
+  const ppid = e.CLAUDE_PARENT_PID || e.PARENT_PID;
+  if (ppid && ppid !== '1' && ppid !== String(process.pid)) return true;
+  return false;
+}
+function main() {
+  if (!isAgent()) {
+    console.log(JSON.stringify({ permissionDecision: 'allow' }));
+    return;
+  }
+  const toolName = process.env.TOOL_NAME || '';
+  const sessionId = process.env.SESSION_ID || process.env.CLAUDE_SESSION_ID || process.ppid?.toString() || 'unknown';
+  const stateDir = '/tmp/specmem-enforcer';
+  const stateFile = path.join(stateDir, `${sessionId}.json`);
+  try { fs.mkdirSync(stateDir, { recursive: true }); } catch {}
+  let state = { searchCount: 0, specmemUsed: false, drilldownRequired: false, drilldownDone: false };
+  try { state = JSON.parse(fs.readFileSync(stateFile, 'utf8')); } catch {}
+  const isSpecmemSearch = /find_memory|find_code_pointers/i.test(toolName);
+  const isDrilldown = /drill_down/i.test(toolName);
+  const isSearchTool = /^(Grep|Glob)$/i.test(toolName);
+  if (isSpecmemSearch) {
+    state.specmemUsed = true;
+    state.searchCount = 0; // Reset search counter
+    // find_code_pointers requires drill_down after
+    if (/find_code_pointers/i.test(toolName)) {
+      state.drilldownRequired = true;
+      state.drilldownDone = false;
+    }
+  }
+  if (isDrilldown) {
+    state.drilldownDone = true;
+    state.drilldownRequired = false;
+  }
+  if (isSearchTool) {
+    state.searchCount = (state.searchCount || 0) + 1;
+  }
+  try { fs.writeFileSync(stateFile, JSON.stringify(state)); } catch {}
+  console.log(JSON.stringify({ permissionDecision: 'allow' }));
+}
+main();

package/dist/config.js CHANGED Viewed

@@ -837,24 +837,19 @@ export function loadConfig() {
     const parsedUrl = parseDatabaseUrl();
     // Priority: DATABASE_URL > ENV VAR > .specmemrc > default
     // Per-project isolation still applies if DATABASE_URL not set
-    // Container mode: use unix socket dir as host for postgres connection
-    // When SPECMEM_CONTAINER_MODE is set, or container run dir has postgres socket
-    // FIX: Try both {projectPath}/specmem/run and {projectPath}/run to handle
-    // the case where projectPath IS the specmem dir (avoids specmem/specmem/run)
+    // Container mode: postgres via unix socket in specmem/run/ (bind-mounted from container /data/run)
+    // Socket appears after container starts PG — dir must exist, socket arrives when PG is ready
     let containerRunDir = path.join(projectPath, 'specmem', 'run');
-    const containerSocketExists = (() => {
-        try {
-            if (fs.existsSync(path.join(containerRunDir, '.s.PGSQL.5432'))) return true;
-            // Fallback: projectPath might BE the specmem dir
-            const altRunDir = path.join(projectPath, 'run');
-            if (fs.existsSync(path.join(altRunDir, '.s.PGSQL.5432'))) {
-                containerRunDir = altRunDir;
-                return true;
-            }
-            return false;
-        } catch { return false; }
-    })();
+    // Also check projectPath/run in case projectPath IS the specmem dir
+    if (!fs.existsSync(containerRunDir) && fs.existsSync(path.join(projectPath, 'run'))) {
+        containerRunDir = path.join(projectPath, 'run');
+    }
+    const containerSocketExists = fs.existsSync(path.join(containerRunDir, '.s.PGSQL.5432'));
     const isContainerMode = process.env['SPECMEM_CONTAINER_MODE'] === 'true' || containerSocketExists;
+    if (isContainerMode) {
+        // Ensure socket directory exists on host — container bind-mounts here
+        try { fs.mkdirSync(containerRunDir, { recursive: true }); } catch (e) { /* may already exist */ }
+    }
     const defaultDbHost = isContainerMode ? containerRunDir : 'localhost';
     const dbHost = parsedUrl?.host || process.env['SPECMEM_DB_HOST'] || getRcValue(rc, 'database.host', defaultDbHost);
     const dbPort = parsedUrl?.port || projectDbPort;

package/dist/db/connectionPoolGoBrrr.js CHANGED Viewed

@@ -17,10 +17,10 @@ types.setTypeParser(20, (val) => {
     return Number.isSafeInteger(n) ? n : BigInt(val);
 }); // bigint - safe for values > 2^53
 const DEFAULT_POOL_SETTINGS = {
-    maxConnections: 20, // safe default - each project creates its own pool, 100 would exhaust PG max_connections
-    minConnections: 5, // keep some warm connections fr
+    maxConnections: 6, // tuned for 4-core 8GB laptop - 20 was exhausting PG under concurrent tool calls
+    minConnections: 2, // keep a couple warm, don't hog connections
     idleTimeoutMs: 30000, // 30 sec timeout on idle connections
-    connectionTimeoutMs: 30000, // 30 sec to establish connection
+    connectionTimeoutMs: 10000, // 10 sec to establish connection - fail fast under load
     statementTimeoutMs: 30000, // 30 sec statement timeout
     queryTimeoutMs: 60000, // 1 min query timeout for thicc queries
     healthCheckIntervalMs: 30000, // health check every 30 sec

package/dist/index.js CHANGED Viewed

@@ -1608,11 +1608,28 @@ class LocalEmbeddingProvider {
      * Runs in background to not block embedding requests
      */
     tryRestartContainer() {
-        // Container mode: brain container manages embedding server — do NOT interfere.
-        // Self-healing attempts override sandboxSocketPath to wrong path (embeddings.sock vs embed.sock)
-        // and try to start conflicting Docker containers, causing CPU/RAM waste and socket confusion.
+        // Container mode: restart the brain container instead of spawning a new process
         if (process.env.SPECMEM_CONTAINER_MODE === 'true') {
-            logger.debug('container mode active — brain manages embedding server, skipping self-heal restart');
+            const now = Date.now();
+            if (now - this.lastRestartAttempt < LocalEmbeddingProvider.RESTART_COOLDOWN_MS) {
+                logger.debug('container restart cooldown active, skipping');
+                return;
+            }
+            this.lastRestartAttempt = now;
+            try {
+                const { getContainerManager } = require('./container/containerManager.js');
+                const projectPath = process.env.SPECMEM_PROJECT_PATH || process.cwd();
+                const cm = getContainerManager(projectPath);
+                logger.info({ projectPath }, '[LocalEmbeddingProvider] Restarting brain container...');
+                cm.start().then(() => {
+                    logger.info('[LocalEmbeddingProvider] Brain container restarted');
+                    this.restartAttempts = 0;
+                }).catch(err => {
+                    logger.error({ error: err?.message }, '[LocalEmbeddingProvider] Brain container restart failed');
+                });
+            } catch (err) {
+                logger.error({ error: err?.message }, '[LocalEmbeddingProvider] Failed to get container manager');
+            }
             return;
         }
         const now = Date.now();

package/dist/mcp/compactionProxy.js CHANGED Viewed

@@ -1254,25 +1254,57 @@ async function handleRequest(req, res) {
     pushEvent('info', `POST /v1/messages model=${body.model || '?'} msgs=${messageCount} size=${(originalSize / 1024).toFixed(0)}KB`);
+    const isCompaction = isCompactionRequest(body);
+    const isPassthrough = !isCompaction && (dontCompress || messageCount <= liveConfig.PRESERVE_RECENT_MESSAGES);
+    let sysPromptModified = false;
     // === SYSTEM PROMPT COMPRESSION ===
+    // Always compress system prompt if not dontCompress — cache makes repeat calls free.
+    // Cache-miss: fire-and-forget on passthrough (don't block forwarding), await on compaction/live paths.
     if (!dontCompress && body.system) {
-      try {
-        const sysResult = await compressSystemPrompt(body.system);
-        if (sysResult.charsSaved > 0) {
-          body.system = sysResult.system;
-          stats.sysPromptCharsSaved += sysResult.charsSaved;
+      // Build hash to check cache without calling async function
+      const _sysKey = typeof body.system === 'string' ? body.system
+        : Array.isArray(body.system) ? body.system.map(b => typeof b === 'string' ? b : (b?.text || '')).join('')
+        : JSON.stringify(body.system);
+      const _sysHash = require('crypto').createHash('md5').update(_sysKey).digest('hex');
+      const _sysCached = _sysPromptCache.get(_sysHash);
+      if (_sysCached) {
+        // Cache hit — zero latency, always apply
+        if (_sysCached.charsSaved > 0) {
+          body.system = _sysCached.system;
+          sysPromptModified = true;
+          stats.sysPromptCharsSaved += _sysCached.charsSaved;
           stats.sysPromptCompressed++;
-          stats.tokensStripped += Math.floor(sysResult.charsSaved / 4);
-          stats.bytesStripped += sysResult.charsSaved;
-          log('compress', `SYSPROMPT: ${sysResult.charsSaved} chars saved`);
-          pushEvent('compress', `System prompt: -${sysResult.charsSaved} chars`);
+          stats.tokensStripped += Math.floor(_sysCached.charsSaved / 4);
+          stats.bytesStripped += _sysCached.charsSaved;
+          log('compress', `SYSPROMPT (cache hit): ${_sysCached.charsSaved} chars saved`);
+          pushEvent('compress', `System prompt (cached): -${_sysCached.charsSaved} chars`);
+        }
+      } else if (isPassthrough) {
+        // Cache miss + passthrough: fire-and-forget on new thread — populates cache for next request
+        compressSystemPrompt(body.system).catch(() => {});
+      } else {
+        // Cache miss + compaction/live: must await (need compressed body)
+        try {
+          const sysResult = await compressSystemPrompt(body.system);
+          if (sysResult.charsSaved > 0) {
+            body.system = sysResult.system;
+            sysPromptModified = true;
+            stats.sysPromptCharsSaved += sysResult.charsSaved;
+            stats.sysPromptCompressed++;
+            stats.tokensStripped += Math.floor(sysResult.charsSaved / 4);
+            stats.bytesStripped += sysResult.charsSaved;
+            log('compress', `SYSPROMPT: ${sysResult.charsSaved} chars saved`);
+            pushEvent('compress', `System prompt: -${sysResult.charsSaved} chars`);
+          }
+        } catch (e) {
+          log('warn', `System prompt compression failed: ${e.message}`);
         }
-      } catch (e) {
-        log('warn', `System prompt compression failed: ${e.message}`);
       }
     }
-    if (isCompactionRequest(body)) {
+    if (isCompaction) {
       // === COMPACTION DETECTED — strip tool bodies ===
       stats.compactionRequests++;
       stats.lastCompaction = new Date().toISOString();
@@ -1284,7 +1316,7 @@ async function handleRequest(req, res) {
       const { strippedMessages, strippingStats } = stripMessages(body.messages);
       body.messages = strippedMessages;
-      // Also apply steno+MT compression on compaction requests
+      // Run steno+MT compression in parallel (independent of strip)
       if (!dontCompress) {
         const { messages: compressed, blocksCompressed, charsCompressed, verifiedCount = 0, stenoOnlyCount = 0, tmHits: hits = 0, samples: compSamples = [] } = await compressMessagesLive(body.messages);
         body.messages = compressed;
@@ -1294,7 +1326,6 @@ async function handleRequest(req, res) {
         stats.zhRejected += stenoOnlyCount;
         stats.stenoOnly += (blocksCompressed - verifiedCount - stenoOnlyCount);
         stats.tmHits += hits;
-        // Store translation samples for preview
         if (compSamples.length > 0) stats._lastSamples = compSamples;
         if (blocksCompressed > 0) {
           pushEvent('compress', `${blocksCompressed} blocks, ${charsCompressed} chars (${verifiedCount} zh, ${stenoOnlyCount} steno, ${hits} TM)`);
@@ -1318,11 +1349,15 @@ async function handleRequest(req, res) {
       return;
     }
-    // === NON-COMPACTION — strip old tool_results + live MT compression ===
-    if (dontCompress || messageCount <= liveConfig.PRESERVE_RECENT_MESSAGES) {
+    // === NON-COMPACTION — passthrough if below threshold ===
+    if (isPassthrough) {
       stats.passthrough++;
       pushEvent('pass', `msgs=${messageCount} (below threshold ${liveConfig.PRESERVE_RECENT_MESSAGES})`);
-      forwardRequest(req, res, rawBody);
+      // Use modified body if sys prompt was compressed (cache hit), else rawBody
+      const passthroughBody = sysPromptModified
+        ? Buffer.from(JSON.stringify(body), 'utf8')
+        : rawBody;
+      forwardRequest(req, res, passthroughBody);
       return;
     }