npm - specmem-hardwicksoftware - Versions diffs - 3.7.35 → 3.7.38 - Mend

specmem-hardwicksoftware 3.7.35 → 3.7.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/CHANGELOG.md +34 -0
package/README.md +11 -15
package/bin/specmem-autoclaude.cjs +12 -1
package/bin/specmem-cli.cjs +1077 -11
package/bin/specmem-console.cjs +890 -63
package/bootstrap.cjs +10 -2
package/claude-hooks/agent-loading-hook.cjs +16 -16
package/claude-hooks/agent-loading-hook.js +28 -21
package/claude-hooks/agent-type-matcher.js +1 -1
package/claude-hooks/background-completion-silencer.js +1 -1
package/claude-hooks/file-claim-enforcer.cjs +37 -36
package/claude-hooks/output-cleaner.cjs +1 -1
package/claude-hooks/refusal-detector-hook.cjs +53 -0
package/claude-hooks/settings.json +64 -4
package/claude-hooks/smart-search-interceptor.js +1 -1
package/claude-hooks/specmem-search-enforcer.cjs +2 -11
package/claude-hooks/specmem-team-member-inject.js +1 -1
package/claude-hooks/specmem-unified-hook.py +1 -1
package/claude-hooks/subagent-loading-hook.cjs +1 -1
package/claude-hooks/task-progress-hook.cjs +7 -7
package/claude-hooks/task-progress-hook.js +3 -3
package/claude-hooks/team-comms-enforcer.cjs +113 -47
package/claude-hooks/use-code-pointers.cjs +1 -1
package/dist/claude-sessions/sessionParser.js +5 -0
package/dist/cli/deploy-to-claude.js +9 -2
package/dist/codebase/codebaseIndexer.js +48 -17
package/dist/codebase/exclusions.js +3 -4
package/dist/codebase/index.js +4 -0
package/dist/codebase/pdfExtractor.js +298 -0
package/dist/dashboard/api/taskTeamMembers.js +2 -2
package/dist/db/bigBrainMigrations.js +29 -0
package/dist/hooks/hookManager.js +4 -4
package/dist/hooks/teamFramingCli.js +1 -1
package/dist/hooks/teamMemberPrepromptHook.js +5 -5
package/dist/index.js +49 -12
package/dist/init/claudeConfigInjector.js +27 -8
package/dist/installer/autoInstall.js +7 -1
package/dist/mcp/compactionProxy.js +1052 -192
package/dist/mcp/compactionProxyDaemon.js +112 -37
package/dist/mcp/contextVault.js +439 -0
package/dist/mcp/embeddingServerManager.js +151 -17
package/dist/mcp/mcpProtocolHandler.js +6 -1
package/dist/mcp/miniCOTServerManager.js +82 -8
package/dist/mcp/specMemServer.js +45 -10
package/dist/mcp/toolRegistry.js +6 -0
package/dist/startup/startupIndexing.js +14 -0
package/dist/team-members/taskOrchestrator.js +3 -3
package/dist/team-members/taskTeamMemberLogger.js +2 -2
package/dist/tools/goofy/deployTeamMember.js +3 -3
package/dist/tools/goofy/digInTheVault.js +81 -0
package/dist/tools/goofy/findCodePointers.js +17 -0
package/dist/tools/goofy/findWhatISaid.js +19 -0
package/dist/tools/goofy/stashTheGoods.js +56 -0
package/dist/tools/teamMemberDeployer.js +2 -2
package/dist/watcher/changeHandler.js +65 -8
package/dist/watcher/changeQueue.js +20 -1
package/embedding-sandbox/frankenstein-embeddings.py +4 -3
package/embedding-sandbox/mini-cot-service.py +11 -13
package/embedding-sandbox/pdf-text-extract.py +208 -0
package/package.json +1 -1
package/scripts/deploy-hooks.cjs +12 -4
package/scripts/fast-batch-embedder.cjs +2 -2
package/scripts/force-retry.cjs +34 -0
package/scripts/global-postinstall.cjs +97 -4
package/scripts/poetic-abliteration.cjs +379 -0
package/scripts/refusal-enforcer.cjs +88 -0
package/scripts/specmem-init.cjs +222 -41
package/specmem/model-config.json +6 -6
package/specmem/supervisord.conf +1 -1
package/svg-sections/readme-token-compaction.svg +246 -0
package/claude-hooks/agent-chooser-hook.js +0 -179

package/dist/mcp/compactionProxy.js CHANGED Viewed

@@ -34,14 +34,84 @@ const CLAUDE_DIR = join(HOME, '.claude');
 const PORT_FILE = join(CLAUDE_DIR, '.compaction-proxy-port');
 const PID_FILE = join(CLAUDE_DIR, '.compaction-proxy.pid');
 const DISABLED_FILE = join(CLAUDE_DIR, '.compaction-proxy-disabled');
+const CUSTOM_SYS_PROMPT_FILE = join(CLAUDE_DIR, '.custom-sys-prompt.json');
+const OG_SYS_PROMPT_FILE = join(CLAUDE_DIR, '.og-sys-prompt.json');
 // Per-user port: env var > default 4080. Multiple users on same machine
 // should set COMPACTION_PROXY_PORT or rely on the port file mechanism.
 const PROXY_PORT = parseInt(process.env.COMPACTION_PROXY_PORT || '4080', 10);
-const UPSTREAM_HOST = process.env.COMPACTION_PROXY_UPSTREAM || 'api.anthropic.com';
-const UPSTREAM_PORT = 443;
+// ============================================================================
+// Custom Upstream Detection — ENV VARS ONLY (no settings.json scanning)
+// ============================================================================
+// For custom APIs (MiniMax, etc.), set these env vars on the MCP server config:
+//   COMPACTION_PROXY_UPSTREAM  — full URL of the real API (e.g., https://api.minimax.io/anthropic)
+//   COMPACTION_PROXY_API_KEY   — API key for the custom upstream
+//   COMPACTION_PROXY_MODEL     — model name to inject (e.g., MiniMax-Text-01)
+//
+// When NONE of these are set, the proxy is transparent: it forwards to api.anthropic.com
+// with all original headers (OAuth Bearer, anthropic-beta, etc.) intact.
+// This is the "native Claude" mode — fast mode, thinking, etc. all work.
+// Custom upstream API key (for MiniMax, etc.)
+const UPSTREAM_API_KEY = process.env.COMPACTION_PROXY_API_KEY
+    || process.env.ANTHROPIC_AUTH_TOKEN
+    || process.env.ANTHROPIC_API_KEY
+    || null;
+// Custom model override (for MiniMax, etc.)
+const UPSTREAM_MODEL = process.env.COMPACTION_PROXY_MODEL
+    || process.env.ANTHROPIC_MODEL
+    || null;
+// Parse upstream URL — env var only, no settings.json scanning
+// Settings.json scanning caused stale MiniMax configs to break native Claude mode.
+function parseUpstreamUrl() {
+    const upstream = process.env.COMPACTION_PROXY_UPSTREAM || null;
+    if (upstream) {
+        try {
+            const url = new URL(upstream.startsWith('http') ? upstream : 'https://' + upstream);
+            return {
+                host: url.hostname,
+                port: parseInt(url.port, 10) || 443,
+                isHttps: url.protocol === 'https:',
+                path: url.pathname || '/'
+            };
+        } catch (e) {
+            return { host: upstream, port: 443, isHttps: true, path: '/' };
+        }
+    }
+    // Default: Anthropic API — headers pass through untouched (OAuth + fast mode work)
+    return { host: 'api.anthropic.com', port: 443, isHttps: true, path: '/' };
+}
+const UPSTREAM = parseUpstreamUrl();
+const UPSTREAM_HOST = UPSTREAM.host;
+const UPSTREAM_PORT = UPSTREAM.port;
+const UPSTREAM_IS_HTTPS = UPSTREAM.isHttps;
+const UPSTREAM_PATH = UPSTREAM.path || '/';
+// FIX: Get the original model name (like "opus") to use in responses so Claude accepts them
+// When using custom upstream (MiniMax), ALWAYS return "opus" in response
+function getOriginalModel() {
+    // If using custom upstream (MiniMax), always return opus for the response
+    if (UPSTREAM_MODEL && UPSTREAM_HOST !== 'api.anthropic.com') {
+        log('proxy', `Using opus for response (custom upstream: ${UPSTREAM_MODEL})`);
+        return 'opus';
+    }
+    // Otherwise return what's in env (for normal Anthropic)
+    return process.env.ANTHROPIC_MODEL || null;
+}
+const ORIGINAL_MODEL = getOriginalModel();
+// Helper to pick HTTP or HTTPS request based on upstream
+const upstreamRequest = UPSTREAM_IS_HTTPS ? httpsRequest : httpRequest;
 const LOG_FILE = join('/tmp', `compaction-proxy-${process.getuid?.() ?? 'default'}.log`);
+// Orphan detection — track last request time for daemon watchdog
+let _lastRequestTime = Date.now();
 // Compaction detection — multiple markers to catch current + future Claude Code versions
 const COMPACTION_SYSTEM_PROMPTS = [
   'You are a helpful AI assistant tasked with summarizing conversations.',
@@ -110,8 +180,8 @@ function _invalidateProjectCaches() {
   _synonyms = null;
 }
-// Preview ring buffer — stores last 5 compressed requests for TUI display
-const PREVIEW_BUFFER_SIZE = 5;
+// Preview ring buffer — stores last 20 compressed requests for TUI display
+const PREVIEW_BUFFER_SIZE = 20;
 const recentRequests = [];  // { original, optimized, timestamp, type, savings }
 function _cleanPreview(raw) {
@@ -167,14 +237,42 @@ function _cleanPreview(raw) {
   }
 }
+function _extractPreviewContent(bodyStr) {
+  // Extract only the interesting parts: last user msg, recent tool results, assistant msgs
+  // Skip the giant repeated system prompt
+  try {
+    const parsed = JSON.parse(bodyStr);
+    if (!parsed.messages || !Array.isArray(parsed.messages)) return bodyStr.slice(0, 3000);
+    const msgs = parsed.messages;
+    // Take last 6 messages max — skip system prompt (role=system or first huge block)
+    const recent = msgs.slice(-6);
+    const parts = [];
+    for (const m of recent) {
+      const role = m.role || '?';
+      let content = '';
+      if (typeof m.content === 'string') {
+        content = m.content.slice(0, 800);
+      } else if (Array.isArray(m.content)) {
+        content = m.content.map(b => {
+          if (b.type === 'text') return (b.text || '').slice(0, 400);
+          if (b.type === 'tool_use') return `[tool_use: ${b.name}(${JSON.stringify(b.input || {}).slice(0, 200)})]`;
+          if (b.type === 'tool_result') return `[tool_result: ${(typeof b.content === 'string' ? b.content : JSON.stringify(b.content || '')).slice(0, 400)}]`;
+          return `[${b.type}]`;
+        }).join('\n');
+      }
+      parts.push(`[${role}] ${content}`);
+    }
+    return parts.join('\n---\n');
+  } catch { return bodyStr.slice(0, 3000); }
+}
 function pushPreview(original, optimized, type, samples) {
   const entry = {
-    original: _cleanPreview(original || ''),
-    optimized: _cleanPreview(optimized || ''),
+    original: _extractPreviewContent(original || ''),
+    optimized: _extractPreviewContent(optimized || ''),
     timestamp: new Date().toISOString(),
     type,
     savings: original && optimized ? Math.max(0, original.length - optimized.length) : 0,
-    // Translation samples: before→after pairs for console display
     samples: Array.isArray(samples) ? samples.slice(0, 8) : [],
   };
   recentRequests.push(entry);
@@ -198,13 +296,87 @@ let liveConfig = {
   OLD_STRIP_THRESHOLD,
   MIN_TRANSLATE_LENGTH,
   TOOL_RESULT_PREVIEW_CHARS,
-  STENO_ENABLED: true,       // toggle steno compression
-  TRANSLATE_ENABLED: true,   // toggle MT translation (requires translate.sock)
+  STENO_ENABLED: true,       // toggle steno compression (abbreviations like function→fn)
+  SLANG_ENABLED: false,      // YCC (Yung Cracka Compress) — DMV x Florida slang layer, opt-in
+  TRANSLATE_ENABLED: true,   // RE-ENABLED v3.7.36 — passthrough vocab + word TM + Jaccard fix the 97% rejection
   OLD_STRIP_ENABLED: true,   // toggle old tool_result stripping
-  SYSTEM_PROMPT_COMPRESS: true, // toggle system prompt steno+translate compression
+  SYSTEM_PROMPT_COMPRESS: true,  // enabled by default — custom sys prompt injection
   SYSTEM_REMINDER_STRIPPING: true,  // toggle <system-reminder> stripping (keeps first, strips rest)
 };
+// System prompt dedup — first instance goes through, subsequent same-hash stripped
+let _seenSysPromptHash = null;  // hash of last-sent system prompt
+let _sysPromptDedup = true;     // toggle via /config
+let _sysPromptSendCount = 0;    // how many times sys prompt has been sent
+let _compactionsSinceLastInject = 0; // track compactions for re-injection
+const SYS_PROMPT_REINJECT_INTERVAL = 3; // re-inject custom sys prompt every N compactions
+// Session boundary detection — MCP server outlives Claude CLI sessions.
+// When a new CLI session starts (>90s gap between requests), reset sys prompt state
+// so the system prompt shows up on the first user submit of every session.
+// NOTE: _lastRequestTime already declared at line 48 (orphan detection) — reuse it
+const SESSION_GAP_MS = 90000; // 90s gap = new session
+// Custom system prompt — user-defined replacement for OG system prompt
+let _customSystemPrompt = null; // { prompt: string, ogHash: string } or null
+function _loadOgSysPrompt() {
+  try {
+    if (existsSync(OG_SYS_PROMPT_FILE)) {
+      const data = JSON.parse(readFileSync(OG_SYS_PROMPT_FILE, 'utf8'));
+      if (data?.prompt) {
+        stats._lastSystemPromptFull = data.prompt;
+        // DON'T pre-load _seenSysPromptHash — first request must always pass through
+        log('info', `Loaded persisted OG system prompt (${data.prompt.length} chars)`);
+      }
+    }
+  } catch (e) {
+    log('warn', `Failed to load OG sys prompt: ${e.message}`);
+  }
+}
+function _saveOgSysPrompt(prompt, hash) {
+  try {
+    writeFileSync(OG_SYS_PROMPT_FILE, JSON.stringify({ prompt, hash, savedAt: Date.now() }, null, 2), 'utf8');
+  } catch (e) {
+    log('warn', `Failed to save OG sys prompt: ${e.message}`);
+  }
+}
+function _loadCustomSysPrompt() {
+  try {
+    if (existsSync(CUSTOM_SYS_PROMPT_FILE)) {
+      const data = JSON.parse(readFileSync(CUSTOM_SYS_PROMPT_FILE, 'utf8'));
+      if (data?.prompt && data?.ogHash) {
+        _customSystemPrompt = data;
+        // DON'T set _seenSysPromptHash here — first request of new Claude session must pass through
+        log('info', `Loaded custom system prompt (ogHash=${data.ogHash.slice(0, 8)}, ${data.prompt.length} chars)`);
+      }
+    }
+  } catch (e) {
+    log('warn', `Failed to load custom sys prompt: ${e.message}`);
+  }
+}
+function _saveCustomSysPrompt(data) {
+  try {
+    if (data) {
+      writeFileSync(CUSTOM_SYS_PROMPT_FILE, JSON.stringify(data, null, 2), 'utf8');
+    } else {
+      if (existsSync(CUSTOM_SYS_PROMPT_FILE)) unlinkSync(CUSTOM_SYS_PROMPT_FILE);
+    }
+  } catch (e) {
+    log('warn', `Failed to save custom sys prompt: ${e.message}`);
+  }
+}
+// Compaction signal from precompact hook (set via /expect-compaction endpoint)
+let _expectCompaction = false;
+let _compactionSignalTime = 0;
+// Load persisted custom system prompt on module init
+_loadCustomSysPrompt();
 // Stats
 const stats = {
   totalRequests: 0,
@@ -231,6 +403,9 @@ const stats = {
   startTime: Date.now()
 };
+// Load persisted OG system prompt (needs stats object)
+_loadOgSysPrompt();
 // ============================================================================
 // Translation Memory — learns from verified round-trips
 // ============================================================================
@@ -440,6 +615,111 @@ function learnSynonyms(origWords, backWords) {
 }
 let _synSaveTimer = null;
+// ============================================================================
+// Passthrough Vocabulary — words that ALWAYS fail translation, skip 'em
+// ============================================================================
+// Words like "webpack", "nginx", "stdout" will never translate correctly.
+// After N failures, they get added here and bypass the socket entirely.
+// This prevents the same words from tanking verification scores every request.
+const PASSTHROUGH_FILE = join(SPECMEM_DATA, 'run', 'passthrough-vocab.json');
+const PASSTHROUGH_FAIL_THRESHOLD = 2;  // 2 failures → permanent passthrough
+let _passthroughCache = null;
+// Built-in passthrough: tech terms that should NEVER be translated
+const BUILTIN_PASSTHROUGH = new Set([
+  'webpack', 'nginx', 'redis', 'postgres', 'postgresql', 'mongodb', 'sqlite',
+  'docker', 'kubernetes', 'github', 'gitlab', 'npm', 'yarn', 'pnpm', 'bun',
+  'typescript', 'javascript', 'nodejs', 'python', 'golang', 'rustlang',
+  'stdout', 'stderr', 'stdin', 'async', 'await', 'const', 'enum', 'struct',
+  'boolean', 'parseInt', 'typeof', 'instanceof', 'undefined', 'null',
+  'localhost', 'middleware', 'dockerfile', 'makefile', 'readme',
+  'eslint', 'prettier', 'babel', 'rollup', 'vite', 'esbuild',
+  'mysql', 'graphql', 'grpc', 'websocket', 'oauth', 'jwt',
+  'ubuntu', 'debian', 'centos', 'linux', 'macos', 'windows',
+  'claude', 'anthropic', 'openai', 'specmem', 'hardwick',
+]);
+function _loadPassthrough() {
+  if (_passthroughCache) return _passthroughCache;
+  _passthroughCache = {};
+  try {
+    if (existsSync(PASSTHROUGH_FILE)) {
+      _passthroughCache = JSON.parse(readFileSync(PASSTHROUGH_FILE, 'utf8'));
+    }
+  } catch { /* corrupt, start fresh */ }
+  return _passthroughCache;
+}
+function _savePassthrough() {
+  try {
+    mkdirSync(dirname(PASSTHROUGH_FILE), { recursive: true });
+    writeFileSync(PASSTHROUGH_FILE, JSON.stringify(_passthroughCache || {}), 'utf8');
+  } catch {}
+}
+let _ptSaveTimer = null;
+function isPassthrough(word) {
+  const lower = word.toLowerCase();
+  if (BUILTIN_PASSTHROUGH.has(lower)) return true;
+  const pt = _loadPassthrough();
+  return pt[lower] && pt[lower] >= PASSTHROUGH_FAIL_THRESHOLD;
+}
+function recordPassthroughFailure(word) {
+  const lower = word.toLowerCase();
+  if (BUILTIN_PASSTHROUGH.has(lower)) return; // already permanent
+  const pt = _loadPassthrough();
+  pt[lower] = (pt[lower] || 0) + 1;
+  if (!_ptSaveTimer) {
+    _ptSaveTimer = setTimeout(() => { _savePassthrough(); _ptSaveTimer = null; }, 10000);
+    if (_ptSaveTimer.unref) _ptSaveTimer.unref();
+  }
+}
+// ============================================================================
+// Word-Level Translation Memory — cache individual word translations
+// ============================================================================
+// When "configuration" → "配置" works in a verified batch, cache that mapping.
+// Next time "configuration" appears in ANY text, use the cache instead of socket.
+// This is separate from the full-text TM (which caches entire steno blocks).
+const WORD_TM_FILE = join(SPECMEM_DATA, 'run', 'word-translations.json');
+let _wordTMCache = null;
+function _loadWordTM() {
+  if (_wordTMCache) return _wordTMCache;
+  _wordTMCache = {};
+  try {
+    if (existsSync(WORD_TM_FILE)) {
+      _wordTMCache = JSON.parse(readFileSync(WORD_TM_FILE, 'utf8'));
+    }
+  } catch { /* corrupt, start fresh */ }
+  return _wordTMCache;
+}
+function _saveWordTM() {
+  try {
+    mkdirSync(dirname(WORD_TM_FILE), { recursive: true });
+    writeFileSync(WORD_TM_FILE, JSON.stringify(_wordTMCache || {}), 'utf8');
+  } catch {}
+}
+let _wordTMSaveTimer = null;
+function wordTMLookup(word) {
+  const tm = _loadWordTM();
+  return tm[word.toLowerCase()] || null;
+}
+function wordTMStore(enWord, zhTranslation) {
+  const tm = _loadWordTM();
+  tm[enWord.toLowerCase()] = zhTranslation;
+  if (!_wordTMSaveTimer) {
+    _wordTMSaveTimer = setTimeout(() => { _saveWordTM(); _wordTMSaveTimer = null; }, 10000);
+    if (_wordTMSaveTimer.unref) _wordTMSaveTimer.unref();
+  }
+}
 // ============================================================================
 // Logging
 // ============================================================================
@@ -492,12 +772,36 @@ function isCompactionRequest(body) {
 function smartStripEdit(input) {
   if (!input || !input.file_path || !input.old_string) return null;
   const lines = [`Edit(${input.file_path})`];
-  // old_string → removed lines (prefix with -)
-  for (const l of input.old_string.split('\n')) lines.push(`- ${l}`);
-  // new_string → added lines (prefix with +)
-  if (input.new_string != null) {
-    for (const l of input.new_string.split('\n')) lines.push(`+ ${l}`);
+  const oldLines = input.old_string.split('\n');
+  const newLines = (input.new_string != null) ? input.new_string.split('\n') : [];
+  // Line-level diff using simple LCS approach
+  const m = oldLines.length, n = newLines.length;
+  // Build LCS table
+  const dp = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
+  for (let i = 1; i <= m; i++) {
+    for (let j = 1; j <= n; j++) {
+      dp[i][j] = (oldLines[i - 1] === newLines[j - 1]) ? dp[i - 1][j - 1] + 1 : Math.max(dp[i - 1][j], dp[i][j - 1]);
+    }
+  }
+  // Backtrack to produce diff
+  let i = m, j = n;
+  const diffParts = [];
+  while (i > 0 || j > 0) {
+    if (i > 0 && j > 0 && oldLines[i - 1] === newLines[j - 1]) {
+      diffParts.push(` ${oldLines[i - 1]}`);
+      i--; j--;
+    } else if (j > 0 && (i === 0 || dp[i][j - 1] >= dp[i - 1][j])) {
+      diffParts.push(`+ ${newLines[j - 1]}`);
+      j--;
+    } else {
+      diffParts.push(`- ${oldLines[i - 1]}`);
+      i--;
+    }
   }
+  diffParts.reverse();
+  for (const part of diffParts) lines.push(part);
   if (input.replace_all) lines.push('(replace_all)');
   return lines.join('\n');
 }
@@ -509,12 +813,40 @@ function stripMessages(messages) {
   let toolUsesStripped = 0;
   let charsRemoved = 0;
-  const strippedMessages = messages.map(msg => {
+  // Three-tier stripping: find last and second-to-last assistant message indices
+  // Tier 1 (last assistant): ZERO stripping — active/pending tool calls
+  // Tier 2 (2nd-to-last assistant): ZERO stripping — needed for decision context
+  // Tier 3 (3rd+ oldest assistant): strip large content blobs, preserve metadata
+  let lastAssistantIndex = -1;
+  let secondLastAssistantIndex = -1;
+  for (let i = messages.length - 1; i >= 0; i--) {
+    if (messages[i] && messages[i].role === 'assistant') {
+      if (lastAssistantIndex === -1) { lastAssistantIndex = i; }
+      else if (secondLastAssistantIndex === -1) { secondLastAssistantIndex = i; break; }
+    }
+  }
+  const strippedMessages = messages.map((msg, msgIndex) => {
     if (!msg || !msg.content || !Array.isArray(msg.content)) return msg;
+    // Tier 1 + Tier 2: assistant messages returned completely untouched
+    const isTier1or2Assistant = (msgIndex === lastAssistantIndex || msgIndex === secondLastAssistantIndex);
+    if (isTier1or2Assistant && msg.role === 'assistant') return msg;
+    // For user messages: check if the preceding assistant message is Tier 1 or 2.
+    // If so, this user message's tool_results correspond to those tool_use calls — don't strip.
+    if (msg.role === 'user') {
+      let precedingAssistantIndex = -1;
+      for (let pi = msgIndex - 1; pi >= 0; pi--) {
+        if (messages[pi] && messages[pi].role === 'assistant') { precedingAssistantIndex = pi; break; }
+      }
+      if (precedingAssistantIndex === lastAssistantIndex || precedingAssistantIndex === secondLastAssistantIndex) return msg;
+    }
     const newContent = msg.content.map(block => {
       if (!block || typeof block !== 'object') return block;
+      // Tier 3: strip large tool_result content blobs, preserve all metadata
       if (block.type === 'tool_result') {
         const originalContent = block.content;
         if (!originalContent) return block;
@@ -540,45 +872,19 @@ function stripMessages(messages) {
         return stripped;
       }
-      if (block.type === 'tool_use') {
-        const input = block.input;
-        if (!input) return block;
-        // Smart diff stripping for Edit tool — keep only - / + lines
-        const editDiff = (block.name === 'Edit') ? smartStripEdit(input) : null;
-        if (editDiff) {
-          const inputStr = JSON.stringify(input);
-          charsRemoved += inputStr.length - editDiff.length;
-          toolUsesStripped++;
-          const stripped = {
-            type: 'tool_use',
-            id: block.id,
-            name: block.name,
-            input: { _stripped: editDiff }
-          };
-          if (block.cache_control) stripped.cache_control = block.cache_control;
-          return stripped;
-        }
-        const inputStr = JSON.stringify(input);
-        if (inputStr.length <= TOOL_USE_INPUT_PREVIEW_CHARS * 2) return block;
-        charsRemoved += inputStr.length - TOOL_USE_INPUT_PREVIEW_CHARS;
-        toolUsesStripped++;
-        const inputPreview = inputStr.slice(0, TOOL_USE_INPUT_PREVIEW_CHARS);
+      // NEVER strip tool_use.input — API validates input against tool schema.
+      // Replacing input with { _stripped } causes InputValidationError on every tool call.
+      if (block.type === 'tool_use') return block;
-        // Preserve all API-required fields: id, name, cache_control
-        const stripped = {
-          type: 'tool_use',
-          id: block.id,
-          name: block.name,
-          input: {
-            _stripped: `${inputPreview}... [TRIMMED: ${inputStr.length} chars → ${TOOL_USE_INPUT_PREVIEW_CHARS}]`
-          }
-        };
-        if (block.cache_control) stripped.cache_control = block.cache_control;
-        return stripped;
+      // Strip assistant text blocks during compaction — don't send API responses back
+      if (block.type === 'text' && typeof block.text === 'string' && msg.role === 'assistant' && block.text.length > 0) {
+        const firstLine = block.text.split('\n')[0].slice(0, 80);
+        const removed = block.text.length - firstLine.length;
+        if (removed > 0) {
+          charsRemoved += removed;
+          toolResultsStripped++;
+          return { ...block, text: `${firstLine}...\n[ASST-STRIPPED: ${block.text.length}→${firstLine.length}]` };
+        }
       }
       return block;
@@ -609,13 +915,18 @@ function stripOldToolResults(messages) {
   let charsRemoved = 0;
   const newMessages = messages.map((msg, idx) => {
-    // Only strip old messages
-    if (idx >= cutoff) return msg;
+    const isRecent = idx >= cutoff;
     if (!msg || !msg.content || !Array.isArray(msg.content)) return msg;
     const newContent = msg.content.map(block => {
       if (!block || typeof block !== 'object') return block;
+      // NEVER strip tool_use.input — API validates against schema
+      if (block.type === 'tool_use') return block;
+      // For remaining block types: only strip if OLD message (preserve recent context)
+      if (isRecent) return block;
       if (block.type === 'tool_result') {
         const originalContent = block.content;
         if (!originalContent) return block;
@@ -626,6 +937,7 @@ function stripOldToolResults(messages) {
         // Only strip if above threshold
         if (originalStr.length <= liveConfig.OLD_STRIP_THRESHOLD) return block;
+        if (originalStr.length <= OLD_STRIP_PREVIEW_CHARS) return block; // preview would be LONGER
         const removed = originalStr.length - OLD_STRIP_PREVIEW_CHARS;
         charsRemoved += removed;
@@ -645,41 +957,8 @@ function stripOldToolResults(messages) {
         if (block.cache_control) stripped.cache_control = block.cache_control;
         return stripped;
       }
-      // Also strip large tool_use inputs in old messages
-      if (block.type === 'tool_use') {
-        const input = block.input;
-        if (!input) return block;
-        // Smart Edit stripping — keep only - / + diff lines
-        const editDiff = smartStripEdit(input);
-        if (editDiff) {
-          const origLen = JSON.stringify(input).length;
-          const newLen = JSON.stringify(editDiff.input).length;
-          charsRemoved += origLen - newLen;
-          toolResultsStripped++;
-          return editDiff;
-        }
-        const inputStr = JSON.stringify(input);
-        if (inputStr.length <= liveConfig.OLD_STRIP_THRESHOLD) return block;
-        const removed = inputStr.length - OLD_STRIP_PREVIEW_CHARS;
-        charsRemoved += removed;
-        toolResultsStripped++;
-        const inputPreview = inputStr.slice(0, OLD_STRIP_PREVIEW_CHARS);
-        const stripped = {
-          type: 'tool_use',
-          id: block.id,
-          name: block.name,
-          input: {
-            _stripped: `${inputPreview}... [TRIMMED: ${inputStr.length} chars → ${OLD_STRIP_PREVIEW_CHARS}]`
-          }
-        };
-        if (block.cache_control) stripped.cache_control = block.cache_control;
-        return stripped;
-      }
+      // NEVER strip tool_use.input — API validates against schema
+      if (block.type === 'tool_use') return block;
       // Strip specmem hook injection text blocks from old messages
       if (block.type === 'text' && typeof block.text === 'string') {
@@ -697,17 +976,19 @@ function stripOldToolResults(messages) {
           };
         }
-        // Strip old assistant text blocks — Claude's own output echoed back
-        // No point sending Claude its own words; keep first line as context anchor
-        if (msg.role === 'assistant' && txt.length > 120) {
-          const firstLine = txt.split('\n')[0].slice(0, 120);
+        // Strip ALL old assistant text blocks — Claude's own output echoed back
+        // No point sending Anthropic its own responses; keep first line as context anchor
+        if (msg.role === 'assistant' && txt.length > 0) {
+          const firstLine = txt.split('\n')[0].slice(0, 80);
           const removed = txt.length - firstLine.length;
-          charsRemoved += removed;
-          toolResultsStripped++;
-          return {
-            ...block,
-            text: `${firstLine}...\n[ASST-ECHO-STRIPPED: ${txt.length} chars → ${firstLine.length}]`
-          };
+          if (removed > 0) {
+            charsRemoved += removed;
+            toolResultsStripped++;
+            return {
+              ...block,
+              text: `${firstLine}...\n[ASST-STRIPPED: ${txt.length}→${firstLine.length}]`
+            };
+          }
         }
       }
@@ -745,19 +1026,21 @@ function stripSystemReminders(messages) {
       if (!matches) return msg;
       let newText = msg.content;
-      for (const match of matches) {
-        if (!firstSeen) {
-          firstSeen = true; // keep the very first one
+      // Use matchAll with index to remove by position, not content.
+      // This prevents identical reminders from nuking the first one we want to keep.
+      const allMatches = [...newText.matchAll(new RegExp(SR_RE.source, 'g'))];
+      // Process in reverse order so indices stay valid as we splice
+      for (let mi = allMatches.length - 1; mi >= 0; mi--) {
+        if (!firstSeen && mi === 0) {
+          firstSeen = true; // keep the very first one by position
           continue;
         }
-        // replaceAll to nuke ALL occurrences of this exact match in the string
-        // .replace(string, '') only kills the first occurrence — duplicates slip through
-        while (newText.includes(match)) {
-          newText = newText.replace(match, '');
-          charsRemoved += match.length;
-          remindersStripped++;
-        }
+        const m = allMatches[mi];
+        newText = newText.slice(0, m.index) + newText.slice(m.index + m[0].length);
+        charsRemoved += m[0].length;
+        remindersStripped++;
       }
+      if (allMatches.length > 0 && !firstSeen) firstSeen = true;
       return { ...msg, content: newText.replace(/\n{3,}/g, '\n\n').trim() };
     }
@@ -771,18 +1054,19 @@ function stripSystemReminders(messages) {
       if (!matches) return block;
       let newText = block.text;
-      for (const match of matches) {
-        if (!firstSeen) {
-          firstSeen = true; // keep the very first one
+      // Position-based removal — prevents identical reminders from killing the kept one
+      const allBlockMatches = [...newText.matchAll(new RegExp(SR_RE.source, 'g'))];
+      for (let mi = allBlockMatches.length - 1; mi >= 0; mi--) {
+        if (!firstSeen && mi === 0) {
+          firstSeen = true;
           continue;
         }
-        // replaceAll to nuke ALL occurrences of this exact match in the block
-        while (newText.includes(match)) {
-          newText = newText.replace(match, '');
-          charsRemoved += match.length;
-          remindersStripped++;
-        }
+        const m = allBlockMatches[mi];
+        newText = newText.slice(0, m.index) + newText.slice(m.index + m[0].length);
+        charsRemoved += m[0].length;
+        remindersStripped++;
       }
+      if (allBlockMatches.length > 0 && !firstSeen) firstSeen = true;
       const cleaned = newText.replace(/\n{3,}/g, '\n\n').trim();
       // If block is now empty after stripping, remove it entirely
@@ -824,15 +1108,156 @@ const FILLER_WORDS = new Set([
 const KEEP_WORDS = new Set(['not', 'no', 'never', 'none', 'nor', 'neither', 'without',
   'all', 'every', 'each', 'both', 'only', 'must', 'always']);
+// Programming abbreviations — deterministic shorthand Claude already understands.
+// Applied during steno phase for guaranteed token savings (no MT verification needed).
+// ~40% additional savings on top of filler word removal for code-heavy text.
+const STENO_ABBREVIATIONS = new Map([
+  // Core programming constructs
+  ['function', 'fn'], ['functions', 'fns'], ['configuration', 'cfg'], ['configure', 'cfg'],
+  ['implementation', 'impl'], ['implement', 'impl'], ['application', 'app'], ['applications', 'apps'],
+  ['authentication', 'auth'], ['authenticate', 'auth'], ['authorization', 'authz'],
+  ['database', 'db'], ['databases', 'dbs'], ['directory', 'dir'], ['directories', 'dirs'],
+  ['environment', 'env'], ['environments', 'envs'], ['repository', 'repo'], ['repositories', 'repos'],
+  ['parameter', 'param'], ['parameters', 'params'], ['argument', 'arg'], ['arguments', 'args'],
+  ['property', 'prop'], ['properties', 'props'], ['attribute', 'attr'], ['attributes', 'attrs'],
+  ['reference', 'ref'], ['references', 'refs'], ['document', 'doc'], ['documentation', 'docs'],
+  ['message', 'msg'], ['messages', 'msgs'], ['number', 'num'], ['integer', 'int'], ['boolean', 'bool'],
+  ['object', 'obj'], ['objects', 'objs'], ['element', 'el'], ['elements', 'els'],
+  ['component', 'comp'], ['components', 'comps'], ['template', 'tmpl'], ['package', 'pkg'],
+  ['dependency', 'dep'], ['dependencies', 'deps'], ['library', 'lib'], ['libraries', 'libs'],
+  ['module', 'mod'], ['modules', 'mods'], ['version', 'ver'], ['command', 'cmd'],
+  ['request', 'req'], ['response', 'res'], ['callback', 'cb'], ['middleware', 'mw'],
+  ['specification', 'spec'], ['expression', 'expr'], ['variable', 'var'], ['constant', 'const'],
+  ['instance', 'inst'], ['constructor', 'ctor'], ['temporary', 'tmp'],
+  ['maximum', 'max'], ['minimum', 'min'], ['average', 'avg'],
+  ['previous', 'prev'], ['current', 'curr'], ['original', 'orig'], ['information', 'info'],
+  ['connection', 'conn'], ['connections', 'conns'], ['transaction', 'txn'],
+  ['operation', 'op'], ['operations', 'ops'], ['execution', 'exec'],
+  ['process', 'proc'], ['processes', 'procs'], ['manager', 'mgr'], ['service', 'svc'],
+  ['server', 'srv'], ['certificate', 'cert'], ['permission', 'perm'], ['separator', 'sep'],
+  ['initialize', 'init'], ['initialization', 'init'], ['synchronize', 'sync'],
+  ['asynchronous', 'async'], ['development', 'dev'], ['production', 'prod'],
+  ['distribution', 'dist'], ['administrator', 'admin'], ['memory', 'mem'],
+  ['address', 'addr'], ['register', 'reg'], ['buffer', 'buf'], ['channel', 'chan'],
+  ['context', 'ctx'], ['receive', 'recv'], ['generate', 'gen'], ['generator', 'gen'],
+  ['calculate', 'calc'], ['resource', 'rsc'], ['allocation', 'alloc'], ['allocate', 'alloc'],
+  ['descriptor', 'desc'], ['position', 'pos'], ['source', 'src'], ['destination', 'dst'],
+  ['character', 'char'], ['characters', 'chars'],
+  // Verbs
+  ['execute', 'exec'], ['executing', 'exec'], ['remove', 'rm'], ['removing', 'rm'],
+  ['delete', 'del'], ['deleting', 'del'], ['compare', 'cmp'], ['convert', 'conv'],
+  // Error/status
+  ['error', 'err'], ['errors', 'errs'], ['warning', 'warn'], ['warnings', 'warns'],
+  ['exception', 'exc'], ['successful', 'ok'], ['successfully', 'ok'],
+  // Network
+  ['protocol', 'proto'], ['interface', 'iface'], ['network', 'net'],
+  ['socket', 'sock'], ['password', 'pwd'],
+  // Common English → ultra-short
+  ['however', 'but'], ['although', 'tho'], ['because', 'bc'], ['therefore', 'so'],
+  ['regarding', 're'], ['approximately', '~'], ['including', 'incl'],
+  ['currently', 'now'], ['previously', 'prev'], ['additional', 'extra'],
+  ['following', 'next'], ['different', 'diff'], ['required', 'reqd'],
+  ['returned', 'ret'], ['returns', 'ret'], ['specified', 'given'],
+  ['completed', 'done'], ['automatically', 'auto'], ['immediately', 'now'],
+]);
+// Learned abbreviations file (grows over time from verified loopbacks)
+const LEARNED_ABBREV_FILE = join(SPECMEM_DATA, 'run', 'learned-abbreviations.json');
+let _learnedAbbrevCache = null;
+function _loadLearnedAbbreviations() {
+  if (_learnedAbbrevCache) return _learnedAbbrevCache;
+  _learnedAbbrevCache = {};
+  try {
+    if (existsSync(LEARNED_ABBREV_FILE)) {
+      _learnedAbbrevCache = JSON.parse(readFileSync(LEARNED_ABBREV_FILE, 'utf8'));
+    }
+  } catch { /* corrupt, start fresh */ }
+  return _learnedAbbrevCache;
+}
+function _saveLearnedAbbreviations() {
+  try {
+    const data = JSON.stringify(_learnedAbbrevCache || {});
+    mkdirSync(dirname(LEARNED_ABBREV_FILE), { recursive: true });
+    writeFileSync(LEARNED_ABBREV_FILE, data, 'utf8');
+  } catch {}
+}
+// ============================================================================
+// YCC — Yung Cracka Compress (optional layer, off by default)
+// ============================================================================
+// 🥷 Slang-based token compression fluent in DMV x Florida grown.
+// Uses internet/regional shorthand that every LLM already understands
+// from training on social media. Opt-in via SLANG_ENABLED = true.
+// "we got a hood rat on them token waste" — belt to ass
+const SLANG_COMPRESSIONS = new Map([
+  // ── Universal Internet Slang (every LLM trained on social media knows these) ──
+  ['definitely', 'def'], ['probably', 'prolly'], ['because', 'bc'],
+  ['people', 'ppl'], ['something', 'smth'], ['nothing', 'nth'],
+  ['anything', 'anythng'], ['everyone', 'evryone'], ['already', 'alr'],
+  ['between', 'btwn'], ['tomorrow', 'tmrw'], ['yesterday', 'ystrdy'],
+  ['together', 'tgthr'], ['usually', 'usu'], ['obviously', 'obvi'],
+  ['seriously', 'srsly'], ['actually', 'actly'], ['basically', 'basicly'],
+  ['honestly', 'honstly'], ['especially', 'esp'], ['literally', 'lit'],
+  ['whatever', 'wtv'], ['though', 'tho'], ['enough', 'enuf'],
+  ['through', 'thru'], ['going', 'goin'], ['trying', 'tryna'],
+  ['about', 'abt'], ['really', 'rly'], ['please', 'pls'],
+  ['thanks', 'thx'], ['okay', 'ok'], ['never', 'nvr'],
+  ['conversation', 'convo'], ['situation', 'sitch'], ['legitimate', 'legit'],
+  ['important', 'impt'], ['favorite', 'fav'], ['problem', 'prob'],
+  ['question', 'q'], ['picture', 'pic'], ['pictures', 'pics'],
+  ['example', 'ex'], ['suspicious', 'sus'], ['nevermind', 'nvm'],
+  ['awkward', 'awk'], ['ridiculous', 'ridic'], ['gorgeous', 'gorge'],
+  ['absolutely', 'abs'],
+  // ── DMV (DC/MD/VA) Slang — understood from Twitter/TikTok/Reddit ──
+  // Only unambiguous single-word compressions that LLMs parse correctly
+  ['extremely', 'hella'], ['terrible', 'trash'], ['excellent', 'fire'],
+  ['boring', 'dry'], ['jealous', 'salty'], ['scared', 'shook'],
+  ['surprised', 'shook'], ['bragging', 'stuntin'], ['showing', 'flexin'],
+  ['lying', 'cappin'], ['ignoring', 'ghostin'], ['complaining', 'pressed'],
+  ['succeeding', 'winnin'], ['failing', 'takin'],
+  ['exhausted', 'gassed'], ['focused', 'locked'],
+  // ── Florida / Miami Slang — SoFlo internet vernacular ──
+  ['season', 'szn'], ['professional', 'pro'], ['introduction', 'intro'],
+  ['combination', 'combo'], ['reputation', 'rep'], ['competition', 'comp'],
+  ['demonstration', 'demo'], ['examination', 'exam'], ['university', 'uni'],
+  ['apartment', 'apt'], ['neighborhood', 'hood'], ['boulevard', 'blvd'],
+  // ── Contraction-style (phonetic compressions LLMs handle natively) ──
+  ['should', 'shd'], ['would', 'wld'], ['could', 'cld'],
+  ['doing', 'doin'], ['having', 'havin'], ['making', 'makin'],
+  ['taking', 'takin'], ['getting', 'gettin'], ['coming', 'comin'],
+  ['running', 'runnin'], ['working', 'workin'], ['looking', 'lookin'],
+  ['thinking', 'thinkin'], ['saying', 'sayin'], ['telling', 'tellin'],
+  ['building', 'buildin'], ['calling', 'callin'], ['sending', 'sendin'],
+  ['reading', 'readin'], ['writing', 'writin'], ['waiting', 'waitin'],
+]);
 /**
- * Stenographic reduction — strip filler words, preserve meaning.
- * Guaranteed ~27% token savings on all tokenizers.
+ * Stenographic reduction — strip filler words + apply programming abbreviations
+ * + optional YCC (Yung Cracka Compress) slang layer.
+ * Steno alone: ~40% savings. With YCC: ~45-50% on conversational text.
  */
 function stenographicReduce(text) {
+  const learnedAbbrev = _loadLearnedAbbreviations();
+  const useSlang = liveConfig.SLANG_ENABLED;
   return text.replace(/\b\w+\b/g, (word) => {
     const lower = word.toLowerCase();
     if (KEEP_WORDS.has(lower)) return word;
     if (FILLER_WORDS.has(lower)) return '';
+    // Static abbreviations (deterministic, always applied)
+    const abbr = STENO_ABBREVIATIONS.get(lower);
+    if (abbr) return abbr;
+    // YCC layer (Yung Cracka Compress — opt-in slang)
+    if (useSlang) {
+      const slang = SLANG_COMPRESSIONS.get(lower);
+      if (slang) return slang;
+    }
+    // Learned abbreviations (verified via loopback)
+    if (learnedAbbrev[lower]) return learnedAbbrev[lower];
     return word;
   }).replace(/  +/g, ' ').replace(/ ([.,;:!?])/g, '$1').trim();
 }
@@ -857,22 +1282,118 @@ function looksLikeNaturalLanguage(text) {
  * @param {string} target - target language code
  * @returns {Promise<string[]>} translated texts (fallback: originals)
  */
+/**
+ * Word-level translation: splits each text into individual words, translates
+ * each word independently for maximum accuracy, then reassembles with original
+ * spacing/punctuation preserved. Non-alpha tokens (numbers, paths, punctuation)
+ * pass through untranslated.
+ */
 function translateBatch(texts, socketPath, source = 'en', target = 'zh') {
   return new Promise((resolve) => {
     if (!existsSync(socketPath)) { resolve(texts); return; }
+    // Tokenize each text into words and non-word separators
+    const TOKEN_RE = /([a-zA-Z]+)|([^a-zA-Z]+)/g;
+    const tokenMap = []; // { textIdx, word } — only translatable words
+    const structures = []; // per-text: array of { type: 'word'|'sep', value, mapIdx? }
+    for (let i = 0; i < texts.length; i++) {
+      const struct = [];
+      let match;
+      const re = new RegExp(TOKEN_RE.source, 'g');
+      while ((match = re.exec(texts[i])) !== null) {
+        if (match[1] && match[1].length >= 2) {
+          // Translatable word (2+ alpha chars)
+          struct.push({ type: 'word', value: match[1], mapIdx: tokenMap.length });
+          tokenMap.push({ textIdx: i, word: match[1] });
+        } else {
+          // Separator, number, punctuation, single char — pass through
+          struct.push({ type: 'sep', value: match[0] });
+        }
+      }
+      structures.push(struct);
+    }
+    if (tokenMap.length === 0) { resolve(texts); return; }
+    // ── Layer 1: Passthrough + Word TM pre-filter ──
+    // Check each word against passthrough vocab and word-level TM cache.
+    // Only words that need the neural socket go to the network.
+    const preResolved = new Array(tokenMap.length).fill(null); // null = needs socket
+    const socketIndices = []; // indices into tokenMap that need the socket
+    let ptSkips = 0, wmHits = 0;
+    for (let ti = 0; ti < tokenMap.length; ti++) {
+      const word = tokenMap[ti].word;
+      if (isPassthrough(word)) {
+        preResolved[ti] = word; // keep original English
+        ptSkips++;
+      } else {
+        const cached = wordTMLookup(word);
+        if (cached) {
+          preResolved[ti] = cached; // use cached Chinese
+          wmHits++;
+        } else {
+          socketIndices.push(ti);
+        }
+      }
+    }
+    if (ptSkips > 0 || wmHits > 0) {
+      log('info', `WORD-CACHE: ${wmHits} word-TM hits, ${ptSkips} passthrough skips, ${socketIndices.length} need socket`);
+    }
+    // If everything was resolved from cache, skip the socket entirely
+    if (socketIndices.length === 0) {
+      const results = [];
+      for (let i = 0; i < texts.length; i++) {
+        let out = '';
+        for (const tok of structures[i]) {
+          if (tok.type === 'sep') { out += tok.value; }
+          else { out += preResolved[tok.mapIdx] || tok.value; }
+        }
+        results.push(out);
+      }
+      resolve(results);
+      return;
+    }
+    // Only send uncached words to the socket
+    const socketWords = socketIndices.map(i => tokenMap[i].word);
     const conn = createConnection(socketPath);
     conn.setTimeout(15000);
-    const flattened = texts.map(t => t.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim());
-    const body = JSON.stringify({ q: flattened.join('\n'), source, target }) + '\n';
+    const body = JSON.stringify({ q: socketWords.join('\n'), source, target }) + '\n';
     conn.on('connect', () => conn.write(body));
     let data = '';
-    conn.on('data', d => { data += d; if (data.includes('\n')) conn.end(); });
+    conn.on('data', d => {
+      data += d;
+      // Server sends {"status":"processing"}\n THEN {"translatedText":"..."}\n
+      // Only close when we get the actual translation result, not the status line
+      const lines = data.split('\n').filter(l => l.trim());
+      const lastLine = lines[lines.length - 1];
+      if (lastLine && lastLine.includes('"translatedText"')) conn.end();
+    });
     conn.on('end', () => {
       try {
-        const parsed = JSON.parse(data.trim());
+        // Find the line with actual translation results (skip status lines)
+        const lines = data.split('\n').filter(l => l.trim());
+        const resultLine = lines.find(l => l.includes('"translatedText"')) || lines[lines.length - 1];
+        const parsed = JSON.parse(resultLine.trim());
         if (parsed.translatedText) {
-          const results = parsed.translatedText.split('\n');
-          while (results.length < texts.length) results.push(texts[results.length]);
+          const socketResults = parsed.translatedText.split('\n');
+          // Merge socket results back into preResolved
+          for (let si = 0; si < socketIndices.length; si++) {
+            preResolved[socketIndices[si]] = socketResults[si] || tokenMap[socketIndices[si]].word;
+          }
+          // Reassemble each text using its structure
+          const results = [];
+          for (let i = 0; i < texts.length; i++) {
+            let out = '';
+            for (const tok of structures[i]) {
+              if (tok.type === 'sep') { out += tok.value; }
+              else { out += preResolved[tok.mapIdx] || tok.value; }
+            }
+            results.push(out);
+          }
           resolve(results);
         } else { resolve(texts); }
       } catch { resolve(texts); }
@@ -930,14 +1451,19 @@ function normalizeWord(word) {
 }
 /**
- * Loop-back verification: checks if back-translated text near-perfectly
- * preserves the original meaning, word order, and content.
+ * Loop-back verification: checks if back-translated text preserves
+ * the original meaning and content through the en→zh→en round trip.
+ *
+ * Two-metric approach (takes the HIGHER score):
+ *   1. LCS Dice: Longest Common Subsequence preserves word ORDER
+ *      Score = 2 * LCS / (origLen + backLen)
+ *   2. Jaccard: Bag-of-words overlap IGNORES reordering
+ *      Score = |intersection| / |union|
  *
- * Uses Longest Common Subsequence (LCS) on ordered content words,
- * with learned synonym normalization — so known equivalent words
- * (learned from past failures) count as matches.
+ * Chinese naturally reorders words vs English, so LCS alone is too strict.
+ * Jaccard catches cases where all the right words survive but in different order.
+ * Taking the max means either metric can green-light the translation.
  *
- * Score = 2 * LCS / (origLen + backLen)  (Dice coefficient on word sequences)
  * @returns {number} similarity score 0.0 to 1.0
  */
 function verifySimilarity(original, backTranslated) {
@@ -945,15 +1471,28 @@ function verifySimilarity(original, backTranslated) {
   const backWords = extractContentWords(backTranslated).map(normalizeWord);
   if (origWords.length === 0) return 1.0;
   if (backWords.length === 0) return 0.0;
+  // Metric 1: LCS Dice (order-sensitive)
   const lcs = lcsLength(origWords, backWords);
-  return (2 * lcs) / (origWords.length + backWords.length);
+  const lcsDice = (2 * lcs) / (origWords.length + backWords.length);
+  // Metric 2: Jaccard (order-insensitive, bag-of-words)
+  const origSet = new Set(origWords);
+  const backSet = new Set(backWords);
+  let intersection = 0;
+  for (const w of origSet) if (backSet.has(w)) intersection++;
+  const union = new Set([...origWords, ...backWords]).size;
+  const jaccard = union > 0 ? intersection / union : 0;
+  return Math.max(lcsDice, jaccard);
 }
 // Minimum similarity for accepting Chinese translation (0.0-1.0)
-// 0.95 = near-perfect alignment required — word order + content must survive
-// the en→zh→en round trip almost perfectly. Below this, preserve English steno.
-// Seeded synonyms ensure most programming terms pass on first encounter.
-const VERIFICATION_THRESHOLD = 0.95;
+// 0.65 = relaxed threshold — the two-metric approach (LCS + Jaccard) means
+// we're already more forgiving of reordering. This catches translations where
+// word meaning survives but order shifts (common in EN↔ZH).
+// Combined with passthrough vocab + word TM, this should verify ~70%+ of blocks.
+const VERIFICATION_THRESHOLD = 0.65;
 function hasDontCompressFlag(body) {
   if (!body || !body.messages || !Array.isArray(body.messages)) return false;
@@ -1006,11 +1545,11 @@ function messageShouldSkipCompression(msg) {
  * tool calls like Grep patterns.
  *
  * Per-block pipeline:
- *   1. Stenographic reduce (strip filler words) — guaranteed ~27% savings
- *   2. [tool_result only] Translate steno→Chinese (zt) — additional savings
+ *   1. Stenographic reduce (filler words + abbreviations) — guaranteed ~40% savings
+ *   2. [tool_result only] Translate steno→Chinese (zt) — additional ~30% on top
  *   3. [tool_result only] Translate Chinese→English (loop-back verification)
- *   4. [tool_result only] Compare loop-back with original — if ≥95% match, accept Chinese
- *   5. Otherwise, fall back to steno-only (still saves 27%)
+ *   4. [tool_result only] Compare loop-back with original — if ≥65% match, accept Chinese
+ *   5. Otherwise, fall back to steno-only (still saves ~40%)
  *
  * DONT_COMPRESS: Per-message flag. If any block in a message contains "DONT_COMPRESS",
  * or a tool_use has input._dont_compress, the entire message is skipped.
@@ -1192,8 +1731,23 @@ async function compressMessagesLive(messages) {
             // Verified — Chinese preserves meaning, use it
             finalTexts[idx] = needVerify[v];
             verifiedCount++;
-            // LEARN: store in Translation Memory for future cache hits
+            // LEARN: store full-text in Translation Memory
             tmStore(stenoTexts[idx], needVerify[v], similarity);
+            // LEARN: cache individual word translations for future reuse
+            // Compare steno words with their Chinese translations word-by-word
+            try {
+              const stenoWords = stenoTexts[idx].match(/\b[a-zA-Z]{2,}\b/g) || [];
+              const zhChars = needVerify[v];
+              // For verified batches, cache each original word → its position in the Chinese output
+              // This is approximate but the word TM grows more accurate over time
+              for (const sw of stenoWords) {
+                if (!isPassthrough(sw) && !wordTMLookup(sw) && sw.length >= 3) {
+                  // The whole batch verified — individual words are likely correct too
+                  // We'll get the exact mapping on the next single-word encounter
+                  // For now, mark that this word CAN be translated (not passthrough)
+                }
+              }
+            } catch { /* non-critical learning */ }
           } else {
             // Failed verification — stick with steno-only
             stenoOnlyCount++;
@@ -1204,6 +1758,15 @@ async function compressMessagesLive(messages) {
             if (learned > 0) {
               log('info', `LEARN-SYN: ${learned} new synonym pairs from "${original.slice(0, 40)}..."`);
             }
+            // LEARN: identify which words caused the failure and record passthrough
+            // Words in original that vanished entirely in back-translation are trouble
+            const origWordSet = new Set(origWords.map(w => w.toLowerCase()));
+            const backWordSet = new Set(backWords.map(w => w.toLowerCase()));
+            for (const ow of origWordSet) {
+              if (!backWordSet.has(ow) && ow.length >= 3 && !FILLER_WORDS.has(ow)) {
+                recordPassthroughFailure(ow);
+              }
+            }
             log('info', `VERIFY-FAIL: sim=${similarity.toFixed(2)} | orig="${original.slice(0, 60)}..." | back="${backEn.slice(0, 60)}..."`);
           }
         }
@@ -1391,21 +1954,168 @@ function collectBody(req) {
   });
 }
+// FIX: Convert Anthropic /v1/messages format to OpenAI /v1/chat/completions format
+function convertToOpenAIFormat(anthropicBody, model) {
+  try {
+    // Anthropic format: { model, messages: [{role, content}], system, ... }
+    // OpenAI format: { model, messages: [{role, content}], ... }
+    const body = JSON.parse(anthropicBody.toString('utf8'));
+    // Convert to OpenAI format
+    const openai = {
+      model: model,
+      messages: []
+    };
+    // Handle system prompt - add as first message with role: system
+    if (body.system) {
+      const systemContent = Array.isArray(body.system)
+        ? body.system.map(s => typeof s === 'string' ? s : s.text || '').join('\n')
+        : (typeof body.system === 'string' ? body.system : '');
+      if (systemContent) {
+        openai.messages.push({ role: 'system', content: systemContent });
+      }
+    }
+    // Convert messages
+    if (body.messages) {
+      for (const msg of body.messages) {
+        // Anthropic uses "user" and "assistant", OpenAI uses same
+        let role = msg.role;
+        let content = msg.content;
+        // Handle content blocks - convert to string
+        if (Array.isArray(content)) {
+          content = content.map(c => {
+            if (typeof c === 'string') return c;
+            return c.text || c.type || '';
+          }).join('\n');
+        }
+        // Skip thinking blocks
+        if (role === 'system' && openai.messages.some(m => m.role === 'system')) {
+          continue; // Already added
+        }
+        openai.messages.push({ role, content });
+      }
+    }
+    // Copy other fields
+    if (body.max_tokens) openai.max_tokens = body.max_tokens;
+    if (body.temperature) openai.temperature = body.temperature;
+    if (body.top_p) openai.top_p = body.top_p;
+    if (body.stream) openai.stream = body.stream;
+    if (body.stop) openai.stop = body.stop;
+    log('proxy', `Converted Anthropic format to OpenAI format for model: ${model}`);
+    return Buffer.from(JSON.stringify(openai), 'utf8');
+  } catch (e) {
+    log('proxy', `Format conversion error: ${e.message}`);
+    return anthropicBody; // Fallback to original
+  }
+}
+// FIX: Convert OpenAI /v1/chat/completions response to Anthropic /v1/messages response format
+function convertFromOpenAIFormat(openaiBody) {
+  try {
+    const body = JSON.parse(openaiBody.toString('utf8'));
+    // OpenAI format: { id, model, choices: [{message: {role, content}}], usage, ... }
+    // Anthropic format: { id, type: "message", role: "assistant", model, content: [{type: "text", text: "..."}], usage }
+    if (!body.choices || !body.choices[0]) {
+      return openaiBody; // Not a valid response
+    }
+    const choice = body.choices[0];
+    const openaiMsg = choice.message || {};
+    // Convert to Anthropic format
+    const anthropic = {
+      id: body.id || `msg_${Date.now()}`,
+      type: 'message',
+      role: 'assistant',
+      model: ORIGINAL_MODEL || body.model, // Use original model so Claude accepts it
+      content: []
+    };
+    // Handle content - OpenAI returns message.content as string, Anthropic wants array of blocks
+    if (openaiMsg.content) {
+      anthropic.content.push({
+        type: 'text',
+        text: openaiMsg.content
+      });
+    }
+    // Usage mapping
+    if (body.usage) {
+      anthropic.usage = {
+        input_tokens: body.usage.prompt_tokens || 0,
+        output_tokens: body.usage.completion_tokens || 0
+      };
+    }
+    // Stop reason
+    if (choice.finish_reason) {
+      anthropic.stop_reason = choice.finish_reason === 'length' ? 'max_tokens' : 'end_turn';
+    }
+    log('proxy', `Converted OpenAI response to Anthropic format`);
+    return Buffer.from(JSON.stringify(anthropic), 'utf8');
+  } catch (e) {
+    log('proxy', `Response conversion error: ${e.message}`);
+    return openaiBody; // Fallback to original
+  }
+}
 function forwardRequest(req, res, bodyBuffer) {
+  let modifiedBody = bodyBuffer;
+  let modifiedPath = req.url;
+  // Detect custom upstream mode (MiniMax, etc.) — env-var driven only
+  const isCustomUpstream = UPSTREAM_HOST !== 'api.anthropic.com';
+  // Prepend base path from upstream URL (e.g., /anthropic for MiniMax)
+  if (UPSTREAM_PATH && UPSTREAM_PATH !== '/') {
+    modifiedPath = UPSTREAM_PATH + (req.url.startsWith('/') ? '' : '/') + req.url;
+  }
+  // Custom upstream: inject model name into request body
+  if (isCustomUpstream && UPSTREAM_MODEL) {
+    try {
+      const body = JSON.parse(bodyBuffer.toString('utf8'));
+      if (body.model) {
+        body.model = UPSTREAM_MODEL;
+        modifiedBody = Buffer.from(JSON.stringify(body), 'utf8');
+      }
+    } catch (e) {
+      log('proxy', `Failed to replace model: ${e.message}`);
+    }
+  }
   const upstreamHeaders = { ...req.headers };
   upstreamHeaders.host = UPSTREAM_HOST;
-  upstreamHeaders['content-length'] = bodyBuffer.length;
+  upstreamHeaders['content-length'] = modifiedBody.length;
   delete upstreamHeaders['proxy-connection'];
   delete upstreamHeaders['proxy-authorization'];
-  const upstreamReq = httpsRequest({
+  // Custom upstream: inject API key (MiniMax, etc.)
+  // Native mode: leave original headers untouched (OAuth Bearer + fast mode work)
+  if (isCustomUpstream && UPSTREAM_API_KEY) {
+    upstreamHeaders['x-api-key'] = UPSTREAM_API_KEY;
+    upstreamHeaders['Authorization'] = `Bearer ${UPSTREAM_API_KEY}`;
+  }
+  const upstreamReq = upstreamRequest({
     hostname: UPSTREAM_HOST,
     port: UPSTREAM_PORT,
-    path: req.url,
+    path: modifiedPath,
     method: req.method,
     headers: upstreamHeaders,
     timeout: 300000
   }, (upstreamRes) => {
+    // Always write headers — both regular Anthropic and custom upstream (MiniMax) need them
     res.writeHead(upstreamRes.statusCode, upstreamRes.headers);
     upstreamRes.pipe(res);
     upstreamRes.on('error', (err) => {
@@ -1433,11 +2143,12 @@ function forwardRequest(req, res, bodyBuffer) {
     }
   });
-  upstreamReq.write(bodyBuffer);
+  upstreamReq.write(modifiedBody);
   upstreamReq.end();
 }
 async function handleRequest(req, res) {
+  _lastRequestTime = Date.now();
   // Health/stats endpoint
   if (req.url === '/health' || req.url === '/stats') {
     const tm = _loadTM();
@@ -1449,11 +2160,44 @@ async function handleRequest(req, res) {
     return;
   }
-  // Preview endpoint — last compressed request
-  if (req.url === '/preview') {
+  // Preview endpoint — compressed request history (supports ?since= for incremental fetch)
+  if (req.url === '/preview' || req.url?.startsWith('/preview?')) {
     res.writeHead(200, { 'Content-Type': 'application/json' });
-    const latest = recentRequests.length > 0 ? recentRequests[recentRequests.length - 1] : null;
-    res.end(JSON.stringify({ preview: latest, count: recentRequests.length }));
+    const url = new URL(req.url, 'http://localhost');
+    const since = url.searchParams?.get('since'); // ISO timestamp — only return entries newer than this
+    let entries = recentRequests;
+    if (since) {
+      entries = entries.filter(e => e.timestamp > since);
+    }
+    const latest = entries.length > 0 ? entries[entries.length - 1] : null;
+    res.end(JSON.stringify({ preview: latest, history: entries, count: recentRequests.length }));
+    return;
+  }
+  // Reset endpoint — clears stale preview history, stats, caches on new console session
+  if (req.url === '/reset' && req.method === 'POST') {
+    recentRequests.length = 0;
+    _sysPromptCache.clear();
+    _seenSysPromptHash = null;
+    _sysPromptSendCount = 0;
+    _compactionsSinceLastInject = 0;
+    _lastRequestTime = Date.now(); // reset to now, not 0 (0 would trip orphan watchdog)
+    Object.assign(stats, {
+      requests: 0, compressed: 0, passthrough: 0, bytesOriginal: 0, bytesCompressed: 0,
+      toolBlocksStripped: 0, toolCharsRemoved: 0, sysPromptsDeduped: 0, systemRemindersStripped: 0,
+      liveCompressed: 0, liveBlocksCompressed: 0, liveCharsCompressed: 0,
+      zhVerified: 0, zhRejected: 0, zhSkipped: 0,
+    });
+    res.writeHead(200, { 'Content-Type': 'application/json' });
+    res.end(JSON.stringify({ reset: true }));
+    return;
+  }
+  // Shutdown endpoint — console calls this on exit to kill the daemon
+  if (req.url === '/shutdown' && req.method === 'POST') {
+    res.writeHead(200, { 'Content-Type': 'application/json' });
+    res.end(JSON.stringify({ shutdown: true }));
+    setTimeout(() => process.exit(0), 100);
     return;
   }
@@ -1476,8 +2220,10 @@ async function handleRequest(req, res) {
         if (body.OLD_STRIP_ENABLED != null) liveConfig.OLD_STRIP_ENABLED = !!body.OLD_STRIP_ENABLED;
         if (body.SYSTEM_PROMPT_COMPRESS != null) liveConfig.SYSTEM_PROMPT_COMPRESS = !!body.SYSTEM_PROMPT_COMPRESS;
         if (body.SYSTEM_REMINDER_STRIPPING != null) liveConfig.SYSTEM_REMINDER_STRIPPING = !!body.SYSTEM_REMINDER_STRIPPING;
+        if (body.SYS_PROMPT_DEDUP != null) { _sysPromptDedup = !!body.SYS_PROMPT_DEDUP; if (!_sysPromptDedup) _seenSysPromptHash = null; }
+        if (body.RESET_SYS_PROMPT_HASH) _seenSysPromptHash = null; // force next request to send full sys prompt
         res.writeHead(200, { 'Content-Type': 'application/json' });
-        res.end(JSON.stringify({ ok: true, ...liveConfig }));
+        res.end(JSON.stringify({ ok: true, ...liveConfig, SYS_PROMPT_DEDUP: _sysPromptDedup, seenSysPromptHash: _seenSysPromptHash }));
       } catch (e) {
         res.writeHead(400, { 'Content-Type': 'application/json' });
         res.end(JSON.stringify({ error: e.message }));
@@ -1508,6 +2254,49 @@ async function handleRequest(req, res) {
     return;
   }
+  // Custom system prompt — user-editable replacement for OG system prompt
+  if (req.url === '/custom-system-prompt') {
+    if (req.method === 'GET') {
+      // Return current custom prompt + OG prompt text for modal pre-population
+      const ogText = stats._lastSystemPrompt?.captured ? stats._lastSystemPrompt.preview : null;
+      // Also send full OG prompt if we have it cached
+      const fullOg = stats._lastSystemPromptFull || null;
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({
+        customPrompt: _customSystemPrompt?.prompt || null,
+        ogHash: _customSystemPrompt?.ogHash || _seenSysPromptHash || null,
+        ogPrompt: fullOg,
+        ogPreview: ogText,
+        hasCustom: !!_customSystemPrompt,
+        activeMode: _customSystemPrompt ? 'custom' : 'original',
+      }));
+      return;
+    }
+    if (req.method === 'POST') {
+      try {
+        const body = JSON.parse((await collectBody(req)).toString('utf8'));
+        if (body.reset) {
+          _customSystemPrompt = null;
+          _saveCustomSysPrompt(null);
+          log('info', 'Custom system prompt RESET to OG');
+          pushEvent('info', 'Custom system prompt reset to OG');
+        } else if (body.prompt && typeof body.prompt === 'string') {
+          const ogHash = body.ogHash || _seenSysPromptHash;
+          _customSystemPrompt = { prompt: body.prompt, ogHash, savedAt: new Date().toISOString() };
+          _saveCustomSysPrompt(_customSystemPrompt);
+          log('info', `Custom system prompt SAVED (${body.prompt.length} chars, ogHash=${ogHash?.slice(0, 8)})`);
+          pushEvent('info', `Custom sys prompt saved: ${body.prompt.length} chars`);
+        }
+        res.writeHead(200, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({ ok: true, hasCustom: !!_customSystemPrompt }));
+      } catch (e) {
+        res.writeHead(400, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({ error: e.message }));
+      }
+      return;
+    }
+  }
   // Toggle endpoint — pause/unpause proxy
   if (req.url === '/toggle' && req.method === 'POST') {
     proxyPaused = !proxyPaused;
@@ -1533,6 +2322,16 @@ async function handleRequest(req, res) {
     return;
   }
+  // POST /expect-compaction — precompact hook signals next request should strip aggressively
+  if (req.url === '/expect-compaction' && req.method === 'POST') {
+    _expectCompaction = true;
+    _compactionSignalTime = Date.now();
+    log('info', 'COMPACTION-SIGNAL: Precompact hook signaled — next request gets aggressive stripping');
+    res.writeHead(200, { 'Content-Type': 'application/json' });
+    res.end(JSON.stringify({ ok: true, armed: true }));
+    return;
+  }
   // --- Multi-Project Registry Endpoints ---
   // POST /register — register a project with the daemon
@@ -1635,7 +2434,7 @@ async function handleRequest(req, res) {
     const messageCount = body.messages?.length || 0;
     const dontCompress = hasDontCompressFlag(body);
-    // Capture system prompt info for dashboard
+    // Capture system prompt info for dashboard + modal editor
     if (body.system) {
       const sysStr = typeof body.system === 'string' ? body.system
         : Array.isArray(body.system) ? body.system.map(b => typeof b === 'string' ? b : b?.text || '').join('')
@@ -1649,58 +2448,114 @@ async function handleRequest(req, res) {
         timestamp: new Date().toISOString(),
         preview: sysStr.slice(0, 300),
       };
+      // Store full text for the Customize System Prompt modal
+      stats._lastSystemPromptFull = sysStr;
+      // Persist OG prompt to disk so modal works immediately on restart
+      const _ogHash = createHash('md5').update(sysStr).digest('hex');
+      if (_ogHash !== _seenSysPromptHash || !existsSync(OG_SYS_PROMPT_FILE)) {
+        _saveOgSysPrompt(sysStr, _ogHash);
+      }
     }
     pushEvent('info', `POST /v1/messages model=${body.model || '?'} msgs=${messageCount} size=${(originalSize / 1024).toFixed(0)}KB`);
-    const isCompaction = isCompactionRequest(body);
+    // Check compaction: either heuristic detection OR precompact hook signaled it
+    let isCompaction = isCompactionRequest(body);
+    if (!isCompaction && _expectCompaction && (Date.now() - _compactionSignalTime < 30000)) {
+      isCompaction = true;
+      _expectCompaction = false;
+      log('info', '=== COMPACTION DETECTED (via precompact hook signal) === msgs=' + messageCount);
+    } else if (_expectCompaction && (Date.now() - _compactionSignalTime >= 30000)) {
+      _expectCompaction = false; // stale signal — expired after 30s
+    } else if (isCompaction) {
+      _expectCompaction = false; // clear flag if heuristic also caught it
+    }
     // No passthrough — always process everything (system-reminder strip + steno + translate)
     const isPassthrough = false;
     let sysPromptModified = false;
-    // === SYSTEM PROMPT COMPRESSION ===
-    // Always compress system prompt if not dontCompress — cache makes repeat calls free.
-    // Cache-miss: fire-and-forget on passthrough (don't block forwarding), await on compaction/live paths.
-    if (!dontCompress && body.system) {
-      // Build hash to check cache without calling async function
+    // === SYSTEM PROMPT MANAGEMENT ===
+    // Strategy:
+    //   1. FIRST request (or after reset): ALWAYS inject custom/optimized sys prompt
+    //   2. Next requests: strip sys prompt (already sent)
+    //   3. Every N compactions: re-inject custom sys prompt (Claude forgets after compaction)
+    //   4. If no custom prompt exists: auto-generate optimized one (Traditional Chinese + SpecMem tools)
+    if (body.system) {
       const _sysKey = typeof body.system === 'string' ? body.system
         : Array.isArray(body.system) ? body.system.map(b => typeof b === 'string' ? b : (b?.text || '')).join('')
         : JSON.stringify(body.system);
-      const _sysHash = createHash('md5').update(_sysKey).digest('hex');
-      const _sysCached = _sysPromptCache.get(_sysHash);
+      const _sysHash = createHash('md5').update(_sysKey).digest('hex').slice(0, 16);
+      // ── Session boundary detection ──
+      // MCP server outlives Claude CLI sessions. Detect new session by time gap
+      // so system prompt always shows on first user submit of every session.
+      const now = Date.now();
+      if (_lastRequestTime > 0 && (now - _lastRequestTime) >= SESSION_GAP_MS) {
+        log('info', `SESSION BOUNDARY detected (${((now - _lastRequestTime) / 1000).toFixed(0)}s gap) — resetting sys prompt state`);
+        _sysPromptSendCount = 0;
+        _compactionsSinceLastInject = 0;
+        // Keep _seenSysPromptHash so OG prompt isn't re-saved
+      }
+      _lastRequestTime = now;
+      // Save OG prompt on first sight
+      if (!_seenSysPromptHash) {
+        _saveOgSysPrompt(_sysKey, _sysHash);
+      }
+      // Determine if we should inject custom sys prompt this request
+      const shouldInject = (
+        _sysPromptSendCount === 0 ||  // first request of session (or after session boundary reset)
+        (isCompaction && ++_compactionsSinceLastInject >= SYS_PROMPT_REINJECT_INTERVAL)  // every 3rd compaction
+      );
+      if (shouldInject) {
+        _compactionsSinceLastInject = 0;
+        _sysPromptSendCount++;
+        // Get custom prompt — either user-edited or auto-generated optimized
+        let customPrompt = _customSystemPrompt?.prompt;
+        if (!customPrompt && liveConfig.SYSTEM_PROMPT_COMPRESS) {
+          // Auto-generate: inject SpecMem tool awareness into original prompt
+          customPrompt = _sysKey + '\n\n# SpecMem Integration\nYou have access to SpecMem persistent memory tools (find_memory, save_memory, find_code_pointers, drill_down). Use these proactively to recall context, search code semantically, and store important findings. Always check memory before starting complex tasks.';
+        }
-      if (_sysCached) {
-        // Cache hit — zero latency, always apply
-        if (_sysCached.charsSaved > 0) {
-          body.system = _sysCached.system;
+        if (customPrompt) {
+          const origSize = _sysKey.length;
+          body.system = customPrompt;
           sysPromptModified = true;
-          stats.sysPromptCharsSaved += _sysCached.charsSaved;
+          _seenSysPromptHash = _sysHash;
+          const newSize = customPrompt.length;
+          const saved = origSize - newSize;
+          if (saved > 0) {
+            stats.sysPromptCharsSaved += saved;
+            stats.tokensStripped += Math.floor(saved / 4);
+            stats.bytesStripped += saved;
+          }
           stats.sysPromptCompressed++;
-          stats.tokensStripped += Math.floor(_sysCached.charsSaved / 4);
-          stats.bytesStripped += _sysCached.charsSaved;
-          log('compress', `SYSPROMPT (cache hit): ${_sysCached.charsSaved} chars saved`);
-          pushEvent('compress', `System prompt (cached): -${_sysCached.charsSaved} chars`);
+          log('info', `SYSPROMPT injected (${shouldInject ? 'first/reinject' : 'custom'}): ${origSize}→${newSize} chars`);
+          pushEvent('dedup', `SYSPROMPT injected: ${(origSize/1024).toFixed(0)}KB→${(newSize/1024).toFixed(0)}KB`);
+        } else {
+          // No custom, no auto-gen — pass through original
+          _seenSysPromptHash = _sysHash;
+          // NOTE: don't increment _sysPromptSendCount here — already incremented at line 2288
+          log('info', `SYSPROMPT passthrough (no custom): ${(_sysKey.length/1024).toFixed(0)}KB`);
         }
-      } else if (isPassthrough) {
-        // Cache miss + passthrough: fire-and-forget on new thread — populates cache for next request
-        compressSystemPrompt(body.system).catch(() => {});
+      } else if (_seenSysPromptHash === _sysHash) {
+        // Already sent, strip to save tokens
+        const sysSize = _sysKey.length;
+        delete body.system;
+        sysPromptModified = true;
+        stats.sysPromptCharsSaved += sysSize;
+        stats.sysPromptCompressed++;
+        stats.tokensStripped += Math.floor(sysSize / 4);
+        stats.bytesStripped += sysSize;
+        pushEvent('dedup', `SYSPROMPT stripped (same hash): -${(sysSize/1024).toFixed(0)}KB`);
       } else {
-        // Cache miss + compaction/live: must await (need compressed body)
-        try {
-          const sysResult = await compressSystemPrompt(body.system);
-          if (sysResult.charsSaved > 0) {
-            body.system = sysResult.system;
-            sysPromptModified = true;
-            stats.sysPromptCharsSaved += sysResult.charsSaved;
-            stats.sysPromptCompressed++;
-            stats.tokensStripped += Math.floor(sysResult.charsSaved / 4);
-            stats.bytesStripped += sysResult.charsSaved;
-            log('compress', `SYSPROMPT: ${sysResult.charsSaved} chars saved`);
-            pushEvent('compress', `System prompt: -${sysResult.charsSaved} chars`);
-          }
-        } catch (e) {
-          log('warn', `System prompt compression failed: ${e.message}`);
-        }
+        // Different sys prompt (changed by Claude Code) — send it, update hash
+        _seenSysPromptHash = _sysHash;
+        _sysPromptSendCount++;
+        log('info', `SYSPROMPT changed hash ${_sysHash} — sending full`);
       }
     }
@@ -2011,7 +2866,7 @@ function setPaused(state) {
 /**
  * Check if the daemon is responding on its health endpoint.
  */
-function checkDaemonHealth() {
+export function checkDaemonHealth() {
   return new Promise((resolve) => {
     const req = httpGet(`http://127.0.0.1:${PROXY_PORT}/health`, { timeout: 2000 }, (res) => {
       let data = '';
@@ -2125,7 +2980,7 @@ export function getCompactionProxyStats() {
  * Register this project with the running daemon.
  * Fire-and-forget — if daemon isn't running yet, fails silently.
  */
-function registerWithDaemon(projectPath, pid) {
+export function registerWithDaemon(projectPath, pid) {
   if (!projectPath) return;
   const body = JSON.stringify({ projectPath, pid: pid || process.pid });
   try {
@@ -2176,6 +3031,10 @@ function deregisterFromDaemon(projectPath) {
 /**
  * Check if a PID is alive using signal 0.
  */
+function getLastRequestTime() {
+  return _lastRequestTime;
+}
 function isPidAlive(pid) {
   if (!pid || pid <= 0) return false;
   try {
@@ -2243,4 +3102,5 @@ export {
   DISABLED_FILE,
   LOG_FILE,
   CLAUDE_DIR,
+  getLastRequestTime,
 };