npm - context-mode - Versions diffs - 1.0.103 → 1.0.104 - Mend

context-mode 1.0.103 → 1.0.104

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/.openclaw-plugin/openclaw.plugin.json +1 -1
package/.openclaw-plugin/package.json +1 -1
package/README.md +66 -5
package/bin/statusline.mjs +321 -0
package/build/adapters/antigravity/index.d.ts +6 -0
package/build/adapters/antigravity/index.js +10 -0
package/build/adapters/base.d.ts +23 -0
package/build/adapters/base.js +29 -0
package/build/adapters/codex/index.d.ts +10 -0
package/build/adapters/codex/index.js +22 -4
package/build/adapters/cursor/index.d.ts +7 -0
package/build/adapters/cursor/index.js +11 -0
package/build/adapters/detect.d.ts +12 -1
package/build/adapters/detect.js +69 -7
package/build/adapters/gemini-cli/index.d.ts +8 -1
package/build/adapters/gemini-cli/index.js +19 -7
package/build/adapters/jetbrains-copilot/index.d.ts +7 -0
package/build/adapters/jetbrains-copilot/index.js +12 -0
package/build/adapters/kiro/index.d.ts +8 -0
package/build/adapters/kiro/index.js +12 -0
package/build/adapters/openclaw/index.d.ts +17 -0
package/build/adapters/openclaw/index.js +29 -4
package/build/adapters/opencode/index.d.ts +8 -0
package/build/adapters/opencode/index.js +18 -6
package/build/adapters/qwen-code/index.d.ts +1 -0
package/build/adapters/qwen-code/index.js +3 -0
package/build/adapters/types.d.ts +33 -0
package/build/adapters/vscode-copilot/index.d.ts +6 -0
package/build/adapters/vscode-copilot/index.js +10 -0
package/build/adapters/zed/index.d.ts +1 -0
package/build/adapters/zed/index.js +3 -0
package/build/cli.d.ts +15 -0
package/build/cli.js +62 -16
package/build/concurrency/runPool.d.ts +36 -0
package/build/concurrency/runPool.js +51 -0
package/build/executor.d.ts +11 -1
package/build/executor.js +59 -16
package/build/fetch-cache.d.ts +13 -0
package/build/fetch-cache.js +15 -0
package/build/lifecycle.d.ts +6 -2
package/build/lifecycle.js +29 -2
package/build/opencode-plugin.d.ts +6 -0
package/build/opencode-plugin.js +60 -1
package/build/routing-block.d.ts +8 -0
package/build/routing-block.js +86 -0
package/build/runtime.d.ts +1 -0
package/build/runtime.js +54 -3
package/build/search/auto-memory.d.ts +23 -10
package/build/search/auto-memory.js +64 -26
package/build/search/unified.d.ts +3 -0
package/build/search/unified.js +2 -2
package/build/server.d.ts +42 -0
package/build/server.js +693 -164
package/build/session/analytics.d.ts +49 -1
package/build/session/analytics.js +278 -16
package/build/session/db.d.ts +39 -8
package/build/session/db.js +170 -19
package/build/session/extract.js +124 -2
package/build/tool-naming.d.ts +4 -0
package/build/tool-naming.js +24 -0
package/cli.bundle.mjs +201 -159
package/configs/antigravity/GEMINI.md +11 -0
package/configs/claude-code/CLAUDE.md +11 -0
package/configs/codex/AGENTS.md +11 -0
package/configs/cursor/context-mode.mdc +11 -0
package/configs/gemini-cli/GEMINI.md +11 -0
package/configs/jetbrains-copilot/copilot-instructions.md +3 -0
package/configs/kilo/AGENTS.md +11 -0
package/configs/kiro/KIRO.md +11 -0
package/configs/openclaw/AGENTS.md +11 -0
package/configs/opencode/AGENTS.md +11 -0
package/configs/pi/AGENTS.md +11 -0
package/configs/qwen-code/QWEN.md +11 -0
package/configs/vscode-copilot/copilot-instructions.md +3 -0
package/configs/zed/AGENTS.md +11 -0
package/hooks/auto-injection.mjs +36 -10
package/hooks/cache-heal-utils.mjs +231 -0
package/hooks/codex/sessionstart.mjs +7 -4
package/hooks/core/routing.mjs +5 -0
package/hooks/cursor/sessionstart.mjs +7 -4
package/hooks/formatters/claude-code.mjs +20 -0
package/hooks/gemini-cli/sessionstart.mjs +7 -2
package/hooks/jetbrains-copilot/sessionstart.mjs +7 -2
package/hooks/normalize-hooks.mjs +184 -0
package/hooks/session-db.bundle.mjs +33 -14
package/hooks/session-extract.bundle.mjs +2 -2
package/hooks/session-helpers.mjs +68 -20
package/hooks/session-loaders.mjs +8 -2
package/hooks/sessionstart.mjs +8 -2
package/hooks/vscode-copilot/sessionstart.mjs +7 -2
package/openclaw.plugin.json +1 -1
package/package.json +2 -1
package/server.bundle.mjs +164 -125
package/skills/ctx-insight/SKILL.md +1 -1
package/start.mjs +63 -3

package/build/server.js CHANGED Viewed

@@ -3,15 +3,17 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 import { createRequire } from "node:module";
 import { createHash } from "node:crypto";
-import { existsSync, unlinkSync, readdirSync, readFileSync, writeFileSync, rmSync, mkdirSync, cpSync, statSync, symlinkSync, lstatSync } from "node:fs";
+import { existsSync, unlinkSync, readdirSync, readFileSync, writeFileSync, renameSync, rmSync, mkdirSync, cpSync, statSync, symlinkSync, lstatSync } from "node:fs";
 import { execSync } from "node:child_process";
-import { join, dirname, resolve, sep } from "node:path";
+import { join, dirname, resolve, sep, isAbsolute } from "node:path";
 import { fileURLToPath } from "node:url";
-import { homedir, tmpdir } from "node:os";
+import { homedir, tmpdir, cpus } from "node:os";
 import { request as httpsRequest } from "node:https";
 import { z } from "zod";
 import { PolyglotExecutor } from "./executor.js";
+import { runPool } from "./concurrency/runPool.js";
 import { ContentStore, cleanupStaleDBs, cleanupStaleContentDBs } from "./store.js";
+import { composeFetchCacheKey } from "./fetch-cache.js";
 import { readBashPolicies, evaluateCommandDenyOnly, extractShellCommands, readToolDenyPatterns, evaluateFilePath, } from "./security.js";
 import { detectRuntimes, getRuntimeSummary, getAvailableLanguages, hasBunRuntime, } from "./runtime.js";
 import { classifyNonZeroExit } from "./exit-classify.js";
@@ -19,8 +21,9 @@ import { startLifecycleGuard } from "./lifecycle.js";
 import { getWorktreeSuffix, SessionDB } from "./session/db.js";
 import { searchAllSources } from "./search/unified.js";
 import { buildNodeCommand } from "./adapters/types.js";
+import { detectPlatform, getSessionDirSegments } from "./adapters/detect.js";
 import { loadDatabase } from "./db-base.js";
-import { AnalyticsEngine, formatReport } from "./session/analytics.js";
+import { AnalyticsEngine, formatReport, getLifetimeStats, OPUS_INPUT_PRICE_PER_TOKEN } from "./session/analytics.js";
 const __pkg_dir = dirname(fileURLToPath(import.meta.url));
 const VERSION = (() => {
     for (const rel of ["../package.json", "./package.json"]) {
@@ -57,7 +60,7 @@ server.server.setRequestHandler(ListResourcesRequestSchema, async () => ({ resou
 server.server.setRequestHandler(ListResourceTemplatesRequestSchema, async () => ({ resourceTemplates: [] }));
 const executor = new PolyglotExecutor({
     runtimes,
-    projectRoot: process.env.CLAUDE_PROJECT_DIR,
+    projectRoot: () => getProjectDir(),
 });
 // ─────────────────────────────────────────────────────────
 // FS read tracking preload for ctx_batch_execute
@@ -109,6 +112,20 @@ let _insightChild = null;
 function getSessionDir() {
     if (_detectedAdapter)
         return _detectedAdapter.getSessionDir();
+    // Pre-detection path (race window before MCP `initialize` completes):
+    // call detectPlatform() (sync, env-var-based) and look up segments via
+    // getSessionDirSegments() (sync map, no adapter instantiation). This keeps
+    // non-Claude platforms from spilling sessions into ~/.claude/.
+    try {
+        const signal = detectPlatform();
+        const segments = getSessionDirSegments(signal.platform);
+        if (segments) {
+            const dir = join(homedir(), ...segments, "context-mode", "sessions");
+            mkdirSync(dir, { recursive: true });
+            return dir;
+        }
+    }
+    catch { /* fall through to .claude fallback */ }
     const dir = join(homedir(), ".claude", "context-mode", "sessions");
     mkdirSync(dir, { recursive: true });
     return dir;
@@ -130,9 +147,18 @@ function getProjectDir() {
         || process.env.VSCODE_CWD
         || process.env.OPENCODE_PROJECT_DIR
         || process.env.PI_PROJECT_DIR
+        || process.env.IDEA_INITIAL_DIRECTORY
         || process.env.CONTEXT_MODE_PROJECT_DIR
         || process.cwd();
 }
+/**
+ * Resolve a possibly-relative path against the project directory (full env cascade),
+ * not the MCP server's process.cwd(). MCP server is spawned by the host and its cwd
+ * is unrelated to where the user is working.
+ */
+function resolveProjectPath(filePath) {
+    return isAbsolute(filePath) ? filePath : resolve(getProjectDir(), filePath);
+}
 /**
  * Consistent project dir hashing across all DB paths.
  * Normalizes Windows backslashes before hashing so the same project
@@ -322,10 +348,120 @@ function trackResponse(toolName, response) {
     sessionStats.calls[toolName] = (sessionStats.calls[toolName] || 0) + 1;
     sessionStats.bytesReturned[toolName] =
         (sessionStats.bytesReturned[toolName] || 0) + bytes;
+    // Persist a sidecar JSON snapshot for the statusline — read at ~3-5 Hz by
+    // bin/statusline.mjs (and any external dashboard) so they don't have to
+    // open the SQLite database. Throttled inside persistStats() (500ms) so
+    // it's safe to call on every response. The b392c2f concurrency refactor
+    // dropped the SessionDB tool-call counter (`persistToolCallCounter`); we
+    // keep persistStats here because the statusline depends on it.
+    persistStats();
     return response;
 }
 function trackIndexed(bytes) {
     sessionStats.bytesIndexed += bytes;
+    persistStats();
+}
+// ─────────────────────────────────────────────────────────
+// Stats persistence — written after every tool call so
+// external readers (status line scripts, dashboards, hooks)
+// can see real-time savings without spawning an MCP client.
+// ─────────────────────────────────────────────────────────
+const STATS_PERSIST_THROTTLE_MS = 500;
+// Schema version for the persisted stats payload (~/.claude/context-mode/sessions/stats-*.json).
+// Bump when a field is added/renamed/removed. Statusline reads `schemaVersion ?? 0` and warns when
+// it sees a future schema, so legacy bundles degrade gracefully on upgrade rather than silently
+// rendering missing fields (PR #401 architect review P1.3).
+// v2: added tokens_saved_lifetime + dollars_saved_lifetime.
+const STATS_SCHEMA_VERSION = 2;
+// OPUS_INPUT_PRICE_PER_TOKEN intentionally NOT defined here — single source in
+// src/session/analytics.ts re-exported above. (P1.1 — pricing constant dedup,
+// PR #401 architect + ops 2-vote convergence.)
+const LIFETIME_REFRESH_MS = 30_000;
+// Matches the conversion factor in src/session/analytics.ts renderBottomLine:
+// ~1KB per session event ÷ 4 bytes/token = 256 tokens/event.
+const TOKENS_PER_EVENT = 256;
+let _lastStatsPersist = 0;
+let _lifetimeCache;
+/**
+ * Resolve the per-session stats file path.
+ *
+ * The session id mirrors the Claude Code adapter contract
+ * (`pid-<parent pid>`), so a status line script can derive
+ * the same id from `$PPID` without coupling to MCP.
+ */
+function getStatsFilePath() {
+    const sessionId = process.env.CLAUDE_SESSION_ID || `pid-${process.ppid}`;
+    return join(getSessionDir(), `stats-${sessionId}.json`);
+}
+function persistStats() {
+    const now = Date.now();
+    if (now - _lastStatsPersist < STATS_PERSIST_THROTTLE_MS)
+        return;
+    _lastStatsPersist = now;
+    try {
+        const totalReturned = Object.values(sessionStats.bytesReturned).reduce((a, b) => a + b, 0);
+        const totalCalls = Object.values(sessionStats.calls).reduce((a, b) => a + b, 0);
+        const keptOut = sessionStats.bytesIndexed +
+            sessionStats.bytesSandboxed +
+            sessionStats.cacheBytesSaved;
+        const totalProcessed = keptOut + totalReturned;
+        const reductionPct = totalProcessed > 0
+            ? Math.round((1 - totalReturned / totalProcessed) * 100)
+            : 0;
+        const tokensSaved = Math.round(keptOut / 4);
+        // Lifetime savings — cached separately because getLifetimeStats() scans
+        // disk (per-project SessionDBs + auto-memory dirs) and is too expensive
+        // for the 500ms persist throttle. Refresh every 30s; the statusline
+        // doesn't need second-by-second lifetime accuracy.
+        let lifetimeTokens = _lifetimeCache?.tokens ?? 0;
+        if (!_lifetimeCache || now - _lifetimeCache.computedAt > LIFETIME_REFRESH_MS) {
+            try {
+                const life = getLifetimeStats({ sessionsDir: getSessionDir() });
+                lifetimeTokens = (life?.totalEvents ?? 0) * TOKENS_PER_EVENT;
+                _lifetimeCache = { tokens: lifetimeTokens, computedAt: now };
+            }
+            catch {
+                // best-effort — keep stale cache or 0
+            }
+        }
+        const payload = {
+            schemaVersion: STATS_SCHEMA_VERSION,
+            version: VERSION,
+            updated_at: now,
+            session_start: sessionStats.sessionStart,
+            uptime_ms: now - sessionStats.sessionStart,
+            total_calls: totalCalls,
+            bytes_returned: totalReturned,
+            bytes_indexed: sessionStats.bytesIndexed,
+            bytes_sandboxed: sessionStats.bytesSandboxed,
+            cache_hits: sessionStats.cacheHits,
+            cache_bytes_saved: sessionStats.cacheBytesSaved,
+            kept_out: keptOut,
+            total_processed: totalProcessed,
+            reduction_pct: reductionPct,
+            tokens_saved: tokensSaved,
+            // statusline-facing $ values — pre-computed at Opus input rate so the
+            // statusline doesn't have to know pricing. Lets us evolve pricing in
+            // one place without touching consumers.
+            dollars_saved_session: +(tokensSaved * OPUS_INPUT_PRICE_PER_TOKEN).toFixed(2),
+            tokens_saved_lifetime: lifetimeTokens,
+            dollars_saved_lifetime: +(lifetimeTokens * OPUS_INPUT_PRICE_PER_TOKEN).toFixed(2),
+            by_tool: Object.fromEntries(Object.keys({ ...sessionStats.calls, ...sessionStats.bytesReturned }).map((t) => [
+                t,
+                {
+                    calls: sessionStats.calls[t] || 0,
+                    bytes: sessionStats.bytesReturned[t] || 0,
+                },
+            ])),
+        };
+        const filePath = getStatsFilePath();
+        const tmpPath = `${filePath}.tmp`;
+        writeFileSync(tmpPath, JSON.stringify(payload));
+        renameSync(tmpPath, filePath);
+    }
+    catch {
+        // best-effort — never break tool calls because of stats persistence
+    }
 }
 // ==============================================================================
 // Security: server-side deny firewall
@@ -387,7 +523,7 @@ function checkNonShellDenyPolicy(code, language, toolName) {
  */
 function checkFilePathDenyPolicy(filePath, toolName) {
     try {
-        const projectDir = process.env.CLAUDE_PROJECT_DIR ?? process.cwd();
+        const projectDir = getProjectDir();
         const denyGlobs = readToolDenyPatterns("Read", projectDir);
         const result = evaluateFilePath(filePath, denyGlobs, process.platform === "win32", projectDir);
         if (result.denied) {
@@ -539,6 +675,94 @@ export function formatBatchQueryResults(store, queries, source, maxOutput = 80 *
     sections.push(`\n> **Tip:** Results are scoped to this batch only. To search across all indexed sources, use \`ctx_search(queries: [...])\`.`);
     return sections;
 }
+function formatCommandOutput(label, raw, onFsBytes) {
+    let output = raw || "(no output)";
+    const fsMatches = output.matchAll(/__CM_FS__:(\d+)/g);
+    let cmdFsBytes = 0;
+    for (const m of fsMatches)
+        cmdFsBytes += parseInt(m[1]);
+    if (cmdFsBytes > 0) {
+        onFsBytes?.(cmdFsBytes);
+        output = output.replace(/__CM_FS__:\d+\n?/g, "");
+    }
+    return `# ${label}\n\n${output}\n`;
+}
+/**
+ * Execute batch commands. concurrency=1 preserves the legacy serial path
+ * (shared timeout budget + cascading skip-on-timeout). concurrency>1 runs
+ * commands concurrently with at most N in flight; each command receives the
+ * full timeout, output is collated by input index, and per-command timeouts
+ * record `(timed out)` blocks without skipping siblings.
+ */
+export async function runBatchCommands(commands, opts, executor) {
+    const { timeout, concurrency, nodeOptsPrefix, onFsBytes } = opts;
+    if (concurrency <= 1) {
+        // Serial path — shared timeout budget, cascading skip on timeout.
+        const outputs = [];
+        const startTime = Date.now();
+        let timedOut = false;
+        for (let i = 0; i < commands.length; i++) {
+            const cmd = commands[i];
+            const elapsed = Date.now() - startTime;
+            const remaining = timeout - elapsed;
+            if (remaining <= 0) {
+                outputs.push(`# ${cmd.label}\n\n(skipped — batch timeout exceeded)\n`);
+                timedOut = true;
+                continue;
+            }
+            const result = await executor.execute({
+                language: "shell",
+                code: `${nodeOptsPrefix}${cmd.command} 2>&1`,
+                timeout: remaining,
+            });
+            outputs.push(formatCommandOutput(cmd.label, result.stdout, onFsBytes));
+            if (result.timedOut) {
+                timedOut = true;
+                for (let j = i + 1; j < commands.length; j++) {
+                    outputs.push(`# ${commands[j].label}\n\n(skipped — batch timeout exceeded)\n`);
+                }
+                break;
+            }
+        }
+        return { outputs, timedOut };
+    }
+    // Parallel path — delegated to the shared runPool primitive.
+    // Each job returns { output, timedOut }; runPool handles in-flight cap,
+    // throw isolation (Promise.allSettled semantics), and order preservation.
+    const jobs = commands.map((cmd) => ({
+        run: async () => {
+            const result = await executor.execute({
+                language: "shell",
+                code: `${nodeOptsPrefix}${cmd.command} 2>&1`,
+                timeout,
+            });
+            // Always route partial stdout through formatCommandOutput so __CM_FS__
+            // markers are stripped + counted, even when the command timed out.
+            const formatted = formatCommandOutput(cmd.label, result.stdout, onFsBytes);
+            const output = result.timedOut
+                ? formatted.replace(/\n$/, "") + `\n(timed out after ${timeout}ms)\n`
+                : formatted;
+            return { output, timedOut: !!result.timedOut };
+        },
+    }));
+    const { settled } = await runPool(jobs, { concurrency });
+    const outputs = new Array(commands.length);
+    let timedOut = false;
+    for (let i = 0; i < settled.length; i++) {
+        const r = settled[i];
+        if (r.status === "fulfilled") {
+            outputs[i] = r.value.output;
+            if (r.value.timedOut)
+                timedOut = true;
+        }
+        else {
+            // Isolated executor throw (spawn EAGAIN, ENOMEM, EMFILE, …) — siblings keep running.
+            const message = r.reason instanceof Error ? r.reason.message : String(r.reason);
+            outputs[i] = `# ${commands[i].label}\n\n(executor error: ${message})\n`;
+        }
+    }
+    return { outputs, timedOut };
+}
 // ─────────────────────────────────────────────────────────
 // Tool: execute
 // ─────────────────────────────────────────────────────────
@@ -1009,18 +1233,19 @@ server.registerTool("ctx_index", {
         });
     }
     try {
+        const resolvedPath = path ? resolveProjectPath(path) : undefined;
         // Track the raw bytes being indexed (content or file)
         if (content)
             trackIndexed(Buffer.byteLength(content));
-        else if (path) {
+        else if (resolvedPath) {
             try {
                 const fs = await import("fs");
-                trackIndexed(fs.readFileSync(path).byteLength);
+                trackIndexed(fs.readFileSync(resolvedPath).byteLength);
             }
             catch { /* ignore — file read errors handled by store */ }
         }
         const store = getStore();
-        const result = store.index({ content, path, source });
+        const result = store.index({ content, path: resolvedPath, source: source ?? resolvedPath });
         return trackResponse("ctx_index", {
             content: [
                 {
@@ -1178,14 +1403,14 @@ server.registerTool("ctx_search", {
         if (sort === "timeline") {
             try {
                 const sessionsDir = getSessionDir();
-                const dbFile = join(sessionsDir, `${hashProjectDir()}.db`);
+                const dbFile = join(sessionsDir, `${hashProjectDir()}${getWorktreeSuffix()}.db`);
                 if (existsSync(dbFile)) {
                     timelineDB = new SessionDB({ dbPath: dbFile });
                 }
             }
             catch { /* SessionDB unavailable — search ContentStore + auto-memory only */ }
         }
-        const configDir = process.env.CLAUDE_CONFIG_DIR || join(homedir(), ".claude");
+        const configDir = _detectedAdapter?.getConfigDir() ?? (process.env.CLAUDE_CONFIG_DIR || join(homedir(), ".claude"));
         try {
             for (const q of queryList) {
                 if (totalSize > MAX_TOTAL) {
@@ -1204,6 +1429,7 @@ server.registerTool("ctx_search", {
                         sessionDB: timelineDB,
                         projectDir: getProjectDir(),
                         configDir,
+                        adapter: _detectedAdapter ?? undefined,
                     });
                 }
                 else {
@@ -1342,54 +1568,178 @@ async function main() {
 main();
 `;
 }
-server.registerTool("ctx_fetch_and_index", {
-    title: "Fetch & Index URL",
-    description: "Fetches URL content, converts HTML to markdown, indexes into searchable knowledge base, " +
-        "and returns a ~3KB preview. Full content stays in sandbox — use ctx_search() for deeper lookups.\n\n" +
-        "Better than WebFetch: preview is immediate, full content is searchable, raw HTML never enters context.\n\n" +
-        "Content-type aware: HTML is converted to markdown, JSON is chunked by key paths, plain text is indexed directly.\n\n" +
-        "When reporting results — terse like caveman. Technical substance exact. Only fluff die. Pattern: [thing] [action] [reason]. [next step].",
-    inputSchema: z.object({
-        url: z.string().describe("The URL to fetch and index"),
-        source: z
-            .string()
-            .optional()
-            .describe("Label for the indexed content (e.g., 'React useEffect docs', 'Supabase Auth API')"),
-        force: z
-            .boolean()
-            .optional()
-            .describe("Skip cache and re-fetch even if content was recently indexed"),
-    }),
-}, async ({ url, source, force }) => {
-    // TTL cache: if source was indexed within 24h, return cached hint
+// ─────────────────────────────────────────────────────────
+// fetch_and_index helpers — split into parallel-safe fetch and serial-only index
+// ─────────────────────────────────────────────────────────
+const FETCH_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
+const FETCH_PREVIEW_LIMIT = 3072;
+/**
+ * Pure fetch step — TTL cache check + subprocess fetch. SAFE TO RUN IN PARALLEL.
+ * Performs zero SQLite writes (only reads source meta). Caller must funnel
+ * fetched results through `indexFetched` serially to avoid FTS5 WAL contention.
+ */
+/**
+ * SSRF guard for ctx_fetch_and_index: validate URL scheme + resolve target IP +
+ * block link-local / IMDS / multicast / reserved IP ranges. Returns null if
+ * safe; returns a FetchOneResult fetch_error if blocked.
+ *
+ * Policy (PR #401 ops review, developer-friendly default):
+ *
+ * **HARD BLOCK** (no legitimate dev workflow):
+ *   - file://, gopher://, javascript:, data: schemes (only http: and https:)
+ *   - 169.254.0.0/16 link-local (INCLUDES 169.254.169.254 = AWS/GCP/Azure IMDS
+ *     cloud credential endpoint — high-value target for indirect prompt injection)
+ *   - IPv6 link-local fe80::/10
+ *   - Multicast (224+ IPv4, ff00::/8 IPv6) and reserved (0.0.0.0/8) ranges
+ *
+ * **ALLOW by default** (legitimate developer use cases dominate):
+ *   - localhost, 127.x.x.x, ::1 (local dev servers — Next.js, Vite, Postgres, …)
+ *   - 10.x, 172.16-31.x, 192.168.x RFC1918 private (developer's internal network)
+ *
+ * **STRICT MODE** opt-in via env var: `CTX_FETCH_STRICT=1`
+ *   - Blocks loopback + RFC1918 too
+ *   - For hosted/CI environments where the runtime isn't the user's own machine
+ *
+ * DNS resolution is performed against the resolved IP (not just URL parse) so a
+ * hostname like `evil.com` pointing to 169.254.169.254 is rejected — defends
+ * against attacker-controlled DNS records and DNS rebinding.
+ */
+async function ssrfGuard(rawUrl) {
+    let parsed;
+    try {
+        parsed = new URL(rawUrl);
+    }
+    catch {
+        return { kind: "fetch_error", url: rawUrl, error: "invalid URL", reason: "exit" };
+    }
+    // 1. Scheme allowlist — http and https only
+    if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
+        return {
+            kind: "fetch_error",
+            url: rawUrl,
+            error: `URL scheme "${parsed.protocol}" not allowed (only http: and https:)`,
+            reason: "exit",
+        };
+    }
+    const strict = process.env.CTX_FETCH_STRICT === "1";
+    // 2. DNS resolve + check IP ranges (hard-block + optional strict-mode block)
+    try {
+        const { lookup } = await import("node:dns/promises");
+        const records = await lookup(parsed.hostname, { all: true, verbatim: true });
+        for (const rec of records) {
+            const verdict = classifyIp(rec.address);
+            if (verdict === "block") {
+                return {
+                    kind: "fetch_error",
+                    url: rawUrl,
+                    error: `URL "${parsed.hostname}" resolves to ${rec.address} — blocked (link-local / IMDS / multicast / reserved)`,
+                    reason: "exit",
+                };
+            }
+            if (verdict === "private" && strict) {
+                return {
+                    kind: "fetch_error",
+                    url: rawUrl,
+                    error: `URL "${parsed.hostname}" resolves to private IP ${rec.address} — blocked under CTX_FETCH_STRICT=1`,
+                    reason: "exit",
+                };
+            }
+        }
+    }
+    catch (err) {
+        return {
+            kind: "fetch_error",
+            url: rawUrl,
+            error: `DNS lookup failed for "${parsed.hostname}": ${err instanceof Error ? err.message : String(err)}`,
+            reason: "exit",
+        };
+    }
+    return null; // safe to fetch
+}
+/**
+ * Classify an IP address.
+ *   - "block":    always blocked (link-local/IMDS/multicast/reserved/malformed)
+ *   - "private":  loopback or RFC1918 — allowed by default, blocked in strict mode
+ *   - "public":   safe to fetch
+ *
+ * Exported (via the function name) so SSRF tests can exercise the matcher directly.
+ */
+export function classifyIp(ip) {
+    const lower = ip.toLowerCase();
+    // IPv6 takes priority — check for `:` first so IPv4-mapped addresses
+    // (`::ffff:127.0.0.1`) don't get incorrectly routed through the IPv4 parser.
+    if (lower.includes(":")) {
+        // IPv4-mapped IPv6 (`::ffff:127.0.0.1`) — recurse through IPv4 classifier
+        const v4MappedMatch = lower.match(/^::ffff:([\d.]+)$/);
+        if (v4MappedMatch)
+            return classifyIp(v4MappedMatch[1]);
+        // Hard-block
+        if (lower === "::")
+            return "block"; // unspecified
+        if (lower.startsWith("fe8") || lower.startsWith("fe9") ||
+            lower.startsWith("fea") || lower.startsWith("feb"))
+            return "block"; // fe80::/10 link-local
+        if (lower.startsWith("ff"))
+            return "block"; // ff00::/8 multicast
+        // Private (loopback + ULA)
+        if (lower === "::1")
+            return "private";
+        if (lower.startsWith("fc") || lower.startsWith("fd"))
+            return "private"; // fc00::/7 ULA
+        return "public";
+    }
+    // IPv4 (or non-IP string — malformed = block)
+    if (!ip.includes("."))
+        return "block"; // not an IP at all
+    const parts = ip.split(".").map((p) => parseInt(p, 10));
+    if (parts.length !== 4 || parts.some((p) => isNaN(p) || p < 0 || p > 255))
+        return "block";
+    const [a, b] = parts;
+    // Hard-block (no legitimate use)
+    if (a === 169 && b === 254)
+        return "block"; // link-local incl. 169.254.169.254 (IMDS)
+    if (a === 0)
+        return "block"; // 0.0.0.0/8 (current network)
+    if (a >= 224)
+        return "block"; // 224.0.0.0+ multicast/reserved
+    // Private (loopback + RFC1918) — allow by default
+    if (a === 127)
+        return "private"; // 127.0.0.0/8 loopback
+    if (a === 10)
+        return "private"; // 10.0.0.0/8
+    if (a === 172 && b >= 16 && b <= 31)
+        return "private"; // 172.16.0.0/12
+    if (a === 192 && b === 168)
+        return "private"; // 192.168.0.0/16
+    return "public";
+}
+async function fetchOneUrl(url, source, force) {
+    // SSRF guard — reject file://, javascript:, loopback, RFC1918, IMDS, link-local
+    // BEFORE any cache lookup or subprocess spawn. Even cached entries shouldn't
+    // serve a previously-poisoned source label.
+    const ssrfBlock = await ssrfGuard(url);
+    if (ssrfBlock)
+        return ssrfBlock;
     if (!force) {
         const store = getStore();
-        const label = source ?? url;
-        const meta = store.getSourceMeta(label);
+        // Cache key composes (source, url) so two distinct URLs sharing the same
+        // `source` label do not collide — they each get their own cache slot
+        // (commit 1f1243e regression test enforced).
+        const cacheKey = composeFetchCacheKey(source, url);
+        const meta = store.getSourceMeta(cacheKey);
         if (meta) {
             const indexedAt = new Date(meta.indexedAt + "Z"); // SQLite datetime is UTC without Z
             const ageMs = Date.now() - indexedAt.getTime();
-            const TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
-            if (ageMs < TTL_MS) {
+            if (ageMs < FETCH_TTL_MS) {
                 const ageHours = Math.floor(ageMs / (60 * 60 * 1000));
                 const ageMin = Math.floor(ageMs / (60 * 1000));
                 const ageStr = ageHours > 0 ? `${ageHours}h ago` : ageMin > 0 ? `${ageMin}m ago` : "just now";
-                // Track cache savings — estimate ~1.6KB per chunk (average indexed content size)
-                const estimatedBytes = meta.chunkCount * 1600;
-                sessionStats.cacheHits++;
-                sessionStats.cacheBytesSaved += estimatedBytes;
-                return trackResponse("ctx_fetch_and_index", {
-                    content: [{
-                            type: "text",
-                            text: `Cached: **${meta.label}** — ${meta.chunkCount} sections, indexed ${ageStr} (fresh, TTL: 24h).\nTo refresh: call ctx_fetch_and_index again with \`force: true\`.\n\nYou MUST call ctx_search() to answer questions about this content — this cached response contains no content.\nUse: ctx_search(queries: [...], source: "${meta.label}")`,
-                        }],
-                });
+                const estimatedBytes = meta.chunkCount * 1600; // ~1.6KB/chunk avg
+                return { kind: "cached", label: meta.label, chunkCount: meta.chunkCount, estimatedBytes, ageStr };
             }
-            // Stale (>24h) — fall through to re-fetch silently
+            // Stale — fall through to re-fetch silently
         }
     }
-    // Generate a unique temp file path for the subprocess to write fetched content.
-    // This bypasses the executor's 100KB stdout truncation — content goes file→handler directly.
     const outputPath = join(tmpdir(), `ctx-fetch-${Date.now()}-${Math.random().toString(36).slice(2)}.dat`);
     try {
         const fetchCode = buildFetchCode(url, outputPath);
@@ -1399,93 +1749,258 @@ server.registerTool("ctx_fetch_and_index", {
             timeout: 30_000,
         });
         if (result.exitCode !== 0) {
-            return trackResponse("ctx_fetch_and_index", {
-                content: [
-                    {
-                        type: "text",
-                        text: `Failed to fetch ${url}: ${result.stderr || result.stdout}`,
-                    },
-                ],
-                isError: true,
-            });
+            return { kind: "fetch_error", url, error: result.stderr || result.stdout || "unknown error", reason: "exit" };
         }
-        // Parse content-type marker from stdout (content is in the temp file)
-        const store = getStore();
         const header = (result.stdout || "").trim();
-        // Read full content from temp file
         let markdown;
         try {
             markdown = readFileSync(outputPath, "utf-8").trim();
         }
         catch {
-            return trackResponse("ctx_fetch_and_index", {
-                content: [
-                    {
-                        type: "text",
-                        text: `Fetched ${url} but could not read subprocess output`,
-                    },
-                ],
-                isError: true,
-            });
+            return { kind: "fetch_error", url, error: "could not read subprocess output", reason: "read" };
         }
         if (markdown.length === 0) {
+            return { kind: "fetch_error", url, error: "empty content", reason: "empty" };
+        }
+        return { kind: "fetched", url, source, markdown, header };
+    }
+    catch (err) {
+        return {
+            kind: "fetch_error",
+            url,
+            error: err instanceof Error ? err.message : String(err),
+            reason: "throw",
+        };
+    }
+    finally {
+        try {
+            rmSync(outputPath);
+        }
+        catch { /* already gone */ }
+    }
+}
+/**
+ * Serial-only indexing step — single FTS5 write per call. Caller loops over
+ * fetched results and calls this one-at-a-time to avoid SQLite WAL contention
+ * (PRD finding E).
+ */
+function indexFetched(f) {
+    const store = getStore();
+    // Storage label composed via composeFetchCacheKey so two URLs sharing a
+    // `source` label do not overwrite each other (commit 1f1243e). ctx_search()
+    // still finds both via LIKE-mode source filter on the `source` substring.
+    const storageLabel = composeFetchCacheKey(f.source, f.url);
+    let indexed;
+    if (f.header === "__CM_CT__:json") {
+        indexed = store.indexJSON(f.markdown, storageLabel);
+    }
+    else if (f.header === "__CM_CT__:text") {
+        indexed = store.indexPlainText(f.markdown, storageLabel);
+    }
+    else {
+        indexed = store.index({ content: f.markdown, source: storageLabel });
+    }
+    // Track AFTER the FTS5 write succeeds — failed indexes shouldn't inflate the counter.
+    trackIndexed(Buffer.byteLength(f.markdown));
+    const preview = f.markdown.length > FETCH_PREVIEW_LIMIT
+        ? f.markdown.slice(0, FETCH_PREVIEW_LIMIT) + "\n\n…[truncated — use ctx_search() for full content]"
+        : f.markdown;
+    return {
+        label: indexed.label,
+        totalChunks: indexed.totalChunks,
+        totalBytes: Buffer.byteLength(f.markdown),
+        preview,
+    };
+}
+server.registerTool("ctx_fetch_and_index", {
+    title: "Fetch & Index URL(s)",
+    description: "Fetches URL content, converts HTML to markdown, indexes into searchable knowledge base, " +
+        "and returns a ~3KB preview. Full content stays in sandbox — use ctx_search() for deeper lookups.\n\n" +
+        "Better than WebFetch: preview is immediate, full content is searchable, raw HTML never enters context.\n\n" +
+        "Content-type aware: HTML is converted to markdown, JSON is chunked by key paths, plain text is indexed directly.\n\n" +
+        "PARALLELIZE I/O: For multi-URL research (library evaluation, migration scans, doc comparisons), pass `requests: [{url, source}, ...]` with `concurrency: 4-8` — speeds up by 3-5x on real workloads.\n" +
+        "  ✅ Use concurrency: 4-8 for: library docs sweep, multi-changelog scan, competitive pricing pages, multi-region docs, GitHub raw file pulls.\n" +
+        "  ❌ Single URL → use the legacy {url, source} shape (concurrency irrelevant).\n" +
+        "  Example: requests: [{url: 'https://react.dev/...', source: 'react'}, {url: 'https://vuejs.org/...', source: 'vue'}], concurrency: 5.\n" +
+        "  Indexing is serial regardless of concurrency — fetches race, FTS5 writes don't (avoids SQLite WAL contention).\n\n" +
+        "When reporting results — terse like caveman. Technical substance exact. Only fluff die. Pattern: [thing] [action] [reason]. [next step].",
+    inputSchema: z.object({
+        url: z.string().optional().describe("Single URL to fetch and index (legacy single-shape)"),
+        source: z
+            .string()
+            .optional()
+            .describe("Label for the indexed content when using single `url` (e.g., 'React useEffect docs', 'Supabase Auth API'). For batch, put source in each requests entry."),
+        requests: z
+            .array(z.object({
+            url: z.string().describe("URL to fetch"),
+            source: z.string().optional().describe("Label for this URL's indexed content"),
+        }))
+            .min(1)
+            .optional()
+            .describe("Batch shape: array of {url, source?} entries. Use with concurrency>1 for parallel fetch. " +
+            "Each request indexed under its own source label. Output preserves input order."),
+        concurrency: z
+            .coerce.number()
+            .int()
+            .min(1)
+            .max(8)
+            .optional()
+            .default(1)
+            .describe("Max URLs to fetch in parallel (1-8, default: 1). " +
+            "Use 4-8 for I/O-bound multi-URL batches (library docs, changelogs, pricing pages). " +
+            "Capped by os.cpus().length on small machines (response notes when capped). " +
+            "Indexing is always serial regardless — only fetches race."),
+        force: z
+            .boolean()
+            .optional()
+            .describe("Skip cache and re-fetch even if content was recently indexed"),
+    }),
+}, async ({ url, source, requests, concurrency, force }) => {
+    // Normalize input: legacy {url} or new {requests: [...]}.
+    // requests wins when both are provided (explicit batch intent).
+    const batch = requests
+        ? requests
+        : url
+            ? [{ url, source }]
+            : [];
+    if (batch.length === 0) {
+        return trackResponse("ctx_fetch_and_index", {
+            content: [{
+                    type: "text",
+                    text: "ctx_fetch_and_index requires either `url` (single) or `requests: [{url, source?}, ...]` (batch).",
+                }],
+            isError: true,
+        });
+    }
+    const isLegacySingle = !requests && batch.length === 1;
+    const requestedConcurrency = concurrency ?? 1;
+    // Parallel fetch via shared runPool primitive. capByCpuCount only for batch
+    // — single-URL doesn't need the cap (only one job, executor is one subprocess).
+    const jobs = batch.map((req) => ({
+        run: () => fetchOneUrl(req.url, req.source, force),
+    }));
+    const { settled, effectiveConcurrency, capped } = await runPool(jobs, {
+        concurrency: requestedConcurrency,
+        capByCpuCount: !isLegacySingle && requestedConcurrency > 1,
+    });
+    const finalized = [];
+    for (let i = 0; i < settled.length; i++) {
+        const r = settled[i];
+        if (r.status === "rejected") {
+            const message = r.reason instanceof Error ? r.reason.message : String(r.reason);
+            finalized.push({ kind: "job_error", url: batch[i].url, error: message });
+            continue;
+        }
+        const v = r.value;
+        if (v.kind === "cached") {
+            sessionStats.cacheHits++;
+            sessionStats.cacheBytesSaved += v.estimatedBytes;
+            finalized.push({ kind: "cached", label: v.label, chunkCount: v.chunkCount, ageStr: v.ageStr });
+        }
+        else if (v.kind === "fetch_error") {
+            finalized.push({ kind: "fetch_error", url: v.url, error: v.error, reason: v.reason });
+        }
+        else {
+            // Serial FTS5 write here — no parallel store.index calls.
+            finalized.push({ kind: "fetched", indexed: indexFetched(v) });
+        }
+    }
+    // Backward-compat single-URL response shape — preserve the EXACT original wording.
+    if (isLegacySingle) {
+        const r = finalized[0];
+        if (r.kind === "cached") {
             return trackResponse("ctx_fetch_and_index", {
-                content: [
-                    {
+                content: [{
                         type: "text",
-                        text: `Fetched ${url} but got empty content`,
-                    },
-                ],
-                isError: true,
+                        text: `Cached: **${r.label}** — ${r.chunkCount} sections, indexed ${r.ageStr} (fresh, TTL: 24h).\nTo refresh: call ctx_fetch_and_index again with \`force: true\`.\n\nYou MUST call ctx_search() to answer questions about this content — this cached response contains no content.\nUse: ctx_search(queries: [...], source: "${r.label}")`,
+                    }],
             });
         }
-        trackIndexed(Buffer.byteLength(markdown));
-        // Route to the appropriate indexing strategy based on Content-Type
-        let indexed;
-        if (header === "__CM_CT__:json") {
-            indexed = store.indexJSON(markdown, source ?? url);
+        if (r.kind === "fetched") {
+            const totalKB = (r.indexed.totalBytes / 1024).toFixed(1);
+            const text = [
+                `Fetched and indexed **${r.indexed.totalChunks} sections** (${totalKB}KB) from: ${r.indexed.label}`,
+                `Full content indexed in sandbox — use ctx_search(queries: [...], source: "${r.indexed.label}") for specific lookups.`,
+                "",
+                "---",
+                "",
+                r.indexed.preview,
+            ].join("\n");
+            return trackResponse("ctx_fetch_and_index", {
+                content: [{ type: "text", text }],
+            });
         }
-        else if (header === "__CM_CT__:text") {
-            indexed = store.indexPlainText(markdown, source ?? url);
+        // fetch_error — preserve original error wording per reason
+        if (r.kind === "fetch_error") {
+            const text = r.reason === "empty" ? `Fetched ${r.url} but got empty content`
+                : r.reason === "read" ? `Fetched ${r.url} but could not read subprocess output`
+                    : r.reason === "exit" ? `Failed to fetch ${r.url}: ${r.error}`
+                        : /* throw */ `Fetch error: ${r.error}`;
+            return trackResponse("ctx_fetch_and_index", {
+                content: [{ type: "text", text }],
+                isError: true,
+            });
         }
-        else {
-            // HTML (default) — content is already converted to markdown
-            indexed = store.index({ content: markdown, source: source ?? url });
-        }
-        // Build preview — first ~3KB of markdown for immediate use
-        const PREVIEW_LIMIT = 3072;
-        const preview = markdown.length > PREVIEW_LIMIT
-            ? markdown.slice(0, PREVIEW_LIMIT) + "\n\n…[truncated — use ctx_search() for full content]"
-            : markdown;
-        const totalKB = (Buffer.byteLength(markdown) / 1024).toFixed(1);
-        const text = [
-            `Fetched and indexed **${indexed.totalChunks} sections** (${totalKB}KB) from: ${indexed.label}`,
-            `Full content indexed in sandbox — use ctx_search(queries: [...], source: "${indexed.label}") for specific lookups.`,
-            "",
-            "---",
-            "",
-            preview,
-        ].join("\n");
-        return trackResponse("ctx_fetch_and_index", {
-            content: [{ type: "text", text }],
-        });
-    }
-    catch (err) {
-        const message = err instanceof Error ? err.message : String(err);
+        // job_error
         return trackResponse("ctx_fetch_and_index", {
-            content: [
-                { type: "text", text: `Fetch error: ${message}` },
-            ],
+            content: [{ type: "text", text: `Fetch error: ${r.error}` }],
             isError: true,
         });
     }
-    finally {
-        // Clean up temp file
-        try {
-            rmSync(outputPath);
+    // Batch response — aggregated summary; isError only when EVERY URL failed.
+    // Per-URL preview capped tightly so a 8-URL batch doesn't undo the
+    // context-savings the tool exists to deliver (PRD review finding G1).
+    const FETCH_BATCH_PREVIEW_LIMIT = 384; // ~3KB total for 8-URL batches
+    const lines = [];
+    let totalSections = 0;
+    let totalBytes = 0;
+    let cachedCount = 0;
+    let fetchedCount = 0;
+    let errorCount = 0;
+    const snippets = [];
+    for (const r of finalized) {
+        if (r.kind === "cached") {
+            cachedCount++;
+            lines.push(`- [cache] ${r.label} — ${r.chunkCount} sections (${r.ageStr})`);
+        }
+        else if (r.kind === "fetched") {
+            fetchedCount++;
+            totalSections += r.indexed.totalChunks;
+            totalBytes += r.indexed.totalBytes;
+            const kb = (r.indexed.totalBytes / 1024).toFixed(1);
+            lines.push(`- [new]   ${r.indexed.label} — ${r.indexed.totalChunks} sections (${kb}KB)`);
+            const snippet = r.indexed.preview.length > FETCH_BATCH_PREVIEW_LIMIT
+                ? r.indexed.preview.slice(0, FETCH_BATCH_PREVIEW_LIMIT).trimEnd() + "…"
+                : r.indexed.preview;
+            snippets.push(`### ${r.indexed.label}\n\n${snippet}`);
         }
-        catch { /* already gone */ }
-    }
+        else {
+            errorCount++;
+            lines.push(`- [err]   ${r.url}: ${r.error}`);
+        }
+    }
+    const totalKB = (totalBytes / 1024).toFixed(1);
+    const cappedNote = capped
+        ? ` cap=${effectiveConcurrency}/${cpus().length}cpu`
+        : "";
+    // Caveman style — terse status line: counts + sections + size.
+    // Singular forms used at count=1 to avoid grammar drift ("1 errors" → "1 error").
+    const fmt = (n, sing, plur) => `${n} ${n === 1 ? sing : plur}`;
+    const headerLine = `fetched ${batch.length} c=${effectiveConcurrency}${cappedNote}. ` +
+        `ok=${fetchedCount} cache=${cachedCount} err=${errorCount}. ` +
+        `${fmt(totalSections, "section", "sections")} ${totalKB}KB.`;
+    const text = [
+        headerLine,
+        "",
+        ...lines,
+        "",
+        `ctx_search(queries: [...], source: "<label>") for full content.`,
+        ...(snippets.length > 0 ? ["", "---", "", ...snippets] : []),
+    ].join("\n");
+    return trackResponse("ctx_fetch_and_index", {
+        content: [{ type: "text", text }],
+        isError: errorCount === batch.length, // only mark error if every URL failed
+    });
 });
 // ─────────────────────────────────────────────────────────
 // Tool: batch_execute
@@ -1497,7 +2012,12 @@ server.registerTool("ctx_batch_execute", {
         "THIS IS THE PRIMARY TOOL. Use this instead of multiple ctx_execute() calls.\n\n" +
         "One ctx_batch_execute call replaces 30+ ctx_execute calls + 10+ ctx_search calls.\n" +
         "Provide all commands to run and all queries to search — everything happens in one round trip.\n\n" +
-        "THINK IN CODE: When commands produce data you need to analyze, add processing commands that filter and summarize. Don't pull raw output into context — let the sandbox do the work.\n\n" +
+        "PARALLELIZE I/O: For I/O-bound batches (network calls, slow API queries, multi-URL fetches), ALWAYS pass concurrency: 4-8 — speeds up by 3-5x on real workloads.\n" +
+        "  ✅ Use concurrency: 4-8 for: gh API calls, curl/web fetches, multi-region cloud queries, multi-repo git reads, dig/DNS, docker inspect.\n" +
+        "  ❌ Keep concurrency: 1 for: npm test, build, lint, image processing (CPU-bound), or commands sharing state (ports, lock files, same-repo writes).\n" +
+        "  Example: [gh issue view 1, gh issue view 2, gh issue view 3] → concurrency: 3.\n" +
+        "  Speedup depends on workload — applies to I/O wait, not CPU work.\n\n" +
+        "THINK IN CODE — NON-NEGOTIABLE: When commands produce data you need to analyze, count, filter, compare, or transform — add a processing command that runs JavaScript and console.log() ONLY the answer. NEVER pull raw output into context to reason over. Concurrency parallelizes the FETCH; THINK IN CODE owns the PROCESSING. One programmed analysis replaces ten read-and-reason rounds. Pure JavaScript, Node.js built-ins (fs, path, child_process), try/catch, null-safe.\n\n" +
         "When reporting results — terse like caveman. Technical substance exact. Only fluff die. Pattern: [thing] [action] [reason]. [next step].",
     inputSchema: z.object({
         commands: z.preprocess(coerceCommandsArray, z
@@ -1510,7 +2030,8 @@ server.registerTool("ctx_batch_execute", {
                 .describe("Shell command to execute"),
         }))
             .min(1)
-            .describe("Commands to execute as a batch. Each runs sequentially, output is labeled with the section header.")),
+            .describe("Commands to execute as a batch. Output is labeled with the section header. " +
+            "Default order is sequential; pass concurrency>1 to run in parallel (output stays in input order).")),
         queries: z.preprocess(coerceJsonArray, z
             .array(z.string())
             .min(1)
@@ -1521,9 +2042,21 @@ server.registerTool("ctx_batch_execute", {
             .coerce.number()
             .optional()
             .default(60000)
-            .describe("Max execution time in ms (default: 60s)"),
+            .describe("Max execution time in ms (default: 60s). With concurrency=1, shared budget across commands; with concurrency>1, applied per-command."),
+        concurrency: z
+            .coerce.number()
+            .int()
+            .min(1)
+            .max(8)
+            .optional()
+            .default(1)
+            .describe("Max commands to run in parallel (1-8, default: 1). " +
+            "Use 4-8 for I/O-bound batches (network, gh, curl, multi-repo git reads). " +
+            "Keep at 1 for CPU-bound (npm test, build, lint) or stateful commands (ports, locks). " +
+            ">1 switches to per-command timeouts (no shared budget) and " +
+            "individual `(timed out)` blocks instead of cascading skip."),
     }),
-}, async ({ commands, queries, timeout }) => {
+}, async ({ commands, queries, timeout, concurrency }) => {
     // Security: check each command against deny patterns
     for (const cmd of commands) {
         const denied = checkDenyPolicy(cmd.command, "batch_execute");
@@ -1531,51 +2064,18 @@ server.registerTool("ctx_batch_execute", {
             return denied;
     }
     try {
-        // Execute each command individually so every command gets its own
-        // output capture. Full stdout is preserved and indexed into FTS5.
-        // (Issue #61, #197)
-        const perCommandOutputs = [];
-        const startTime = Date.now();
-        let timedOut = false;
         // Inject NODE_OPTIONS for FS read tracking in spawned Node processes.
         // The executor denies NODE_OPTIONS in its env (security), so we set it
         // as an inline shell prefix. This only affects child `node` invocations.
         const nodeOptsPrefix = `NODE_OPTIONS="--require ${CM_FS_PRELOAD}" `;
-        for (const cmd of commands) {
-            const elapsed = Date.now() - startTime;
-            const remaining = timeout - elapsed;
-            if (remaining <= 0) {
-                perCommandOutputs.push(`# ${cmd.label}\n\n(skipped — batch timeout exceeded)\n`);
-                timedOut = true;
-                continue;
-            }
-            const result = await executor.execute({
-                language: "shell",
-                code: `${nodeOptsPrefix}${cmd.command} 2>&1`,
-                timeout: remaining,
-            });
-            let output = result.stdout || "(no output)";
-            // Parse and strip __CM_FS__ markers emitted by the preload script.
-            // Because 2>&1 merges stderr into stdout, markers appear in output.
-            const fsMatches = output.matchAll(/__CM_FS__:(\d+)/g);
-            let cmdFsBytes = 0;
-            for (const m of fsMatches)
-                cmdFsBytes += parseInt(m[1]);
-            if (cmdFsBytes > 0) {
-                sessionStats.bytesSandboxed += cmdFsBytes;
-                output = output.replace(/__CM_FS__:\d+\n?/g, "");
-            }
-            perCommandOutputs.push(`# ${cmd.label}\n\n${output}\n`);
-            if (result.timedOut) {
-                timedOut = true;
-                // Mark remaining commands as skipped
-                const idx = commands.indexOf(cmd);
-                for (let i = idx + 1; i < commands.length; i++) {
-                    perCommandOutputs.push(`# ${commands[i].label}\n\n(skipped — batch timeout exceeded)\n`);
-                }
-                break;
-            }
-        }
+        // Full stdout is preserved per-command and indexed into FTS5 (Issue #61, #197).
+        // Concurrency>1 switches to a worker pool with per-command timeouts.
+        const { outputs: perCommandOutputs, timedOut } = await runBatchCommands(commands, {
+            timeout,
+            concurrency,
+            nodeOptsPrefix,
+            onFsBytes: (bytes) => { sessionStats.bytesSandboxed += bytes; },
+        }, executor);
         const stdout = perCommandOutputs.join("\n");
         const totalBytes = Buffer.byteLength(stdout);
         const totalLines = stdout.split("\n").length;
@@ -1678,24 +2178,37 @@ server.registerTool("ctx_stats", {
             try {
                 const engine = new AnalyticsEngine(sdb);
                 const report = engine.queryAll(sessionStats);
-                text = formatReport(report, VERSION, _latestVersion);
+                // MCP usage is read-only and cheap; only available when DB exists.
+                const mcpUsage = engine.getMcpToolUsage();
+                // Lifetime stats span every project's SessionDB + auto-memory dir
+                // (Bugs #3/#4); failures are absorbed inside getLifetimeStats so a
+                // corrupt sidecar can never break ctx_stats.
+                const lifetime = getLifetimeStats();
+                text = formatReport(report, VERSION, _latestVersion, { lifetime, mcpUsage });
             }
             finally {
                 sdb.close();
             }
         }
         else {
-            // No session DB — build a minimal report from runtime stats only
+            // No session DB — build a minimal report from runtime stats only.
+            // Lifetime still meaningful (other projects, auto-memory) so include it.
             const engine = new AnalyticsEngine(createMinimalDb());
             const report = engine.queryAll(sessionStats);
-            text = formatReport(report, VERSION, _latestVersion);
+            const lifetime = getLifetimeStats();
+            text = formatReport(report, VERSION, _latestVersion, { lifetime });
         }
     }
     catch {
         // Session DB not available or incompatible — build minimal report from runtime stats
         const engine = new AnalyticsEngine(createMinimalDb());
         const report = engine.queryAll(sessionStats);
-        text = formatReport(report, VERSION, _latestVersion);
+        let lifetime;
+        try {
+            lifetime = getLifetimeStats();
+        }
+        catch { /* never block ctx_stats */ }
+        text = formatReport(report, VERSION, _latestVersion, lifetime ? { lifetime } : undefined);
     }
     return trackResponse("ctx_stats", {
         content: [{ type: "text", text }],
@@ -1985,6 +2498,13 @@ server.registerTool("ctx_purge", {
     sessionStats.cacheBytesSaved = 0;
     sessionStats.sessionStart = Date.now();
     deleted.push("session stats");
+    // Also drop the persisted stats file so external readers see a fresh state
+    try {
+        const statsFile = getStatsFilePath();
+        if (existsSync(statsFile))
+            unlinkSync(statsFile);
+    }
+    catch { /* best effort */ }
     return trackResponse("ctx_purge", {
         content: [{
                 type: "text",
@@ -2231,6 +2751,15 @@ async function main() {
         }
     };
     const gracefulShutdown = async () => {
+        // Final stats flush — bypass throttle so the last 0-500ms of
+        // bytes_indexed / bytes_returned aren't silently lost on SIGTERM/SIGINT
+        // (PR #401 grill-me review B1: persistStats early-returns inside throttle
+        // window; gracefulShutdown previously did NOT bypass).
+        try {
+            _lastStatsPersist = 0;
+            persistStats();
+        }
+        catch { /* best effort — never block shutdown */ }
         shutdown();
         process.exit(0);
     };