npm - @kage-core/kage-graph-mcp - Versions diffs - 1.1.3 → 1.1.5 - Mend

@kage-core/kage-graph-mcp 1.1.3 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -38,6 +38,7 @@ kage recall "how do I run tests" --project /path/to/repo
 kage recall "how do I run tests" --project /path/to/repo --explain --json
 kage quality --project /path/to/repo
 kage benchmark --project /path/to/repo
+kage benchmark --project /path/to/repo --compare --task "how do I run tests"
 kage viewer --project /path/to/repo
 kage daemon start --project /path/to/repo
 kage observe --project /path/to/repo --event '{"type":"command_result","session_id":"s1","command":"npm test","exit_code":0}'
@@ -125,6 +126,12 @@ and parser coverage, code graph counts, evidence coverage, approved vs pending
 memory, validation status, estimated tokens saved per recall, duplicate
 candidates, average memory quality, and a readiness score.
+Use `kage benchmark --compare --task "<task>" --project <repo>` or
+`kage_benchmark_compare` to compare the same task on the same repo with and
+without Kage. It estimates manual full-file rediscovery tokens/steps, compares
+them to compact Kage recall plus code graph context, and returns evidence plus
+caveats for honest marketing proof.
 Use `kage refresh --project <repo>` or the `kage_refresh` MCP tool after
 meaningful file changes. Refresh rebuilds indexes, code graph, memory graph,
 metrics, and stale-memory metadata. Memory is marked stale when status or
@@ -204,6 +211,7 @@ Local repo tools:
 - `kage_pr_check`
 - `kage_quality`
 - `kage_benchmark`
+- `kage_benchmark_compare`
 - `kage_setup_agent`
 - `kage_graph`
 - `kage_graph_visual`

package/dist/cli.js CHANGED Viewed

@@ -31,6 +31,7 @@ Usage:
   kage metrics --project <dir> [--json]
   kage quality --project <dir> [--json]
   kage benchmark --project <dir> [--json]
+  kage benchmark --project <dir> --compare --task <task> [--json]
   kage code-graph --project <dir> [--json]
   kage code-graph "<query>" --project <dir> [--json]
   kage graph --project <dir> [--json]
@@ -533,6 +534,46 @@ async function main() {
         return;
     }
     if (command === "benchmark") {
+        if (args.includes("--compare")) {
+            const result = (0, kernel_js_1.benchmarkTaskComparison)(projectArg(args), takeArg(args, "--task") ?? firstPositional(args) ?? "how do I run tests");
+            if (args.includes("--json")) {
+                console.log(JSON.stringify(result, null, 2));
+                return;
+            }
+            console.log(`Kage A/B Benchmark: ${result.project_dir}`);
+            console.log(`Task: ${result.task}`);
+            console.log("");
+            console.log("Without Kage:");
+            console.log(`  Files examined: ${result.baseline_without_kage.files_examined}`);
+            console.log(`  Full-file tokens: ${result.baseline_without_kage.full_file_tokens}`);
+            console.log(`  Steps: ${result.baseline_without_kage.steps}`);
+            console.log(`  Estimated time: ${result.baseline_without_kage.estimated_time_seconds}s`);
+            console.log("");
+            console.log("With Kage:");
+            console.log(`  Memory packets: ${result.with_kage.memory_packets_used}`);
+            console.log(`  Code facts: ${result.with_kage.code_files_returned + result.with_kage.code_symbols_returned + result.with_kage.code_routes_returned + result.with_kage.code_tests_returned}`);
+            console.log(`  Context tokens: ${result.with_kage.context_tokens}`);
+            console.log(`  Steps: ${result.with_kage.steps}`);
+            console.log(`  Estimated time: ${result.with_kage.estimated_time_seconds}s`);
+            console.log("");
+            console.log("Delta:");
+            console.log(`  Estimated tokens saved: ${result.delta.estimated_tokens_saved}`);
+            console.log(`  Context reduction: ${result.delta.context_reduction_percent}%`);
+            console.log(`  Rediscovery steps saved: ${result.delta.rediscovery_steps_saved}`);
+            console.log(`  Estimated time saved: ${result.delta.estimated_time_saved_seconds}s`);
+            console.log(`  Full-file reads avoided: ${result.delta.full_file_reads_avoided}`);
+            console.log(`  Recall hit: ${result.delta.recall_hit ? "yes" : "no"}`);
+            console.log(`  Code graph hit: ${result.delta.code_graph_hit ? "yes" : "no"}`);
+            console.log("");
+            console.log("Baseline files:");
+            for (const file of result.evidence.baseline_files.slice(0, 8))
+                console.log(`  - ${file.path} (${file.tokens} tokens): ${file.why}`);
+            console.log("");
+            console.log("Kage memory:");
+            for (const packet of result.evidence.kage_memory.slice(0, 5))
+                console.log(`  - ${packet.title} (${packet.type}, score ${packet.score})`);
+            return;
+        }
         const result = (0, kernel_js_1.benchmarkProject)(projectArg(args));
         if (args.includes("--json")) {
             console.log(JSON.stringify(result, null, 2));

package/dist/index.js CHANGED Viewed

@@ -209,6 +209,18 @@ function listTools() {
                 required: ["project_dir"],
             },
         },
+        {
+            name: "kage_benchmark_compare",
+            description: "Compare the same task on the same repo with and without Kage. Reports estimated baseline discovery tokens/steps versus Kage recall/code-graph context, with evidence and caveats.",
+            inputSchema: {
+                type: "object",
+                properties: {
+                    project_dir: { type: "string" },
+                    task: { type: "string" },
+                },
+                required: ["project_dir", "task"],
+            },
+        },
         {
             name: "kage_setup_agent",
             description: "Generate MCP/setup instructions for Codex, Claude Code, Cursor, Windsurf, Gemini CLI, OpenCode, Cline, Goose, Roo Code, Kilo Code, Claude Desktop, Aider, or generic MCP.",
@@ -661,6 +673,12 @@ async function callTool(name, args) {
             content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
         };
     }
+    if (name === "kage_benchmark_compare") {
+        const result = (0, kernel_js_1.benchmarkTaskComparison)(String(args?.project_dir ?? ""), String(args?.task ?? ""));
+        return {
+            content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        };
+    }
     if (name === "kage_setup_agent") {
         const result = (0, kernel_js_1.setupAgent)(String(args?.agent ?? ""), String(args?.project_dir ?? ""), { write: Boolean(args?.write) });
         return {

package/dist/kernel.js CHANGED Viewed

@@ -75,6 +75,7 @@ exports.graphMermaid = graphMermaid;
 exports.kageMetrics = kageMetrics;
 exports.qualityReport = qualityReport;
 exports.benchmarkProject = benchmarkProject;
+exports.benchmarkTaskComparison = benchmarkTaskComparison;
 exports.learn = learn;
 exports.capture = capture;
 exports.createPublicCandidate = createPublicCandidate;
@@ -180,11 +181,23 @@ Keep captures concise and future-facing. Do not store raw transcripts.
 ## End-Of-Task Proposal
-Before finishing a task that changed files, call \`kage_propose_from_diff\`.
+After meaningful file changes, call \`kage_refresh\` so indexes, code graph,
+memory graph, metrics, and stale-memory checks are current.
-This writes a branch review summary and a repo-local change-memory packet. It
-should capture what changed, why it matters, how to verify it, and what future
-agents should know. Git or PR review is the repo-level review boundary.
+Before finishing a task that changed files, call \`kage_pr_summarize\` or
+\`kage_propose_from_diff\`, then call \`kage_pr_check\`.
+\`kage_pr_summarize\` writes a branch review summary and a repo-local
+change-memory packet. \`kage_pr_check\` verifies validation, graph freshness,
+stale packets, and whether repo memory changed with the branch. If the check
+fails, explain the required actions instead of hiding the failure. Git or PR
+review is the repo-level review boundary.
+## Package Updates
+If the user asks to update Kage, run \`kage upgrade\`, then verify setup with
+\`kage setup verify-agent --agent <agent> --project <repo>\`. Tell the user to
+restart the agent when MCP tools need to reload.
 ## Feedback
@@ -210,7 +223,9 @@ For normal coding tasks:
 4. \`kage_graph\` for remembered decisions, bugs, workflows, and conventions
 5. Work on the task
 6. \`kage_learn\` for concrete learnings
-7. \`kage_propose_from_diff\` before the final response to create repo-local change memory
+7. \`kage_refresh\` after meaningful file changes
+8. \`kage_pr_summarize\` or \`kage_propose_from_diff\` before the final response to create repo-local change memory
+9. \`kage_pr_check\` before final handoff or merge readiness claims
 For quick factual questions, \`kage_recall\` alone is enough. For status or demo requests, call \`kage_metrics\`.
 ${AGENTS_POLICY_END}
@@ -3050,6 +3065,110 @@ function benchmarkProject(projectDir) {
         },
     };
 }
+function baselineDiscoveryFiles(projectDir, task) {
+    const terms = tokenize(task);
+    const graph = buildCodeGraph(projectDir);
+    const candidatePaths = unique([
+        "README.md",
+        "AGENTS.md",
+        "CLAUDE.md",
+        "package.json",
+        ...graph.files.map((file) => file.path),
+    ]).filter((path) => path && !shouldSkipRepoMemoryPath(path));
+    return candidatePaths
+        .map((path) => {
+        const absolute = (0, node_path_1.join)(projectDir, path);
+        if (!(0, node_fs_1.existsSync)(absolute))
+            return null;
+        const stats = (0, node_fs_1.statSync)(absolute);
+        if (!stats.isFile() || stats.size > 240_000)
+            return null;
+        const text = (0, node_fs_1.readFileSync)(absolute, "utf8");
+        const score = scoreText(terms, `${path}\n${text.slice(0, 8000)}`, [path]);
+        const alwaysUseful = ["README.md", "AGENTS.md", "CLAUDE.md", "package.json"].includes(path);
+        if (score <= 0 && !alwaysUseful)
+            return null;
+        return {
+            path,
+            tokens: Math.max(1, Math.ceil(stats.size / 4)),
+            why: score > 0 ? "task terms matched path or file content" : "standard repo orientation file",
+            score: score + (alwaysUseful ? 1 : 0),
+        };
+    })
+        .filter((entry) => Boolean(entry))
+        .sort((a, b) => b.score - a.score || b.tokens - a.tokens || a.path.localeCompare(b.path))
+        .slice(0, 10);
+}
+function benchmarkTaskComparison(projectDir, task) {
+    ensureMemoryDirs(projectDir);
+    const query = task.trim() || "how do I run tests";
+    const baselineFiles = baselineDiscoveryFiles(projectDir, query);
+    const baselineTokens = baselineFiles.reduce((sum, file) => sum + file.tokens, 0);
+    const recallResult = recall(projectDir, query, 5, true);
+    const codeResult = queryCodeGraph(projectDir, query, 10);
+    const kageContext = `${recallResult.context_block}\n\n${codeResult.context_block}`;
+    const kageTokens = estimateTokens(kageContext);
+    const codeFactLines = [
+        ...codeResult.routes.map((route) => `[route] ${route.method} ${route.path} in ${route.file_path}:${route.line}`),
+        ...codeResult.symbols.map((symbol) => `[symbol] ${symbol.kind} ${symbol.name} in ${symbol.path}:${symbol.line}`),
+        ...codeResult.tests.map((test) => `[test] ${test.title} in ${test.test_path}:${test.line}`),
+        ...codeResult.files.slice(0, 5).map((file) => `[file] ${file.path} (${file.kind}, ${file.language}, ${file.parser})`),
+    ];
+    const baselineSteps = Math.max(3, baselineFiles.length + 2);
+    const kageSteps = 3;
+    const tokensSaved = Math.max(0, baselineTokens - kageTokens);
+    const contextReduction = baselineTokens > 0 ? percent(tokensSaved, baselineTokens) : 0;
+    const timeSaved = Math.max(0, baselineSteps * 45 - kageSteps * 12);
+    return {
+        schema_version: 1,
+        project_dir: projectDir,
+        task: query,
+        generated_at: nowIso(),
+        baseline_without_kage: {
+            strategy: "manual_repo_discovery_estimate",
+            files_examined: baselineFiles.length,
+            full_file_tokens: baselineTokens,
+            steps: baselineSteps,
+            estimated_time_seconds: baselineSteps * 45,
+        },
+        with_kage: {
+            strategy: "recall_plus_code_graph",
+            recall_results: recallResult.results.length,
+            memory_packets_used: recallResult.results.length,
+            code_files_returned: codeResult.files.length,
+            code_symbols_returned: codeResult.symbols.length,
+            code_routes_returned: codeResult.routes.length,
+            code_tests_returned: codeResult.tests.length,
+            context_tokens: kageTokens,
+            steps: kageSteps,
+            estimated_time_seconds: kageSteps * 12,
+        },
+        delta: {
+            estimated_tokens_saved: tokensSaved,
+            context_reduction_percent: contextReduction,
+            rediscovery_steps_saved: Math.max(0, baselineSteps - kageSteps),
+            estimated_time_saved_seconds: timeSaved,
+            full_file_reads_avoided: Math.max(0, baselineFiles.length - codeResult.files.length),
+            recall_hit: recallResult.results.length > 0,
+            code_graph_hit: codeFactLines.length > 0,
+        },
+        evidence: {
+            baseline_files: baselineFiles.map(({ path, tokens, why }) => ({ path, tokens, why })),
+            kage_memory: recallResult.results.map((entry) => ({
+                id: entry.packet.id,
+                title: entry.packet.title,
+                type: entry.packet.type,
+                score: entry.score,
+            })),
+            kage_code_facts: codeFactLines.slice(0, 12),
+        },
+        caveats: [
+            "Baseline is a deterministic manual-discovery estimate, not a live human or agent timing trace.",
+            "Token savings estimate full-file reads avoided versus compact Kage recall/code-graph context.",
+            "Use this for relative proof on the same repo/task, not cross-repo absolute claims.",
+        ],
+    };
+}
 function kageMetricsShallow(projectDir) {
     const codeGraph = buildCodeGraph(projectDir);
     const knowledgeGraph = buildKnowledgeGraph(projectDir);
@@ -3414,22 +3533,42 @@ Before making code changes or answering implementation questions:
 3. Call kage_code_graph for file, symbol, route, test, or dependency questions.
 4. Call kage_graph for decisions, bugs, workflows, and conventions.
 When you learn something reusable: kage_learn.
-Before finishing a task that changed files: kage_propose_from_diff.
+After meaningful file changes: kage_refresh.
+Before finishing a task that changed files: kage_pr_summarize or kage_propose_from_diff, then kage_pr_check.
 If recalled memory helped: kage_feedback helpful. If wrong or stale: kage_feedback wrong or stale."
 fi
 KAGE_MSG="$POLICY" python3 -c "import json,os; print(json.dumps({'systemMessage': os.environ['KAGE_MSG']}))"
+`;
+        const stopHookScript = `#!/usr/bin/env bash
+# Kage Stop hook — best-effort repo memory refresh before Claude Code finishes.
+# Silent if Kage is not initialized in the current project or no git changes exist.
+set -euo pipefail
+PAYLOAD="$(cat || true)"
+CWD="$(printf "%s" "$PAYLOAD" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('cwd',''))" 2>/dev/null || echo "")"
+[[ -d "$CWD/.agent_memory" ]] || exit 0
+command -v kage >/dev/null 2>&1 || exit 0
+if git -C "$CWD" status --porcelain -uall >/dev/null 2>&1 && [[ -n "$(git -C "$CWD" status --porcelain -uall)" ]]; then
+  kage refresh --project "$CWD" --json >/dev/null 2>&1 || true
+  kage pr summarize --project "$CWD" --json >/dev/null 2>&1 || true
+fi
+exit 0
 `;
         const settingsPath = (0, node_path_1.join)(home, ".claude", "settings.json");
         const hookEntry = {
             hooks: {
                 SessionStart: [{ matcher: "", hooks: [{ type: "command", command: "bash ~/.claude/kage/hooks/session-start.sh", timeout: 5 }] }],
+                Stop: [{ matcher: "", hooks: [{ type: "command", command: "bash ~/.claude/kage/hooks/stop.sh", timeout: 20 }] }],
             },
         };
         setSnippet(path, JSON.stringify({ mcpServers: { kage: server } }, null, 2), [
             "Add the MCP server to ~/.claude.json, then restart Claude Code.",
             "alwaysLoad: true makes Kage tools immediately visible without requiring ToolSearch.",
-            `Also create ${hookDir}/session-start.sh with the hook script and add the SessionStart hook to ~/.claude/settings.json.`,
+            `Also create ${hookDir}/session-start.sh and ${hookDir}/stop.sh with the hook scripts and add SessionStart/Stop hooks to ~/.claude/settings.json.`,
             "Run `kage init --project <repo>` inside each repo to install the ambient memory policy.",
         ], true);
         if (options.write) {
@@ -3437,6 +3576,7 @@ KAGE_MSG="$POLICY" python3 -c "import json,os; print(json.dumps({'systemMessage'
             // Install the ambient session-start hook
             (0, node_fs_1.mkdirSync)(hookDir, { recursive: true });
             (0, node_fs_1.writeFileSync)((0, node_path_1.join)(hookDir, "session-start.sh"), hookScript, { mode: 0o755 });
+            (0, node_fs_1.writeFileSync)((0, node_path_1.join)(hookDir, "stop.sh"), stopHookScript, { mode: 0o755 });
             upsertJsonSettings(settingsPath, hookEntry);
             result.wrote = true;
         }
@@ -3862,11 +4002,25 @@ function distillSession(projectDir, sessionId) {
 function createDiffChangeMemory(projectDir, summary) {
     const branch = summary.branch ?? "detached";
     const head = summary.head ?? "unknown";
-    const fingerprint = (0, node_crypto_1.createHash)("sha256")
-        .update(`${branch}\n${head}\n${summary.changed_files.join("\n")}\n${summary.diff_stat}`)
-        .digest("hex")
-        .slice(0, 10);
     const title = `Change memory: ${branch}`;
+    // Remove any stale change-memory packets for this branch so propose_from_diff
+    // replaces rather than accumulates. The stable ID (branch-only, no fingerprint)
+    // makes writePacket idempotent going forward; this sweep handles packets that
+    // were written with the old fingerprint-based ID.
+    const stalePrefix = `workflow-${slugify(title)}-`;
+    const stableId = makePacketId(projectDir, "workflow", title);
+    const stableFileName = `${stalePrefix}${(0, node_crypto_1.createHash)("sha256").update(stableId).digest("hex").slice(0, 8)}.json`;
+    try {
+        const existing = (0, node_fs_1.readdirSync)(packetsDir(projectDir)).filter((name) => name.startsWith(stalePrefix) && name !== stableFileName);
+        for (const name of existing) {
+            const stale = (0, node_path_1.join)(packetsDir(projectDir), name);
+            const stalePacket = readJson(stale);
+            if (stalePacket?.type === "workflow" && stalePacket?.title === title) {
+                (0, node_fs_1.unlinkSync)(stale);
+            }
+        }
+    }
+    catch { /* non-fatal */ }
     const verifyCommands = npmScriptCommands(projectDir)
         .filter((command) => /(test|check|lint|build|type|verify)/i.test(command))
         .slice(0, 8);
@@ -3900,7 +4054,7 @@ function createDiffChangeMemory(projectDir, summary) {
     const now = nowIso();
     const packet = {
         schema_version: exports.PACKET_SCHEMA_VERSION,
-        id: makePacketId(projectDir, "workflow", title, fingerprint),
+        id: stableId,
         title,
         summary: `Repo-local context for ${summary.changed_files.length} changed repo path${summary.changed_files.length === 1 ? "" : "s"} on ${branch}.`,
         body,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@kage-core/kage-graph-mcp",
-  "version": "1.1.3",
+  "version": "1.1.5",
   "description": "Local-first repo memory, code graph, and recall MCP server for coding agents",
   "main": "dist/index.js",
   "files": [