npm - @kage-core/kage-graph-mcp - Versions diffs - 1.1.4 → 1.1.6 - Mend

@kage-core/kage-graph-mcp 1.1.4 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -38,6 +38,7 @@ kage recall "how do I run tests" --project /path/to/repo
 kage recall "how do I run tests" --project /path/to/repo --explain --json
 kage quality --project /path/to/repo
 kage benchmark --project /path/to/repo
+kage benchmark --project /path/to/repo --compare --task "how do I run tests"
 kage viewer --project /path/to/repo
 kage daemon start --project /path/to/repo
 kage observe --project /path/to/repo --event '{"type":"command_result","session_id":"s1","command":"npm test","exit_code":0}'
@@ -125,6 +126,12 @@ and parser coverage, code graph counts, evidence coverage, approved vs pending
 memory, validation status, estimated tokens saved per recall, duplicate
 candidates, average memory quality, and a readiness score.
+Use `kage benchmark --compare --task "<task>" --project <repo>` or
+`kage_benchmark_compare` to compare the same task on the same repo with and
+without Kage. It estimates manual full-file rediscovery tokens/steps, compares
+them to compact Kage recall plus code graph context, and returns evidence plus
+caveats for honest marketing proof.
 Use `kage refresh --project <repo>` or the `kage_refresh` MCP tool after
 meaningful file changes. Refresh rebuilds indexes, code graph, memory graph,
 metrics, and stale-memory metadata. Memory is marked stale when status or
@@ -151,6 +158,11 @@ ranking: text, graph, path/type/tag, freshness, quality, feedback, and a vector
 placeholder for future local or external embedding providers. Current fallback
 is deterministic text plus graph retrieval.
+`kage_context` is the primary MCP entrypoint for agents. It validates repo
+memory, recalls relevant packets, and returns code/knowledge graph context in
+one call. Agents should use it at task start instead of loading separate
+`kage_validate`, `kage_recall`, `kage_code_graph`, and `kage_graph` schemas.
 `kage daemon start` exposes the optional local REST runtime on
 `127.0.0.1:3111`:
@@ -196,6 +208,7 @@ confidence, and token-savings metrics connect.
 Local repo tools:
+- `kage_context`
 - `kage_recall`
 - `kage_code_graph`
 - `kage_metrics`
@@ -204,6 +217,7 @@ Local repo tools:
 - `kage_pr_check`
 - `kage_quality`
 - `kage_benchmark`
+- `kage_benchmark_compare`
 - `kage_setup_agent`
 - `kage_graph`
 - `kage_graph_visual`
@@ -280,14 +294,12 @@ Minimum policy:
 ```md
 Before code changes or repo-specific answers:
-1. Call `kage_validate`.
-2. Call `kage_recall` with the user task as the query.
-3. Call `kage_graph` with the user task as the query.
-4. Capture reusable learnings with `kage_learn` or `kage_capture`.
-5. After meaningful file changes, call `kage_refresh`.
-6. Before finishing changed-file tasks, call `kage_propose_from_diff` or `kage_pr_summarize`.
-7. Before merge, call `kage_pr_check`.
-8. Never publish or promote org/global memory automatically.
+1. Call `kage_context` with `project_dir` and the user task as `query`.
+2. Capture reusable learnings with `kage_learn` or `kage_capture`.
+3. After meaningful file changes, call `kage_refresh`.
+4. Before finishing changed-file tasks, call `kage_propose_from_diff` or `kage_pr_summarize`.
+5. Before merge, call `kage_pr_check`.
+6. Never publish or promote org/global memory automatically.
 ```
 Run `kage setup verify-agent --agent codex --project <repo>` after setup. The

package/dist/cli.js CHANGED Viewed

@@ -31,6 +31,7 @@ Usage:
   kage metrics --project <dir> [--json]
   kage quality --project <dir> [--json]
   kage benchmark --project <dir> [--json]
+  kage benchmark --project <dir> --compare --task <task> [--json]
   kage code-graph --project <dir> [--json]
   kage code-graph "<query>" --project <dir> [--json]
   kage graph --project <dir> [--json]
@@ -533,6 +534,46 @@ async function main() {
         return;
     }
     if (command === "benchmark") {
+        if (args.includes("--compare")) {
+            const result = (0, kernel_js_1.benchmarkTaskComparison)(projectArg(args), takeArg(args, "--task") ?? firstPositional(args) ?? "how do I run tests");
+            if (args.includes("--json")) {
+                console.log(JSON.stringify(result, null, 2));
+                return;
+            }
+            console.log(`Kage A/B Benchmark: ${result.project_dir}`);
+            console.log(`Task: ${result.task}`);
+            console.log("");
+            console.log("Without Kage:");
+            console.log(`  Files examined: ${result.baseline_without_kage.files_examined}`);
+            console.log(`  Full-file tokens: ${result.baseline_without_kage.full_file_tokens}`);
+            console.log(`  Steps: ${result.baseline_without_kage.steps}`);
+            console.log(`  Estimated time: ${result.baseline_without_kage.estimated_time_seconds}s`);
+            console.log("");
+            console.log("With Kage:");
+            console.log(`  Memory packets: ${result.with_kage.memory_packets_used}`);
+            console.log(`  Code facts: ${result.with_kage.code_files_returned + result.with_kage.code_symbols_returned + result.with_kage.code_routes_returned + result.with_kage.code_tests_returned}`);
+            console.log(`  Context tokens: ${result.with_kage.context_tokens}`);
+            console.log(`  Steps: ${result.with_kage.steps}`);
+            console.log(`  Estimated time: ${result.with_kage.estimated_time_seconds}s`);
+            console.log("");
+            console.log("Delta:");
+            console.log(`  Estimated tokens saved: ${result.delta.estimated_tokens_saved}`);
+            console.log(`  Context reduction: ${result.delta.context_reduction_percent}%`);
+            console.log(`  Rediscovery steps saved: ${result.delta.rediscovery_steps_saved}`);
+            console.log(`  Estimated time saved: ${result.delta.estimated_time_saved_seconds}s`);
+            console.log(`  Full-file reads avoided: ${result.delta.full_file_reads_avoided}`);
+            console.log(`  Recall hit: ${result.delta.recall_hit ? "yes" : "no"}`);
+            console.log(`  Code graph hit: ${result.delta.code_graph_hit ? "yes" : "no"}`);
+            console.log("");
+            console.log("Baseline files:");
+            for (const file of result.evidence.baseline_files.slice(0, 8))
+                console.log(`  - ${file.path} (${file.tokens} tokens): ${file.why}`);
+            console.log("");
+            console.log("Kage memory:");
+            for (const packet of result.evidence.kage_memory.slice(0, 5))
+                console.log(`  - ${packet.title} (${packet.type}, score ${packet.score})`);
+            return;
+        }
         const result = (0, kernel_js_1.benchmarkProject)(projectArg(args));
         if (args.includes("--json")) {
             console.log(JSON.stringify(result, null, 2));

package/dist/index.js CHANGED Viewed

@@ -54,9 +54,25 @@ function arrayArg(value) {
         return value.split(",").map((item) => item.trim()).filter(Boolean);
     return [];
 }
-const server = new index_js_1.Server({ name: "kage-graph", version: "1.1.0" }, { capabilities: { tools: {} } });
+const server = new index_js_1.Server({ name: "kage-graph", version: "1.1.6" }, { capabilities: { tools: {} } });
 function listTools() {
     return [
+        {
+            // Combined entry-point tool: validate + recall + code_graph + graph in one call.
+            // Agents should load this schema first (one ToolSearch) instead of loading four
+            // separate deferred schemas. Cuts session start from 4 schema loads to 1.
+            name: "kage_context",
+            description: "Primary kage entry point. Validates memory health, recalls relevant packets, and queries both the code graph and knowledge graph — all in one call. Call this at the start of every task instead of calling kage_validate, kage_recall, kage_code_graph, and kage_graph separately.",
+            inputSchema: {
+                type: "object",
+                properties: {
+                    project_dir: { type: "string", description: "Absolute path to the project root" },
+                    query: { type: "string", description: "The task or question — used for both memory recall and code graph search" },
+                    limit: { type: "number", description: "Max memory packets to return (default 5)" },
+                },
+                required: ["project_dir", "query"],
+            },
+        },
         {
             name: "kage_search",
             description: "Search the kage community knowledge graph for gotchas, patterns, configs, and architectural decisions across auth, database, payments, deployment, frontend, testing, and more. Returns node summaries ranked by relevance.",
@@ -209,6 +225,18 @@ function listTools() {
                 required: ["project_dir"],
             },
         },
+        {
+            name: "kage_benchmark_compare",
+            description: "Compare the same task on the same repo with and without Kage. Reports estimated baseline discovery tokens/steps versus Kage recall/code-graph context, with evidence and caveats.",
+            inputSchema: {
+                type: "object",
+                properties: {
+                    project_dir: { type: "string" },
+                    task: { type: "string" },
+                },
+                required: ["project_dir", "task"],
+            },
+        },
         {
             name: "kage_setup_agent",
             description: "Generate MCP/setup instructions for Codex, Claude Code, Cursor, Windsurf, Gemini CLI, OpenCode, Cline, Goose, Roo Code, Kilo Code, Claude Desktop, Aider, or generic MCP.",
@@ -596,6 +624,28 @@ async function callTool(name, args) {
             content: [{ type: "text", text: content }],
         };
     }
+    if (name === "kage_context") {
+        const projectDir = String(args?.project_dir ?? "");
+        const query = String(args?.query ?? "");
+        const limit = Number(args?.limit ?? 5);
+        // validate
+        const validation = (0, kernel_js_1.validateProject)(projectDir);
+        const validationText = validation.ok
+            ? "Memory healthy."
+            : `Warnings: ${validation.warnings.join("; ")}`;
+        // recall (memory + code graph + knowledge graph combined)
+        const recallResult = (0, kernel_js_1.recall)(projectDir, query, limit, false);
+        // graph facts on top of recall
+        const graphResult = (0, kernel_js_1.queryGraph)(projectDir, query, 5);
+        const sections = [
+            recallResult.context_block,
+            graphResult.context_block ? `\n## Graph Facts\n${graphResult.context_block}` : "",
+            `\n_${validationText}_`,
+        ].filter(Boolean).join("");
+        return {
+            content: [{ type: "text", text: sections }],
+        };
+    }
     if (name === "kage_recall") {
         const result = (0, kernel_js_1.recall)(String(args?.project_dir ?? ""), String(args?.query ?? ""), Number(args?.limit ?? 5), Boolean(args?.explain));
         return {
@@ -661,6 +711,12 @@ async function callTool(name, args) {
             content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
         };
     }
+    if (name === "kage_benchmark_compare") {
+        const result = (0, kernel_js_1.benchmarkTaskComparison)(String(args?.project_dir ?? ""), String(args?.task ?? ""));
+        return {
+            content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        };
+    }
     if (name === "kage_setup_agent") {
         const result = (0, kernel_js_1.setupAgent)(String(args?.agent ?? ""), String(args?.project_dir ?? ""), { write: Boolean(args?.write) });
         return {

package/dist/kernel.js CHANGED Viewed

@@ -75,6 +75,7 @@ exports.graphMermaid = graphMermaid;
 exports.kageMetrics = kageMetrics;
 exports.qualityReport = qualityReport;
 exports.benchmarkProject = benchmarkProject;
+exports.benchmarkTaskComparison = benchmarkTaskComparison;
 exports.learn = learn;
 exports.capture = capture;
 exports.createPublicCandidate = createPublicCandidate;
@@ -150,12 +151,11 @@ This repo uses Kage as an automatic memory harness for coding agents.
 Before making code changes, answering repo-specific implementation questions, debugging failures, or proposing architecture:
-1. Call \`kage_validate\` for this repo.
-2. Call \`kage_recall\` with the user's task as the query.
-3. Call \`kage_code_graph\` when the task mentions files, APIs, routes, symbols, tests, dependencies, or code flow.
-4. Call \`kage_graph\` with the user's task as the query when the task depends on decisions, bugs, workflows, commands, or conventions.
-5. Use returned memory only when it is relevant, source-backed, and not stale.
-6. Prefer repo memory over public/community memory when they conflict.
+1. Call \`kage_context\` with \`project_dir\` and the task as \`query\`.
+   This validates memory, recalls relevant packets, and queries both the code graph
+   and knowledge graph in one call — replacing the old four-step validate/recall/code_graph/graph sequence.
+2. Use returned memory only when it is relevant, source-backed, and not stale.
+3. Prefer repo memory over public/community memory when they conflict.
 Do this without waiting for the user to ask. Kage should feel like ambient repo memory, not a manual search command.
@@ -216,17 +216,13 @@ If recalled memory materially helped, call \`kage_feedback\` with \`helpful\`.
 For normal coding tasks:
-1. \`kage_validate\`
-2. \`kage_recall\`
-3. \`kage_code_graph\` for source flow, routes, symbols, tests, and dependencies
-4. \`kage_graph\` for remembered decisions, bugs, workflows, and conventions
-5. Work on the task
-6. \`kage_learn\` for concrete learnings
-7. \`kage_refresh\` after meaningful file changes
-8. \`kage_pr_summarize\` or \`kage_propose_from_diff\` before the final response to create repo-local change memory
-9. \`kage_pr_check\` before final handoff or merge readiness claims
+1. \`kage_context\` — validate + recall + code graph + knowledge graph in one call
+2. Work on the task
+3. \`kage_learn\` for concrete learnings
+4. \`kage_refresh\` after meaningful file changes
+5. \`kage_propose_from_diff\` before the final response to create repo-local change memory
-For quick factual questions, \`kage_recall\` alone is enough. For status or demo requests, call \`kage_metrics\`.
+For quick factual questions, \`kage_context\` alone is enough. For status or demo requests, call \`kage_metrics\`.
 ${AGENTS_POLICY_END}
 `;
 const STOPWORDS = new Set([
@@ -579,7 +575,9 @@ function evaluateMemoryQuality(projectDir, packet) {
         risks,
         duplicate_candidates: duplicates,
         stale_reasons: staleReasons,
-        estimated_tokens_saved: Math.max(40, estimateTokens(packet.body) * 2),
+        // Tokens an agent saves by reading this packet instead of the files it references.
+        // Approximated as the token size of the files it grounds to (or the packet body if no paths).
+        estimated_tokens_saved: Math.max(20, estimateTokens(packet.body)),
     };
 }
 function evaluateMemoryAdmission(projectDir, packet) {
@@ -2905,8 +2903,13 @@ function kageMetrics(projectDir) {
     const duplicatePairs = allPackets.reduce((sum, packet) => sum + duplicateCandidates(projectDir, packet).length, 0);
     const indexedSourceTokens = Math.ceil(sourceFiles.reduce((sum, file) => sum + file.size_bytes, 0) / 4);
     const memoryTokens = allPackets.reduce((sum, packet) => sum + estimateTokens(packetText(packet)), 0);
+    // Estimated size of a typical recall response: structured packet summaries + code graph
+    // slice, capped at ~1 800 tokens. This is what actually reaches the agent per recall call.
     const recallContextTokens = Math.max(250, Math.min(1800, codeGraph.symbols.length * 12 + codeGraph.routes.length * 10 + knowledgeGraph.edges.length * 14 + 180));
-    const tokensSaved = Math.max(0, indexedSourceTokens + memoryTokens - recallContextTokens);
+    // Honest saving: tokens an agent would spend reading all source files minus tokens a
+    // targeted recall costs. Only meaningful when an agent would otherwise read everything.
+    // memoryTokens is storage cost, not context sent — excluded from this calculation.
+    const tokensSaved = Math.max(0, indexedSourceTokens - recallContextTokens);
     const readinessScore = Math.max(0, Math.min(100, Math.round(coverage * 0.35 +
         percent(evidenceBackedEdges, knowledgeGraph.edges.length) * 0.25 +
         (approvedPackets > 0 ? 20 : 0) +
@@ -3064,6 +3067,110 @@ function benchmarkProject(projectDir) {
         },
     };
 }
+function baselineDiscoveryFiles(projectDir, task) {
+    const terms = tokenize(task);
+    const graph = buildCodeGraph(projectDir);
+    const candidatePaths = unique([
+        "README.md",
+        "AGENTS.md",
+        "CLAUDE.md",
+        "package.json",
+        ...graph.files.map((file) => file.path),
+    ]).filter((path) => path && !shouldSkipRepoMemoryPath(path));
+    return candidatePaths
+        .map((path) => {
+        const absolute = (0, node_path_1.join)(projectDir, path);
+        if (!(0, node_fs_1.existsSync)(absolute))
+            return null;
+        const stats = (0, node_fs_1.statSync)(absolute);
+        if (!stats.isFile() || stats.size > 240_000)
+            return null;
+        const text = (0, node_fs_1.readFileSync)(absolute, "utf8");
+        const score = scoreText(terms, `${path}\n${text.slice(0, 8000)}`, [path]);
+        const alwaysUseful = ["README.md", "AGENTS.md", "CLAUDE.md", "package.json"].includes(path);
+        if (score <= 0 && !alwaysUseful)
+            return null;
+        return {
+            path,
+            tokens: Math.max(1, Math.ceil(stats.size / 4)),
+            why: score > 0 ? "task terms matched path or file content" : "standard repo orientation file",
+            score: score + (alwaysUseful ? 1 : 0),
+        };
+    })
+        .filter((entry) => Boolean(entry))
+        .sort((a, b) => b.score - a.score || b.tokens - a.tokens || a.path.localeCompare(b.path))
+        .slice(0, 10);
+}
+function benchmarkTaskComparison(projectDir, task) {
+    ensureMemoryDirs(projectDir);
+    const query = task.trim() || "how do I run tests";
+    const baselineFiles = baselineDiscoveryFiles(projectDir, query);
+    const baselineTokens = baselineFiles.reduce((sum, file) => sum + file.tokens, 0);
+    const recallResult = recall(projectDir, query, 5, true);
+    const codeResult = queryCodeGraph(projectDir, query, 10);
+    const kageContext = `${recallResult.context_block}\n\n${codeResult.context_block}`;
+    const kageTokens = estimateTokens(kageContext);
+    const codeFactLines = [
+        ...codeResult.routes.map((route) => `[route] ${route.method} ${route.path} in ${route.file_path}:${route.line}`),
+        ...codeResult.symbols.map((symbol) => `[symbol] ${symbol.kind} ${symbol.name} in ${symbol.path}:${symbol.line}`),
+        ...codeResult.tests.map((test) => `[test] ${test.title} in ${test.test_path}:${test.line}`),
+        ...codeResult.files.slice(0, 5).map((file) => `[file] ${file.path} (${file.kind}, ${file.language}, ${file.parser})`),
+    ];
+    const baselineSteps = Math.max(3, baselineFiles.length + 2);
+    const kageSteps = 3;
+    const tokensSaved = Math.max(0, baselineTokens - kageTokens);
+    const contextReduction = baselineTokens > 0 ? percent(tokensSaved, baselineTokens) : 0;
+    const timeSaved = Math.max(0, baselineSteps * 45 - kageSteps * 12);
+    return {
+        schema_version: 1,
+        project_dir: projectDir,
+        task: query,
+        generated_at: nowIso(),
+        baseline_without_kage: {
+            strategy: "manual_repo_discovery_estimate",
+            files_examined: baselineFiles.length,
+            full_file_tokens: baselineTokens,
+            steps: baselineSteps,
+            estimated_time_seconds: baselineSteps * 45,
+        },
+        with_kage: {
+            strategy: "recall_plus_code_graph",
+            recall_results: recallResult.results.length,
+            memory_packets_used: recallResult.results.length,
+            code_files_returned: codeResult.files.length,
+            code_symbols_returned: codeResult.symbols.length,
+            code_routes_returned: codeResult.routes.length,
+            code_tests_returned: codeResult.tests.length,
+            context_tokens: kageTokens,
+            steps: kageSteps,
+            estimated_time_seconds: kageSteps * 12,
+        },
+        delta: {
+            estimated_tokens_saved: tokensSaved,
+            context_reduction_percent: contextReduction,
+            rediscovery_steps_saved: Math.max(0, baselineSteps - kageSteps),
+            estimated_time_saved_seconds: timeSaved,
+            full_file_reads_avoided: Math.max(0, baselineFiles.length - codeResult.files.length),
+            recall_hit: recallResult.results.length > 0,
+            code_graph_hit: codeFactLines.length > 0,
+        },
+        evidence: {
+            baseline_files: baselineFiles.map(({ path, tokens, why }) => ({ path, tokens, why })),
+            kage_memory: recallResult.results.map((entry) => ({
+                id: entry.packet.id,
+                title: entry.packet.title,
+                type: entry.packet.type,
+                score: entry.score,
+            })),
+            kage_code_facts: codeFactLines.slice(0, 12),
+        },
+        caveats: [
+            "Baseline is a deterministic manual-discovery estimate, not a live human or agent timing trace.",
+            "Token savings estimate full-file reads avoided versus compact Kage recall/code-graph context.",
+            "Use this for relative proof on the same repo/task, not cross-repo absolute claims.",
+        ],
+    };
+}
 function kageMetricsShallow(projectDir) {
     const codeGraph = buildCodeGraph(projectDir);
     const knowledgeGraph = buildKnowledgeGraph(projectDir);
@@ -3108,7 +3215,7 @@ function kageMetricsShallow(projectDir) {
             estimated_indexed_source_tokens: indexedSourceTokens,
             estimated_memory_tokens: memoryTokens,
             estimated_recall_context_tokens: recallContextTokens,
-            estimated_tokens_saved_per_recall: Math.max(0, indexedSourceTokens + memoryTokens - recallContextTokens),
+            estimated_tokens_saved_per_recall: Math.max(0, indexedSourceTokens - recallContextTokens),
         },
         harness: {
             policy_installed: (0, node_fs_1.existsSync)((0, node_path_1.join)(projectDir, "AGENTS.md")),
@@ -3423,10 +3530,8 @@ fi
 if [[ -z "$POLICY" ]]; then
   POLICY="This repo uses Kage as an automatic memory harness for coding agents.
 Before making code changes or answering implementation questions:
-1. Call kage_validate for this repo.
-2. Call kage_recall with the user task as the query.
-3. Call kage_code_graph for file, symbol, route, test, or dependency questions.
-4. Call kage_graph for decisions, bugs, workflows, and conventions.
+1. Call kage_context with project_dir and the user task as query.
+2. Use returned memory only when it is relevant, source-backed, and not stale.
 When you learn something reusable: kage_learn.
 After meaningful file changes: kage_refresh.
 Before finishing a task that changed files: kage_pr_summarize or kage_propose_from_diff, then kage_pr_check.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@kage-core/kage-graph-mcp",
-  "version": "1.1.4",
+  "version": "1.1.6",
   "description": "Local-first repo memory, code graph, and recall MCP server for coding agents",
   "main": "dist/index.js",
   "files": [