npm - @xultrax-web/agent-memory-mcp - Versions diffs - 0.11.5 → 0.11.7 - Mend

@xultrax-web/agent-memory-mcp 0.11.5 → 0.11.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -1,15 +1,20 @@
 # agent-memory-mcp
-> Markdown memory for AI agents. Your data is just files.
+> Codify how you work. Every AI tool obeys.
 [![CI](https://github.com/xultrax-web/agent-memory-mcp/actions/workflows/ci.yml/badge.svg)](https://github.com/xultrax-web/agent-memory-mcp/actions/workflows/ci.yml)
 [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
 [![Node](https://img.shields.io/badge/node-%3E%3D20-brightgreen.svg)](https://nodejs.org)
 [![MCP](https://img.shields.io/badge/MCP-server-blueviolet)](https://modelcontextprotocol.io)
-**Markdown memory for AI agents.** Plain files in a directory you control — read them, edit them, grep them, commit them. Operator-grade storage primitives (atomic writes, file locking, soft-delete to `.trash/`, schema versioning, doctor command) wrap the files so nothing rots in the long tail.
+**Memory as constraint, not just recall.** Plain markdown files in a directory you control. Capture your rules + recipes + decisions + context once, applied everywhere — across sessions, across machines, across every AI tool you use.
-You can `cat` your memory. You can `grep` it. You can edit it in vim. You can commit it to git. You can move it between machines with `scp` or with the built-in `agent-memory sync` (git-backed). If the AI gets a memory wrong, you fix it in a text editor and save. No migration scripts. No vendor lock-in.
+The wedge:
+1. **Rules are first-class memories.** Tag with severity (hard / soft), scope, enforce_on category, regex patterns, last_verified date.
+2. **Companion files emit automatically** to `AGENTS.md` (Linux-Foundation universal standard), `CLAUDE.md` (Claude Code's 5-level hierarchy), `.cursor/rules/*.mdc` (Cursor MDC), and `.gemini/instructions.md` — your rules show up in every tool, every session, with no plugin needed.
+3. **`check_action` gates destructive operations.** Agent proposes an action, server matches against your rule store, and either issues a [Compliance Receipt](docs/compliance-receipt-protocol-1.0.md) (HMAC-signed bearer token bound to your rules) or returns a structured rejection naming the rule that blocked.
+4. **Plain files all the way down.** You can `cat` your memory, `grep` it, edit it in vim, commit it to git, sync it to another machine via the built-in `agent-memory sync`. If the AI gets it wrong, you fix it in a text editor and save. No migration scripts. No vendor lock-in. Reference implementation of the [Compliance Receipt Protocol 1.0](docs/compliance-receipt-protocol-1.0.md) — other MCP servers can adopt the same receipts and interoperate.
 ---

package/dist/index.js CHANGED Viewed

@@ -23,7 +23,7 @@
  */
 import { Server } from "@modelcontextprotocol/sdk/server/index.js";
 import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
-import { CallToolRequestSchema, GetPromptRequestSchema, ListPromptsRequestSchema, ListResourcesRequestSchema, ListToolsRequestSchema, ReadResourceRequestSchema, } from "@modelcontextprotocol/sdk/types.js";
+import { CallToolRequestSchema, CreateMessageResultSchema, GetPromptRequestSchema, ListPromptsRequestSchema, ListResourcesRequestSchema, ListToolsRequestSchema, ReadResourceRequestSchema, } from "@modelcontextprotocol/sdk/types.js";
 import Fuse from "fuse.js";
 import matter from "gray-matter";
 import { spawnSync } from "node:child_process";
@@ -1504,15 +1504,116 @@ export function checkActionAgainstRules(action, actionType) {
     }
     return { hard, soft, rules_evaluated: rules.length };
 }
-function toolCheckAction(args) {
+/**
+ * Tier-2 Sampling enrichment · runs ONE rule's natural-language
+ * applies_when conditions past an LLM via MCP sampling/createMessage.
+ * The server makes the request; the client decides (per MCP spec)
+ * whether to forward to its LLM, prompt the user, or refuse.
+ *
+ * On any error (client lacks sampling, user refused, unparseable
+ * response), returns null — Tier-2 silently degrades to "no extra
+ * violations found" and we ship the Tier-1 result.
+ */
+function clientSupportsSampling() {
+    // server.getClientCapabilities() is undefined before the MCP initialize
+    // handshake; once initialized, returns the capabilities the client
+    // declared. We only call Sampling if `sampling` is in there — saves a
+    // round-trip and prevents test harnesses (which don't respond to
+    // sampling/createMessage) from hanging.
+    try {
+        const caps = server.getClientCapabilities();
+        return !!caps?.sampling;
+    }
+    catch {
+        return false;
+    }
+}
+async function runTier2Sampling(rule, action, actionType) {
+    if (!rule.applies_when || rule.applies_when.length === 0)
+        return null;
+    if (!clientSupportsSampling())
+        return null;
+    const prompt = `You are evaluating whether a proposed action violates an operator rule.\n\n` +
+        `RULE:\n` +
+        `  name: ${rule.name}\n` +
+        `  description: ${rule.description}\n` +
+        `  severity: ${rule.severity ?? "soft"}\n` +
+        `  applies_when:\n` +
+        rule.applies_when.map((s) => `    - ${s}`).join("\n") +
+        `\n\nPROPOSED ACTION:\n` +
+        `  ${action}\n` +
+        `  (category: ${actionType})\n\n` +
+        `Does the proposed action match any of the "applies_when" conditions?\n` +
+        `Respond with strict JSON only, no commentary: {"violates": true|false, "reason": "..."}.\n` +
+        `If the action is ambiguous, answer false.`;
+    try {
+        const result = await server.request({
+            method: "sampling/createMessage",
+            params: {
+                messages: [{ role: "user", content: { type: "text", text: prompt } }],
+                systemPrompt: "You are a strict policy evaluator. Reply with JSON only.",
+                maxTokens: 200,
+                modelPreferences: { intelligencePriority: 0.8, speedPriority: 0.4 },
+            },
+        }, CreateMessageResultSchema);
+        const text = result.content.type === "text" ? result.content.text : "";
+        // Tolerate a stray code-fence around the JSON.
+        const cleaned = text.trim().replace(/^```(?:json)?\s*|\s*```$/g, "");
+        const parsed = JSON.parse(cleaned);
+        if (parsed.violates === true) {
+            return {
+                rule: rule.name,
+                severity: rule.severity ?? "soft",
+                reason: `Sampling judgment: ${parsed.reason ?? "applies_when matched"}`,
+            };
+        }
+        return null;
+    }
+    catch (err) {
+        // Sampling unsupported on this client, user refused, response
+        // unparseable, or any other transport-level failure. Degrade
+        // silently to Tier-1 only · we never block a check_action call
+        // because Tier-2 couldn't run.
+        log("debug", "tier2_sampling_skipped", {
+            rule: rule.name,
+            error: err instanceof Error ? err.message : String(err),
+        });
+        return null;
+    }
+}
+async function toolCheckAction(args) {
     const action = String(args.action ?? "").trim();
     const actionType = String(args.action_type ?? "").trim();
     const sessionId = typeof args.session_id === "string" ? args.session_id.trim() : "";
+    // Tier-2 Sampling is opt-out: defaults to true on clients that support
+    // it; gracefully degrades on clients that don't. Set to false to skip
+    // the LLM round-trip entirely (e.g. for batched/script use).
+    const tier2Enabled = args.use_sampling !== false;
     if (!action)
         throw new Error("action is required (the proposed action description)");
     if (!actionType)
         throw new Error("action_type is required (e.g. 'deletions', 'commits', 'file_writes', 'chat_responses')");
     const { hard, soft, rules_evaluated } = checkActionAgainstRules(action, actionType);
+    // Tier-2: run Sampling for any rule with applies_when that DIDN'T
+    // already match deterministically. Rules already flagged in Tier-1
+    // don't need a Sampling round-trip (we know they violate).
+    if (tier2Enabled) {
+        const tier1HitRules = new Set([...hard.map((v) => v.rule), ...soft.map((v) => v.rule)]);
+        const rules = loadAllRules();
+        const tier2Candidates = rules.filter((r) => r.applies_when &&
+            r.applies_when.length > 0 &&
+            !tier1HitRules.has(r.name) &&
+            (!r.enforce_on || r.enforce_on.length === 0 || r.enforce_on.includes(actionType)));
+        for (const rule of tier2Candidates) {
+            const violation = await runTier2Sampling(rule, action, actionType);
+            if (violation) {
+                if (violation.severity === "hard")
+                    hard.push(violation);
+                else
+                    soft.push(violation);
+            }
+        }
+    }
     if (hard.length > 0) {
         const result = {
             approved: false,
@@ -2087,7 +2188,7 @@ function actionColor(action) {
 // -------------------------------------------------------------
 // Server wiring
 // -------------------------------------------------------------
-const server = new Server({ name: "agent-memory", version: "0.11.4" }, { capabilities: { tools: {}, resources: {}, prompts: {} } });
+const server = new Server({ name: "agent-memory", version: "0.11.7" }, { capabilities: { tools: {}, resources: {}, prompts: {} } });
 // -------------------------------------------------------------
 // Resource URI scheme
 // -------------------------------------------------------------
@@ -2611,7 +2712,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
                 "  - APPROVES: returns a short-lived Compliance Receipt (HMAC-signed, 60s default) the agent can pass to destructive tools (e.g. delete_memory) as proof of compliance.\n" +
                 "  - DENIES: returns structured hard_violations (severity:hard rules that block) and/or soft_warnings (severity:soft rules that warn but allow).\n\n" +
                 "Tier 1 (deterministic) matches the action against rule.matches regexes + rule.enforce_on category filter. Works on every MCP client.\n" +
-                "Tier 2 (Sampling-enriched LLM judgment on rule.applies_when) ships in v0.11.3.x for clients that support Sampling (Claude Desktop, VS Code Copilot).",
+                "Tier 2 (v0.11.7+) calls back to the client via MCP sampling/createMessage to judge rule.applies_when natural-language conditions. Auto-enabled on clients that declared the sampling capability; silently skipped on clients that didn't.",
             inputSchema: {
                 type: "object",
                 properties: {
@@ -2627,6 +2728,10 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
                         type: "string",
                         description: "Optional session identifier · binds the issued receipt to this session via a caveat.",
                     },
+                    use_sampling: {
+                        type: "boolean",
+                        description: "Opt out of Tier-2 Sampling enrichment (default true). Set false for batched/scripted use where the Sampling round-trip would add latency. CLI invocations default this to false automatically.",
+                    },
                 },
                 required: ["action", "action_type"],
             },
@@ -2721,7 +2826,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
                 result = toolEmitCompanions(args);
                 break;
             case "check_action":
-                result = toolCheckAction(args);
+                result = await toolCheckAction(args);
                 break;
             case "audit":
                 result = toolAudit(args);
@@ -3018,11 +3123,14 @@ async function cliMain(command, rest) {
                 if (!action || !actionType) {
                     throw new Error("Usage: agent-memory check-action '<action description>' --type <action_type> [--session <id>]");
                 }
-                process.stdout.write(toolCheckAction({
+                process.stdout.write((await toolCheckAction({
                     action,
                     action_type: actionType,
                     session_id: flags.session ? String(flags.session) : undefined,
-                }) + "\n");
+                    // CLI invocations don't have a Sampling-capable client attached,
+                    // so skip Tier 2 to avoid a timeout · keeps the CLI fast.
+                    use_sampling: false,
+                })) + "\n");
                 return 0;
             }
             case "audit": {

package/package.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "name": "@xultrax-web/agent-memory-mcp",
-  "version": "0.11.5",
+  "version": "0.11.7",
   "mcpName": "io.github.xultrax-web/agent-memory-mcp",
-  "description": "Markdown memory for AI agents. Plain files you can read, edit, grep, and commit. Operator-grade storage with atomic writes, file locking, tags, [[wiki-links]], find_related, git-backed multi-machine sync, and an Ink-based TUI.",
+  "description": "Codify how you work. Every AI tool obeys. Markdown rules + cross-tool companion files (AGENTS.md/CLAUDE.md/.cursor/rules/.gemini) + Compliance Receipts for protocol-level enforcement of destructive ops. Reference implementation of CRP 1.0. Works on every MCP client (no Sampling required).",
   "type": "module",
   "bin": {
     "agent-memory-mcp": "dist/index.js",