npm - knit-mcp - Versions diffs - 0.10.0 → 0.11.4 - Mend

knit-mcp 0.10.0 → 0.11.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +48 -5
package/dist/cache-7HSMIYDJ.js +20 -0
package/dist/{chunk-BW4JUY74.js → chunk-27TA2ZQZ.js} +16 -0
package/dist/{chunk-KLNUEE3O.js → chunk-7UFS67HP.js} +9 -1
package/dist/{chunk-2N67YTOQ.js → chunk-GATMQQK5.js} +4 -56
package/dist/{chunk-A7DIGJI4.js → chunk-HROSQ5MS.js} +211 -347
package/dist/chunk-I63UMEBF.js +330 -0
package/dist/{chunk-5NCSZYRJ.js → chunk-LV73YTVN.js} +5 -0
package/dist/{chunk-RD2CPNTQ.js → chunk-OINYMLOV.js} +11 -3
package/dist/chunk-POXT5OYN.js +66 -0
package/dist/{chunk-7PPC6IG6.js → chunk-ST4X7LZT.js} +60 -2
package/dist/{chunk-P47VGEXO.js → chunk-VB2TIR6L.js} +2 -2
package/dist/cli.js +29 -9
package/dist/doctor-4DN2P2JR.js +179 -0
package/dist/{export-4A7VE6VG.js → export-4BO6HCXP.js} +2 -2
package/dist/{install-agents-O2YVJLO6.js → install-agents-WDBQBWMN.js} +9 -7
package/dist/{instructions-4FI32YZU.js → instructions-JARSXQPO.js} +1 -1
package/dist/{integration-scanner-YU43QNVC.js → integration-scanner-LBD2PIZ3.js} +3 -3
package/dist/{refresh-W3I7QDDP.js → refresh-S62AZ3QA.js} +18 -6
package/dist/{status-KX4U7HYL.js → status-2SEITNIE.js} +4 -4
package/dist/{tools-AI6QKEOU.js → tools-ECHCPLCB.js} +964 -120
package/dist/update-check-GQVDVT2N.js +14 -0
package/package.json +2 -1
package/dist/cache-PH7DZ6Y4.js +0 -18

package/README.md CHANGED Viewed

@@ -3,8 +3,8 @@
   <a href="https://github.com/PDgit12/knit/actions/workflows/ci.yml"><img src="https://img.shields.io/github/actions/workflow/status/PDgit12/knit/ci.yml?style=for-the-badge&label=CI&color=10b981" alt="CI" /></a>
   <img src="https://img.shields.io/badge/license-MIT-3b82f6?style=for-the-badge" alt="license" />
   <img src="https://img.shields.io/badge/node-%E2%89%A518-339933?style=for-the-badge&logo=node.js&logoColor=white" alt="node" />
-  <img src="https://img.shields.io/badge/tests-492%20passing-22c55e?style=for-the-badge" alt="tests" />
-  <img src="https://img.shields.io/badge/MCP%20tools-43-7c3aed?style=for-the-badge" alt="tools" />
+  <img src="https://img.shields.io/badge/tests-665%20passing-22c55e?style=for-the-badge" alt="tests" />
+  <img src="https://img.shields.io/badge/MCP%20tools-53-7c3aed?style=for-the-badge" alt="tools" />
 </p>
 <h1 align="center">🧶 knit</h1>
@@ -18,9 +18,10 @@
 <p align="center">
   <a href="#-quick-start">Quick start</a> ·
   <a href="#-what-knit-is">What it is</a> ·
-  <a href="#-whats-new-in-v090">v0.9</a> ·
-  <a href="#-43-mcp-tools">Tools</a> ·
-  <a href="#-how-its-different">Comparison</a>
+  <a href="#-whats-new-in-v0110">v0.11</a> ·
+  <a href="#-52-mcp-tools">Tools</a> ·
+  <a href="#-how-its-different">Comparison</a> ·
+  <a href="#-honest-comparison-vs-memory-libraries">vs mem0/Letta</a>
 </p>
 ---
@@ -410,10 +411,52 @@ Compounding
 ---
+## 🧭 Honest comparison vs memory libraries
+The mem0 / Letta / agentmemory comparison deserves a separate section because they're a different category — **memory-as-a-service libraries**, not MCP-native workflow layers. Reading their published benchmarks side-by-side:
+| | mem0 | Letta (MemGPT) | agentmemory | **Knit** |
+|--|---|---|---|---|
+| **Published benchmark** | LOCOMO: 67–92% LLM-as-Judge; ~90% token reduction (1.7K vs 26K per conversation) | No head-to-head token-reduction number; "Letta Leaderboard" benchmarks *LLMs* on agentic memory, not Letta | LongMemEval-S: **95.2% R@5** with BM25+RRF+graph; 86.2% BM25-only | **Not yet measured.** Same architecture as agentmemory; no published number. |
+| **Retrieval architecture** | Vector + graph (Mem0g variant) | OS-inspired tiered memory (core/recall/archival) | BM25 + local vectors + KG fused via RRF (k=60) | BM25 + RRF + graph-traversal (fused via RRF k=60). Per-project + cross-project diversity caps. |
+| **Install shape** | SDK integration; managed cloud or self-hosted | SDK integration; self-hosted server | Python library | **`npx knit-mcp setup` → MCP server, zero glue.** Works with Claude Code / Cursor / Codex / any MCP host. |
+| **Workflow primitive** | None — pure memory | Agent-managed memory operations | None — pure retrieval | **4-tier classifier + plan-mode + protocol guard + parallel team worktrees.** |
+| **Self-calibration** | No | No | No | **Per-project classifier calibration** (v0.11): user FP feedback shifts thresholds; classifier gets less wrong over time. |
+### What's honest about this
+**Knit's measured retrieval on a 50-question synthetic harness (v0.11.2):**
+| Metric | Knit (v0.11.2 synthetic) | agentmemory (LongMemEval-S, published) |
+|---|---|---|
+| Top-1 accuracy | **86.0%** | not published in that form |
+| Recall@5 | **96.0%** | **95.2%** |
+Run it yourself: `npm run bench`. Source: [`benchmarks/retrieval-synthetic.ts`](./benchmarks/retrieval-synthetic.ts).
+**These numbers are NOT apples-to-apples with agentmemory's.** Their benchmark is 1,500 questions from real long conversations; Knit's is 50 hand-authored questions on a 7KB synthetic corpus. The numbers are close because the architecture is similar (BM25 + RRF), not because we've proven parity at scale. **Real comparison requires running LongMemEval-S on Knit** — on the roadmap for v0.13.
+**Knit isn't trying to be a better mem0.** It's a different product:
+- **MCP-native + zero-glue install** — mem0/Letta require SDK integration; Knit drops into any MCP host (Claude Code, Cursor, Codex) with one command.
+- **Workflow primitive** — the 4-tier classifier + plan-mode + protocol guard + team worktrees is what makes Knit a *command layer*, not a memory library.
+- **Per-project classifier calibration** (v0.11 slice 4) — `knit_record_false_positive` with a direction tag shifts thresholds over time. Nobody else does this; nobody else needs to, because they're memory libraries, not workflow routers.
+- **Measurable cheapness** — `knit_compounding_metrics` + `knit_get_metrics_history` make the "cheaper over time" claim *chartable per project*. mem0 publishes aggregate dataset numbers; Knit ships per-user instrumentation.
+### What's deferred
+LongMemEval-S R@5/R@10 + LOCOMO LLM-as-Judge runs are on the roadmap (v0.13+). Until they're published, treat any cross-system token-savings comparison as architectural-claim-only.
+---
 ## 📜 Release history
 | Version | Headline |
 |---|---|
+| **v0.11.4** | Dogfood audit · ran a full audit of Knit's own codebase using its own `knit_spawn_team_worktree` primitive (4 parallel teams: Core Logic, Infrastructure, UI, Quality Assurance). Fixes: HIGH `engram refresh` no longer clobbers user-curated CLAUDE.md (now uses `spliceKnitBlock` like `cache.ts`); `saveSource`/`loadSource` validate `sourceId`; `appendGlobalLearning` propagates write failures; `redactSecrets` applied to `label`/`tags`/`domains` across all persistence boundaries; 100KB response ceiling on `knit_generate_test_cases`; full v0.11 tool surface now documented in `workflow-protocol.ts` generator (was frozen at the v0.4 surface). Plus: 16 key tools reclassified with `[PROTOCOL]`/`[REVIEW]`/`[MEMORY]`/`[GRAPH]` prefixes so the LLM picks the right tool reliably. 53 tools, 687 tests. |
+| **v0.11.3** | Propagation patch · `update_available` flag now surfaces in `knit_load_session` response (≈100% session reach vs. brain_status' low reach) + startup stderr nag on stale versions. Helps FUTURE upgrades land faster; doesn't retroactively reach v0.10.x users. 53 tools, 665 tests. |
+| **v0.11.2** | Pre-publish polish · chunk cap (2000) + `errorResponse` envelope across handlers + CLAUDE.md generator surfaces v0.11 tools · new `engram doctor` install health-check CLI · upgrade-path smoke test caught + fixed a data-loss bug in cache.ts (Case B was wiping user permissions on upgrade) · 11 real exploit-payload integration tests prove C1/C2/H1 fixes hold · `npm run bench` ships a synthetic retrieval harness (50 Q&A) measuring 86% top-1 / 96% R@5. 53 tools, 664 tests. |
+| **v0.11.1** | Audit-driven hardening · 3 CRITICAL (source_id path traversal, post-edit tsc shell injection, live calibration bug) + 10 HIGH fixes from a 5-agent audit, implemented in 3 parallel `knit_spawn_team_worktree` teams. HOOKS_VERSION 11 (auto-upgrades existing users). New `knit_delete_requirements` tool. Honest comparison vs mem0/Letta added. 53 tools, 636 tests. |
+| **v0.11.0** | Verify Layer + auto-config foundation · mandatory `knit_verify_claim` REVIEW gate · post-edit diff verify + universal `tsc` check · drift detector · self-healing classifier (per-project calibration) · `knit_index_requirements` + `knit_generate_test_cases` (BM25 over long specs) · `knit_get_fingerprint` + `knit_infer_domains` + `knit_compose_template` (zero-config CLAUDE.md). 52 tools, 625 tests. |
 | **v0.10.0** | Token-economics release · risk × scope × change_kind classifier split · `context_budget_remaining` graceful degradation · per-project diversity cap on cross-project search · 11 new compounding-metrics fields + weekly snapshot persistence + `knit_get_metrics_history`. Makes "Knit makes Claude cheaper" a chartable number from day 1. |
 | **v0.9.0** | Hook-level enforcement · citation rule · `knit_verify_claim` · auto-search in classify · `suggested_reads` · `knit_get_learning` · `knit_consolidate_learnings`. |
 | **v0.8.x** | Vectorless RAG (BM25 + RRF) · graph-traversal retriever · per-project instruction tailoring · `knit_compounding_metrics` · integration scanner. |

package/dist/cache-7HSMIYDJ.js ADDED Viewed

@@ -0,0 +1,20 @@
+import {
+  detectProjectRoot,
+  getBrain,
+  refreshBrain
+} from "./chunk-I63UMEBF.js";
+import "./chunk-HROSQ5MS.js";
+import "./chunk-GATMQQK5.js";
+import "./chunk-WKQHCLLO.js";
+import "./chunk-MOOVNMIN.js";
+import "./chunk-ST4X7LZT.js";
+import "./chunk-M3YZOJNW.js";
+import "./chunk-POXT5OYN.js";
+import "./chunk-VB2TIR6L.js";
+import "./chunk-7UFS67HP.js";
+import "./chunk-27TA2ZQZ.js";
+export {
+  detectProjectRoot,
+  getBrain,
+  refreshBrain
+};

package/dist/{chunk-BW4JUY74.js → chunk-27TA2ZQZ.js} RENAMED Viewed

@@ -93,6 +93,18 @@ function searchMarkerPath(rootPath) {
 function claimMarkerPath(rootPath) {
   return join2(projectDataDir(rootPath), ".claim-verified-current");
 }
+function turnEditLogPath(rootPath) {
+  return join2(projectDataDir(rootPath), ".turn-edits.jsonl");
+}
+function calibrationPath(rootPath) {
+  return join2(projectDataDir(rootPath), "calibration.json");
+}
+function requirementsDir(rootPath) {
+  return join2(projectDataDir(rootPath), "requirements");
+}
+function requirementSourcePath(rootPath, sourceId) {
+  return join2(requirementsDir(rootPath), `${sourceId}.json`);
+}
 function featuresConfigPath(rootPath) {
   return join2(projectDataDir(rootPath), "features.json");
 }
@@ -150,6 +162,10 @@ export {
   sessionMarkerPath,
   searchMarkerPath,
   claimMarkerPath,
+  turnEditLogPath,
+  calibrationPath,
+  requirementsDir,
+  requirementSourcePath,
   featuresConfigPath,
   integrationsConfigPath,
   metricsHistoryPath,

package/dist/{chunk-KLNUEE3O.js → chunk-7UFS67HP.js} RENAMED Viewed

@@ -53,7 +53,15 @@ function generateSessionStartup() {
 First action: call \`knit_load_session\`. One MCP call returns last sessions, handoff, learnings, false positives. If \`handoff.md\` exists at the repo root, resume that work first.
-Protocol Guard runs in \`warn\` mode by default \u2014 adjust with \`knit_set_protocol_strictness\`.`;
+Protocol Guard runs in \`warn\` mode by default \u2014 adjust with \`knit_set_protocol_strictness\`.
+## v0.11 tool surface (in addition to query/search/record)
+- **\`knit_verify_claim\`** \u2014 fact-check one claim against the knowledge graph before LEARN. Stop-hook enforces on standard/complex scope.
+- **\`knit_index_requirements\` + \`knit_generate_test_cases\` + \`knit_list_requirements\` + \`knit_delete_requirements\`** \u2014 long-form spec / RFC ingestion (200KB doc \u2192 relevant 5\u20137KB chunks per feature query).
+- **\`knit_get_fingerprint\` + \`knit_infer_domains\` + \`knit_compose_template\`** \u2014 auto-config primitives: detected stack \u2192 ranked domains \u2192 composed CLAUDE.md sections (preview only; you paste to accept).
+- **\`knit_get_calibration\` + tag your false-positives** (e.g. \`#complex-was-trivial\`) \u2014 the per-project self-healing classifier tunes thresholds after 3 same-direction FPs.
+- **\`knit_brain_status\`** surfaces calibration / requirements / fingerprint state so you can discover all of the above from one health check.`;
 }
 function generateProjectMap(knowledge) {
   const { summary } = knowledge;

package/dist/{chunk-2N67YTOQ.js → chunk-GATMQQK5.js} RENAMED Viewed

@@ -5,13 +5,13 @@ import {
   isBundledCore,
   knownAgents,
   rawAgentUrl
-} from "./chunk-7PPC6IG6.js";
+} from "./chunk-ST4X7LZT.js";
 import {
   agentsCacheFile,
   projectAgentFile,
   projectAgentsDir,
   sessionsJsonlPath
-} from "./chunk-BW4JUY74.js";
+} from "./chunk-27TA2ZQZ.js";
 // src/engine/install-agents.ts
 import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync2, writeFileSync as writeFileSync2 } from "fs";
@@ -62,7 +62,7 @@ async function fetchAgent(name, opts = {}) {
     throw new AgentFetchError(`Unknown agent: "${name}". Not in engram's registry.`);
   }
   const cachePath = agentsCacheFile(ref, cat, bare);
-  if (existsSync(cachePath)) {
+  if (!opts.refresh && existsSync(cachePath)) {
     return readFileSync(cachePath, "utf-8");
   }
   if (process.env.KNIT_OFFLINE === "1" || process.env.ENGRAM_OFFLINE === "1") {
@@ -250,7 +250,7 @@ async function installAgentsForProject(rootPath, config, knowledge, knowledgeBas
       }
     }
     try {
-      const baseMd = await fetchAgent(name, opts.refresh ? { ref: void 0 } : {});
+      const baseMd = await fetchAgent(name, opts.refresh ? { ref: void 0, refresh: true } : {});
       const relevant = knowledgeBase ? selectRelevantLearnings(knowledgeBase.entries, name) : [];
       const personalized = personalizeAgent(baseMd, {
         config,
@@ -285,55 +285,6 @@ function agentsNeededByProject(config) {
   return Array.from(names);
 }
-// src/mcp/update-check.ts
-var REGISTRY_DIST_TAGS_URL = "https://registry.npmjs.org/-/package/knit-mcp/dist-tags";
-var FETCH_TIMEOUT_MS = 2e3;
-var CACHE_TTL_MS = 60 * 60 * 1e3;
-var cachedLatest = null;
-var lastCheckedAt = 0;
-var inFlight = null;
-function getCachedLatestVersion() {
-  if (Date.now() - lastCheckedAt > CACHE_TTL_MS) {
-    prewarmLatestVersion();
-  }
-  return cachedLatest;
-}
-function prewarmLatestVersion() {
-  if (inFlight) return;
-  if (Date.now() - lastCheckedAt < CACHE_TTL_MS && cachedLatest !== null) return;
-  inFlight = doFetch().finally(() => {
-    inFlight = null;
-  });
-}
-async function doFetch() {
-  const controller = new AbortController();
-  const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
-  try {
-    const res = await fetch(REGISTRY_DIST_TAGS_URL, { signal: controller.signal });
-    if (!res.ok) return;
-    const data = await res.json();
-    if (typeof data.latest === "string" && data.latest.length > 0) {
-      cachedLatest = data.latest;
-      lastCheckedAt = Date.now();
-    }
-  } catch {
-  } finally {
-    clearTimeout(timeout);
-  }
-}
-function isNewerVersion(latest, current) {
-  const parse = (v) => {
-    const stripped = v.replace(/[-+].*$/, "");
-    const parts = stripped.split(".").map((n) => parseInt(n, 10) || 0);
-    return [parts[0] ?? 0, parts[1] ?? 0, parts[2] ?? 0];
-  };
-  const [a1, a2, a3] = parse(latest);
-  const [b1, b2, b3] = parse(current);
-  if (a1 !== b1) return a1 > b1;
-  if (a2 !== b2) return a2 > b2;
-  return a3 > b3;
-}
 // src/engine/sessions.ts
 import { existsSync as existsSync3, mkdirSync as mkdirSync3, appendFileSync, readFileSync as readFileSync3, statSync, writeFileSync as writeFileSync3, renameSync } from "fs";
 import { dirname as dirname2 } from "path";
@@ -466,9 +417,6 @@ function parseLine(line) {
 export {
   installAgentsForProject,
-  getCachedLatestVersion,
-  prewarmLatestVersion,
-  isNewerVersion,
   appendSession,
   searchSessions,
   getRecentSessions,