npm - knit-mcp - Versions diffs - 0.16.1 → 0.22.0 - Mend

knit-mcp 0.16.1 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.md +180 -141
package/dist/cache-3QREKWAW.js +21 -0
package/dist/chunk-2GDNMY7N.js +57 -0
package/dist/{chunk-27TA2ZQZ.js → chunk-5EUQ2DCN.js} +12 -0
package/dist/{chunk-2FAS6CV4.js → chunk-5R5YKDNT.js} +895 -13
package/dist/{chunk-BBQSWT4H.js → chunk-6BQPXFRL.js} +40 -0
package/dist/{chunk-VB2TIR6L.js → chunk-DIU7RE5X.js} +2 -2
package/dist/{chunk-OINYMLOV.js → chunk-DXV5NAQ3.js} +10 -4
package/dist/{chunk-ZESAIRIL.js → chunk-ESTWQMZZ.js} +61 -6
package/dist/{tools-7VJRV64S.js → chunk-NT7S4F72.js} +591 -158
package/dist/chunk-T55DZTYS.js +70 -0
package/dist/{chunk-Q3GNWHEW.js → chunk-WPXK5IHO.js} +60 -9
package/dist/{chunk-YRLAWCYW.js → chunk-X4PHSVRB.js} +399 -1
package/dist/chunk-YINPCUVZ.js +198 -0
package/dist/cli.js +42 -15
package/dist/doctor-HKC7JQST.js +26 -0
package/dist/{export-4BO6HCXP.js → export-QKUVOV3O.js} +3 -2
package/dist/host-SZN2NCFM.js +18 -0
package/dist/{install-agents-2JYKFLU6.js → install-agents-3ZTV6EQW.js} +8 -11
package/dist/{instructions-4SLOUME2.js → instructions-N5VV4ESJ.js} +3 -1
package/dist/{integration-scanner-LBD2PIZ3.js → integration-scanner-5O6XSGGP.js} +2 -2
package/dist/prompts-3MSBEU5V.js +49 -0
package/dist/{refresh-4X4HMDMT.js → refresh-4FWFEZP3.js} +4 -6
package/dist/{setup-2YN36GWS.js → setup-GFU5HIA5.js} +144 -19
package/dist/{status-RPHO7QQO.js → status-J2Q4ACID.js} +4 -4
package/dist/tools-AVMVTHON.js +30 -0
package/dist/{ui-GN4JT4XR.js → ui-W2SAVL73.js} +166 -82
package/package.json +1 -1
package/webapp/dist/assets/index-DxyZTqwU.js +40 -0
package/webapp/dist/index.html +1 -1
package/dist/cache-7S5DFFQ6.js +0 -21
package/dist/chunk-FX3SVNHX.js +0 -364
package/dist/chunk-JE4BZQUD.js +0 -333
package/dist/chunk-OZCVBNHF.js +0 -120
package/dist/chunk-QM4U75VE.js +0 -475
package/dist/doctor-2ESSKFZE.js +0 -14
package/webapp/dist/assets/index-BvEqg_UZ.js +0 -40

package/README.md CHANGED Viewed

@@ -3,8 +3,6 @@
   <a href="https://github.com/PDgit12/knit/actions/workflows/ci.yml"><img src="https://img.shields.io/github/actions/workflow/status/PDgit12/knit/ci.yml?style=for-the-badge&label=CI&color=10b981" alt="CI" /></a>
   <img src="https://img.shields.io/badge/license-MIT-3b82f6?style=for-the-badge" alt="license" />
   <img src="https://img.shields.io/badge/node-%E2%89%A518-339933?style=for-the-badge&logo=node.js&logoColor=white" alt="node" />
-  <img src="https://img.shields.io/badge/MCP%20tools-55-7c3aed?style=for-the-badge" alt="tools" />
-  <img src="https://img.shields.io/badge/agents-6-10b981?style=for-the-badge" alt="agents supported" />
   <img src="https://img.shields.io/badge/local--first-100%25-3b82f6?style=for-the-badge" alt="local-first" />
 </p>
@@ -20,14 +18,14 @@
   <a href="#-quick-start">Quick start</a> ·
   <a href="#-what-knit-is">What it is</a> ·
   <a href="#-how-search-works">How search works</a> ·
-  <a href="#-55-mcp-tools">Tools</a> ·
+  <a href="#-56-mcp-tools">Tools</a> ·
   <a href="#-the-dashboard">Dashboard</a> ·
-  <a href="#-how-its-different">vs mem0/Letta</a>
+  <a href="#-why-knit">Why Knit</a>
 </p>
 ---
-## 🧠 What knit is
+## 🧠 What Knit is
 Knit gives **any MCP-speaking coding agent** the right defaults automatically — because you can't predict how a user will phrase a request, and every agent (Claude Code, Cursor, Codex CLI, Cline, Continue, GitHub Copilot) ends up burning tokens re-discovering the same project facts. Knit does four jobs at once:
@@ -36,11 +34,11 @@ Knit gives **any MCP-speaking coding agent** the right defaults automatically
 | 🧠 **Memory** | Every project keeps a brain at `~/.knit/projects/<hash>/`. Sessions compound: learnings, false positives, session summaries, and a static-analysis import graph are all queryable next session. Cross-project pool at `~/.knit/global/`. |
 | 🪶 **Tokens** | `CLAUDE.md` is ~2 KB (project facts only). Protocol depth is fetched on demand via `knit_get_workflow(phase)`. Per-cache-hit savings ≈ 15K tokens (calibrated from instrumented RESEARCH phases — override via env). Reuse-ratio + ROI surfaced in the dashboard. |
 | 🛠️ **Workflow** | A 4-tier classification (Inquiry / Trivial / Standard / Complex) with phase-triggered plan mode, quality-gated `LEARN`, and team-scoped git worktrees so parallel agents don't step on each other. |
-| 📊 **Dashboard** | New in v0.13. `knit ui` opens a local-first analytics dashboard at `http://127.0.0.1:7421` — bento layout, brain savings, per-project ROI, **force-directed brain graph**, real-time sync via SSE. See [Dashboard](#-the-dashboard). |
+| 📊 **Dashboard** | `knit` opens the brain — a local-first dashboard at `http://127.0.0.1:7421`: bento layout, brain savings, per-project ROI, **force-directed brain graph**, real-time sync via SSE. See [Dashboard](#-the-dashboard). |
 **Local-first** invariant: zero cloud calls in memory/retrieval/classification. Dashboard binds to `127.0.0.1` only, with Host/Origin validation + CSP headers. Your brain stays on your machine.
-It's a **single product**, not four. Every design choice has to win on memory + tokens + workflow + analytics together.
+One product: every design choice wins on memory, tokens, workflow, and analytics together.
 ---
@@ -48,12 +46,28 @@ It's a **single product**, not four. Every design choice has to win on memory +
 ```bash
 npm install -g knit-mcp
-knit setup       # adds Knit MCP to your agent's config (Claude Code / Cursor / Codex / etc.)
-knit ui          # opens the brain dashboard at http://127.0.0.1:7421 (optional but recommended)
+knit setup       # one-time: register Knit with your agents (Claude Code / Cursor / Codex / …)
+knit             # open the brain — the dashboard at http://127.0.0.1:7421
 ```
+Two commands: `knit setup` for one-time agent registration, then `knit` to open the brain. Agents communicate with the MCP server over stdio; that process is launched by the host, not invoked manually.
 **No per-project setup.** Open your MCP-speaking agent in any project — the first MCP tool call auto-initializes the brain, hooks, and per-project CLAUDE.md block.
+### First prompt — onboard your project
+Once Knit is connected, open your project in your agent and paste this once. Fill in the brackets, or just describe the project in your own words — the agent does the rest:
+> You have the Knit MCP connected. Call `knit_load_session`, then call `knit_onboard` with:
+> - **project_description** — what this project is
+> - **intent** — what I'm building right now
+> - **strictness** — `off` | `warn` | `block` (how strictly to enforce the workflow)
+> - **focus_domains** — comma-separated areas (e.g. `api, billing`)
+>
+> Then summarize what you configured and call `knit_classify_task` for my first task.
+Knit persists these preferences and surfaces your project intent at the start of every session. It's a plain MCP tool, so the same prompt works on **any** host — Claude Code, Cursor, Codex, Cline, Continue, Copilot — new session or resumed.
 ### Adoption per agent
 v0.14: a single `knit setup` detects **every** installed MCP-speaking agent on
@@ -78,8 +92,6 @@ per-agent manual setup, no copy-pasted JSON.
 > **Supported shells:** macOS, Linux, WSL, Git Bash, PowerShell. Windows `cmd.exe` is not supported as the hook-runner shell — use PowerShell (default in modern Windows Terminal) or Git Bash.
-> **Supported shells:** macOS, Linux, WSL, Git Bash, PowerShell. Windows `cmd.exe` is not supported as the hook-runner shell — use PowerShell (default in modern Windows Terminal) or Git Bash.
 ### Quiet mode
 Knit ships **Protocol Guard in `warn` mode by default** — hooks print reminders, they never block. Fully silent:
@@ -120,68 +132,101 @@ Knit writes nowhere else on your machine.
 ---
+## 🎬 A real session
+A new TypeScript project, from install to a compounding brain:
+1. **Install + register.** `npm i -g knit-mcp && knit setup` — Knit registers with every MCP-speaking agent on the machine.
+2. **Onboard.** Open the project in your agent and paste the onboarding prompt. The agent calls `knit_onboard` — *"Project: a billing API. Intent: add Stripe webhooks. strictness: warn. focus: api, webhooks."* Knit persists those preferences and records the intent.
+3. **Ask for the feature.** The agent calls `knit_classify_task` → e.g. *complex, high-risk* → plan mode. It pulls context with `knit_build_context` (ripple effects), `knit_search_learnings` (anything learned before), and `knit_query_dependents` on the files it will touch.
+4. **Build + verify.** It implements, runs `knit_verify_claim` to check its claims against the knowledge graph, and `knit_record_learning` to save what was non-obvious.
+5. **Compound.** Next session, `knit_load_session` surfaces your intent plus that learning — the brain is already sharper. Run **`knit`** to see it: the dashboard shows the project, its knowledge index, learnings, and token ROI building over time. Hit **Refresh** to re-index or **Export brain** to write an Obsidian vault.
+Every step is local, deterministic, and works on any MCP host.
 ## 🔍 How search works
 Knit's retrieval is **BM25 + Reciprocal Rank Fusion** over your learnings,
-session summaries, and the cross-project pool, with two cheap-but-honest
-lexical-bridging layers stacked on top: **2-gram fallback** for typos and
-rare compounds, and **curated coding-domain synonym expansion** for the
-most common semantic-gap pairs. No vector embeddings, no remote inference,
-no API calls.
-**Why this design choice (not an oversight):**
-- **Deterministic.** Same query → same ranking, every time. No model
-  drift, no upgrade-day surprises.
-- **Fast.** Sub-millisecond on corpora ≤ 1K entries (your typical
-  project memory). No cold start, no model load.
-- **Local-first.** Zero network calls. Your memory never leaves the
-  machine.
-- **Auditable.** You can explain every hit by looking at term overlap
-  + the synonym dictionary (50 pairs, hand-curated). No "the model
-  said so."
-- **Honest at the boundary.** The bench has documented misses where
-  even synonym expansion can't bridge the gap — we ship those visible,
-  not hidden.
-**What it does well.** Exact term match, identifier search
-(`knit_classify_task`), rare-term emphasis (e.g. `PIPE_BUF`), multi-word
-ranking, tag filtering, cross-project diversification (max 2 per
-project), branch diversification on sessions (max 2 per branch). **Typo
-recovery via 2-gram fallback** (`knit_clasify` → `knit_classify_task`).
-**Synonym recovery via curated dictionary** (`hook` ↔ `webhook`,
-`schema` ↔ `migration`, `auth` ↔ `authentication`, `cache` ↔ `memo`,
-`deploy` ↔ `ship` ↔ `release`, etc. — see
-[`src/engine/retrieval/synonyms.ts`](src/engine/retrieval/synonyms.ts)
-for the full ~50-pair dictionary). Synonym matches scored at 0.4× of a
-direct BM25 hit so genuine matches always rank higher.
-**What it still cannot do.** Multi-word paraphrase ("how do schema
-changes ship" with no shared terms). Deep abstraction-level bridging
-("data consistency" → "atomic temp+rename"). Question intent
-("what's the right pattern for X"). Negation. Cross-entry synthesis
-("based on the auth lessons, what should I do for OAuth"). These need
-either embeddings (model dependency + bundle weight, breaks local-first
-unless run locally via ONNX) or an LLM call layer (Knit-as-retrieval
-becomes Knit-as-agent, different identity). v0.20+ candidate: hybrid
-retrieval (BM25 + local embeddings via RRF) — opt-in, bench-gated.
-**The practical implication.** Search with words close to how you
-recorded the learning, OR words that have a synonym pair in the
-dictionary. If you write a learning about *webhook signatures*, you
-can now search either *webhook signatures* OR *hook signatures* —
-the dictionary bridges those. For genuinely different vocabulary that
-isn't in the synonym table, use `knit_search_global_learnings` to widen
-the corpus, or call `knit_search_sessions` to pull from past narrative
-summaries that may use more terms.
-**Bench numbers (v0.16):** synthetic 88.0% top-1 / **100% recall@5**,
-learnings (real-prose) 86.7% top-1 / 96.7% recall@5. Both default ON;
-opt-out via `enableNgramFallback: false` + `enableSynonyms: false` for
-a strict lexical-only baseline.
+session summaries, and the cross-project pool, with two lexical-bridging
+layers on top: a **2-gram fallback** for typos and rare compounds, and
+**curated coding-domain synonym expansion** for common semantic-gap pairs.
+No vector embeddings, no remote inference, no API calls.
+The design is deliberate:
+- **Deterministic** — same query, same ranking, every time. No model drift.
+- **Fast** — sub-millisecond on typical project corpora (≤ 1K entries). No cold start.
+- **Local-first** — zero network calls; your memory never leaves the machine.
+- **Auditable** — every hit is explainable from term overlap plus the 50-pair synonym dictionary.
+**Capabilities.** Exact term + identifier match (`knit_classify_task`),
+rare-term emphasis (`PIPE_BUF`), multi-word ranking, tag filtering,
+cross-project diversification (max 2 per project), branch diversification on
+sessions (max 2 per branch), typo recovery via 2-gram fallback
+(`knit_clasify` → `knit_classify_task`), and synonym recovery (`hook` ↔
+`webhook`, `schema` ↔ `migration`, `auth` ↔ `authentication`, `cache` ↔
+`memo`, `deploy` ↔ `ship` ↔ `release`, … — see
+[`src/engine/retrieval/synonyms.ts`](src/engine/retrieval/synonyms.ts) for the
+full ~50-pair dictionary). Synonym matches score at 0.4× a direct hit, so exact
+matches always rank higher.
+**Benchmarks.** Synthetic 88.0% top-1 / **100% recall@5**; real-prose learnings
+86.7% top-1 / 96.7% recall@5. Both layers default on; set
+`enableNgramFallback: false` + `enableSynonyms: false` for a strict
+lexical-only baseline.
+**Roadmap.** A hybrid retriever (BM25 + local embeddings, fused via RRF) for
+paraphrase and abstraction-bridging is a v0.21+ candidate — opt-in,
+bench-gated, and local-first.
 ---
+## ✨ What's new in v0.22.0
+- **Host composition.** Knit detects the host at the MCP handshake (`clientInfo`) and composes with its native orchestration on complex tasks: Claude Code → dynamic workflow, Cursor → parallel worktree agents, Codex → subagents, Copilot/VS Code → agent mode + `/mcp.knit.*`. **Suggest-only hosts stay suggest-only** — Knit suggests its own worktree primitive, never fakes auto-trigger.
+- **Stale-index fix.** `knit_query_*` and `knit_verify_claim` no longer return a confident *false* answer when a file changed after the index was built — `getBrain` auto-refreshes on source drift, and `verify_claim` **self-heals** (rebuilds + re-verifies in one call).
+- **Full tool-use.** `knit_classify_task` returns an ordered, signal-gated `tool_plan` so the diverse tool surface actually gets used instead of collapsing to 1–2 tools.
+- **Per-host adherence hooks** for Cursor / Codex / Copilot (merged with existing config; non-Claude hooks marked `_knitUnverified` — confirm in-host).
+- **Token optimization.** Hierarchical retrieval everywhere (headline + `id` + preview; full lesson via `knit_get_learning`), `token_mode: lean`, and a handshake-instructions rewrite that's **~22% leaner** while adding the new clauses.
+- **56 tools** (Tier-1 37). Shipped after a six-dimension audit and a real-life stdio end-to-end run across five `clientInfo` values.
+## ✨ What's new in v0.21.0
+- **Onboarding (`knit_onboard`).** Paste the README prompt after connecting Knit, describe your project + how you want Knit to behave, and the agent persists your preferences (strictness, features, focus domains) and records the project intent — surfaced every session, on any MCP host.
+- **Dashboard actions.** The dashboard can now **Refresh** (re-index a project) and **Export all projects** (Obsidian vault), in addition to viewing. Actions run as child processes (non-blocking) and stay loopback-bound + Host/Origin-gated.
+- **56 tools** (Tier-1 37). Shipped after a second six-dimension audit (0 critical) and a real-life end-to-end run.
+## ✨ What's new in v0.20.0
+v0.20 makes Knit a **fully-ready, dashboard-first brain** — a consolidated
+release (internal phases v0.17–v0.20) shipped after a six-dimension deep-clean
+audit (0 critical findings).
+- **Brain freshness layer.** One shared primitive governs staleness across every
+  store, so the brain never serves data it can't vouch for: handoffs auto-clear
+  once resolved or stale, idle classifier signals decay, old cross-project
+  learnings drop from search, and a learning that names a now-deleted file is
+  flagged. Freshness drives prune/clear/flag only — never the bench-gated
+  retrieval ranking.
+- **Tool count you can explain.** `knit doctor` and `knit_list_features` print
+  the live active count *with the reason* (e.g. `46 of 56 = 37 always-on + 9
+  teams [≥3 domains] · …`), so a number that legitimately varies by project
+  shape stops looking like a bug. A drift test pins the docs to the registry.
+- **Stays on-protocol mid-session.** A throttled, escalating reminder rides the
+  MCP tool response when an agent drifts (e.g. records work before classifying)
+  — reaching every MCP host, not just Claude Code. Silence with
+  `knit_set_protocol_strictness({ level: "off" })`.
+- **Dashboard-first.** Run **`knit`** to open the brain; the agent/stdio path is
+  unchanged. The dashboard gains a Knowledge-index view and a `knit doctor`
+  webapp health check. (v0.21 adds Refresh + Export actions to the dashboard;
+  `knit setup` remains CLI-only.)
+- **Composes with your setup.** Scans Claude Code Skills
+  (`.claude/skills/<name>/SKILL.md`) alongside slash commands; positioning leads
+  with the integrated brain rather than competitor comparisons.
+Security/hygiene from the audit: the command/Skill scanner now guards size and
+rejects symlinks before reading (no OOM, no arbitrary-file reads into the brain).
 ## ✨ What's new in v0.16.0
 v0.16 is the **semantic-lite release**. Two retrieval improvements that
@@ -268,6 +313,7 @@ return `{ status: 'protocol_required', next_action: '...' }` instead of
 proceeding — the agent reads the response, follows the breadcrumb, retries.
 This is the universality answer: same enforcement, transport layer instead
 of host layer. Default strictness stays `warn` so existing flows are unchanged.
+(v0.20 extends this with mid-session re-surfacing — see *What's new in v0.20.0* above.)
 ### ⚡ Agent-native slash-command auto-detection
@@ -325,7 +371,7 @@ A single command opens a local-first analytics surface at `http://127.0.0.1:7421
 **Real-time sync via SSE.** The server watches `~/.knit/` via `fs.watch`; any agent recording a learning anywhere updates the open dashboard within ~250ms. No polling.
-### 🔐 Security hardening (real, not theater)
+### 🔐 Security hardening
 The dashboard is a localhost HTTP server, which has real attack surface. v0.13 closes it:
@@ -363,30 +409,30 @@ The dashboard works regardless of which agent you use — it reads the brain fro
 | `knit_classify_task` response | ~500 tok | **~150 tok** | 70% |
 | `knit_load_session` response | ~3–5 KB | **~1.5 KB** | ~60% |
-Each surface gets a `healthy | warn | over-budget` verdict from `knit_brain_status.token_budget`. **Drift is a regression test, not a vibes claim.**
+Each surface gets a `healthy | warn | over-budget` verdict from `knit_brain_status.token_budget`, enforced by a regression test.
 ---
 ## 📊 The dashboard
-Run `knit ui` to open the local analytics surface. **Single command**, no other CLI needed for normal operation:
+Run **`knit`** to open the brain (the local analytics surface); `knit ui` is an explicit alias:
 ```bash
-knit ui
+knit
 # Knit Dashboard — http://127.0.0.1:7421
 # Reading from: /Users/<you>/.knit
 # Press Ctrl-C to stop.
-# (automatically opens your default browser)
+# (opens your default browser; visit the URL above if it does not)
 ```
 | Feature | What you see |
 |---|---|
 | **Bento home** | Big "Net tokens saved" hero card (dark), live recent activity (green "live" dot when SSE connected), memory hit-rate gauge, top projects by ROI as color-blocked cards |
 | **Brain graph** | Force-directed visualization of one project's learnings. Nodes sized by access count, colored by domain. Edges by Jaccard similarity over tags + domains. Click any node → side panel with the full lesson. Threshold slider live-recomputes the graph. |
-| **Per-project deep dive** | Hero card with verdict tone (cold/warming/compounding/strong), retrieval signals, classifications-by-tier breakdown, top domains heatmap, searchable learnings list |
-| **Health** | Install diagnostics — Node version, Knit version, ~/.knit permissions, MCP registration in `~/.claude.json` |
+| **Per-project deep dive** | Hero card with verdict tone (cold/warming/compounding/strong), retrieval signals, classifications-by-tier breakdown, top domains heatmap, searchable learnings list, Knowledge index, and **Refresh** (re-index this project) + **Export all projects** (Obsidian vault) actions |
+| **Health** | Install diagnostics — Node version, Knit version, ~/.knit permissions, per-agent MCP registration |
-**API endpoints** (all read-only, all 127.0.0.1 only):
+**API endpoints** (127.0.0.1 only, Host/Origin-gated):
 - `GET /api/version` — runtime version + update check + security metadata
 - `GET /api/brain/summary` — global counts
@@ -394,20 +440,28 @@ knit ui
 - `GET /api/projects` — project list
 - `GET /api/projects/:id/learnings` — full learning entries
 - `GET /api/projects/:id/metrics` — compounding ROI for one project
+- `GET /api/projects/:id/knowledge` — knowledge-index summary
 - `GET /api/projects/:id/graph` — force-directed node + edge data (Jaccard threshold tunable)
 - `GET /api/global/learnings` — cross-project pool
 - `GET /api/doctor` — install diagnostics
 - `GET /api/events` — Server-Sent Events stream for real-time sync
+- `POST /api/projects/:id/refresh` — re-index a project (source path from its meta; spawned as a child process)
+- `POST /api/export` — export all projects to a fixed `~/.knit/exports/` vault
 ---
-## 🛠️ 55 MCP Tools
+## 🛠️ 56 MCP Tools
-> **49 active by default** at first handshake. The remaining 6 are tier-gated:
-> teams (9 tools, auto-on when ≥3 domains detected), subagents (1 tool, auto-on
-> when `.claude/agents/` exists), and admin (3 tools, opt-in via
-> `knit_enable_feature("admin")`). Call `knit_list_features` to see what's
-> available and how to enable.
+> **37 always-on, up to 19 conditional, 56 total.** The active count varies by
+> project shape, so it isn't one fixed number — it's `37` plus whichever
+> conditional groups your project triggers: teams (9 tools, auto-on when ≥3
+> domains detected), diagnostics (6 tools, on during your first session),
+> subagents (1 tool, auto-on when `.claude/agents/` exists), and admin (3 tools,
+> opt-in via `knit_enable_feature("admin")`). That's why one machine shows 46
+> and another 44 — it reflects each project's shape. Run `knit doctor` (or call
+> `knit_list_features`) for your project's **live count and the reason for it**.
+> The groups below cover the main tools; `knit_list_features` is the
+> authoritative live list.
 <details open>
 <summary><strong>🕸️ Knowledge graph</strong> <em>(Tier 1, ~5ms)</em></summary>
@@ -453,8 +507,9 @@ knit ui
 | `knit_get_workflow` | Fetch protocol depth for one phase on demand. Sections: `overview, tier, phases, research, ideate, plan, execute, optimize, review, tdd, learn, handoff, ship, tools`. |
 | `knit_get_suggestions` | Adaptive warnings from past patterns in given domains. |
 | `knit_reflect` | Detect patterns across recorded learnings (per-project + global pool). Useful with ≥3 entries. |
-| `knit_setup_project` | Describe a non-code project (legal, marketing, research) to bootstrap domain teams. |
-| `knit_prune_sessions` | Prune `sessions.jsonl` by age (default 90 days). Atomic rewrite. |
+| `knit_onboard` | **v0.21.** One-time onboarding: captures the project + how the user wants Knit, persists preferences (strictness, features, focus domains), records the project intent. |
+| `knit_scan_agent_commands` | Scan each MCP host's slash-command + skill directories; surface user-defined commands so Knit composes with them. |
+| `knit_suggest_command` | Per-phase lookup against scanned commands; returns the agent-native command to invoke. |
 </details>
@@ -477,7 +532,7 @@ Runtime enforcement of the Knit protocol via PreToolUse and SessionStart hooks.
 |---|---|
 | `knit_brain_status` | Brain health + **token-budget** verdicts per surface + `update_available` notification + integrations summary. |
 | `knit_list_features` | Surfaces hidden tools and tells you how to enable them. The escape hatch. |
-| `knit_enable_feature` | Flip on a Tier-2/3 feature (`teams`, `subagents`, `admin`). Emits `notifications/tools/list_changed` — new tools appear without a Claude Code restart. |
+| `knit_enable_feature` | Flip on a Tier-2/3 feature (`teams`, `subagents`, `admin`). Emits `notifications/tools/list_changed` — new tools appear without an agent restart. |
 | `knit_disable_feature` | Symmetric to enable. |
 | `knit_scan_integrations` | Re-detect existing workflow frameworks (Ruflo, gstack, CodeTour, Conductor, other MCP servers, custom CLAUDE.md sections). |
 | `knit_compounding_metrics` | Quantifies *"Knit gets cheaper over time"* — sessions, cache hits, reuse-ratio %, estimated tokens saved. Verdict: `cold \| warming \| compounding \| strong`. |
@@ -515,7 +570,9 @@ Runtime enforcement of the Knit protocol via PreToolUse and SessionStart hooks.
 | Tool | What it does |
 |---|---|
-| `knit_setup_project` | Bootstrap domain teams for a non-code project. One-time. |
+| `knit_setup_project` | Bootstrap domain teams for a non-code project (legal, marketing, research). One-time. |
+| `knit_prune_sessions` | Prune `sessions.jsonl` by age (default 90 days). Atomic rewrite. Auto-prune handles this normally. |
+| `knit_reset_calibration` | Wipe per-project classifier calibration. Discards accumulated tuning. |
 </details>
@@ -626,7 +683,7 @@ knit install-agents --refresh    # re-fetch from network even if cached
   "token_budget": {
     "budgets": {
       "claude_md":            { "bytes": 2048,  "target_bytes": 6500,  "verdict": "healthy" },
-      "tool_registry":        { "bytes": 8400,  "target_bytes": 8500,  "verdict": "healthy", "active_tool_count": 31, "total_tool_count": 43 },
+      "tool_registry":        { "bytes": 8400,  "target_bytes": 8500,  "verdict": "healthy", "active_tool_count": 46, "total_tool_count": 56 },
       "instructions":         { "bytes": 2200,  "target_bytes": 2500,  "verdict": "healthy" },
       "per_session_overhead": { "bytes": 12648, "target_bytes": 17500, "verdict": "healthy" }
     },
@@ -641,7 +698,7 @@ knit install-agents --refresh    # re-fetch from network even if cached
   "update_available": {
     "current": "0.8.0",
     "latest":  "0.9.0",
-    "upgrade": "Restart Claude Code to spawn a fresh MCP — npx will auto-fetch the new version."
+    "upgrade": "Restart your agent to spawn a fresh MCP — npx will auto-fetch the new version."
   }
 }
 ```
@@ -652,14 +709,19 @@ Pair with `knit_compounding_metrics` for the value side of the ledger (sessions,
 ## 💻 CLI
+The surface is dashboard-first: `knit` opens the brain, `knit setup` performs
+one-time agent registration. The remaining commands are operational tooling for
+scripting and CI; their views are progressively moving into the dashboard.
 ```bash
-knit setup            # one time: detects all 6 MCP-speaking agents and registers Knit in each
-knit doctor           # install health check: version, MCP registration per agent, knowledgebase
-knit ui               # launch the local Knit dashboard (http://127.0.0.1:7421)
-knit status           # text snapshot: sessions, learnings, hit rate, knowledge health
-knit refresh          # force rebuild knowledge brain
-knit install-agents   # install VoltAgent subagents into <project>/.claude/agents/
-knit export <fmt>     # export learnings (current targets: obsidian)
+knit                  # open the brain (the dashboard at http://127.0.0.1:7421)
+knit setup            # one-time: detect installed MCP-speaking agents and register Knit in each
+knit doctor           # install health check: version, per-agent MCP registration, webapp bundle, knowledgebase
+knit ui               # explicit alias for the dashboard (same as bare `knit`)
+knit status           # terminal snapshot: sessions, learnings, hit rate, knowledge-index health
+knit refresh          # rebuild the knowledge index from source
+knit install-agents   # install subagent definitions into <project>/.claude/agents/
+knit export <fmt>     # export learnings (supported targets: obsidian)
 ```
 Example `knit status`:
@@ -677,7 +739,7 @@ Knowledge Base
 Token budget (v0.16)
   CLAUDE.md:           2.0 KB  → healthy
-  Tool registry:       ~13 KB  → warn (49 active / 55 total)
+  Tool registry:       ~13 KB  → warn (46 active / 56 total)
   Instructions:        ~4 KB   → healthy
   Per-session total:   ~20 KB  → healthy
@@ -689,58 +751,32 @@ Compounding
 ---
-## 🆚 How it's different
-|  | gstack (skills) | ECC (agents) | Ruflo (orchestration) | **Knit** |
-|--|---|---|---|---|
-| **Bet** | Slash-command flows | Agent rules | 100+ agents in swarms | **One disciplined agent, compounding memory** |
-| **Setup** | Install skills per-project | Manual `.claude/` setup | `npx ruflo init` (heavy) | **`npx knit-mcp setup` (light)** |
-| **Memory** | jsonl files in-tree | Memory directory | Vector DB + 4-tier consolidation | **Local, searchable, vectorless BM25 + graph fusion + 2-gram fallback + 50-pair synonym dictionary** |
-| **Token cost** | Skills loaded into context | Rules loaded into context | 314 tools advertised | **~2 KB CLAUDE.md, tier-gated registry, budget guardrail** |
-| **Parallel work** | None | None | Multi-agent swarms + federation | **Team-scoped git worktrees** |
-| **Cloud dependency** | None | None | Cognitum.One (cloud backbone) | **None — fully local** |
-| **Self-measurement** | None | None | Cost-tracker plugin | **`knit_brain_status.token_budget` + `knit_compounding_metrics`** |
-| **Anti-hallucination** | None | None | None advertised | **`knit_verify_claim` + citation rule + pre/post import validation** |
-| **Non-code projects** | No | No | Limited | **Description-driven via `knit_setup_project`** |
+## 🧠 Why Knit
-**The bet:** Ruflo for agent quantity (swarms, federation, plugins). Knit for **agent quality** (memory, classification, token discipline, hallucination defense). Different markets. The integration scanner detects Ruflo when installed and tailors instructions to defer routing to it — Knit operates as the memory + classification substrate underneath.
+Knit is a **project brain your agent plugs into** — a live code knowledge graph wired into ranked memory and a task classifier that routes work by impact. The pieces aren't sold separately; the value is the integration:
----
-## 🧭 Honest comparison vs memory libraries
+- **Graph-grounded recall** — memory ranked by what your change *structurally* touches (dependents, fanout), not just keyword overlap.
+- **Impact classifier** — every task is sized (Inquiry → Trivial → Standard → Complex) and complex work auto-enters plan mode. The brain decides *how carefully* to handle a change, not just what to recall.
+- **Self-calibrating** — `knit_record_false_positive` shifts the classifier's thresholds per project; it gets less wrong over time.
+- **Token accounting** — `knit_compounding_metrics` makes "cheaper over time" chartable per project.
+- **Parallel team worktrees** — multi-domain work fans out into isolated git worktrees so agents don't collide.
+- **Brain integrity** — a freshness layer keeps every datum trustworthy: stale handoffs auto-clear, idle classifier signals decay, deleted-file references get flagged.
+- **Fully local, zero-glue** — `npx knit-mcp setup` and it's a brain every MCP host (Claude Code, Cursor, Codex, Cline, Continue, Copilot) shares. No cloud, no SDK wiring.
-The mem0 / Letta / agentmemory comparison deserves a separate section because they're a different category — **memory-as-a-service libraries**, not MCP-native workflow layers. Reading their published benchmarks side-by-side:
+**"Why use Knit if my agent already has memory?"** Your agent's memory *stores notes*; Knit *decides* — it ranks recall by what your change structurally touches, classifies each task to set the right workflow depth, and tracks the cost over time. Graph-grounded routing, not a markdown notepad.
-| | mem0 | Letta (MemGPT) | agentmemory | **Knit** |
-|--|---|---|---|---|
-| **Published benchmark** | LOCOMO: 67–92% LLM-as-Judge; ~90% token reduction (1.7K vs 26K per conversation) | No head-to-head token-reduction number; "Letta Leaderboard" benchmarks *LLMs* on agentic memory, not Letta | LongMemEval-S: **95.2% R@5** with BM25+RRF+graph; 86.2% BM25-only | **Not yet measured.** Same architecture as agentmemory; no published number. |
-| **Retrieval architecture** | Vector + graph (Mem0g variant) | OS-inspired tiered memory (core/recall/archival) | BM25 + local vectors + KG fused via RRF (k=60) | BM25 + RRF + graph-traversal (fused via RRF k=60). Per-project + cross-project diversity caps. |
-| **Install shape** | SDK integration; managed cloud or self-hosted | SDK integration; self-hosted server | Python library | **`npx knit-mcp setup` → MCP server, zero glue.** Works with Claude Code / Cursor / Codex / any MCP host. |
-| **Workflow primitive** | None — pure memory | Agent-managed memory operations | None — pure retrieval | **4-tier classifier + plan-mode + protocol guard + parallel team worktrees.** |
-| **Self-calibration** | No | No | No | **Per-project classifier calibration** (v0.11): user FP feedback shifts thresholds; classifier gets less wrong over time. |
+Knit also **composes with** whatever else you run: `knit_scan_integrations` detects existing workflow frameworks and slash commands and defers to them where they fit — Knit stays the memory + classification brain underneath.
-### What's honest about this
+### Retrieval benchmarks
-**Knit's measured retrieval on a 50-question synthetic harness (v0.11.2):**
+Knit's retrieval is BM25 + reciprocal-rank fusion + graph traversal — **vectorless, deterministic, auditable**, no embedding model or cloud call. In-repo regression gates:
-| Metric | Knit (v0.11.2 synthetic) | agentmemory (LongMemEval-S, published) |
-|---|---|---|
-| Top-1 accuracy | **86.0%** | not published in that form |
-| Recall@5 | **96.0%** | **95.2%** |
-Run it yourself: `npm run bench`. Source: [`benchmarks/retrieval-synthetic.ts`](./benchmarks/retrieval-synthetic.ts).
-**These numbers are NOT apples-to-apples with agentmemory's.** Their benchmark is 1,500 questions from real long conversations; Knit's is 50 hand-authored questions on a 7KB synthetic corpus. The numbers are close because the architecture is similar (BM25 + RRF), not because we've proven parity at scale. **Real comparison requires running LongMemEval-S on Knit** — on the roadmap (a v0.20+ candidate alongside hybrid BM25 + local embeddings retrieval).
-**Knit isn't trying to be a better mem0.** It's a different product:
-- **MCP-native + zero-glue install** — mem0/Letta require SDK integration; Knit drops into any MCP host (Claude Code, Cursor, Codex) with one command.
-- **Workflow primitive** — the 4-tier classifier + plan-mode + protocol guard + team worktrees is what makes Knit a *command layer*, not a memory library.
-- **Per-project classifier calibration** (v0.11 slice 4) — `knit_record_false_positive` with a direction tag shifts thresholds over time. Nobody else does this; nobody else needs to, because they're memory libraries, not workflow routers.
-- **Measurable cheapness** — `knit_compounding_metrics` + `knit_get_metrics_history` make the "cheaper over time" claim *chartable per project*. mem0 publishes aggregate dataset numbers; Knit ships per-user instrumentation.
-### What's deferred
+| Harness | Top-1 | Recall@5 | Run it |
+|---|---|---|---|
+| 50-question synthetic | **88%** | **100%** | `npm run bench` |
+| 30-question narrative prose | **86.7%** | **96.7%** | `npm run bench:learnings` |
-LongMemEval-S R@5/R@10 + LOCOMO LLM-as-Judge runs are on the roadmap (v0.13+). Until they're published, treat any cross-system token-savings comparison as architectural-claim-only.
+These are focused in-repo regression gates that block a merge if retrieval degrades. A run on a standard long-memory benchmark and a hybrid BM25 + local-embeddings retriever are v0.21+ candidates.
 ---
@@ -748,6 +784,9 @@ LongMemEval-S R@5/R@10 + LOCOMO LLM-as-Judge runs are on the roadmap (v0.13+). U
 | Version | Headline |
 |---|---|
+| **v0.22.0** | **Host composition + full tool-use + stale-index fix.** Knit detects the host at the handshake and composes with its native orchestration (Claude Code dynamic workflows, Cursor worktree agents, Codex subagents, Copilot/VS Code `/mcp.knit.*`); suggest-only hosts stay suggest-only. `getBrain` auto-refreshes a stale index and `knit_verify_claim` self-heals, so `query_*`/`verify` never return a confident false answer. `knit_classify_task` returns a signal-gated `tool_plan`; per-host adherence hooks (Cursor/Codex/Copilot, unverified-in-host flagged); `orchestration`/`token_mode` onboarding prefs; hierarchical retrieval + a ~22% leaner handshake. Shipped after a six-dimension audit + a real-life stdio E2E across five `clientInfo` values (incl. a live mid-session no-staleness run). 56 tools, 935 tests. |
+| **v0.21.0** | **Onboarding + dashboard actions.** `knit_onboard` captures the project + how the user wants Knit (preferences persisted, intent surfaced every session, host-agnostic). The dashboard gains **Refresh** + **Export all projects** actions (non-blocking child processes, Host/Origin-gated). New `GET /api/projects/:id/knowledge` + a `knit doctor` webapp check. Shipped after a second six-dimension audit (0 critical) + a real-life E2E. 56 tools. |
+| **v0.20.0** | **Brain integrity + clarity + dashboard-first.** A freshness layer keeps every datum trustworthy (handoffs auto-clear, idle classifier signals decay, deleted-file references get flagged). `knit doctor`/`knit_list_features` explain the live tool count. Mid-session protocol re-surfacing keeps agents on-protocol across every MCP host. **`knit`** opens the brain dashboard; a read-only Knowledge-index view + Skills composition land. Removed competitor comparisons for intrinsic positioning. Shipped after a six-dimension deep-clean audit (0 critical). 55 tools, 855 tests. |
 | **v0.16.0** | **Semantic-lite retrieval.** Curated coding-domain synonym dictionary (~50 pairs) closes the most common BM25 lexical gaps (`hook` ↔ `webhook`, `schema` ↔ `migration`, etc.) without an embedding model. 2-gram fallback for typos default ON after bench verification. Synthetic bench 88% top-1 / **100% recall@5** (was 96%); learnings 86.7% top-1 / 96.7% recall@5. Plus a FIFO-safe `O_NONBLOCK` fix to `handleIndexRequirements`. 55 tools, 818 tests. |
 | **v0.15.0** | **Deep-clean audit release.** Six-dimension second audit + atomic-write helper applied to 9+ sites including `~/.claude.json` (a torn write there used to brick Claude Code). SHA256 sidecars on agent-fetcher cache writes detect tampering and re-fetch. `qs` CVE pinned via `npm overrides` → 0 vulns. Opt-in BM25 2-gram fallback for typos. `pruneLearningsByAge` + schema-validated `readLearnings`. Webapp DoctorView shows per-agent rows. Update notice surfaces in MCP `instructions` field for all 6 agents. 55 tools, 805+ tests. |
 | **v0.14.1** | **Ship-readiness audit + atomicity hardening.** First six-dimension audit + 14 P1 fixes: `writeFileAtomic` helper across 9+ persistence paths; `handleSetupProject` redaction gap closed; `record_learning` substring dedup matches the description claim; soft-gate documented in instructions field; pre-publish leak gate. 55 tools. |
@@ -757,7 +796,7 @@ LongMemEval-S R@5/R@10 + LOCOMO LLM-as-Judge runs are on the roadmap (v0.13+). U
 | **v0.11.4** | Dogfood audit · ran a full audit of Knit's own codebase using its own `knit_spawn_team_worktree` primitive (4 parallel teams: Core Logic, Infrastructure, UI, Quality Assurance). Fixes: HIGH `engram refresh` no longer clobbers user-curated CLAUDE.md (now uses `spliceKnitBlock` like `cache.ts`); `saveSource`/`loadSource` validate `sourceId`; `appendGlobalLearning` propagates write failures; `redactSecrets` applied to `label`/`tags`/`domains` across all persistence boundaries; 100KB response ceiling on `knit_generate_test_cases`; full v0.11 tool surface now documented in `workflow-protocol.ts` generator (was frozen at the v0.4 surface). Plus: 16 key tools reclassified with `[PROTOCOL]`/`[REVIEW]`/`[MEMORY]`/`[GRAPH]` prefixes so the LLM picks the right tool reliably. 53 tools, 687 tests. |
 | **v0.11.3** | Propagation patch · `update_available` flag now surfaces in `knit_load_session` response (≈100% session reach vs. brain_status' low reach) + startup stderr nag on stale versions. Helps FUTURE upgrades land faster; doesn't retroactively reach v0.10.x users. 53 tools, 665 tests. |
 | **v0.11.2** | Pre-publish polish · chunk cap (2000) + `errorResponse` envelope across handlers + CLAUDE.md generator surfaces v0.11 tools · new `engram doctor` install health-check CLI · upgrade-path smoke test caught + fixed a data-loss bug in cache.ts (Case B was wiping user permissions on upgrade) · 11 real exploit-payload integration tests prove C1/C2/H1 fixes hold · `npm run bench` ships a synthetic retrieval harness (50 Q&A) measuring 86% top-1 / 96% R@5. 53 tools, 664 tests. |
-| **v0.11.1** | Audit-driven hardening · 3 CRITICAL (source_id path traversal, post-edit tsc shell injection, live calibration bug) + 10 HIGH fixes from a 5-agent audit, implemented in 3 parallel `knit_spawn_team_worktree` teams. HOOKS_VERSION 11 (auto-upgrades existing users). New `knit_delete_requirements` tool. Honest comparison vs mem0/Letta added. 53 tools, 636 tests. |
+| **v0.11.1** | Audit-driven hardening · 3 CRITICAL (source_id path traversal, post-edit tsc shell injection, live calibration bug) + 10 HIGH fixes from a 5-agent audit, implemented in 3 parallel `knit_spawn_team_worktree` teams. HOOKS_VERSION 11 (auto-upgrades existing users). New `knit_delete_requirements` tool. 53 tools, 636 tests. |
 | **v0.11.0** | Verify Layer + auto-config foundation · mandatory `knit_verify_claim` REVIEW gate · post-edit diff verify + universal `tsc` check · drift detector · self-healing classifier (per-project calibration) · `knit_index_requirements` + `knit_generate_test_cases` (BM25 over long specs) · `knit_get_fingerprint` + `knit_infer_domains` + `knit_compose_template` (zero-config CLAUDE.md). 52 tools, 625 tests. |
 | **v0.10.0** | Token-economics release · risk × scope × change_kind classifier split · `context_budget_remaining` graceful degradation · per-project diversity cap on cross-project search · 11 new compounding-metrics fields + weekly snapshot persistence + `knit_get_metrics_history`. Makes "Knit makes Claude cheaper" a chartable number from day 1. |
 | **v0.9.0** | Hook-level enforcement · citation rule · `knit_verify_claim` · auto-search in classify · `suggested_reads` · `knit_get_learning` · `knit_consolidate_learnings`. |
@@ -800,7 +839,7 @@ npm run build      # compile CLI + MCP server + webapp
 ```
 knit (npm package)
 ├── dist/cli.js                 # CLI: setup, doctor, ui, status, refresh, install-agents, export
-└── dist/mcp/server.js          # MCP server: 55 tools (tier-gated), auto-init
+└── dist/mcp/server.js          # MCP server: 56 tools (tier-gated), auto-init
 per-project, in ~/.knit/projects/<hash>/
 ├── knowledge.json              # import graph + exports + test map

package/dist/cache-3QREKWAW.js ADDED Viewed

@@ -0,0 +1,21 @@
+import {
+  detectProjectRoot,
+  getBrain,
+  refreshBrain,
+  resetStalenessThrottle
+} from "./chunk-5R5YKDNT.js";
+import "./chunk-X4PHSVRB.js";
+import "./chunk-6BQPXFRL.js";
+import "./chunk-2GDNMY7N.js";
+import "./chunk-WKQHCLLO.js";
+import "./chunk-V54QPQ6K.js";
+import "./chunk-POXT5OYN.js";
+import "./chunk-DIU7RE5X.js";
+import "./chunk-7UFS67HP.js";
+import "./chunk-5EUQ2DCN.js";
+export {
+  detectProjectRoot,
+  getBrain,
+  refreshBrain,
+  resetStalenessThrottle
+};

package/dist/chunk-2GDNMY7N.js ADDED Viewed

@@ -0,0 +1,57 @@
+// src/engine/freshness.ts
+import { existsSync } from "fs";
+import { isAbsolute, resolve } from "path";
+var MS_PER_DAY = 864e5;
+var FRESHNESS = {
+  /** A handoff older than this is considered abandoned, not in-flight. */
+  HANDOFF_TTL_DAYS: 14,
+  /** Global cross-project learnings older than this drop out of search. */
+  GLOBAL_LEARNING_TTL_DAYS: 365,
+  /** Classifier FP counters idle longer than this decay (stale tuning signal). */
+  CALIBRATION_DECAY_DAYS: 120,
+  /** Project session entries older than this are pruned. */
+  SESSION_TTL_DAYS: 90,
+  /** Project learnings older than this are pruned (FPs + accessed entries kept). */
+  LEARNING_TTL_DAYS: 180,
+  /** Re-run throttle: at most one age-prune sweep per project per this window. */
+  PRUNE_THROTTLE_DAYS: 1
+};
+function ageDays(iso, nowMs = Date.now()) {
+  if (typeof iso !== "string" || iso.length === 0) return null;
+  const t = Date.parse(iso);
+  if (!Number.isFinite(t)) return null;
+  return (nowMs - t) / MS_PER_DAY;
+}
+function isStale(iso, maxAgeDays, nowMs = Date.now()) {
+  const age = ageDays(iso, nowMs);
+  if (age === null) return false;
+  return age > maxAgeDays;
+}
+function resolveRef(rootPath, ref) {
+  if (typeof ref !== "string" || ref.trim().length === 0) return null;
+  const r = ref.trim();
+  return isAbsolute(r) ? r : resolve(rootPath, r);
+}
+function sourceExists(rootPath, ref) {
+  const abs = resolveRef(rootPath, ref);
+  return abs !== null && existsSync(abs);
+}
+function extractFileRefs(text) {
+  if (typeof text !== "string" || text.length === 0) return [];
+  const re = /\b([\w.-]+\/[\w./-]+\.[a-z0-9]{1,5})\b/gi;
+  const out = /* @__PURE__ */ new Set();
+  let m;
+  while ((m = re.exec(text)) !== null) {
+    out.add(m[1]);
+  }
+  return [...out];
+}
+export {
+  FRESHNESS,
+  ageDays,
+  isStale,
+  resolveRef,
+  sourceExists,
+  extractFileRefs
+};