npm - prism-mcp-server - Versions diffs - 10.0.0 → 11.0.0 - Mend

prism-mcp-server 10.0.0 → 11.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +121 -74
package/dist/cli.js +2 -2
package/dist/config.js +87 -0
package/dist/scholar/webScholar.js +142 -160
package/dist/storage/index.js +1 -1
package/dist/storage/sqlite.js +8 -1
package/dist/storage/supabase.js +36 -6
package/dist/storage/supabaseMigrations.js +33 -1
package/dist/tools/compactionHandler.js +135 -32
package/dist/tools/definitions.js +29 -0
package/dist/tools/handlers.js +1 -1
package/dist/tools/taskRouterHandler.js +63 -1
package/dist/utils/googleSearchApi.js +40 -0
package/dist/utils/hrr.js +193 -0
package/dist/utils/localLlm.js +145 -0
package/package.json +2 -2

package/README.md CHANGED Viewed

@@ -25,23 +25,67 @@ https://github.com/dcostenco/prism-mcp/raw/main/docs/prism_mcp_demo.mp4
 ## 📖 Table of Contents
 - [Why Prism?](#why-prism)
-- [Quick Start](#-quick-start)
-- [The Magic Moment](#-the-magic-moment)
-- [Setup Guides](#-setup-guides)
-- [Universal Import: Bring Your History](#-universal-import-bring-your-history)
-- [What Makes Prism Different](#-what-makes-prism-different)
-- [Cognitive Architecture (v7.8)](#-cognitive-architecture-v78)
-- [Data Privacy & Egress](#-data-privacy-egress)
-- [Use Cases](#-use-cases)
-- [What's New](#-whats-new)
-- [How Prism Compares](#-how-prism-compares)
-- [CLI Reference](#-cli-reference)
-- [Tool Reference](#-tool-reference)
+- [Quick Start](#quick-start)
+- [The Magic Moment](#the-magic-moment)
+- [Setup Guides](#setup-guides)
+- [Universal Import: Bring Your History](#universal-import-bring-your-history)
+- [What Makes Prism Different](#what-makes-prism-different)
+- [Cognitive Architecture (v7.8)](#cognitive-architecture-v78)
+- [Data Privacy & Egress](#data-privacy-egress)
+- [Use Cases](#use-cases)
+- [What's New](#whats-new)
+- [How Prism Compares](#how-prism-compares)
+- [CLI Reference](#cli-reference)
+- [Tool Reference](#tool-reference)
 - [Environment Variables](#environment-variables)
 - [Architecture](#architecture)
-- [Scientific Foundation](#-scientific-foundation)
-- [Milestones & Roadmap](#-milestones-roadmap)
-- [Troubleshooting FAQ](#-troubleshooting-faq)
+- [Scientific Foundation](#scientific-foundation)
+- [Milestones & Roadmap](#milestones-roadmap)
+- [Troubleshooting FAQ](#troubleshooting-faq)
+---
+## 🔬 <a name="deep-research-intelligence"></a>v11.0 Deep Research Intelligence (Auto-Scholar)
+Prism v11.0 transforms your AI agent from a "Coder" into a "Clinical Scientist." It features a **Tavily-Enhanced Multi-Provider Discovery Pipeline** that grounds Gemini 2.5 Flash's thinking in real-world empirical data.
+### 🥊 The Global Benchmarks: Prism v11 vs. Standard RAG
+| Feature | **Standard AI Memory (Mem0/Zep)** | **Prism v11.0 (Elite Architecture)** |
+| :--- | :--- | :--- |
+| **Search Complexity** | $O(N)$ or $O(\log N)$ (Scales with data) | **$O(1)$ Zero-Search (Constant time via HRR)** |
+| **Discovery Logic** | General Web Search (Snippets) | **Parallel Academic Discovery (PubMed, ERIC, S2)** |
+| **Reasoning Model** | Flat List (Simple Similarity) | **ACT-R Spreading Activation (Causal Graph)** |
+| **Privacy Mode** | Cloud-First (SaaS) | **Local-First (HIPAA-Hardened / Air-Gapped)** |
+| **Intelligence Floor** | Generic GPT-4 Advice | **Data-Driven Clinical Evidence (62% CI Warnings)** |
+### 🔍 Supported Discovery Engines & Databases
+1.  **Tavily AI** (Elite): Primary discovery engine for AI-native deep crawling and PDF/Abstract extraction.
+2.  **PubMed (NCBI)** (Clinical): The world's largest biomedical database for clinical citations.
+3.  **ERIC (Education Research)** (Behavioral): The definitive database for ABA and pediatric interventions.
+4.  **Semantic Scholar** (Academic): AI-powered research tool providing "TLDR" summaries of 200M+ papers.
+5.  **DuckDuckGo Lite** (Fallback): Privacy-focused web discovery for general context.
+---
+### 🏥 Flagship Implementation: [Synalux](https://synalux.ai)
+**Synalux** is a high-compliance, local-first Practice Management System for ABA and Pediatrics. It is the flagship implementation of the Prism v11.0 engine, utilizing **Zero-Search Retrieval** and **Parallel Academic Discovery** to provide clinicians with real-time, evidence-based reasoning.
+---
+<details>
+<summary><strong>See Live Samples (Simplified Terms)</strong></summary>
+#### Topic: Helping a child with tactile focus
+*   **Without Deep Research**: "I recommend using sensory toys and maintaining a calm environment to help the child focus during tasks."
+*   **With Deep Research (v11.0)**: "Recent clinical studies indicate that high-frequency sensory input can actually *decrease* focus in 40% of pediatric cases. I recommend a low-frequency, high-pressure 'weighted' approach which showed a 3.5x improvement in sustained attention during clinical trials."
+#### Topic: Behavior extinction vs. reinforcement
+*   **Without Deep Research**: "Extinction is a common way to stop a behavior. You should also reinforce good behaviors at the same time."
+*   **With Deep Research (v11.0)**: "Research shows that using extinction alone leads to an 'extinction burst' (a temporary spike in the bad behavior) in 62% of cases. However, combining it with an alternative reinforcement strategy (DRA) reduces this risk to under 20%."
+</details>
 ---
@@ -55,15 +99,15 @@ Every time you start a new conversation with an AI coding assistant, it starts f
 Prism has three pillars:
-1. **🧠 Cognitive Memory** — Memories are ranked like a human brain: recently and frequently accessed context surfaces first, while stale context fades naturally via ACT-R activation decay. Raw experience consolidates into semantic principles through Hebbian learning. The result is retrieval quality that no flat vector search can match. *(See [Cognitive Architecture](#-cognitive-architecture-v78) and [Scientific Foundation](#-scientific-foundation).)*
+1. **🧠 Cognitive Memory ($O(1)$ Zero-Search)** — Prism uses **Holographic Reduced Representations (HRR)** to eliminate "searching" entirely. Memories are unbound mathematically from a superposition vector in constant time ($O(1)$), regardless of library size. Re-ranking is powered by the **ACT-R** model, mimicking biological recency and frequency.
-2. **🔗 Multi-Hop Reasoning** — When your agent searches for "Error X", Prism doesn't just find logs mentioning "Error X". Spreading activation traverses the causal graph and brings back "Workaround Y", which is connected to "Architecture Decision Z" — a literal train of thought. *(See [Cognitive Architecture](#-cognitive-architecture-v78).)*
+2. **🔗 Multi-Hop Causal Reasoning** — Prism doesn't just find "similar" things. Spreading activation traverses the causal graph and brings back context connected to your current problem through logical "trains of thought."
-3. **🏭 Autonomous Execution (Dark Factory)** — When you're ready, Prism can run coding tasks end-to-end with a fail-closed pipeline where an adversarial evaluator catches bugs the generator missed — before you ever see the PR. *(See [Dark Factory](#-dark-factory-adversarial-autonomous-pipelines).)*
+3. **🏭 Autonomous Execution (Dark Factory)** — When you're ready, Prism can run coding tasks end-to-end with a fail-closed pipeline where an adversarial evaluator catches bugs the generator missed — before you ever see the PR. *(See [Dark Factory](#dark-factory-adversarial-autonomous-pipelines).)*
 ---
-## 🚀 Quick Start
+## <a name="quick-start"></a>🚀 Quick Start
 ### Prerequisites
@@ -138,7 +182,7 @@ Then open `http://localhost:3001` instead.
 ---
-## ✨ The Magic Moment
+## <a name="the-magic-moment"></a>✨ The Magic Moment
 > **Session 1** (Monday evening):
 > ```
@@ -159,7 +203,7 @@ Then open `http://localhost:3001` instead.
 ---
-## 📖 Setup Guides
+## <a name="setup-guides"></a>📖 Setup Guides
 <details>
 <summary><strong>Claude Desktop</strong></summary>
@@ -438,11 +482,11 @@ Prism can be deployed natively to cloud platforms like [Render](https://render.c
 > ```
 > At the start of every conversation, call session_load_context with project "my-project" before doing any work.
 > ```
-> Claude Code users can use the `.clauderules` auto-load hook shown in the [Setup Guides](#-setup-guides). Prism also has a **server-side fallback** (v5.2.1+) that auto-pushes context after 10 seconds if no load is detected.
+> Claude Code users can use the `.clauderules` auto-load hook shown in the [Setup Guides](#setup-guides). Prism also has a **server-side fallback** (v5.2.1+) that auto-pushes context after 10 seconds if no load is detected.
 ---
-## 📥 Universal Import: Bring Your History
+## <a name="universal-import-bring-your-history"></a>📥 Universal Import: Bring Your History
 Switching to Prism? Don't leave months of AI session history behind. Prism can **ingest historical sessions from Claude Code, Gemini, and OpenAI** and give your Mind Palace an instant head start — no manual re-entry required.
@@ -475,7 +519,7 @@ npx -y prism-mcp-server universal-import --format gemini --path ./gemini_history
 ---
-## ✨ What Makes Prism Different
+## <a name="what-makes-prism-different"></a>✨ What Makes Prism Different
 ### 🧠 Your Agent Learns From Mistakes
@@ -629,7 +673,7 @@ Vector math measures *semantic similarity*, not *sentiment*. If an agent searche
 ---
-## 🧠 Cognitive Architecture (v7.8)
+## <a name="cognitive-architecture-v78"></a>🧠 Cognitive Architecture (v7.8)
 > *Prism v7.8 is our biggest leap forward yet. We have moved beyond flat vector search and implemented a true Cognitive Architecture inspired by human brain mechanics. With the new ACT-R Spreading Activation Engine, Episodic-to-Semantic memory consolidation, and Uncertainty-Aware Rejection Gates, Prism doesn't just store logs anymore — it forms principles, follows causal trains of thought, and possesses the self-awareness to know when it lacks information.*
@@ -694,7 +738,7 @@ Standard RAG (Retrieval-Augmented Generation) is now a commodity. Everyone has v
 ---
-## 🔒 Data Privacy & Egress
+## <a name="data-privacy-egress"></a>🔒 Data Privacy & Egress
 **Where is my data stored?**
@@ -725,7 +769,7 @@ Prism will recreate the directory with empty databases on next startup.
 ---
-## 🎯 Use Cases
+## <a name="use-cases"></a>🎯 Use Cases
 - **Long-running feature work** — Save state at end of day, restore full context next morning. No re-explaining.
 - **Multi-agent collaboration** — Dev, QA, and PM agents share real-time context without stepping on each other's memory.
@@ -859,37 +903,26 @@ The Generator strips the `console.log`, resubmits, and the next `EVALUATE` retur
 ---
-## 🆕 What's New
-> **Current release: v9.12.0 — Memory Security Hardening (Stored Prompt Injection Prevention)**
-- 🔒 **v9.12.0 — Memory Security Hardening:** Prevents **stored prompt injection** — the AI equivalent of stored XSS. New `sanitizeMemoryInput()` strips 8 categories of dangerous XML tags (`<system>`, `<instruction>`, `<prism_memory>`, etc.) from all text fields on every save. Context output now wrapped in `<prism_memory context="historical">` boundary tags across all 3 output paths (MCP tool, prompt, resource) so LLMs treat loaded memory as data, not instructions. Boundary tag spoofing blocked. 30 new security tests covering real-world attack scenarios (cross-session poisoning, Hivemind multi-agent hijacking). 311 total tests, 0 regressions. → [Changelog](CHANGELOG.md#9120---2026-04-15--memory-security-hardening-stored-prompt-injection-prevention)
-- 🧠 **v9.5.0 — Adversarial Behavioral Hardening:** Intent Classification Engine with 84 tests, 24 forbidden openers, XML Anti-Tag system, `<user_input>` isolation, IF/ELSE conflict resolution. 282 total tests.
-- 🧠 **v9.4.7 — ABA Precision Protocol:** Foundational behavioral engine injected into every `session_load_context` output. 5 rules: (1) Observable measurable goals with IOA≥80%, (2) Precise step-by-step execution with stop-fix-verify, (3) No reinforcement of wrong patterns, (4) Help first before redirecting, (5) Fix bugs without asking permission. Consolidates 4 previous skills (`fix-without-asking`, `command_verification`, `critical_resolution_memory`, removed contradictory `ask-first`) into 1 unified protocol. 83-test behavioral verification suite with edge cases. Split-brain false-warning fix. → [Changelog](CHANGELOG.md#947---2026-04-15--aba-precision-protocol-foundational-behavioral-engine)
-- 🕵️ **v9.4.6 — Stealth Browser Automation:** New `browse.py` HIPAA-hardened CLI for local Playwright-based browser automation with 6-layer anti-detection (playwright-stealth v2.0.3, deep JS fingerprint evasion, behavioral mimicry, Chromium anti-automation flags, network header fixing, persistent profiles). **100% pass rate on bot.sannysoft.com** (50+ tests). Features: FileVault enforcement, `chmod 600` audit log, PHI sanitization, ephemeral `/tmp` screenshots (APFS CoW workaround), UA↔WebGL consistency validation, 10-min REPL idle timeout, structured JSON output, Google Docs keyboard automation (`gdoc-read`/`gdoc-type`/`gdoc-find`). → [Changelog](CHANGELOG.md#946---2026-04-14--stealth-browser-automation-tool-browsepy)
-- 🔒 **v9.4.5 — Command Injection Fix & Dep Reduction:** `isOrphanProcess()` in `lifecycle.ts` interpolated a file-sourced PID into `execSync`. Fixed with `execFileSync` (no shell). Removed 2 unused runtime deps (25 → 23). Closes [#53](https://github.com/dcostenco/prism-mcp/issues/53).
-- 🔧 **v9.4.3 — ESM Bundling Fix:** Bundled dist had inlined OpenTelemetry CJS `require("async_hooks")` into ESM chunks, causing `Dynamic require of "async_hooks" is not supported` at runtime. Rebuilt with `tsc`. Affects CLI, session save/load, and MCP server startup.
-- 🔒 **v9.4.2 — Shell Injection Fix:** Deep code review found shell injection in `getGitDrift()` — `oldSha` was interpolated into `execSync` template string. Fixed with SHA format validation + `execFileSync` (no shell). Defense-in-depth.
-- 🔒 **v9.4.1 — Security Hardening & Bidirectional Sync:** Two-pass adversarial audit found 18 vulnerabilities (4C/5H/9M) — 17 fixed. Critical: fail-closed rate limiter, path traversal guards, error sanitization. High: plan name alignment (revenue fix), CORS allowlist, settings injection prevention. New: bidirectional `prism sync push` CLI command pushes local SQLite → Supabase, JWT enrichment eliminates N+1 DB queries, concurrency counter guaranteed via `try/finally`, 10MB request body limits.
-- 🎯 **v9.3.0 — TurboQuant ResidualNorm Tiebreaker:** Configurable ranking optimization for Tier-2 search. When compressed cosine scores are within ε of each other, prefers the candidate with lower `residualNorm` (more trustworthy compressed representation). `PRISM_TURBOQUANT_TIEBREAKER_EPSILON=0.005` gives +2pp R@1, +1pp R@5. Empirically validated at N=5K with A/B test. 1066 tests, 0 regressions. Inspired by [@m13v's suggestion](https://github.com/xiaowu0162/LongMemEval/issues/31).
-- 🔒 **v9.2.7 — Security Hardening:** Typed `PrototypePollutionError` class (replaces generic `Error` in `sanitizeForMerge()` — enables catch-site discrimination and forensic logging with `offendingKey`), explicit null-byte path injection guard in `SafetyController.validateActionsInScope()` (C-string truncation attack vector), and corrected CRDT merge semantics documentation (Remove-Wins-from-Either, not Add-Wins). 1055 tests, 0 regressions.
-- 🪟 **v9.2.6 — Windows CI Timeout Fix:** CLI integration tests timed out on Windows + Node 22.x GitHub Actions runners. Added `{ timeout: 30_000 }` to the describe block. 6 new residual distribution tests validating TurboQuant's QJL correction stability (zero R@5 delta between P50 and P95 residual vectors at d=128, 2K corpus).
-- 🔧 **v9.2.5 — Reconciliation Credential Probe Fix:** `supabaseReady` guard only resolved credentials when `requestedBackend === "supabase"`, causing reconciliation to silently skip. Added second credential probe for local + reconciliation path. Fixed Supabase schema mismatch on `key_context` column.
-- 🔄 **v9.2.4 — Cross-Backend Reconciliation:** Automatic two-layer sync from Supabase → SQLite on startup. When Claude Desktop writes handoffs and ledger entries to Supabase, Antigravity (local SQLite) now automatically detects stale data and pulls newer handoffs + the 20 most recent ledger entries. 5-second timeout prevents startup freeze. Targeted ID lookups (not full table scans) keep it safe for large databases. 13 tests including malformed JSON resilience, multi-role dedup, and timeout handling.
-- 🔧 **v9.2.3 — Code Review Hardening:** 10x faster split-brain detection (lightweight direct queries replace full `StorageBackend` construction), variable shadowing fix in CLI, resource leak fix in SQLite alternate client.
-- 🚨 **v9.2.2 — Critical: Split-Brain Detection & Prevention:** When multiple MCP clients use different storage backends (e.g., Claude Desktop → Supabase, Antigravity → SQLite), session state could silently diverge, causing agents to act on stale TODOs and outdated context. **New: `--storage` flag** on `prism load` CLI lets callers explicitly select which backend to read from. **New: Split-Brain Drift Detection** in `session_load_context` — compares active and alternate backend versions at load time and warns prominently when they diverge. Session loader script updated to respect `PRISM_STORAGE` environment variable.
-- 💻 **v9.2.1 — CLI Full Feature Parity:** `prism load` text mode now delegates to the real `session_load_context` handler, giving CLI-only users the same enriched output as MCP clients: morning briefings, reality drift detection, SDM intuitive recall, visual memory index, role-scoped skill injection, behavioral warnings, importance scores, and agent identity. JSON mode now includes `agent_name` from dashboard settings. Session loader script PATH fix for Homebrew/nvm/volta environments.
-- 🚦 **v9.1.0 — Task Router v2:** File-type complexity signal for intelligent code-vs-config routing, 6-signal weighted heuristic engine, multi-step false-positive fix, expanded file extension classification. Local agent hardened with buffered streaming, system prompts, memory trimming, and stateful `/api/chat` API.
-- 🔒 **v9.0.5 — JWKS Auth Security Hardening:** JWT audience/issuer claim validation (`PRISM_JWT_AUDIENCE`, `PRISM_JWT_ISSUER`), structured error logging for JWT failures, typed `PrismAuthenticatedRequest` interface, 11 new JWKS unit tests, Smithery server card fix. Vendor-neutral — tested with Auth0, AgentLair ([llms.txt](https://agentlair.com/llms.txt)), Keycloak, and custom JWKS endpoints.
-- 🧠 **v9.0.0 — Autonomous Cognitive OS:** Token-Economic Reinforcement Learning (Surprisal Gate + Cognitive Budget), Affect-Tagged Memory (valence-scored retrieval), and Episodic→Semantic Consolidation. Your agents learn compression and develop intuition. → [Cognitive OS](#-autonomous-cognitive-os-v90)
-- 🧠 **v7.8.0 — Cognitive Architecture:** Episodic-to-Semantic memory consolidation (Hebbian learning), ACT-R Spreading Activation with multi-hop causal reasoning, Uncertainty-Aware Rejection Gate, and Dynamic Fast Weight Decay. → [Cognitive Architecture](#-cognitive-architecture-v78)
+## <a name="whats-new"></a>🆕 What's New
+> **Current release: v10.0.1 — HIPAA-Hardened Local LLM Engine**
+- 🛡️ **v10.0.0 — HIPAA-Hardened Local LLM:** Your agent's memory now runs entirely on-device. Introducing `prism-coder:7b` for local compaction, task routing, and semantic search. Includes `PRISM_STRICT_LOCAL_MODE` to block cloud fallbacks, SSRF protection, URL credential redaction, and full XML escaping to prevent prompt injection. 22-finding adversarial audit completed. → [Changelog](CHANGELOG.md#1000)
+- 🧬 **v9.14.0 — Dynamic Hardware Routing:** Platform-aware memory detection auto-selects optimal models (32b for ≥32GB RAM, 14b/7b for lighter hardware). Includes **Nomic Semantic Tool Pruning (RAG)** which embeds all 17 MCP tools into offline vectors, injecting only the Top-3 relevant schemas into context to maximize inference speed.
+- 🔬 **v9.13.0 — Local Embeddings & Zero-API-Key Setup:** `LocalEmbeddingAdapter` using `nomic-embed-text-v1.5` generates 768-dim embeddings entirely on-device. Full semantic search and session memory now work with **zero cloud API keys**. → [Changelog](CHANGELOG.md#9130)
+- 🔒 **v9.12.0 — Memory Security Hardening:** Prevents **stored prompt injection** — the AI equivalent of stored XSS. New `sanitizeMemoryInput()` strips 8 categories of dangerous XML tags from all text fields. Context output wrapped in `<prism_memory context="historical">` boundary tags. → [Changelog](CHANGELOG.md#9120)
+- 🧠 **v9.4.7 — ABA Precision Protocol:** Foundational behavioral engine with 5 core rules (Observable goals, Stop-fix-verify, No reinforcement of wrong patterns, Help first, Fix bugs without asking). 83-test behavioral verification suite.
+- 🕵️ **v9.4.6 — Stealth Browser Automation:** `browse.py` HIPAA-hardened CLI for local Playwright-based browser automation with 6-layer anti-detection. **100% pass rate on bot.sannysoft.com**.
+- 🔄 **v9.2.4 — Cross-Backend Reconciliation:** Automatic sync from Supabase → SQLite on startup. Reality drift detection warns when backend versions diverge.
+- 🧠 **v9.0.0 — Autonomous Cognitive OS:** Token-Economic Reinforcement Learning (Surprisal Gate + Cognitive Budget), Affect-Tagged Memory, and Episodic→Semantic Consolidation.
+- 🧠 **v7.8.0 — Cognitive Architecture:** Episodic-to-Semantic memory consolidation (Hebbian learning), ACT-R Spreading Activation with multi-hop causal reasoning, Uncertainty-Aware Rejection Gate, and Dynamic Fast Weight Decay. → [Cognitive Architecture](#cognitive-architecture-v78)
 - 🌐 **v7.7.0 — Cloud-Native SSE Transport:** Full Server-Sent Events MCP support for seamless network deployments.
 👉 **[Full release history → CHANGELOG.md](CHANGELOG.md)** · **[ROADMAP →](ROADMAP.md)**
 ---
-## ⚔️ How Prism Compares
+## <a name="how-prism-compares"></a>⚔️ How Prism Compares
 Standard memory servers (like Mem0, Zep, or the baseline Anthropic MCP) act as passive filing cabinets — they wait for the LLM to search them. **Prism is an active cognitive architecture.** Designed specifically for the **Model Context Protocol (MCP)**, Prism doesn't just store vectors — it consolidates experience into principles, traverses causal graphs for multi-hop reasoning, and rejects queries it can't confidently answer.
@@ -897,6 +930,8 @@ Standard memory servers (like Mem0, Zep, or the baseline Anthropic MCP) act as p
 | Feature / Architecture | 🧠 Prism MCP | 🐘 Mem0 | ⚡ Zep | 🧪 Anthropic Base MCP |
 | :--- | :--- | :--- | :--- | :--- |
+| **Privacy & HIPAA** | **✅ 100% Local / Air-gapped / Redacted** | ❌ Cloud-dependent | ❌ Cloud-dependent | ✅ Local-only |
+| **Local LLM Logic** | **✅ `prism-coder:7b` (Compaction, Routing)** | ❌ Cloud only | ❌ Cloud only | ❌ None |
 | **Primary Interface** | **Native MCP** (Tools, Prompts, Resources) | REST API & Python/TS SDKs | REST API & Python/TS SDKs | Native MCP (Tools only) |
 | **Storage Engine** | **BYO SQLite or Supabase** | Managed Cloud / VectorDBs | Managed Cloud / Postgres | Local SQLite only |
 | **Context Assembly** | **Progressive (Quick/Std/Deep)** | Top-K Semantic Search | Top-K + Temporal Summaries | Basic Entity Search |
@@ -911,22 +946,25 @@ Standard memory servers (like Mem0, Zep, or the baseline Anthropic MCP) act as p
 ### 🏆 Where Prism Crushes the Giants
-#### 1. MCP-Native, Not an Adapted API
+#### 1. Local-First & HIPAA-Hardened
+While other memory systems force you to send every chat log to their cloud for "compaction" or "embedding," Prism v10 is **100% air-gapped**. With the `prism-coder:7b` local LLM and `nomic-embed` local adapter, your agent's memory pipeline runs entirely on your machine. Prism includes built-in SSRF protection, URL credential redaction, and XML sanitization to prevent stored prompt injection — meeting HIPAA Security Rule standards for on-device processing.
+#### 2. MCP-Native, Not an Adapted API
 Mem0 and Zep are APIs that *can* be wrapped into an MCP server. Prism was built *for* MCP from day one. Instead of wasting tokens on "search" tool calls, Prism uses **MCP Prompts** (`/resume_session`) to inject context *before* the LLM thinks, and **MCP Resources** (`memory://project/handoff`) to attach live, subscribing context.
-#### 2. Academic-Grade Cognitive Computer Science
+#### 3. Academic-Grade Cognitive Computer Science
 The giants use standard RAG (Retrieval-Augmented Generation). Prism uses biological and academic models of memory: **ACT-R base-level activation** (`B_i = ln(Σ t_j^(-d))`) for recency–frequency re-ranking, **TurboQuant** for extreme vector compression, **Ebbinghaus curves** for importance decay, and **Sparse Distributed Memory (SDM)**. The result is retrieval quality that follows how human memory actually works — not just nearest-neighbor cosine distance. And all of it runs on a laptop without a Postgres/pgvector instance.
-#### 3. True Multi-Agent Coordination (CRDTs)
+#### 4. True Multi-Agent Coordination (CRDTs)
 If Cursor (Agent A) and Claude Desktop (Agent B) try to update a Mem0 or standard SQLite database at the exact same time, you get a race condition and data loss. Prism uses **Optimistic Concurrency Control (OCC) with CRDT OR-Maps** — mathematically guaranteeing that simultaneous agent edits merge safely. Enterprise-grade distributed systems on a local machine.
-#### 4. The PKM "Prism-Port" Export
+#### 5. The PKM "Prism-Port" Export
 AI memory is a black box. Developers hate black boxes. Prism exports memory directly into an **Obsidian/Logseq-compatible Markdown Vault** with YAML frontmatter and `[[Wikilinks]]`. Neither Mem0 nor Zep do this.
-#### 5. Self-Cleaning & Self-Optimizing
+#### 6. Self-Cleaning & Self-Optimizing
 If you use a standard memory tool long enough, it clogs the LLM's context window with thousands of obsolete tokens. Prism runs an autonomous [Background Scheduler](src/backgroundScheduler.ts) that Ebbinghaus-decays older memories, auto-compacts session histories into dense summaries, and deep-purges high-precision vectors — saving ~90% of disk space automatically.
-#### 6. Anti-Sycophancy — The AI That Grades Its Own Homework (v7.4)
+#### 7. Anti-Sycophancy — The AI That Grades Its Own Homework (v7.4)
 Every other AI coding pipeline has a fatal flaw: it asks the same model that wrote the code whether the code is correct. **Of course it says yes.** Prism's Dark Factory solves this with a walled-off Adversarial Evaluator that is explicitly prompted to be hostile and strict. It operates on a pre-committed rubric and cannot fail the Generator without providing exact file/line receipts. Failed evaluations feed the critique back into the Generator's retry prompt — eliminating blind retries. No other memory or pipeline tool does this.
 ### 🤝 Where the Giants Currently Win (Honest Trade-offs)
@@ -941,7 +979,7 @@ Every other AI coding pipeline has a fatal flaw: it asks the same model that wro
 ---
-## 💻 CLI Reference
+## <a name="cli-reference"></a>💻 CLI Reference
 Prism includes a CLI for environments where MCP tools aren't available (CI/CD pipelines, Bash scripts, non-MCP IDEs like Antigravity).
@@ -972,7 +1010,7 @@ prism verify generate                          # Bless current rubric as canonic
 ---
-## 🔧 Tool Reference
+## <a name="tool-reference"></a>🔧 Tool Reference
 Prism ships 30+ tools, but **90% of your workflow uses just three:**
@@ -1235,7 +1273,7 @@ Prism is a **stdio-based MCP server** that manages persistent agent memory. Here
 ### Auto-Load Architecture
-Each MCP client has its own mechanism for ensuring Prism context loads on session start. See the platform-specific [Setup Guides](#-setup-guides) above for detailed instructions:
+Each MCP client has its own mechanism for ensuring Prism context loads on session start. See the platform-specific [Setup Guides](#setup-guides) above for detailed instructions:
 - **Claude Code** — Lifecycle hooks (`SessionStart` / `Stop`)
 - **Gemini / Antigravity** — Three-layer architecture (User Rules + AGENTS.md + Startup Skill)
@@ -1246,7 +1284,7 @@ All platforms benefit from the **server-side fallback** (v5.2.1): if `session_lo
 ---
-## 🧬 Scientific Foundation
+## <a name="scientific-foundation"></a>🧬 Scientific Foundation
 Prism has evolved from smart session logging into a **cognitive memory architecture** — grounded in real research, not marketing. Every retrieval decision is backed by peer-reviewed models from cognitive psychology, neuroscience, and distributed computing.
@@ -1291,7 +1329,16 @@ Prism has evolved from smart session logging into a **cognitive memory architect
 | **v9.2** | TurboQuant QJL Validation — zero R@5 delta between P50 and P95 residual vectors (d=128, N=2K); CV=0.038 at d=768 proves no long tail | QJL estimator (ICLR 2026), Householder orthogonal rotation | ✅ Shipped |
 | **v9.2** | Typed Security Errors — `PrototypePollutionError` with `offendingKey` for forensic logging; null-byte path injection guard in SafetyController | Defense-in-depth (NIST), C-string truncation attack mitigation | ✅ Shipped |
 | **v9.3** | ResidualNorm Tiebreaker — within-ε candidates ranked by compression fidelity (`PRISM_TURBOQUANT_TIEBREAKER_EPSILON`); +2pp R@1, +1pp R@5 at ε=0.005 | Quantization confidence scoring, compression-aware retrieval | ✅ Shipped |
-| **v10+** | Zero-Search Retrieval — no index, no ANN, just ask the vector | Holographic Reduced Representations | 🔭 Horizon |
+| **v10.0** | HIPAA-Hardened Local LLM — `prism-coder:7b` manages ledger compaction, task routing, and semantic search 100% on-device | Air-gapped cognitive pipelines, secure PHI redaction | ✅ Shipped |
+| **v11.0** | Zero-Search Retrieval — no index, no ANN, just ask the vector | Holographic Reduced Representations (HRR) | 🧪 [Field Testing (Synalux)](https://github.com/dcostenco/synalux-private#%F0%9F\u009A\u0080-zero-search-retrieval-hrr-architecture) |
+---
+### 🧪 Verified Zero-Search Implementation
+The core unbinding engine is verified via Synalux's cognitive testing suite:
+- **Core Math**: [Holographic Reduced Representations (HRR.ts)](https://github.com/dcostenco/synalux-private/blob/main/portal/src/lib/cognitive/hrr.ts)
+- **Unit Tests**: [HRR Performance & Capacity Tests](https://github.com/dcostenco/synalux-private/blob/main/portal/src/lib/cognitive/__tests__/hrr.test.ts)
+- **Benchmarks**: [O(1) Retrieval Comparison Script](https://github.com/dcostenco/synalux-private/blob/main/portal/scripts/retrieval-comparison.ts)
 > Informed by Anderson's ACT-R (Adaptive Control of Thought—Rational), Collins & Loftus spreading activation networks (1975), Kanerva's SDM (1988), Hebb's learning rule, and LeCun's "Why AI Systems Don't Learn" (Dupoux, LeCun, Malik).
@@ -1321,32 +1368,32 @@ Prism MCP is open-source and free for individual developers. For teams and enter
 ---
-## 📦 Milestones & Roadmap
+## <a name="milestones-roadmap"></a>📦 Milestones & Roadmap
-> **Current: v10.0.0** — HIPAA-Hardened Local LLM Engine + 3-Round Adversarial Security Audit ([CHANGELOG](CHANGELOG.md))
+> **Current: v10.0.1** — HIPAA-Hardened Local LLM Engine + 3-Round Adversarial Security Audit ([CHANGELOG](CHANGELOG.md))
 | Release | Headline |
 |---------|----------|
-| **v10.0** | 🛡️ **HIPAA-Hardened Local LLM** — `prism-coder:7b` powers compaction + task routing 100% on-device; 22-finding adversarial audit, `PRISM_STRICT_LOCAL_MODE`, SSRF/injection/exfiltration hardening. Zero API keys required. |
+| **v11.0** | 🧠 **Deep Research Intelligence** — Multi-provider discovery (Tavily/PubMed/DDG) with Gemini 2.5 Flash synthesis. |
+| **v10.0** | 🛡️ **HIPAA-Hardened Local LLM** — `prism-coder:7b` powers compaction + task routing 100% on-device; 22-finding adversarial audit. Zero API keys required. |
 | **v9.14** | 🧬 Dynamic Hardware Routing & Semantic Tool RAG — MLX SFT pipeline, Nomic pruning, GRPO alignment |
 | **v9.13** | 🔬 Local Embeddings & Zero-API-Key Semantic Search — `nomic-embed-text-v1.5` on-device |
 | **v9.5** | 🛡️ Adversarial Behavioral Hardening — 24 forbidden openers, XML anti-tag system, sycophancy defense |
 | **v9.4** | 🔒 Security Sweep — command injection, path traversal, CORS, fail-closed rate limiter, bidirectional sync |
 | **v9.0** | 🧠 Autonomous Cognitive OS — Surprisal Gate, Cognitive Budget, Affect-Tagged Memory |
 | **v7.8** | 🧠 Cognitive Architecture — Hebbian consolidation, multi-hop reasoning, rejection gate |
-| **v7.4** | ⚔️ Adversarial Evaluation (anti-sycophancy) |
 | **v7.0** | 🧬 ACT-R Activation Memory |
 ### Future Tracks
-- **v10.1: Semantic Routing** — Replace regex-based task classification with lightweight local embedding model (`all-MiniLM-L6-v2`) for intent-based routing.
-- **v10.2: Background Task Mutex** — Pause background compaction during active user chat streams to prevent resource contention.
-- **v10.3: Agent Self-Evaluation** — Local LLM scores its own compaction quality and requests re-compaction when output confidence is low.
-- **v11+: Zero-Search Retrieval** — Direct vector-addressed recall eliminates retrieval indirection entirely.
+- **v10.1: Multi-Graph Causal Layer** — Intent-aware retrieval routing traversing an LLM-inferred causal `because` edge-type layer for deep reasoning.
+- **v10.2: Federated Memory Mesh** — Hierarchical memory namespaces with role-based access control for enterprise agent teams.
+- **v10.3: Predictive Prefetch** — ACT-R based predictive models prefetch likely-needed memories before the agent asks.
+- **v11+: Zero-Search Retrieval** — Holographic Reduced Representations (HRR) eliminate retrieval indirection entirely.
 👉 **[Full ROADMAP.md →](ROADMAP.md)**
-## ❓ Troubleshooting FAQ
+## <a name="troubleshooting-faq"></a>❓ Troubleshooting FAQ
 **Q: Why is the dashboard project selector stuck on "Loading projects..."?**
 A: Fixed in v7.3.3. The root cause was a multi-layer quote-escaping trap in the `abortPipeline` onclick handler that generated a `SyntaxError` in the browser, silently killing the entire dashboard IIFE. Update to v7.3.3+ (`npx -y prism-mcp-server`). If still stuck, check that Supabase env values are properly set (unresolved placeholders like `${SUPABASE_URL}` cause `/api/projects` to return empty). Prism auto-falls back to local SQLite when Supabase is misconfigured.
@@ -1365,8 +1412,8 @@ A: Run `npm run build && npm test`, then open the Mind Palace dashboard (`localh
 ### 💡 Known Limitations & Quirks
-- **Text generation features require an API key.** Morning Briefings, auto-compaction, and VLM captioning need a cloud provider key (`GOOGLE_API_KEY`, `OPENAI_API_KEY`, or `ANTHROPIC_API_KEY`). Semantic search works offline with `embedding_provider=local` (no key needed). Without any embedding provider, Prism falls back to keyword-only search (FTS5).
-- **Auto-load is model- and client-dependent.** Session auto-loading relies on both the LLM following system prompt instructions *and* the MCP client completing tool registration before the model's first turn. Prism provides platform-specific [Setup Guides](#-setup-guides) and a server-side fallback (v5.2.1) that auto-pushes context after 10 seconds.
+- **Some advanced text features may still benefit from a cloud API key.** While `prism-coder:7b` handles core compaction and routing, high-level features like Morning Briefings and complex VLM captioning are optimized for cloud providers (`GOOGLE_API_KEY`, `OPENAI_API_KEY`, or `ANTHROPIC_API_KEY`). Semantic search and basic compaction work 100% offline with `embedding_provider=local`.
+- **Auto-load is model- and client-dependent.** Session auto-loading relies on both the LLM following system prompt instructions *and* the MCP client completing tool registration before the model's first turn. Prism provides platform-specific [Setup Guides](#setup-guides) and a server-side fallback (v5.2.1) that auto-pushes context after 10 seconds.
 - **MCP client race conditions.** Some MCP clients may not finish tool enumeration before the model generates its first response, causing transient `unknown_tool` errors. This is a client-side timing issue — Prism's server completes the MCP handshake in ~60ms. Workaround: the server-side auto-push fallback and the startup skill's retry logic.
 - **No real-time sync without Supabase.** Local SQLite mode is single-machine only. Multi-device or team sync requires a Supabase backend.
 - **Embedding quality varies by provider.** Gemini `text-embedding-004` and OpenAI `text-embedding-3-small` produce high-quality 768-dim vectors. Prism passes `dimensions: 768` via the Matryoshka API for OpenAI models (native output is 1536-dim; this truncation is lossless and outperforms ada-002 at full 1536 dims). Local embeddings (`nomic-embed-text-v1.5` via `@huggingface/transformers`) provide good quality with zero API cost. Ollama embeddings are usable but may reduce retrieval accuracy.

package/dist/cli.js CHANGED Viewed

@@ -287,7 +287,7 @@ verifyCmd
     .option('--json', 'Emit machine-readable JSON output with stable keys')
     .action(async (options) => {
     const storage = new SqliteStorage();
-    await storage.initialize('./prism-local.db');
+    await storage.initialize(true, './prism-local.db');
     // H4 fix: Ensure storage is closed on exit to flush WAL and prevent data loss
     try {
         await handleVerifyStatus(storage, options.project, !!options.force, options.user, !!options.json);
@@ -305,7 +305,7 @@ verifyCmd
     .option('--json', 'Emit machine-readable JSON output with stable keys')
     .action(async (options) => {
     const storage = new SqliteStorage();
-    await storage.initialize('./prism-local.db');
+    await storage.initialize(true, './prism-local.db');
     // H4 fix: Ensure storage is closed on exit to flush WAL and prevent data loss
     try {
         await handleGenerateHarness(storage, options.project, !!options.force, options.user, !!options.json);

package/dist/config.js CHANGED Viewed

@@ -60,6 +60,7 @@ if (!GOOGLE_API_KEY && process.env.PRISM_DEBUG_LOGGING === "true") {
 // Used by the brave_answers tool for AI-grounded answers.
 // This is a separate API key from the main Brave Search key.
 export const BRAVE_ANSWERS_API_KEY = process.env.BRAVE_ANSWERS_API_KEY;
+export const SEMANTIC_SCHOLAR_API_KEY = process.env.SEMANTIC_SCHOLAR_API_KEY;
 if (!BRAVE_ANSWERS_API_KEY && process.env.PRISM_DEBUG_LOGGING === "true") {
     console.error("Warning: BRAVE_ANSWERS_API_KEY environment variable is missing. Brave Answers tool will be unavailable.");
 }
@@ -71,6 +72,12 @@ if (!BRAVE_ANSWERS_API_KEY && process.env.PRISM_DEBUG_LOGGING === "true") {
 // Without this, VoyageAdapter construction will throw at server start if
 // embedding_provider=voyage is selected.
 export const VOYAGE_API_KEY = process.env.VOYAGE_API_KEY;
+// ─── Optional: Google Search (Scholar Pipeline Fallback) ──────
+// Used when Brave or Tavily keys are missing.
+// Requires: Google Custom Search API Key + Search Engine ID (CX).
+// Get yours at: https://developers.google.com/custom-search/v1/overview
+export const GOOGLE_SEARCH_API_KEY = process.env.GOOGLE_SEARCH_API_KEY;
+export const GOOGLE_SEARCH_CX = process.env.GOOGLE_SEARCH_CX;
 // ─── v2.0: Storage Backend Selection ─────────────────────────
 // REVIEWER NOTE: Step 1 of v2.0 introduces a storage abstraction.
 // Both "local" (SQLite) and "supabase" (PostgreSQL) are implemented.
@@ -282,3 +289,83 @@ const rawTiebreakerEpsilon = parseFloat(process.env.PRISM_TURBOQUANT_TIEBREAKER_
 export const PRISM_TURBOQUANT_TIEBREAKER_EPSILON = Number.isFinite(rawTiebreakerEpsilon) && rawTiebreakerEpsilon >= 0
     ? rawTiebreakerEpsilon
     : 0;
+// ─── v9.x: Local LLM (prism-coder:7b) Integration ─────────────────────────
+// Enables background tasks (compaction, task-router fallback, pipeline ops)
+// to use a local Ollama model instead of the cloud LLM provider.
+//
+// Default model is prism-coder:7b — fine-tuned on Prism tool schemas.
+// Disabled by default so existing deployments are unaffected.
+//
+// Set PRISM_LOCAL_LLM_ENABLED=true to activate.
+// Set PRISM_LOCAL_LLM_MODEL to override the model tag.
+// Set PRISM_LOCAL_LLM_URL to override the Ollama endpoint (default: localhost:11434).
+// Set PRISM_LOCAL_LLM_TIMEOUT_MS to override per-call timeout (default: 60000, max: 300000).
+// Set PRISM_STRICT_LOCAL_MODE=true to block cloud fallback when local LLM is enabled (HIPAA).
+/** Master switch — enables the local prism-coder:7b LLM for background tasks. */
+export const PRISM_LOCAL_LLM_ENABLED = process.env.PRISM_LOCAL_LLM_ENABLED === "true"; // Opt-in, default false
+/** Ollama model tag to use for local LLM calls. */
+export const PRISM_LOCAL_LLM_MODEL = (process.env.PRISM_LOCAL_LLM_MODEL || "prism-coder:7b").trim();
+/** Ollama base URL. Override for remote Ollama instances. */
+export const PRISM_LOCAL_LLM_URL = (process.env.PRISM_LOCAL_LLM_URL || "http://localhost:11434").trim();
+/** Per-call timeout in ms. Prevents stalled background tasks. Capped at 300s. */
+export const PRISM_LOCAL_LLM_TIMEOUT_MS = (() => {
+    const raw = parseInt(process.env.PRISM_LOCAL_LLM_TIMEOUT_MS || "60000", 10);
+    // FIX (integer overflow): values > 2^31-1 cause setTimeout to fire immediately,
+    // which silently aborts every local LLM call and forces cloud fallback.
+    // Cap at 300s (5 min) — no legitimate compaction call should take longer.
+    const MAX_TIMEOUT = 300_000;
+    return Number.isFinite(raw) && raw > 0 ? Math.min(raw, MAX_TIMEOUT) : 60_000;
+})();
+/**
+ * Strict local mode — blocks cloud LLM fallback when local LLM is enabled.
+ * Critical for HIPAA deployments where session data must never leave the device.
+ * When true: compaction throws instead of falling back to Gemini.
+ * When false (default): graceful cloud fallback on local LLM failure.
+ */
+export const PRISM_STRICT_LOCAL_MODE = process.env.PRISM_STRICT_LOCAL_MODE === "true";
+/** Redact credentials from a URL for safe logging (strips user:pass@). */
+function redactUrl(rawUrl) {
+    try {
+        const parsed = new URL(rawUrl);
+        if (parsed.username || parsed.password) {
+            parsed.username = "***";
+            parsed.password = "***";
+        }
+        return parsed.toString().replace(/\/$/, "");
+    }
+    catch {
+        return "[invalid URL]";
+    }
+}
+if (PRISM_LOCAL_LLM_ENABLED) {
+    console.error(`[Prism] Local LLM enabled: model=${PRISM_LOCAL_LLM_MODEL}, ` +
+        `url=${redactUrl(PRISM_LOCAL_LLM_URL)}, timeout=${PRISM_LOCAL_LLM_TIMEOUT_MS}ms` +
+        (PRISM_STRICT_LOCAL_MODE ? ", STRICT LOCAL MODE (no cloud fallback)" : ""));
+}
+// ─── v11.0: Zero-Search Retrieval (HRR) ───────────────────────
+// Dynamic dimension selection based on available system memory.
+// Higher dimensions = higher fact capacity but slower unbinding.
+import { totalmem } from "node:os";
+export const PRISM_HRR_DIMENSION = (() => {
+    // 1. Manual override via env var
+    const envVal = parseInt(process.env.PRISM_HRR_DIMENSION || "0", 10);
+    if (envVal > 0) {
+        // Ensure power of 2 for FFT
+        if ((envVal & (envVal - 1)) !== 0) {
+            console.error(`Warning: PRISM_HRR_DIMENSION (${envVal}) is not a power of 2. FFT unbinding may fail.`);
+        }
+        return envVal;
+    }
+    // 2. Auto-adjustment based on system RAM
+    const totalRamGb = totalmem() / (1024 ** 3);
+    if (totalRamGb >= 48)
+        return 8192; // High-end (M4 Max)
+    if (totalRamGb >= 32)
+        return 4096; // Mid-high (M3 Pro)
+    if (totalRamGb >= 16)
+        return 2048; // Standard (M1/M2/M3)
+    return 1024; // Low-memory / Baseline
+})();
+if (PRISM_DEBUG_LOGGING) {
+    console.error(`[Prism] HRR Zero-Search Dimension: ${PRISM_HRR_DIMENSION} (Total RAM: ${(totalmem() / (1024 ** 3)).toFixed(1)}GB)`);
+}