npm - @strvmarv/total-recall - Versions diffs - 0.1.0 - Mend

@strvmarv/total-recall 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/.claude-plugin/plugin.json +18 -0
package/.copilot-plugin/plugin.json +12 -0
package/.cursor-plugin/plugin.json +13 -0
package/.opencode/INSTALL.md +24 -0
package/CONTRIBUTING.md +295 -0
package/LICENSE +21 -0
package/README.md +239 -0
package/agents/compactor.md +47 -0
package/dist/index.js +2554 -0
package/eval/benchmarks/retrieval.jsonl +20 -0
package/eval/corpus/memories.jsonl +20 -0
package/hooks/hooks-cursor.json +16 -0
package/hooks/hooks.json +16 -0
package/hooks/session-end/run.sh +5 -0
package/hooks/session-start/run.sh +11 -0
package/package.json +78 -0
package/skills/forget/SKILL.md +23 -0
package/skills/ingest/SKILL.md +20 -0
package/skills/memory/SKILL.md +60 -0
package/skills/search/SKILL.md +19 -0
package/skills/status/SKILL.md +32 -0
package/src/defaults.toml +28 -0

package/.claude-plugin/plugin.json ADDED Viewed

@@ -0,0 +1,18 @@
+{
+  "name": "total-recall",
+  "description": "Multi-tiered memory and knowledge base with semantic search, auto-compaction, and built-in evaluation. Works across Claude Code, Copilot CLI, OpenCode, Cline, and Cursor.",
+  "version": "0.1.0",
+  "author": {
+    "name": "strvmarv"
+  },
+  "skills": "./skills/",
+  "agents": "./agents/",
+  "hooks": "./hooks/hooks.json",
+  "mcpServers": {
+    "total-recall": {
+      "command": "node",
+      "args": ["dist/index.js"],
+      "env": {}
+    }
+  }
+}

package/.copilot-plugin/plugin.json ADDED Viewed

@@ -0,0 +1,12 @@
+{
+  "name": "total-recall",
+  "description": "Multi-tiered memory and knowledge base for TUI coding assistants",
+  "version": "0.1.0",
+  "skills": "./skills/",
+  "mcpServers": {
+    "total-recall": {
+      "command": "node",
+      "args": ["dist/index.js"]
+    }
+  }
+}

package/.cursor-plugin/plugin.json ADDED Viewed

@@ -0,0 +1,13 @@
+{
+  "name": "total-recall",
+  "description": "Multi-tiered memory and knowledge base for TUI coding assistants",
+  "version": "0.1.0",
+  "skills": "./skills/",
+  "hooks": "./hooks/hooks-cursor.json",
+  "mcpServers": {
+    "total-recall": {
+      "command": "node",
+      "args": ["dist/index.js"]
+    }
+  }
+}

package/.opencode/INSTALL.md ADDED Viewed

@@ -0,0 +1,24 @@
+# total-recall for OpenCode
+## Installation
+1. Add the MCP server to your OpenCode config:
+```json
+{
+  "mcpServers": {
+    "total-recall": {
+      "command": "node",
+      "args": ["/path/to/total-recall/dist/index.js"]
+    }
+  }
+}
+```
+2. Copy the skills directory to your OpenCode plugins:
+```bash
+cp -r skills/ ~/.opencode/plugins/total-recall/skills/
+```
+3. Restart OpenCode. total-recall will auto-initialize on first session.

package/CONTRIBUTING.md ADDED Viewed

@@ -0,0 +1,295 @@
+# Contributing to total-recall
+---
+## Getting Started
+```bash
+git clone https://github.com/strvmarv/total-recall.git
+cd total-recall
+npm install
+npm run build
+```
+Run tests to verify your environment:
+```bash
+npm test
+```
+Run the type checker:
+```bash
+npm run typecheck
+```
+---
+## Adding a New Host Tool Importer
+Host importers detect a specific tool's presence, scan its memory files, and migrate them into total-recall on first run.
+### 1. Implement the `HostImporter` interface
+The interface is defined in `src/importers/importer.ts`:
+```typescript
+export interface HostImporter {
+  name: string;
+  detect(): boolean;
+  scan(): { memoryFiles: number; knowledgeFiles: number; sessionFiles: number };
+  importMemories(db: Database.Database, embed: EmbedFn, project?: string): ImportResult;
+  importKnowledge(db: Database.Database, embed: EmbedFn): ImportResult;
+}
+```
+Create `src/importers/my-tool.ts`:
+```typescript
+import { existsSync, readdirSync, readFileSync } from "node:fs";
+import { join } from "node:path";
+import { homedir } from "node:os";
+import { createHash } from "node:crypto";
+import type Database from "better-sqlite3";
+import type { HostImporter, ImportResult } from "./importer.js";
+import { insertEntry } from "../db/entries.js";
+import { insertEmbedding } from "../search/vector-search.js";
+type EmbedFn = (text: string) => Float32Array;
+// Where my-tool stores its memories
+const MY_TOOL_DIR = join(homedir(), ".my-tool");
+export const myToolImporter: HostImporter = {
+  name: "my-tool",
+  detect() {
+    return existsSync(MY_TOOL_DIR);
+  },
+  scan() {
+    if (!existsSync(MY_TOOL_DIR)) {
+      return { memoryFiles: 0, knowledgeFiles: 0, sessionFiles: 0 };
+    }
+    const files = readdirSync(MY_TOOL_DIR).filter((f) => f.endsWith(".md"));
+    return { memoryFiles: files.length, knowledgeFiles: 0, sessionFiles: 0 };
+  },
+  importMemories(db, embed, project) {
+    if (!existsSync(MY_TOOL_DIR)) return { imported: 0, skipped: 0, errors: [] };
+    const result: ImportResult = { imported: 0, skipped: 0, errors: [] };
+    const files = readdirSync(MY_TOOL_DIR).filter((f) => f.endsWith(".md"));
+    for (const file of files) {
+      const filePath = join(MY_TOOL_DIR, file);
+      try {
+        const raw = readFileSync(filePath, "utf-8").trim();
+        if (!raw) { result.skipped++; continue; }
+        const hash = createHash("sha256").update(raw).digest("hex");
+        // Check if already imported (use import_log table)
+        const existing = db
+          .prepare("SELECT id FROM import_log WHERE content_hash = ?")
+          .get(hash) as { id: string } | undefined;
+        if (existing) { result.skipped++; continue; }
+        const id = crypto.randomUUID();
+        const embedding = embed(raw);
+        insertEntry(db, {
+          id,
+          content: raw,
+          tier: "warm",
+          content_type: "note",
+          source_tool: "my-tool",
+          source_path: filePath,
+          project: project ?? null,
+        });
+        insertEmbedding(db, id, embedding);
+        // Log the import to avoid duplicates on next run
+        db.prepare(
+          "INSERT INTO import_log (id, source_tool, source_path, content_hash, entry_id, tier, content_type) VALUES (?, ?, ?, ?, ?, ?, ?)"
+        ).run(crypto.randomUUID(), "my-tool", filePath, hash, id, "warm", "note");
+        result.imported++;
+      } catch (err) {
+        result.errors.push(`${file}: ${String(err)}`);
+      }
+    }
+    return result;
+  },
+  importKnowledge(_db, _embed) {
+    // my-tool has no separate knowledge files
+    return { imported: 0, skipped: 0, errors: [] };
+  },
+};
+```
+### 2. Register the importer
+Add it to the importer list in `src/index.ts` (or wherever importers are initialized at startup).
+### 3. Add tests
+Create `src/importers/my-tool.test.ts` mirroring the existing `claude-code.test.ts` structure.
+---
+## Adding a New Content Type
+Content types classify what kind of information a memory or knowledge chunk contains (e.g., `note`, `code`, `doc`, `decision`).
+### 1. Add the type to the schema
+In `src/db/schema.ts`, find the `content_type` column definition and add your new type to the CHECK constraint or lookup table:
+```sql
+-- In the entries table or a lookup table:
+content_type TEXT CHECK(content_type IN ('note', 'code', 'doc', 'decision', 'my-new-type'))
+```
+### 2. Add the type to the TypeScript union
+In `src/types.ts`, extend the `ContentType` union:
+```typescript
+export type ContentType = "note" | "code" | "doc" | "decision" | "my-new-type";
+```
+### 3. Assign a relevance weight (optional)
+If your content type should be scored differently during retrieval, add a weight entry in the search relevance config (see `src/search/`).
+---
+## Adding a New Chunking Parser
+The chunker in `src/ingestion/chunker.ts` dispatches to per-format parsers based on file extension. To add support for a new format:
+### 1. Implement the parser
+Create `src/ingestion/my-format-parser.ts`. Your parser must return `Chunk[]`:
+```typescript
+import type { Chunk } from "./chunker.js";
+export interface ParseOptions {
+  maxTokens: number;
+  overlapTokens?: number;
+}
+export function parseMyFormat(content: string, opts: ParseOptions): Chunk[] {
+  const chunks: Chunk[] = [];
+  // Split your format into logical units.
+  // Each chunk needs: content, startLine, endLine.
+  // Optionally: headingPath (for outline-like formats), name/kind (for code-like formats).
+  let lineNumber = 1;
+  for (const section of splitIntoSections(content)) {
+    chunks.push({
+      content: section.text,
+      startLine: lineNumber,
+      endLine: lineNumber + section.text.split("\n").length - 1,
+    });
+    lineNumber += section.text.split("\n").length + 1;
+  }
+  return chunks;
+}
+function splitIntoSections(content: string) {
+  // Your format-specific splitting logic here
+  return content.split(/\n---\n/).map((text) => ({ text }));
+}
+```
+### 2. Register the parser in `chunker.ts`
+In `src/ingestion/chunker.ts`, add your file extensions and import:
+```typescript
+import { parseMyFormat } from "./my-format-parser.js";
+const MY_FORMAT_EXTENSIONS = new Set([".myext", ".myfmt"]);
+```
+Then add a branch in the `chunkFile` function before the fallback:
+```typescript
+export function chunkFile(content, filePath, opts) {
+  // ...existing Markdown and code branches...
+  if (MY_FORMAT_EXTENSIONS.has(ext)) {
+    return parseMyFormat(content, opts);
+  }
+  // Fallback: paragraph-based splitting
+  return splitByParagraphs(content, opts.maxTokens);
+}
+```
+### 3. Add tests
+Create `src/ingestion/my-format-parser.test.ts`. Test at minimum: empty input, single section, multiple sections, sections exceeding `maxTokens`.
+---
+## Running Tests
+Run all tests once:
+```bash
+npm test
+# or
+npx vitest run
+```
+Run in watch mode during development:
+```bash
+npm run test:watch
+# or
+npx vitest
+```
+Run a specific test file:
+```bash
+npx vitest run src/importers/my-tool.test.ts
+```
+---
+## Running Benchmarks
+Once the MCP server is running and connected to your coding assistant, use the eval commands:
+```
+/memory eval                      # Live retrieval metrics for current session
+/memory eval --benchmark          # Run synthetic benchmark suite
+/memory eval --snapshot baseline  # Save current config as a named baseline
+/memory eval --compare baseline   # Compare current config against saved baseline
+/memory eval --grow               # Add real query misses to benchmark suite
+```
+A PR that changes retrieval logic, scoring, or compaction thresholds must include a `--benchmark` run showing no regression against the `baseline` snapshot.
+---
+## PR Requirements
+Before opening a pull request:
+1. **Tests pass** — `npm test` exits 0 with no failures
+2. **Type checker clean** — `npm run typecheck` exits 0
+3. **Build succeeds** — `npm run build` exits 0
+4. **Benchmark does not regress** — run `/memory eval --compare baseline` and include the output in your PR description if you changed retrieval, scoring, or compaction logic
+5. **New behavior is tested** — new importers, parsers, and content types all require corresponding test files
+If you're adding a new host tool importer, include the `detect()` logic rationale in your PR description — false positives will silently corrupt imports for users who don't have the tool installed.

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 strvmarv
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,239 @@
+# total-recall
+**Multi-tiered memory and knowledge base for TUI coding assistants.**
+Your AI coding tool forgets everything. total-recall doesn't.
+A cross-platform plugin that gives Claude Code, GitHub Copilot CLI, OpenCode, Cline, and Cursor persistent, semantically searchable memory with a hierarchical knowledge base — backed by local SQLite + vector embeddings, zero external dependencies.
+---
+## The Problem
+Every TUI coding assistant has the same gap:
+- **No tiering** — all memories treated equally, leading to context bloat or information loss
+- **Tool-locked** — switching between Claude Code and Copilot means starting from scratch
+- **No knowledge base** — can't ingest your docs and have them retrieved when relevant
+- **No semantic search** — memories retrieved by filename, not by meaning
+- **No observability** — no way to know if memory is helping or just noise
+---
+## The Solution
+total-recall introduces a three-tier memory model: **Hot** memories (up to 50 entries) are auto-injected into every prompt so your most important context is always present. **Warm** memories (up to 10K entries) are retrieved semantically — when you ask about authentication, relevant auth memories surface automatically. **Cold** storage is unlimited hierarchical knowledge base: ingest your docs, README files, API references, and architecture notes, and they're retrieved when relevant.
+The knowledge base ingests entire directories — source trees, documentation folders, design specs — and chunks them semantically with heading-aware Markdown parsing and AST-based code parsing. Every chunk is embedded with `all-MiniLM-L6-v2` (384 dimensions, runs locally via ONNX) so retrieval is purely semantic, no keyword matching required.
+Platform support is via MCP (Model Context Protocol), which means total-recall works with any MCP-compatible tool. Dedicated importers for Claude Code and Copilot CLI mean your existing memories migrate automatically on first run. An eval framework lets you measure retrieval quality, run benchmarks, and compare configuration changes before committing them.
+---
+## Installation
+### Self-Install (Paste Into Any AI Coding Assistant)
+> Install the total-recall memory plugin: fetch and follow the instructions at https://raw.githubusercontent.com/strvmarv/total-recall/main/INSTALL.md
+That's it. Your AI assistant will read the instructions and install total-recall for its platform.
+### Claude Code
+```
+/plugin install total-recall@strvmarv-marketplace
+```
+Or if the marketplace isn't registered:
+```
+/plugin marketplace add strvmarv/total-recall-marketplace
+/plugin install total-recall@strvmarv-marketplace
+```
+### npm (Any MCP-Compatible Tool)
+```bash
+npm install -g @strvmarv/total-recall
+```
+Then add to your tool's MCP config:
+```json
+{
+  "mcpServers": {
+    "total-recall": {
+      "command": "npx",
+      "args": ["-y", "@strvmarv/total-recall"]
+    }
+  }
+}
+```
+This works with **Copilot CLI**, **OpenCode**, **Cline**, **Cursor**, and any other MCP-compatible tool.
+### From Source
+```bash
+git clone https://github.com/strvmarv/total-recall.git
+cd total-recall
+npm install && npm run build
+```
+### First Session
+total-recall auto-initializes on first use:
+1. Creates `~/.total-recall/` with SQLite database
+2. Downloads embedding model (~80MB, one-time)
+3. Scans for existing memories (Claude Code, Copilot CLI)
+4. Auto-ingests project docs (README, docs/, etc.)
+5. Reports: `total-recall: initialized · 4 memories imported · 12 docs ingested · system verified`
+---
+## Architecture
+```
+MCP Server (Node.js/TypeScript)
+├── Always Loaded: SQLite + vec, MCP Tools, Event Logger
+├── Lazy Loaded: ONNX Embedder, Compactor, Ingestor
+└── Host Importers: Claude Code, Copilot CLI, OpenCode
+Tiers:
+  Hot (50 entries)  → auto-injected every prompt
+  Warm (10K entries) → semantic search per query
+  Cold (unlimited)   → hierarchical KB retrieval
+```
+**Data flow:**
+1. `store` — write a memory, assign tier, embed, persist
+2. `search` — embed query, vector search warm + cold, return ranked results
+3. `compact` — decay scores, demote warm→cold, evict hot→warm, summarize clusters
+4. `ingest` — chunk files, embed chunks, store in cold tier with metadata
+All state lives in `~/.total-recall/db.sqlite`. The embedding model is cached at `~/.total-recall/models/`. No network calls after initial model download.
+---
+## Commands
+| Command | Description |
+|---|---|
+| `/memory status` | Dashboard overview |
+| `/memory search <query>` | Semantic search across all tiers |
+| `/memory ingest <path>` | Add files/dirs to knowledge base |
+| `/memory forget <query>` | Find and delete entries |
+| `/memory compact` | Force compaction with preview |
+| `/memory inspect <id>` | Deep dive on single entry |
+| `/memory promote <id>` | Move entry to higher tier |
+| `/memory demote <id>` | Move entry to lower tier |
+| `/memory export` | Export to portable format |
+| `/memory import <file>` | Import from export file |
+| `/memory eval` | Live performance metrics |
+| `/memory eval --benchmark` | Run synthetic benchmark |
+| `/memory eval --compare <name>` | Compare configs |
+| `/memory eval --snapshot <name>` | Save current config baseline |
+| `/memory eval --grow` | Add real misses to benchmark |
+| `/memory config get <key>` | Read config value |
+| `/memory config set <key> <value>` | Update config |
+| `/memory history` | Show recent tier movements |
+| `/memory lineage <id>` | Show compaction ancestry |
+---
+## Supported Platforms
+| Platform | Support | Notes |
+|---|---|---|
+| Claude Code | Full | Native plugin, session hooks, auto-import |
+| Copilot CLI | Full | Auto-import from existing Copilot memory files |
+| OpenCode | MCP | Configure MCP server in opencode config |
+| Cline | MCP | Configure MCP server in Cline settings |
+| Cursor | Full | MCP server + `.cursor-plugin/` wrapper |
+---
+## Configuration
+Copy `~/.total-recall/config.toml` to override defaults:
+```toml
+# total-recall configuration
+[tiers.hot]
+max_entries = 50          # Max entries auto-injected per prompt
+token_budget = 4000       # Max tokens for hot tier injection
+carry_forward_threshold = 0.7  # Score threshold to stay in hot
+[tiers.warm]
+max_entries = 10000       # Max entries in warm tier
+retrieval_top_k = 5       # Results returned per search
+similarity_threshold = 0.65    # Min cosine similarity for retrieval
+cold_decay_days = 30      # Days before unused warm entries decay to cold
+[tiers.cold]
+chunk_max_tokens = 512    # Max tokens per knowledge base chunk
+chunk_overlap_tokens = 50 # Overlap between adjacent chunks
+lazy_summary_threshold = 5     # Accesses before generating summary
+[compaction]
+decay_half_life_hours = 168    # Score half-life (168h = 1 week)
+warm_threshold = 0.3           # Score below which warm→cold
+promote_threshold = 0.7        # Score above which cold→warm
+warm_sweep_interval_days = 7   # How often to run warm sweep
+[embedding]
+model = "all-MiniLM-L6-v2"    # Embedding model name
+dimensions = 384               # Embedding dimensions
+```
+---
+## Extending
+### Adding a New Host Tool
+Implement the `HostImporter` interface (~50 lines). It requires four methods: `detect()` to check if the tool is present, `scan()` to report what's available, `importMemories()` to migrate existing memories, and `importKnowledge()` to migrate knowledge files. See [CONTRIBUTING.md](CONTRIBUTING.md) for a full example.
+### Adding a New Content Type
+Add a row to the `content_type` lookup table in `src/db/schema.ts`, then register a type weight for relevance scoring. No other changes needed.
+### Adding a New Chunking Parser
+Implement the `Chunk[]`-returning parser interface and register it in `src/ingestion/chunker.ts` alongside the existing Markdown and code parsers. See [CONTRIBUTING.md](CONTRIBUTING.md) for the interface definition.
+---
+## Built With & Inspired By
+### [superpowers](https://github.com/obra/superpowers) by [obra](https://github.com/obra)
+total-recall's plugin architecture, skill format, hook system, multi-platform wrapper pattern, and development philosophy are directly inspired by and modeled after the **superpowers** plugin. superpowers demonstrated that a zero-dependency, markdown-driven skill system could fundamentally improve how AI coding assistants behave — total-recall extends that same philosophy to memory and knowledge management.
+Specific patterns we learned from superpowers:
+- **SKILL.md format** with YAML frontmatter and trigger-condition-focused descriptions
+- **SessionStart hooks** for injecting core behavior at session start
+- **Multi-platform wrappers** (`.claude-plugin/`, `.copilot-plugin/`, `.cursor-plugin/`, `.opencode/`)
+- **Subagent architecture** for isolated, focused task execution
+- **Zero-dependency philosophy** — no external services, no API keys, no cloud
+- **Two-stage review pattern** for quality assurance
+If you're building plugins for TUI coding assistants, start with [superpowers](https://github.com/obra/superpowers). It's the foundation this ecosystem needs.
+### Core Technologies
+- [better-sqlite3](https://github.com/WiseLibs/better-sqlite3) — Fast, synchronous SQLite bindings
+- [sqlite-vec](https://github.com/asg017/sqlite-vec) — Vector similarity search in SQLite
+- [onnxruntime-node](https://github.com/microsoft/onnxruntime) — Local ML inference
+- [all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) — Sentence embeddings (384d)
+- [@modelcontextprotocol/sdk](https://github.com/modelcontextprotocol/sdk) — MCP server implementation
+---
+## License
+MIT — see [LICENSE](LICENSE)

package/agents/compactor.md ADDED Viewed

@@ -0,0 +1,47 @@
+---
+name: compactor
+description: |
+  Use this agent at session end to perform intelligent hot-to-warm compaction.
+  Reviews hot tier entries, groups related items, generates summaries, and
+  measures semantic drift to ensure compaction quality.
+model: inherit
+---
+# Compactor Agent
+You are the total-recall compactor. Your job is to review hot tier memory entries at session end and decide what to keep, promote, merge, or discard.
+## Input
+You receive the current hot tier entries with their decay scores, access counts, and content.
+## Process
+1. Group related entries (e.g., multiple corrections about the same topic)
+2. For groups of 2+ related entries:
+   - Generate a concise summary that preserves all key facts
+   - The summary should be retrievable by the same queries that would find the originals
+3. For individual entries:
+   - If decay score > promote_threshold: recommend carry forward
+   - If decay score > warm_threshold: recommend promote to warm as-is
+   - If decay score < warm_threshold: recommend discard
+4. Report: entries processed, summaries generated, facts preserved count
+## Output Format
+Return a JSON array of decisions:
+```json
+[
+  {"action": "carry_forward", "entry_ids": ["id1"]},
+  {"action": "promote", "entry_ids": ["id2", "id3"], "summary": "merged summary text"},
+  {"action": "discard", "entry_ids": ["id4"], "reason": "ephemeral session context"}
+]
+```
+## Rules
+- NEVER discard corrections or preferences with decay score > 0.2
+- ALWAYS preserve the specific details (tool names, version numbers, config values)
+- Summaries must be shorter than the combined originals
+- If unsure, promote rather than discard