autoctxd 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/CHANGELOG.md +62 -0
  2. package/CONTRIBUTING.md +80 -0
  3. package/LICENSE +21 -0
  4. package/README.md +301 -0
  5. package/SECURITY.md +81 -0
  6. package/package.json +55 -0
  7. package/scripts/install-hooks.ts +80 -0
  8. package/scripts/install.ps1 +71 -0
  9. package/scripts/install.sh +67 -0
  10. package/scripts/uninstall-hooks.ts +57 -0
  11. package/src/ai/active-guard.ts +96 -0
  12. package/src/ai/adaptive-ranker.ts +48 -0
  13. package/src/ai/classifier.ts +256 -0
  14. package/src/ai/compressor.ts +129 -0
  15. package/src/ai/decision-chains.ts +100 -0
  16. package/src/ai/decision-extractor.ts +148 -0
  17. package/src/ai/pattern-detector.ts +147 -0
  18. package/src/ai/proactive.ts +78 -0
  19. package/src/cli/doctor.ts +171 -0
  20. package/src/cli/embeddings.ts +209 -0
  21. package/src/cli/index.ts +574 -0
  22. package/src/cli/reclassify.ts +134 -0
  23. package/src/context/builder.ts +97 -0
  24. package/src/context/formatter.ts +109 -0
  25. package/src/context/ranker.ts +84 -0
  26. package/src/db/sqlite/decisions.ts +56 -0
  27. package/src/db/sqlite/feedback.ts +92 -0
  28. package/src/db/sqlite/observations.ts +58 -0
  29. package/src/db/sqlite/schema.ts +366 -0
  30. package/src/db/sqlite/sessions.ts +50 -0
  31. package/src/db/sqlite/summaries.ts +69 -0
  32. package/src/db/vector/client.ts +134 -0
  33. package/src/db/vector/embeddings.ts +119 -0
  34. package/src/db/vector/providers/factory.ts +99 -0
  35. package/src/db/vector/providers/minilm.ts +90 -0
  36. package/src/db/vector/providers/ollama.ts +92 -0
  37. package/src/db/vector/providers/tfidf.ts +98 -0
  38. package/src/db/vector/providers/types.ts +39 -0
  39. package/src/db/vector/search.ts +131 -0
  40. package/src/hooks/post-tool-use.ts +205 -0
  41. package/src/hooks/pre-tool-use.ts +305 -0
  42. package/src/hooks/stop.ts +334 -0
  43. package/src/mcp/server.ts +293 -0
  44. package/src/server/dashboard.html +268 -0
  45. package/src/server/dashboard.ts +170 -0
  46. package/src/util/debug.ts +56 -0
  47. package/src/util/ignore.ts +171 -0
  48. package/src/util/metrics.ts +236 -0
  49. package/src/util/path.ts +57 -0
  50. package/tsconfig.json +14 -0
package/CHANGELOG.md ADDED
@@ -0,0 +1,62 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.4.0] - 2026-05-15
9
+
10
+ Quality pass before the first public release. Same surface area, three pre-existing bugs in the classifier and decision extractor fixed.
11
+
12
+ ### Fixed
13
+ - Decision extractor no longer captures `npm install` / `bun add` / `pip install` mentions in Monitor commands, task descriptions, and log streams as architectural decisions. The dependency detector now validates the captured token against a reject list (`output`, `progress`, `tsx`, `tests`, `dev`, `prod`, …) and rejects numeric-only tokens, very short bare words, and anything without alphabetic characters.
14
+ - Classifier filters internal harness tools (`Monitor`, `ToolSearch`, `TaskStop`, `TaskOutput`, `TodoWrite`) and Bash/PowerShell summaries that carry their payloads. These are still persisted but at importance 1, so they fall out of "top observations" and never get injected as recovered context.
15
+ - Bash/PowerShell exploration commands (`List …`, `Check …`, `Find …`, `git status`, `git log`, `git diff`, `git blame`, …) are now classified as `research` with low importance instead of dropping to `other`. PowerShell gets the same Bash heuristics it lacked before.
16
+ - Decisions no longer duplicate across sessions. A unique index on `(project_path, title)` plus `INSERT OR IGNORE` makes re-extraction idempotent.
17
+ - The decision extractor's "tool-prefixed noise" guard was extended to catch `Bash:` / `PowerShell:` / `Edit:` / `Edited ` / `Monitor:` / `ToolSearch:` / `TodoWrite` / `TaskStop:` / `TaskOutput:` summaries that previously slipped through when the classifier promoted them to type=decision via a stray keyword.
18
+
19
+ ### Added
20
+ - `autoctxd cleanup-decisions [--dry-run]` — deletes generic-word noise (`Added npm dep: output`, …) and tool-prefixed leftovers from the pre-0.4 extractor, and collapses any cross-session duplicates that predate the unique index. Idempotent; safe to re-run.
21
+ - Schema migration: cross-session duplicate decisions in legacy DBs are collapsed to the earliest row on open, then the unique index is enforced going forward.
22
+
23
+ ### Improved
24
+ - On the author's real DB, this collapsed the architectural-decisions feed from 142 noisy rows to 4 actual decisions.
25
+
26
+ ## [0.3.0] - 2026-05-15
27
+
28
+ First public release.
29
+
30
+ ### Added
31
+ - **MCP server** with 7 tools (`recall_decisions`, `recall_unfinished`, `search_memory`, `get_project_history`, `check_intent`, `record_feedback`, `record_decision`) for active memory recall in Claude Code, Cursor, Windsurf, Cline, and Claude Desktop.
32
+ - **Active Guard** — `check_intent` flags actions that contradict past decisions before they happen.
33
+ - **Feedback loop** — `record_feedback` lets the system learn which memories are useful, irrelevant, or wrong per user.
34
+ - **Pluggable embeddings** — TF-IDF (default, zero-install), MiniLM-L6-v2 via `@xenova/transformers` (opt-in), and Ollama for any local embedding model. Switch with `autoctxd embeddings switch`; cache is partitioned per provider.
35
+ - **Local web dashboard** at `http://localhost:4589` — metric cards, activity timeline, decision feed, decision chains, pattern panel, hybrid search.
36
+ - **Decision chains** — automatically detects sequences like `mysql → postgres → sqlite` across sessions.
37
+ - **Token-savings metrics** — accumulated estimate of tokens injected vs. tokens that would otherwise be re-explained.
38
+ - **One-command installer** for macOS, Linux (`install.sh`) and Windows (`install.ps1`) — bootstraps Bun if missing, clones the repo, installs deps, initializes the DB, registers the hooks in `settings.json` (non-destructive merge).
39
+ - **Health-check doctor** (`autoctxd doctor`) — validates runtime, deps, DB schema, LanceDB, hook registration, data directory, and accumulated metrics.
40
+ - **`.autoctxd-ignore`** — gitignore-style opt-out for sensitive projects (single `*` opts the whole project out).
41
+ - **Cross-session pattern detection** — tool preferences, work-type focus, file hotspots, TDD vs. fix-first, peak coding hours (after 3+ sessions per project).
42
+ - **Hybrid search** — semantic (vector) + full-text (FTS5) with project filter.
43
+ - **Re-classification** — `autoctxd reclassify` re-runs the classifier on old observations after improvements.
44
+ - **Export** — `autoctxd export ./my-app > context.md` for project memory as markdown.
45
+
46
+ ### Stack
47
+ - Bun runtime
48
+ - SQLite + FTS5 for structured storage and full-text search
49
+ - LanceDB for vector storage (dim adapts to active provider)
50
+ - Keyword + tool/file heuristics for classification (no LLM calls)
51
+
52
+ ### Privacy
53
+ - No telemetry. No cloud. No network calls during operation.
54
+ - Everything in `~/.claude/autoctxd/data/` — delete to wipe.
55
+ - See [SECURITY.md](SECURITY.md) for the full threat model.
56
+
57
+ ### Testing
58
+ - 75 tests, isolated temp DB (never touches real memory).
59
+ - Cross-provider embeddings benchmark (`bun run benchmark:embeddings`).
60
+ - Tests cover classifier, compressor, decision extractor, decision chains, ignore matcher, embedding providers, and end-to-end integration.
61
+
62
+ [0.3.0]: https://github.com/autoctxd/autoctxd/releases/tag/v0.3.0
@@ -0,0 +1,80 @@
1
+ # Contributing to autoctxd
2
+
3
+ Thanks for the interest. This is a small, opinionated project — keep PRs focused and the scope tight.
4
+
5
+ ## Dev setup
6
+
7
+ Requires [Bun](https://bun.sh) ≥ 1.1 and Git.
8
+
9
+ ```bash
10
+ git clone https://github.com/autoctxd/autoctxd.git
11
+ cd autoctxd
12
+ bun install
13
+ bun run src/cli/index.ts init # creates an isolated DB at ./data/autoctxd.db
14
+ bun test
15
+ ```
16
+
17
+ The test suite uses an isolated temp DB — it never touches your real `~/.claude/autoctxd/data/`.
18
+
19
+ ## Running the hooks against a real Claude Code session
20
+
21
+ 1. Install hooks pointing at your dev clone:
22
+ ```bash
23
+ AUTOCTXD_REPO_DIR="$(pwd)" bun run scripts/install-hooks.ts
24
+ ```
25
+ 2. Set `AUTOCTXD_DEBUG=1` so every hook invocation appends to `data/debug.log`.
26
+ 3. Start a Claude Code session in any project. `tail -f data/debug.log` while you work.
27
+ 4. To uninstall: `bun run scripts/uninstall-hooks.ts`.
28
+
29
+ ## Running the MCP server standalone
30
+
31
+ ```bash
32
+ bun run src/mcp/server.ts
33
+ ```
34
+
35
+ It speaks stdio MCP. Easiest way to poke it is to wire it into Claude Desktop / Cursor / Cline (see `docs/INTEGRATIONS.md`) and call the tools from there.
36
+
37
+ ## Project layout
38
+
39
+ ```
40
+ src/
41
+ hooks/ pre-tool-use, post-tool-use, stop — the passive capture path
42
+ mcp/server.ts 7 MCP tools — the active query path
43
+ ai/ classifier, compressor, decision extractor, active-guard, patterns
44
+ context/ builder + ranker + formatter for the injected context block
45
+ db/sqlite/ schema, observations, sessions, decisions, summaries, feedback
46
+ db/vector/ LanceDB client, embeddings (128-dim TF-IDF hashing), search
47
+ cli/index.ts all CLI commands
48
+ util/ path normalization, debug logger, metrics, ignore matcher
49
+ scripts/ install / uninstall, demo seeders, observation cleanup
50
+ tests/ bun test — classifier, compressor, decisions, integration, ignore
51
+ docs/ INTEGRATIONS.md + assets
52
+ ```
53
+
54
+ ## What kind of PRs are welcome
55
+
56
+ - Bug fixes (with a regression test).
57
+ - Better classification / heuristics. Add a test case in `tests/classifier.test.ts` showing the before/after.
58
+ - New CLI commands that surface data already in the DB.
59
+ - Editor integrations beyond the five already documented.
60
+ - Performance improvements with a measurement, not a guess.
61
+
62
+ ## What is out of scope (for now)
63
+
64
+ - Cloud sync, accounts, or any network call during operation. The "100% local, zero API" promise is the product.
65
+ - Swapping SQLite or LanceDB for something else.
66
+ - Heavyweight ML dependencies. The MiniLM upgrade on the roadmap is intentionally bounded to `transformers.js` running locally.
67
+
68
+ If you're not sure whether something is in scope, open an issue first and we'll talk before you write code.
69
+
70
+ ## PR checklist
71
+
72
+ - [ ] `bun test` passes
73
+ - [ ] `bunx tsc --noEmit` is clean
74
+ - [ ] New behavior has a test
75
+ - [ ] Public API changes are reflected in the README
76
+ - [ ] Commits are scoped and have meaningful messages
77
+
78
+ ## License
79
+
80
+ By contributing, you agree your contributions will be licensed under the MIT License (see `LICENSE`).
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Eliel Jatib
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,301 @@
1
+ # autoctxd
2
+
3
+ **Persistent, local memory for Claude Code.**
4
+
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
6
+ [![Bun](https://img.shields.io/badge/runtime-Bun%20%E2%89%A51.1-black)](https://bun.sh)
7
+ [![CI](https://github.com/autoctxd/autoctxd/actions/workflows/ci.yml/badge.svg)](https://github.com/autoctxd/autoctxd/actions/workflows/ci.yml)
8
+ [![Zero API](https://img.shields.io/badge/API%20calls-0-success)](#privacy)
9
+ [![MCP](https://img.shields.io/badge/MCP-7%20tools-purple)](#how-it-works)
10
+ [![PRs welcome](https://img.shields.io/badge/PRs-welcome-orange)](CONTRIBUTING.md)
11
+
12
+ ![Active Guard Demo](docs/assets/active-guard-demo.gif)
13
+
14
+ Every Claude Code session starts from scratch. You re-explain your project, re-state decisions you already made, re-describe what you were in the middle of. `autoctxd` fixes that — silently, locally, with zero API cost.
15
+
16
+ - **100% local, zero API cost** — pure heuristics, no LLM calls, nothing leaves your machine
17
+ - Captures every tool use automatically through Claude Code hooks
18
+ - Injects the right ~600 tokens of context into each new session
19
+ - Tracks architectural decisions separately — they never get forgotten
20
+ - Detects work patterns across sessions
21
+ - **Active Guard** — flags actions that contradict past decisions, before you make them
22
+
23
+ ## Why autoctxd?
24
+
25
+ | | autoctxd | Mem0 / memori (cloud) | Plain CLAUDE.md |
26
+ |---|---|---|---|
27
+ | Where your code context lives | **Your disk** | Their servers | A static file you maintain |
28
+ | Cost per session | **$0** | API calls billed | $0 |
29
+ | Captures automatically | **Yes** (hooks) | Via SDK calls | No — you write it |
30
+ | Active recall during reasoning | **Yes** (7 MCP tools) | Yes | No |
31
+ | Flags decision contradictions | **Yes** (Active Guard) | No | No |
32
+ | Works offline | **Yes** | No | Yes |
33
+ | Cross-session patterns / hotspots | **Yes** | Limited | No |
34
+ | Setup | **One command** | Account + API key | Edit a file |
35
+
36
+ Built specifically for the **Claude Code workflow** — hooks, MCP, and a local dashboard, not a generic SDK you bolt onto something.
37
+
38
+ ## How it works
39
+
40
+ ```
41
+ Session 1 Session 2 (days later)
42
+ ───────── ─────────────────────
43
+ You work on auth refactor ───┐ ┌──► Claude already knows:
44
+ Claude uses Edit/Write/Bash │ │ • "Migrated from jwt to iron-session"
45
+ ▼ │ • Hot files: auth/session.ts
46
+ ┌──────────┐ │ • Last session left the login route
47
+ │ SQLite │ │ half-wired (blocked)
48
+ │ LanceDB │─────┘
49
+ └──────────┘ no re-explaining needed
50
+ ```
51
+
52
+ Two integration modes, use either or both:
53
+
54
+ **1. Passive (hooks, Claude Code only)** — injects context at session start.
55
+
56
+ | Hook | What it does |
57
+ |---|---|
58
+ | `PreToolUse` | Builds context block from past sessions and injects it |
59
+ | `PostToolUse` | Classifies and stores each tool use as an observation |
60
+ | `Stop` | Compresses the session, extracts decisions, detects patterns, generates embeddings |
61
+
62
+ **2. Active (MCP server, works everywhere)** — Claude/Cursor/Windsurf/Cline query memory **during** reasoning via 7 tools:
63
+
64
+ | Tool | What it enables |
65
+ |---|---|
66
+ | `recall_decisions` | Look up past architectural decisions for a project |
67
+ | `recall_unfinished` | Surface blockers from past sessions |
68
+ | `search_memory` | Semantic + FTS search across all your coding memory |
69
+ | `get_project_history` | Recent session summaries |
70
+ | **`check_intent`** | **Active Guard** — flags actions that contradict past decisions |
71
+ | `record_feedback` | Learns from "useful/irrelevant/wrong" signals |
72
+ | `record_decision` | Persists decisions made in-conversation (never forgotten) |
73
+
74
+ See [docs/INTEGRATIONS.md](docs/INTEGRATIONS.md) for setup in each editor.
75
+
76
+ Nothing leaves your machine.
77
+
78
+ ## Install
79
+
80
+ ### One-command install
81
+
82
+ **macOS / Linux**
83
+ ```bash
84
+ curl -fsSL https://raw.githubusercontent.com/autoctxd/autoctxd/main/scripts/install.sh | bash
85
+ ```
86
+
87
+ **Windows (PowerShell)**
88
+ ```powershell
89
+ irm https://raw.githubusercontent.com/autoctxd/autoctxd/main/scripts/install.ps1 | iex
90
+ ```
91
+
92
+ That's it — installs Bun if missing, clones the repo into `~/.claude/autoctxd`, installs deps, initializes the DB, and registers the three hooks in your existing `settings.json` (preserving anything already there).
93
+
94
+ Verify with `autoctxd doctor`. Uninstall cleanly with `bun run scripts/uninstall-hooks.ts`.
95
+
96
+ ### Manual install
97
+
98
+ If you prefer step-by-step, requires [Bun](https://bun.sh) and Claude Code:
99
+
100
+ ```bash
101
+ cd ~/.claude
102
+ git clone https://github.com/autoctxd/autoctxd.git
103
+ cd autoctxd
104
+ bun install
105
+ bun run src/cli/index.ts init
106
+ bun run scripts/install-hooks.ts # merges hooks into settings.json
107
+ bun run src/cli/index.ts doctor # verify
108
+ ```
109
+
110
+ <details>
111
+ <summary>Or if you want to add the hooks manually, here's the block:</summary>
112
+
113
+ ```json
114
+ {
115
+ "hooks": {
116
+ "PreToolUse": [{
117
+ "matcher": "",
118
+ "hooks": [{
119
+ "type": "command",
120
+ "command": "bun run ~/.claude/autoctxd/src/hooks/pre-tool-use.ts"
121
+ }]
122
+ }],
123
+ "PostToolUse": [{
124
+ "matcher": "",
125
+ "hooks": [{
126
+ "type": "command",
127
+ "command": "bun run ~/.claude/autoctxd/src/hooks/post-tool-use.ts"
128
+ }]
129
+ }],
130
+ "Stop": [{
131
+ "matcher": "",
132
+ "hooks": [{
133
+ "type": "command",
134
+ "command": "bun run ~/.claude/autoctxd/src/hooks/stop.ts"
135
+ }]
136
+ }]
137
+ }
138
+ }
139
+ ```
140
+
141
+
142
+ </details>
143
+
144
+ Restart Claude Code. The next session starts collecting. The one after that starts getting context.
145
+
146
+ ## CLI
147
+
148
+ ```bash
149
+ # What has autoctxd learned about you?
150
+ bun run src/cli/index.ts stats
151
+
152
+ # Every decision you've made across projects
153
+ bun run src/cli/index.ts decisions
154
+
155
+ # Hybrid search (semantic + full-text) across everything
156
+ bun run src/cli/index.ts search "race condition async"
157
+
158
+ # Drill into one session
159
+ bun run src/cli/index.ts show session <session-id>
160
+
161
+ # Patterns detected in your workflow
162
+ bun run src/cli/index.ts patterns
163
+
164
+ # Export a project's memory to markdown
165
+ bun run src/cli/index.ts export ./my-app > context.md
166
+
167
+ # Re-run the classifier on old observations (after classifier improvements)
168
+ bun run src/cli/index.ts reclassify
169
+
170
+ # Token accounting: what autoctxd has saved you
171
+ bun run src/cli/index.ts metrics
172
+
173
+ # Launch the local web dashboard
174
+ bun run src/cli/index.ts dashboard
175
+ # → http://localhost:4589
176
+
177
+ # Health check your install
178
+ bun run src/cli/index.ts doctor
179
+ ```
180
+
181
+ ## Dashboard
182
+
183
+ ![Dashboard](docs/assets/dashboard.png)
184
+
185
+ Running `autoctxd dashboard` launches a local web UI at `http://localhost:4589`:
186
+
187
+ - Metric cards: sessions, observations, decisions, projects, context hit rate, avg explore calls, tokens saved
188
+ - Activity timeline of every observation across all projects
189
+ - Architectural decision feed with alternatives and rationale
190
+ - **Decision chains** — automatically detected sequences like `mysql → postgres → sqlite` across sessions
191
+ - Pattern panel (tool preferences, work focus, hotspots, TDD vs fix-first, peak coding hours)
192
+ - Hybrid search (semantic + full-text) with project filter
193
+
194
+ Everything read-only, everything local. No accounts, no network.
195
+
196
+ ## Debug mode
197
+
198
+ Set `AUTOCTXD_DEBUG=1` to log every hook invocation to `data/debug.log`:
199
+
200
+ ```bash
201
+ AUTOCTXD_DEBUG=1 claude # then tail data/debug.log
202
+ ```
203
+
204
+ You'll see what was injected, why, and timings.
205
+
206
+ ## What gets captured
207
+
208
+ Each `PostToolUse` becomes an **observation**, classified as one of:
209
+
210
+ `bug_fix` · `refactor` · `new_feature` · `config` · `research` · `test` · `decision` · `blocked` · `deploy` · `other`
211
+
212
+ The classifier is pure heuristics. Scores each observation 0–10 for importance. Critical files (`auth`, `payment`, `migration`, `schema`) get boosted. Docs/readmes get demoted.
213
+
214
+ **Decisions** are first-class. Every `bun add`, `npm install`, `cargo add` becomes a stack decision. Every "switched from X to Y" becomes an architectural decision. These never get compressed away.
215
+
216
+ **Patterns** emerge after 3+ sessions in the same project: tool preferences, work-type focus, file hotspots, TDD vs. fix-first workflows, peak coding hours.
217
+
218
+ ## What gets injected
219
+
220
+ At session start, `autoctxd` assembles a context block from:
221
+
222
+ - **Architectural decisions** on this project (all of them, always)
223
+ - **Recent session highlights** (top 3 sessions by recency)
224
+ - **Semantically similar past sessions** (cross-project if relevant)
225
+ - **Hot files** (modified 3+ times recently)
226
+ - **Your patterns in this project**
227
+
228
+ Format is compact — targets ~600-800 tokens so it doesn't crowd your context window.
229
+
230
+ ## Privacy
231
+
232
+ Everything stays in `~/.claude/autoctxd/data/`. Delete the folder to wipe all memory. No telemetry, no cloud, no network calls during operation.
233
+
234
+ For sensitive projects, drop a `.autoctxd-ignore` file at the project root. Patterns follow a `.gitignore`-style subset — a single `*` opts the whole project out. See [SECURITY.md](SECURITY.md) for the full threat model and the list of what does/doesn't get persisted.
235
+
236
+ ## Embedding providers
237
+
238
+ Pick the right trade-off for your machine and threat model. **TF-IDF is the default** — zero install pain, deterministic, fast. Upgrade when you want richer retrieval.
239
+
240
+ | Provider | Dim | Latency | Discrimination | Setup |
241
+ |---|---:|---:|---:|---|
242
+ | `tfidf` *(default)* | 128 | **0.1 ms** | gap 0.170, 92% wins | nothing — built in |
243
+ | `minilm` | 384 | 4.5 ms | gap 0.621, **100% wins** | `autoctxd embeddings switch minilm --yes` (~25MB model download on first use) |
244
+ | `ollama` | 768 | varies | run `bun run benchmark:embeddings` on your box | Requires [Ollama](https://ollama.com) running locally with `ollama pull nomic-embed-text` |
245
+
246
+ *Numbers from `scripts/benchmark-embeddings.ts` over 12 dev-text pairs (anchor / paraphrase / unrelated). "Wins" is the share of pairs where the model puts the paraphrase closer to the anchor than the unrelated text. Higher = better.*
247
+
248
+ ```bash
249
+ # What's active and what's available?
250
+ autoctxd embeddings list
251
+ autoctxd embeddings status
252
+
253
+ # Switch — re-embeds everything you've stored, automatically
254
+ autoctxd embeddings switch minilm --yes
255
+
256
+ # Or pin via env (no persistence)
257
+ AUTOCTXD_EMBEDDING=minilm autoctxd ...
258
+ ```
259
+
260
+ The cache is partitioned by provider, so switching back is cheap. `@xenova/transformers` is an `optionalDependency` — if `bun install` fails to fetch it, MiniLM is just unavailable and TF-IDF keeps working.
261
+
262
+ ## Stack
263
+
264
+ | Component | Tech |
265
+ |---|---|
266
+ | Runtime | Bun |
267
+ | Structured DB | SQLite + FTS5 (full-text search) |
268
+ | Vector DB | LanceDB (dim adapts to active provider) |
269
+ | Embeddings | TF-IDF (default) · MiniLM-L6-v2 via transformers.js · Ollama (any embedding model) |
270
+ | Classification | Keyword + tool/file heuristics |
271
+
272
+ ## Roadmap
273
+
274
+ - [x] Local web dashboard
275
+ - [x] Decision chains across sessions
276
+ - [x] Token savings metrics
277
+ - [x] One-command installer for all platforms
278
+ - [x] Health-check doctor
279
+ - [x] Integration test suite
280
+ - [x] **MCP server mode** — Claude queries memory actively, not just at session start
281
+ - [x] **Active Guard** — flags actions that contradict past decisions
282
+ - [x] **Feedback loop** — system learns what's useful to each user
283
+ - [x] **Multi-editor** — Cursor, Windsurf, Cline, Claude Desktop, Claude Code
284
+ - [x] **Pluggable embeddings** — TF-IDF default, MiniLM and Ollama opt-in
285
+ - [x] **`.autoctxd-ignore`** — gitignore-style opt-out for sensitive projects
286
+ - [ ] Codebase awareness — integrate git blame + AST analysis
287
+ - [ ] Predictive context — surface what you'll need next based on patterns
288
+
289
+ ## Testing
290
+
291
+ ```bash
292
+ bun test # 75 tests, isolated temp DB
293
+ bun run benchmark:embeddings # cross-provider quality + latency
294
+ bun run typecheck # tsc --noEmit
295
+ ```
296
+
297
+ Coverage spans the classifier, compressor, decision extractor, decision chains, ignore matcher, embedding providers, and an end-to-end integration flow. Tests never touch your real memory.
298
+
299
+ ## License
300
+
301
+ MIT.
package/SECURITY.md ADDED
@@ -0,0 +1,81 @@
1
+ # Security & Privacy
2
+
3
+ `autoctxd` is designed around a single promise: **nothing leaves your machine.** This document explains exactly what that means in practice, what is and isn't persisted, and how to report a vulnerability.
4
+
5
+ ## Privacy model
6
+
7
+ | Property | Status |
8
+ |---|---|
9
+ | Network calls during operation | None |
10
+ | Telemetry | None |
11
+ | Third-party API keys required | None |
12
+ | LLM calls (yours or ours) | None — embeddings and classification are local heuristics |
13
+ | Cloud sync | None |
14
+ | Data location | `~/.claude/autoctxd/data/` (or `$AUTOCTXD_DATA_DIR` if set) |
15
+
16
+ The only network activity in the project comes from the one-shot installer scripts (cloning the repo, installing Bun if missing). Once installed, the runtime is fully offline.
17
+
18
+ ## What gets persisted
19
+
20
+ The hooks intercept Claude Code tool calls and store **summaries** — not raw content:
21
+
22
+ - **Edit / Write**: the file path plus the first ~80 characters of old/new snippets. The full file content is **not** stored.
23
+ - **Bash**: the command's `description` field if present, otherwise the first command, truncated to 200 characters. **Stdout and stderr are never stored.**
24
+ - **WebFetch / WebSearch / Agent**: prompt and URL/query, truncated to ~160 characters.
25
+ - **Read / Glob / Grep**: counted for metrics only — never stored as observations.
26
+
27
+ The `tool_response` field exists in the hook payload but is intentionally never read. See `src/hooks/post-tool-use.ts`.
28
+
29
+ Generated artifacts that live in `~/.claude/autoctxd/data/`:
30
+
31
+ - `autoctxd.db` — SQLite (sessions, observations, decisions, summaries, FTS index, feedback, MCP access log)
32
+ - `vectors/` — LanceDB vector index of session embeddings
33
+ - `error.log` / `debug.log` — diagnostic logs (only present after errors or when `AUTOCTXD_DEBUG=1`)
34
+
35
+ ## Opting out for sensitive projects
36
+
37
+ Drop a `.autoctxd-ignore` file at a project's root. Patterns follow a `.gitignore`-style subset (no negation):
38
+
39
+ ```
40
+ # anything matching these is dropped before being written to the DB
41
+ .env
42
+ .env.*
43
+ *.pem
44
+ *.key
45
+ secrets/
46
+ config/credentials.*
47
+ ```
48
+
49
+ Use `*` (or `**`) on a line by itself to opt the entire project out.
50
+
51
+ The matcher lives in `src/util/ignore.ts` and runs inside the post-tool-use hook before any DB write.
52
+
53
+ ## Wiping all memory
54
+
55
+ Everything is in one folder. Delete it:
56
+
57
+ ```bash
58
+ rm -rf ~/.claude/autoctxd/data/
59
+ ```
60
+
61
+ Or run `bun run src/cli/index.ts reset` for a structured reset that re-creates an empty database.
62
+
63
+ ## Reporting a vulnerability
64
+
65
+ Please **do not** open a public GitHub issue for security problems.
66
+
67
+ Email **jatibeliel@gmail.com** with:
68
+
69
+ - A description of the issue and impact
70
+ - Steps to reproduce
71
+ - The commit hash you tested against
72
+
73
+ You can expect an initial response within 5 days. If the issue is confirmed, a fix will be prioritized over feature work and credited in the release notes (unless you prefer to remain anonymous).
74
+
75
+ ## Threat model — what `autoctxd` does not protect against
76
+
77
+ Being clear about scope:
78
+
79
+ - **A compromised local machine.** If an attacker has read access to `~/.claude/autoctxd/data/`, they can read your observation history. Treat it like any other file in your home directory.
80
+ - **Malicious MCP clients.** The MCP server trusts its caller. Don't expose it over the network.
81
+ - **Hostile prompts in tool inputs.** The classifier is heuristic and won't sanitize adversarial content embedded in commit messages, file contents, or commands. Summaries are length-capped, but the design assumption is that you are the only user driving Claude Code on your machine.
package/package.json ADDED
@@ -0,0 +1,55 @@
1
+ {
2
+ "name": "autoctxd",
3
+ "version": "0.4.1",
4
+ "description": "Persistent, local, zero-API memory for Claude Code.",
5
+ "type": "module",
6
+ "license": "MIT",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "git+https://github.com/autoctxd/autoctxd.git"
10
+ },
11
+ "homepage": "https://github.com/autoctxd/autoctxd#readme",
12
+ "bugs": {
13
+ "url": "https://github.com/autoctxd/autoctxd/issues"
14
+ },
15
+ "preferGlobal": true,
16
+ "engines": {
17
+ "bun": ">=1.1.0"
18
+ },
19
+ "scripts": {
20
+ "ctx": "bun run src/cli/index.ts",
21
+ "test": "bun test",
22
+ "typecheck": "bunx tsc --noEmit",
23
+ "dashboard": "bun run src/cli/index.ts dashboard",
24
+ "doctor": "bun run src/cli/index.ts doctor",
25
+ "install-hooks": "bun run scripts/install-hooks.ts",
26
+ "uninstall-hooks": "bun run scripts/uninstall-hooks.ts",
27
+ "benchmark:embeddings": "bun run scripts/benchmark-embeddings.ts"
28
+ },
29
+ "bin": {
30
+ "autoctxd": "src/cli/index.ts"
31
+ },
32
+ "keywords": [
33
+ "claude",
34
+ "claude-code",
35
+ "memory",
36
+ "context",
37
+ "local-first",
38
+ "sqlite",
39
+ "lancedb",
40
+ "ai",
41
+ "coding-assistant",
42
+ "mcp",
43
+ "embeddings"
44
+ ],
45
+ "dependencies": {
46
+ "@lancedb/lancedb": "^0.27.2",
47
+ "@modelcontextprotocol/sdk": "^1.29.0"
48
+ },
49
+ "optionalDependencies": {
50
+ "@xenova/transformers": "^2.17.2"
51
+ },
52
+ "devDependencies": {
53
+ "@types/bun": "^1.3.14"
54
+ }
55
+ }
@@ -0,0 +1,80 @@
1
+ #!/usr/bin/env bun
2
+ // Merges autoctxd hooks into ~/.claude/settings.json, preserving any existing
3
+ // hooks the user has configured. Idempotent: running twice does the same thing.
4
+
5
+ import { existsSync, readFileSync, writeFileSync, copyFileSync } from "fs";
6
+ import { join, resolve } from "path";
7
+
8
+ const CTX_ROOT = resolve(join(import.meta.dir, ".."));
9
+ const CLAUDE_DIR = resolve(join(CTX_ROOT, ".."));
10
+ const SETTINGS = join(CLAUDE_DIR, "settings.json");
11
+
12
+ const HOOK_MAPPING: Record<string, string> = {
13
+ PreToolUse: "src/hooks/pre-tool-use.ts",
14
+ PostToolUse: "src/hooks/post-tool-use.ts",
15
+ Stop: "src/hooks/stop.ts",
16
+ };
17
+
18
+ function isClaudeCtxHookEntry(entry: any): boolean {
19
+ return JSON.stringify(entry).includes("autoctxd");
20
+ }
21
+
22
+ function buildHookEntry(relativePath: string): any {
23
+ const abs = join(CTX_ROOT, relativePath).replace(/\\/g, "/");
24
+ return {
25
+ matcher: "",
26
+ hooks: [
27
+ {
28
+ type: "command",
29
+ command: `bun run ${abs}`,
30
+ },
31
+ ],
32
+ };
33
+ }
34
+
35
+ async function main() {
36
+ let cfg: any = {};
37
+
38
+ if (existsSync(SETTINGS)) {
39
+ // Backup before modifying
40
+ const backup = `${SETTINGS}.backup.${Date.now()}`;
41
+ copyFileSync(SETTINGS, backup);
42
+ console.log(` Backed up existing settings.json → ${backup}`);
43
+
44
+ try {
45
+ cfg = JSON.parse(readFileSync(SETTINGS, "utf8"));
46
+ } catch (e) {
47
+ console.error(` Cannot parse existing settings.json: ${e}`);
48
+ console.error(` Keeping current file as-is. Edit it manually or delete it to proceed.`);
49
+ process.exit(1);
50
+ }
51
+ }
52
+
53
+ cfg.hooks ||= {};
54
+
55
+ let installed = 0;
56
+ let skipped = 0;
57
+
58
+ for (const [hookName, relPath] of Object.entries(HOOK_MAPPING)) {
59
+ const current = cfg.hooks[hookName] || [];
60
+ // Strip any pre-existing autoctxd entries — we'll add a fresh one
61
+ const cleaned = current.filter((e: any) => !isClaudeCtxHookEntry(e));
62
+ const skippedHere = current.length - cleaned.length;
63
+
64
+ cleaned.push(buildHookEntry(relPath));
65
+ cfg.hooks[hookName] = cleaned;
66
+
67
+ installed++;
68
+ skipped += skippedHere;
69
+ }
70
+
71
+ writeFileSync(SETTINGS, JSON.stringify(cfg, null, 2) + "\n");
72
+
73
+ console.log(`\n Installed ${installed} autoctxd hooks in ${SETTINGS}`);
74
+ if (skipped > 0) {
75
+ console.log(` Replaced ${skipped} previous autoctxd hook entries.`);
76
+ }
77
+ console.log(`\n Restart Claude Code for hooks to take effect.`);
78
+ }
79
+
80
+ main();