prism-mem 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prism_mem-0.1.0/.claude/CLAUDE.md +200 -0
- prism_mem-0.1.0/.claude/context.md +141 -0
- prism_mem-0.1.0/.claude/plan.md +271 -0
- prism_mem-0.1.0/.claude/settings.local.json +31 -0
- prism_mem-0.1.0/.cursorrules +39 -0
- prism_mem-0.1.0/.gitignore +9 -0
- prism_mem-0.1.0/AGENTS.md +32 -0
- prism_mem-0.1.0/CLAUDE.md +44 -0
- prism_mem-0.1.0/LICENSE +21 -0
- prism_mem-0.1.0/PKG-INFO +146 -0
- prism_mem-0.1.0/README.md +116 -0
- prism_mem-0.1.0/prism_mem/__init__.py +1 -0
- prism_mem-0.1.0/prism_mem/cli.py +275 -0
- prism_mem-0.1.0/prism_mem/config.py +54 -0
- prism_mem-0.1.0/prism_mem/constitution/__init__.py +0 -0
- prism_mem-0.1.0/prism_mem/constitution/generator.py +153 -0
- prism_mem-0.1.0/prism_mem/extraction/__init__.py +0 -0
- prism_mem-0.1.0/prism_mem/extraction/extractor.py +77 -0
- prism_mem-0.1.0/prism_mem/ingestion/__init__.py +0 -0
- prism_mem-0.1.0/prism_mem/ingestion/git_reader.py +35 -0
- prism_mem-0.1.0/prism_mem/ingestion/session_reader.py +158 -0
- prism_mem-0.1.0/prism_mem/linking/__init__.py +0 -0
- prism_mem-0.1.0/prism_mem/linking/linker.py +148 -0
- prism_mem-0.1.0/prism_mem/server/__init__.py +0 -0
- prism_mem-0.1.0/prism_mem/server/mcp_server.py +84 -0
- prism_mem-0.1.0/prism_mem/server/ui_server.py +325 -0
- prism_mem-0.1.0/prism_mem/storage/__init__.py +0 -0
- prism_mem-0.1.0/prism_mem/storage/db.py +147 -0
- prism_mem-0.1.0/prism_mem/storage/models.py +23 -0
- prism_mem-0.1.0/pyproject.toml +44 -0
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# Prism — CLAUDE.md
|
|
2
|
+
|
|
3
|
+
## What This Project Is
|
|
4
|
+
|
|
5
|
+
Prism is a post-session knowledge crystallizer for AI coding agents. It reads Claude Code session transcripts (JSONL files) and git history, extracts structured semantic knowledge as triples, links them into a graph, and auto-regenerates the context files that every agent reads at session start (CLAUDE.md, .cursorrules, AGENTS.md).
|
|
6
|
+
|
|
7
|
+
**The one-liner:** Every coding session leaves behind artifacts. Prism reads them and turns them into structured, linked, reusable knowledge — automatically, with no manual input from the user.
|
|
8
|
+
|
|
9
|
+
**Package name:** `prism-mem` on PyPI. The CLI command is `prism`.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Architecture
|
|
14
|
+
|
|
15
|
+
### Data Flow
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
[User ends a coding session / makes a git commit]
|
|
19
|
+
↓
|
|
20
|
+
[git post-commit hook calls: prism crystallize --project .]
|
|
21
|
+
↓
|
|
22
|
+
[1] INGEST
|
|
23
|
+
- Read ~/.claude/projects/<encoded-path>/<session-uuid>.jsonl
|
|
24
|
+
- Read: git diff HEAD~1 HEAD + git log --oneline -20
|
|
25
|
+
↓
|
|
26
|
+
[2] EXTRACT
|
|
27
|
+
- Feed text chunks to kg-gen
|
|
28
|
+
- kg-gen returns NetworkX graph of (subject, predicate, object) triples
|
|
29
|
+
↓
|
|
30
|
+
[3] STORE + LINK
|
|
31
|
+
- Embed each triple via Anthropic embeddings API
|
|
32
|
+
- Store in SQLite (sqlite-vec) at ~/.prism/projects/<hash>/graph.db
|
|
33
|
+
- For each new triple: query sqlite-vec for nearest existing triples
|
|
34
|
+
- Create edges where similarity > threshold
|
|
35
|
+
- Detect staleness: same subject+predicate, different object → flag old triple
|
|
36
|
+
↓
|
|
37
|
+
[4] GENERATE
|
|
38
|
+
- Score all triples (recency + confidence + retrieval frequency)
|
|
39
|
+
- Take top-N triples → Claude Haiku → write CLAUDE.md
|
|
40
|
+
- Write same knowledge to .cursorrules and AGENTS.md formats
|
|
41
|
+
↓
|
|
42
|
+
[graph.db updated] + [constitution files written to project root]
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Storage Layout
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
~/.prism/
|
|
49
|
+
└── projects/
|
|
50
|
+
└── <project-hash>/
|
|
51
|
+
└── graph.db ← SQLite + sqlite-vec, one per project
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Two tables:
|
|
55
|
+
- `triples`: id, subject, predicate, object, confidence, embedding (blob), session_id, timestamp, stale (bool)
|
|
56
|
+
- `edges`: from_id, to_id, edge_type, weight
|
|
57
|
+
|
|
58
|
+
### MCP Server — 3 tools only, no more
|
|
59
|
+
|
|
60
|
+
| Tool | What it does |
|
|
61
|
+
|---|---|
|
|
62
|
+
| `get_context()` | Returns current CLAUDE.md content — injected at session start |
|
|
63
|
+
| `query_knowledge(question)` | Semantic search over the triple graph |
|
|
64
|
+
| `crystallize(session_id)` | Manually trigger extraction for a session |
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
## Project Structure
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
prism-mem/
|
|
72
|
+
├── CLAUDE.md ← You are here
|
|
73
|
+
├── plan.md ← Build phases and completion criteria
|
|
74
|
+
├── pyproject.toml ← Package config, deps, entry point
|
|
75
|
+
├── requirements.txt ← Dev dependencies (for venv)
|
|
76
|
+
├── prism_mem/
|
|
77
|
+
│ ├── __init__.py
|
|
78
|
+
│ ├── cli.py ← Click CLI: prism serve / ui / crystallize / hook / config
|
|
79
|
+
│ ├── ingestion/
|
|
80
|
+
│ │ ├── __init__.py
|
|
81
|
+
│ │ ├── session_reader.py ← Read ~/.claude/projects/ JSONL files
|
|
82
|
+
│ │ └── git_reader.py ← Read git diff + log via subprocess
|
|
83
|
+
│ ├── extraction/
|
|
84
|
+
│ │ ├── __init__.py
|
|
85
|
+
│ │ └── extractor.py ← kg-gen integration, chunking, triple output
|
|
86
|
+
│ ├── storage/
|
|
87
|
+
│ │ ├── __init__.py
|
|
88
|
+
│ │ ├── db.py ← SQLite setup, schema, migrations
|
|
89
|
+
│ │ └── models.py ← Triple and Edge dataclasses
|
|
90
|
+
│ ├── linking/
|
|
91
|
+
│ │ ├── __init__.py
|
|
92
|
+
│ │ └── linker.py ← Embed triples, find similar, create edges, staleness
|
|
93
|
+
│ ├── constitution/
|
|
94
|
+
│ │ ├── __init__.py
|
|
95
|
+
│ │ └── generator.py ← Score triples → Haiku → write CLAUDE.md etc.
|
|
96
|
+
│ ├── server/
|
|
97
|
+
│ │ ├── __init__.py
|
|
98
|
+
│ │ ├── mcp_server.py ← FastMCP server with 3 tools
|
|
99
|
+
│ │ └── ui_server.py ← FastAPI + Pyvis UI at localhost:7823
|
|
100
|
+
│ └── config.py ← Paths, constants, TOML config (load_config, get_model_string, get_api_key, is_config_complete, validate_provider)
|
|
101
|
+
└── tests/
|
|
102
|
+
└── ...
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
|
|
107
|
+
## Tech Stack — Do Not Deviate
|
|
108
|
+
|
|
109
|
+
| Layer | Tool | Reason |
|
|
110
|
+
|---|---|---|
|
|
111
|
+
| Triple extraction | `kg-gen` | Already built, LLM + clustering, do not re-implement |
|
|
112
|
+
| Vector storage | `sqlite-vec` | Local-first, no cloud, no ChromaDB |
|
|
113
|
+
| In-memory graph | `networkx` | Interops directly with kg-gen output |
|
|
114
|
+
| Graph visualization | `pyvis` | Generates self-contained D3 HTML from NetworkX, no JS needed |
|
|
115
|
+
| MCP server | `fastmcp` | uvx-friendly, decorator-based |
|
|
116
|
+
| Web UI | `fastapi` + `uvicorn` | Serves pyvis HTML + constitution at localhost:7823 |
|
|
117
|
+
| CLI | `click` | Standard, clean |
|
|
118
|
+
| LLM calls | `litellm` | Multi-provider abstraction — Anthropic, OpenAI, Gemini, Ollama, etc. |
|
|
119
|
+
| Config | `~/.prism/config.toml` + built-in `tomllib` | Flat TOML, no external TOML dep, provider validated at set-time |
|
|
120
|
+
|
|
121
|
+
No Memorix dependency. No LangChain. No ChromaDB. No cloud services. Everything runs locally.
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## Key Decisions — Do Not Second-Guess These
|
|
126
|
+
|
|
127
|
+
**Local-first.** All data lives in `~/.prism/`. No network calls except to the Anthropic API. Users own their data.
|
|
128
|
+
|
|
129
|
+
**sqlite-vec, not ChromaDB.** One file, no server, no Docker. sqlite-vec is a SQLite extension that adds vector similarity search. It is sufficient for this use case.
|
|
130
|
+
|
|
131
|
+
**kg-gen for extraction.** Do not write a custom triple extractor. kg-gen handles chunking, LLM calls, and entity clustering. Trust it.
|
|
132
|
+
|
|
133
|
+
**3 MCP tools only.** `get_context`, `query_knowledge`, `crystallize`. Do not add tools. Scope is the whole point.
|
|
134
|
+
|
|
135
|
+
**LiteLLM for all LLM calls.** Multi-provider via a single abstraction layer. Config stored in `~/.prism/config.toml` (provider + model + api_key). Default is Anthropic Haiku — fast and cheap. Provider validated at `prism config set` time using `litellm.provider_list`. extractor.py passes `provider/model` string to kg-gen (already dspy.LM / LiteLLM-compatible). generator.py uses `litellm.completion()` directly with OpenAI-compatible response format.
|
|
136
|
+
|
|
137
|
+
**No Memorix dependency.** Prism is completely independent. Different database, different MCP server, different storage path. They are complementary but not coupled.
|
|
138
|
+
|
|
139
|
+
**Constitution generates 3 files.** CLAUDE.md, .cursorrules, and AGENTS.md from the same triple set, formatted differently for each agent.
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## Build Order
|
|
144
|
+
|
|
145
|
+
Follow this order. Do not skip ahead. Each phase must produce testable output before moving to the next.
|
|
146
|
+
|
|
147
|
+
1. **Project setup** — pyproject.toml, prism_mem/ package skeleton, move session_reader.py in
|
|
148
|
+
2. **Session reader** — decode project path, find JSONL, parse events, return clean text chunks
|
|
149
|
+
3. **Git reader** — subprocess calls to git, return diff + recent log as text
|
|
150
|
+
4. **Extraction (kg-gen)** — chunk text, call kg-gen, inspect triple quality, iterate on chunking
|
|
151
|
+
5. **Storage** — SQLite schema, sqlite-vec setup, store triples + embeddings
|
|
152
|
+
6. **Linking** — embed triples, find similar via sqlite-vec, create edges, stale detection
|
|
153
|
+
7. **Constitution generator** — score triples, call Haiku, write CLAUDE.md / .cursorrules / AGENTS.md
|
|
154
|
+
8. **CLI** — `prism crystallize` wires phases 2–7 end to end
|
|
155
|
+
9. **Git hook** — `prism hook install` writes the post-commit hook
|
|
156
|
+
10. **MCP server** — FastMCP with 3 tools
|
|
157
|
+
11. **Graph UI** — FastAPI + Pyvis, 3 routes: /graph, /memory, /constitution
|
|
158
|
+
|
|
159
|
+
Phase 4 is the critical validation gate. If kg-gen produces poor-quality triples from real Claude Code sessions, the entire write path needs to change. Do not build phases 5–11 before validating phase 4 output on real data.
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## What Prism Is Not
|
|
164
|
+
|
|
165
|
+
- Not a memory store (Memorix does that)
|
|
166
|
+
- Not a retrieval system (agentmemory does that)
|
|
167
|
+
- Not a multi-agent orchestrator
|
|
168
|
+
- Not a team coordination tool
|
|
169
|
+
- Not a chat interface
|
|
170
|
+
- Not a cloud service
|
|
171
|
+
|
|
172
|
+
If a feature request does not directly serve "read session → extract triples → regenerate constitution," it does not belong in the v1 scope.
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
## Environment
|
|
177
|
+
|
|
178
|
+
- Python 3.13
|
|
179
|
+
- Virtual env at `.venv/`
|
|
180
|
+
- LLM provider configured via `prism config set provider/model/api-key` (stored in `~/.prism/config.toml`)
|
|
181
|
+
|
|
182
|
+
## Current State
|
|
183
|
+
|
|
184
|
+
**Phases complete: 1–11 + post-shipping enhancements. All phases done. Next: Shipping.**
|
|
185
|
+
|
|
186
|
+
### Done (summary)
|
|
187
|
+
- **Phase 1**: Package skeleton, `pyproject.toml`, CLI stubs, `config.py`, `models.py`
|
|
188
|
+
- **Phase 2**: `session_reader.py` — parses JSONL transcripts + subagents, chunk schema: `{role, content_type, content, timestamp, session_id, source}`
|
|
189
|
+
- **Phase 3**: `git_reader.py` — `read_git_diff`, `read_git_log`, graceful on all edge cases
|
|
190
|
+
- **Phase 4**: `extractor.py` — kg-gen + Haiku via LiteLLM, `extract_triples(text, context) -> list[tuple]`, `cluster=True`. Validated: 161 triples, good quality.
|
|
191
|
+
- **Phase 5**: `db.py` — SQLite + sqlite-vec, `open_db`, `store_triple`, `get_all_triples`, `get_triple_by_id`, `mark_stale`. Embeddings via `sentence-transformers/all-MiniLM-L6-v2` (384-dim, local/free). DB at `~/.prism/projects/<hash>/graph.db`.
|
|
192
|
+
- **Phase 6**: `linker.py` — `ingest_triple` (store → link → stale), `find_similar` (KNN via sqlite-vec, cosine sim from L2), `create_edge`, `check_and_mark_stale`. Order: link first while old triples are still non-stale, then mark stale.
|
|
193
|
+
- **Phase 7**: `generator.py` — `write_constitution(project_path)`, `select_top_triples` (score = recency + confidence, top 30), `generate_claude_md/cursorrules/agents_md` via Haiku. Verified on real triples.
|
|
194
|
+
- **Phase 8**: `cli.py` — `prism crystallize` wires all phases end-to-end. Progress output at each step, graceful errors, `--session` flag. Verified: 385 triples, 3 files written. Note: pipeline takes ~10min (kg-gen API calls are the bottleneck).
|
|
195
|
+
- **Phase 9**: `cli.py` hook group — `prism hook install` writes `.git/hooks/post-commit` (shebang + prism block), appends if hook exists, idempotent. `prism hook uninstall` strips prism block, removes file if empty. Verified all three cases.
|
|
196
|
+
- **Phase 10**: `mcp_server.py` — FastMCP server with 3 tools: `get_context` (reads CLAUDE.md), `query_knowledge` (embeds question → sqlite-vec KNN → returns top-5 triples with cosine similarity), `crystallize` (spawns `prism crystallize` in background, returns immediately). `prism serve --project .` wires it. Verified all 3 tools via `mcp.call_tool`.
|
|
197
|
+
- **Phase 11**: `ui_server.py` — FastAPI + Pyvis at localhost:7823. Three routes: `/constitution` (CLAUDE.md in `<pre>` + Regenerate button via POST), `/memory` (searchable table of all 385 triples, active/stale badges, JS filter), `/graph` (Pyvis force-directed graph with nav injected, vis.js network). `prism ui --project . [--no-browser]` wires it. Verified: 200 on all routes, 385 total / 241 active shown, vis.js loaded in graph.
|
|
198
|
+
- **Multi-provider config**: `config.py` refactored — removed `ANTHROPIC_API_KEY`/`HAIKU_MODEL` constants, replaced with `load_config()`, `get_model_string()`, `get_api_key()`, `is_config_complete()`, `validate_provider()` reading from `~/.prism/config.toml`. `generator.py` migrated from `anthropic.Anthropic` to `litellm.completion()`. `extractor.py` uses config accessors. `cli.py` adds `prism config set/show` commands with provider validation; `crystallize` checks `is_config_complete()` with clear setup instructions. `pyproject.toml`: removed `anthropic` direct dep, added `litellm>=1.0.0`.
|
|
199
|
+
- **Code review fixes**: removed unused imports (`timezone` in generator.py), dead variables (`_CURATED_PROVIDERS` in config.py, `_VALID_KEYS` in cli.py); `ui_server.py` regenerate endpoint now surfaces errors instead of silently swallowing; `mcp_server.py` crystallize tool checks `is_config_complete()` before spawning subprocess.
|
|
200
|
+
- **UI enhancements**: `/memory` table gains `Session` column (8-char truncation, full ID on hover via `title`). `/graph` node click shows a fixed sidebar listing all contributing session IDs with active/stale badges — data embedded as JSON at page-load, injected vis.js `click` listener accesses Pyvis's `var network` global. `/constitution` adds a `Copy` button beside Regenerate — reads `pre.innerText` via `navigator.clipboard.writeText`, shows `Copied!` for 1.5s.
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# Prism — Implementation Context
|
|
2
|
+
|
|
3
|
+
Quick reference for coding agents. Read this before touching any file.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## What's built vs what's a stub
|
|
8
|
+
|
|
9
|
+
| File | Status | Notes |
|
|
10
|
+
|---|---|---|
|
|
11
|
+
| `prism_mem/__init__.py` | stub | just `__version__` |
|
|
12
|
+
| `prism_mem/config.py` | done | paths, model names, constants |
|
|
13
|
+
| `prism_mem/cli.py` | **done** | `crystallize` fully wired; `serve`, `ui`, `hook` still stub |
|
|
14
|
+
| `prism_mem/ingestion/session_reader.py` | **done** | fully implemented, tested on real data |
|
|
15
|
+
| `prism_mem/ingestion/git_reader.py` | **done** | `read_git_diff`, `read_git_log` |
|
|
16
|
+
| `prism_mem/extraction/extractor.py` | **done** | `extract_triples(text, context) -> list[tuple]` |
|
|
17
|
+
| `prism_mem/storage/models.py` | **done** | `Triple` and `Edge` dataclasses |
|
|
18
|
+
| `prism_mem/storage/db.py` | **done** | `open_db`, `store_triple`, `get_all_triples`, `get_triple_by_id`, `mark_stale` |
|
|
19
|
+
| `prism_mem/linking/linker.py` | **done** | `ingest_triple`, `find_similar`, `create_edge`, `check_and_mark_stale`, `link_triple` |
|
|
20
|
+
| `prism_mem/constitution/generator.py` | **done** | `write_constitution`, `select_top_triples`, `generate_claude_md/cursorrules/agents_md` |
|
|
21
|
+
| `prism_mem/server/mcp_server.py` | stub | raises NotImplementedError |
|
|
22
|
+
| `prism_mem/server/ui_server.py` | stub | raises NotImplementedError |
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## session_reader.py — public API
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from prism_mem.ingestion.session_reader import (
|
|
30
|
+
read_latest_session, # (project_path: str) -> list[dict]
|
|
31
|
+
read_session_by_id, # (project_path: str, session_id: str) -> list[dict]
|
|
32
|
+
list_sessions, # (project_path: str) -> list[Path] newest first
|
|
33
|
+
parse_session, # (jsonl_path: Path) -> list[dict]
|
|
34
|
+
parse_jsonl, # (jsonl_path: Path, source: str) -> list[dict]
|
|
35
|
+
encode_project_path, # (project_path: str) -> str
|
|
36
|
+
find_project_sessions, # (project_path: str) -> Path
|
|
37
|
+
)
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
**Chunk schema:**
|
|
41
|
+
```python
|
|
42
|
+
{
|
|
43
|
+
"role": "user" | "assistant",
|
|
44
|
+
"content_type": "text" | "thinking" | "summary",
|
|
45
|
+
"content": str,
|
|
46
|
+
"timestamp": str, # ISO8601
|
|
47
|
+
"session_id": str, # UUID from the JSONL line
|
|
48
|
+
"source": "main" | "<agent-id>", # "main" = top-level session, agent-id = subagent
|
|
49
|
+
}
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
**What's included / excluded:**
|
|
53
|
+
- Included: user text, assistant text, assistant thinking blocks, summary events (compaction)
|
|
54
|
+
- Excluded: tool_use, tool_result, file-history-snapshot, hook events, system injections
|
|
55
|
+
- Subagent transcripts at `<uuid>/subagents/*.jsonl` are merged in, tagged with `source=<agent-id>`
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## config.py — key constants
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
from prism_mem.config import (
|
|
63
|
+
PRISM_HOME, # ~/.prism
|
|
64
|
+
PROJECTS_DIR, # ~/.prism/projects
|
|
65
|
+
CLAUDE_PROJECTS_DIR, # ~/.claude/projects
|
|
66
|
+
ANTHROPIC_API_KEY, # from env
|
|
67
|
+
SIMILARITY_THRESHOLD, # 0.85
|
|
68
|
+
TOP_TRIPLES_FOR_CONSTITUTION, # 30
|
|
69
|
+
HAIKU_MODEL, # "claude-haiku-4-5-20251001"
|
|
70
|
+
UI_PORT, # 7823
|
|
71
|
+
)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## storage/models.py — dataclasses
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
@dataclass
|
|
80
|
+
class Triple:
|
|
81
|
+
subject: str
|
|
82
|
+
predicate: str
|
|
83
|
+
object: str
|
|
84
|
+
confidence: float = 1.0
|
|
85
|
+
embedding: bytes = b""
|
|
86
|
+
session_id: str = ""
|
|
87
|
+
timestamp: datetime = field(default_factory=datetime.utcnow)
|
|
88
|
+
stale: bool = False
|
|
89
|
+
id: int = 0
|
|
90
|
+
|
|
91
|
+
@dataclass
|
|
92
|
+
class Edge:
|
|
93
|
+
from_id: int
|
|
94
|
+
to_id: int
|
|
95
|
+
edge_type: str
|
|
96
|
+
weight: float
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Environment
|
|
102
|
+
|
|
103
|
+
- Python 3.13.5, venv at `.venv/`
|
|
104
|
+
- **Always run with:** `DYLD_LIBRARY_PATH=/opt/homebrew/opt/expat/lib .venv/bin/python`
|
|
105
|
+
(macOS pyexpat linking workaround — required or imports fail)
|
|
106
|
+
- `ANTHROPIC_API_KEY` must be set for any LLM calls
|
|
107
|
+
- Package installed editable: `pip install -e .` already done
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## Running session_reader manually
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
# from project root
|
|
115
|
+
DYLD_LIBRARY_PATH=/opt/homebrew/opt/expat/lib .venv/bin/python prism_mem/ingestion/session_reader.py
|
|
116
|
+
# or pass a project path:
|
|
117
|
+
DYLD_LIBRARY_PATH=/opt/homebrew/opt/expat/lib .venv/bin/python prism_mem/ingestion/session_reader.py /path/to/project
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## git_reader.py — public API
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from prism_mem.ingestion.git_reader import read_git_diff, read_git_log
|
|
126
|
+
|
|
127
|
+
read_git_diff(project_path: str) -> str # git diff HEAD~1 HEAD, "" on any error/edge case
|
|
128
|
+
read_git_log(project_path: str, n: int = 20) -> str # git log --oneline -N
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Both return `""` safely for: not a git repo, no commits, single commit (no HEAD~1).
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## What's next (Phase 4 — critical gate)
|
|
136
|
+
|
|
137
|
+
Implement `prism_mem/extraction/extractor.py` using kg-gen:
|
|
138
|
+
- Chunk the combined text (session chunks + git diff + log)
|
|
139
|
+
- Call kg-gen on each chunk with the Anthropic backend
|
|
140
|
+
- Collect the NetworkX graph of `(subject, predicate, object)` triples
|
|
141
|
+
- **Do not proceed to Phase 5 until triple quality is validated on real sessions**
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
# Prism — Build Plan
|
|
2
|
+
|
|
3
|
+
Each phase has a clear completion test. Do not move to the next phase until the current one passes its test.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Phase 1 — Project Setup ✅ DONE
|
|
8
|
+
|
|
9
|
+
**Goal:** A proper Python package that can be installed and run as a CLI.
|
|
10
|
+
|
|
11
|
+
**What to do:**
|
|
12
|
+
- Create `pyproject.toml` with package metadata, dependencies, and the `prism` entry point
|
|
13
|
+
- Create `prism_mem/` directory with `__init__.py`
|
|
14
|
+
- Create all subdirectory stubs: `ingestion/`, `extraction/`, `storage/`, `linking/`, `constitution/`, `server/`
|
|
15
|
+
- Move `ingestion/session_reader.py` into `prism_mem/ingestion/session_reader.py`
|
|
16
|
+
- Create `prism_mem/config.py` for paths and constants
|
|
17
|
+
- Create `prism_mem/cli.py` with stub Click commands: `crystallize`, `serve`, `ui`, `hook`
|
|
18
|
+
|
|
19
|
+
**Done when:**
|
|
20
|
+
- `pip install -e .` works with no errors
|
|
21
|
+
- `prism --help` runs and shows the available commands
|
|
22
|
+
- All stub commands run without crashing (they can just print "not implemented yet")
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Phase 2 — Session Reader ✅ DONE
|
|
27
|
+
|
|
28
|
+
**Goal:** Given a project path, find and parse the most recent Claude Code session into clean text chunks.
|
|
29
|
+
|
|
30
|
+
**What to do:**
|
|
31
|
+
- Implement the project path encoder (slashes → hyphens)
|
|
32
|
+
- Implement the session directory finder: look in `~/.claude/projects/<encoded-path>/`
|
|
33
|
+
- List JSONL files sorted by modification time (newest first)
|
|
34
|
+
- Parse JSONL: one JSON object per line
|
|
35
|
+
- Extract text chunks from `type: "assistant"` events: pull `text` blocks and `thinking` blocks from `message.content`
|
|
36
|
+
- Extract text chunks from `type: "user"` events: pull the user's question
|
|
37
|
+
- Return a list of chunks with role, type, content, and timestamp
|
|
38
|
+
|
|
39
|
+
**Done when:**
|
|
40
|
+
- Point it at a real project that has Claude Code sessions
|
|
41
|
+
- It prints the chunks cleanly: role, content preview, timestamp
|
|
42
|
+
- The output is readable English, not raw JSON
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## Phase 3 — Git Reader ✅ DONE
|
|
47
|
+
|
|
48
|
+
**Goal:** Given a project path, return the last commit's diff and recent commit history as text.
|
|
49
|
+
|
|
50
|
+
**What to do:**
|
|
51
|
+
- Use `subprocess` to call `git diff HEAD~1 HEAD` in the project directory
|
|
52
|
+
- Use `subprocess` to call `git log --oneline -20`
|
|
53
|
+
- Handle the case where there are no commits or git is not initialized
|
|
54
|
+
- Return both as plain text strings
|
|
55
|
+
|
|
56
|
+
**Done when:**
|
|
57
|
+
- Run it against any git repo
|
|
58
|
+
- The diff output and log output print cleanly as text
|
|
59
|
+
- No crashes on edge cases (new repo, no commits, not a git repo)
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## Phase 4 — Extraction (Critical Validation Gate) ✅ DONE
|
|
64
|
+
|
|
65
|
+
**Goal:** Feed real session text to kg-gen and evaluate the quality of the triples it returns.
|
|
66
|
+
|
|
67
|
+
**What to do:**
|
|
68
|
+
- Install kg-gen
|
|
69
|
+
- Chunk the session text (kg-gen has a per-call input limit — find it and chunk accordingly)
|
|
70
|
+
- Call kg-gen on each chunk with the appropriate LLM backend (Anthropic)
|
|
71
|
+
- Collect the NetworkX graph output
|
|
72
|
+
- Print every triple: subject, predicate, object
|
|
73
|
+
|
|
74
|
+
**Done when:**
|
|
75
|
+
- Run it on 2–3 real Claude Code sessions from different projects
|
|
76
|
+
- The triples are specific and meaningful (e.g., `(auth module, uses, JWT)` not `(it, does, things)`)
|
|
77
|
+
- Entities are being clustered correctly (same thing not appearing as 3 different node names)
|
|
78
|
+
- You feel confident the triples would be useful 6 months from now
|
|
79
|
+
|
|
80
|
+
**If the triples are low quality:**
|
|
81
|
+
- Try different chunking strategies (smaller chunks, overlap, no overlap)
|
|
82
|
+
- Try different kg-gen configuration options
|
|
83
|
+
- Try filtering the input text (assistant-only, skip tool_use, include thinking)
|
|
84
|
+
- Do not move to Phase 5 until you are satisfied with triple quality on real data
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## Phase 5 — Storage ✅ DONE
|
|
89
|
+
|
|
90
|
+
**Goal:** Persist triples and their embeddings in a local SQLite database with vector search.
|
|
91
|
+
|
|
92
|
+
**What to do:**
|
|
93
|
+
- Install sqlite-vec
|
|
94
|
+
- Define the `Triple` dataclass: id, subject, predicate, object, confidence, embedding, session_id, timestamp, stale
|
|
95
|
+
- Define the `Edge` dataclass: from_id, to_id, edge_type, weight
|
|
96
|
+
- Create `~/.prism/projects/<project-hash>/graph.db` on first use
|
|
97
|
+
- Implement `store_triple()`: embed the triple text, store row + embedding in sqlite-vec
|
|
98
|
+
- Implement `get_all_triples()` and `get_triple_by_id()`
|
|
99
|
+
|
|
100
|
+
**Done when:**
|
|
101
|
+
- Run Phase 4 and pipe the output into storage
|
|
102
|
+
- Query the DB with sqlite3 CLI and verify rows are there
|
|
103
|
+
- Verify embeddings are stored (not null, not empty)
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
|
|
107
|
+
## Phase 6 — Linking ✅ DONE
|
|
108
|
+
|
|
109
|
+
**Goal:** When a new triple is stored, find related existing triples and connect them. Detect stale facts.
|
|
110
|
+
|
|
111
|
+
**What to do:**
|
|
112
|
+
- Implement `find_similar(triple, top_k=5)`: query sqlite-vec for nearest neighbors by embedding
|
|
113
|
+
- Implement `create_edge(from_id, to_id, edge_type, weight)`: write to edges table
|
|
114
|
+
- Link triples where similarity > 0.85
|
|
115
|
+
- Implement staleness: before storing a new triple, check if any existing triple has the same subject and predicate but a different object — if so, set `stale = True` on the old one
|
|
116
|
+
|
|
117
|
+
**Done when:**
|
|
118
|
+
- Store 20+ triples from a real session
|
|
119
|
+
- Print all edges and verify they represent meaningful connections (JWT links to auth module, etc.)
|
|
120
|
+
- Manually introduce a conflicting triple and verify the old one is flagged stale
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## Phase 7 — Constitution Generator ✅ DONE
|
|
125
|
+
|
|
126
|
+
**Goal:** Read the knowledge graph and produce a CLAUDE.md that accurately describes the project.
|
|
127
|
+
|
|
128
|
+
**What to do:**
|
|
129
|
+
- Score every non-stale triple: `score = recency_weight + confidence + retrieval_count`
|
|
130
|
+
- Take the top 30 triples by score
|
|
131
|
+
- Format them as a structured prompt for Claude Haiku
|
|
132
|
+
- Call Haiku with the prompt and receive a CLAUDE.md
|
|
133
|
+
- Write the output to `<project-root>/CLAUDE.md`
|
|
134
|
+
- Also write `.cursorrules` (same content, simpler format for Cursor)
|
|
135
|
+
- Also write `AGENTS.md` (same content, Codex format)
|
|
136
|
+
|
|
137
|
+
**Done when:**
|
|
138
|
+
- Run it against a project you've been working in with Claude Code
|
|
139
|
+
- Open the generated CLAUDE.md
|
|
140
|
+
- The content is accurate: it correctly identifies your tech stack, conventions, and key decisions
|
|
141
|
+
- You would actually find it useful if you started a new session with it loaded
|
|
142
|
+
|
|
143
|
+
This is the payoff moment. If the generated constitution is good, everything is working.
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
## Phase 8 — CLI (End-to-End Wire-Up) ✅ DONE
|
|
148
|
+
|
|
149
|
+
**Goal:** `prism crystallize` runs the full pipeline from ingestion to constitution in one command.
|
|
150
|
+
|
|
151
|
+
**What to do:**
|
|
152
|
+
- Wire `prism crystallize --project <path>` to: read session → read git → extract → store → link → generate
|
|
153
|
+
- Add a `--session <id>` flag to target a specific session instead of the most recent one
|
|
154
|
+
- Print progress to stdout: "Reading session...", "Extracting triples...", "Generated CLAUDE.md"
|
|
155
|
+
- Handle errors gracefully: missing sessions, no git repo, API errors
|
|
156
|
+
|
|
157
|
+
**Done when:**
|
|
158
|
+
- Run `prism crystallize` in any project that has Claude Code sessions
|
|
159
|
+
- CLAUDE.md, .cursorrules, and AGENTS.md appear (or are updated) in the project root
|
|
160
|
+
- The whole pipeline takes under 60 seconds
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## Phase 9 — Git Hook ✅ DONE
|
|
165
|
+
|
|
166
|
+
**Goal:** Prism runs automatically after every commit without the user thinking about it.
|
|
167
|
+
|
|
168
|
+
**What to do:**
|
|
169
|
+
- Implement `prism hook install`: write `.git/hooks/post-commit` in the current project
|
|
170
|
+
- The hook calls `prism crystallize --project $(git rev-parse --show-toplevel)` in the background (`&`)
|
|
171
|
+
- Implement `prism hook uninstall`: remove the hook
|
|
172
|
+
|
|
173
|
+
**Done when:**
|
|
174
|
+
- Install the hook in a test repo
|
|
175
|
+
- Make a commit
|
|
176
|
+
- Within 60 seconds, CLAUDE.md is updated without you doing anything
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
## Phase 10 — MCP Server ✅ DONE
|
|
181
|
+
|
|
182
|
+
**Goal:** Any agent that supports MCP can call Prism's 3 tools.
|
|
183
|
+
|
|
184
|
+
**What to do:**
|
|
185
|
+
- Implement FastMCP server in `server/mcp_server.py`
|
|
186
|
+
- Tool 1: `get_context()` — returns the current CLAUDE.md content for the project
|
|
187
|
+
- Tool 2: `query_knowledge(question: str)` — embeds the question, queries sqlite-vec, returns top-5 matching triples as structured text
|
|
188
|
+
- Tool 3: `crystallize(session_id: str = None)` — triggers the full pipeline, returns confirmation
|
|
189
|
+
- Wire `prism serve` CLI command to start the FastMCP server in stdio mode
|
|
190
|
+
|
|
191
|
+
**Done when:**
|
|
192
|
+
- Add Prism to Claude Code: `claude mcp add prism -- prism serve`
|
|
193
|
+
- Start a new Claude Code session in a project
|
|
194
|
+
- Call `get_context()` via Claude Code and verify it returns the correct CLAUDE.md
|
|
195
|
+
- Call `query_knowledge("how does auth work")` and verify it returns relevant triples
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## Phase 11 — Graph UI ✅ DONE
|
|
200
|
+
|
|
201
|
+
**Goal:** `prism ui` opens a local browser UI where the user can explore the knowledge graph.
|
|
202
|
+
|
|
203
|
+
**What to do:**
|
|
204
|
+
- Implement FastAPI app in `server/ui_server.py`
|
|
205
|
+
- Route `/graph`: load graph.db into NetworkX, render with Pyvis, serve the HTML
|
|
206
|
+
- Route `/memory`: serve a searchable table of all triples (HTML, no JS framework needed)
|
|
207
|
+
- Route `/constitution`: serve current CLAUDE.md content with a "Regenerate" button that calls the constitution generator
|
|
208
|
+
- Wire `prism ui` CLI command to start uvicorn and open `http://localhost:7823` in the browser
|
|
209
|
+
|
|
210
|
+
**Done when:**
|
|
211
|
+
- Run `prism ui`
|
|
212
|
+
- Browser opens at localhost:7823
|
|
213
|
+
- The force-directed graph is visible and interactive (draggable nodes, hoverable edges)
|
|
214
|
+
- The constitution tab shows the current CLAUDE.md with a working Regenerate button
|
|
215
|
+
|
|
216
|
+
**Enhancements added post-completion (see Post-Shipping section):**
|
|
217
|
+
- `/memory`: Session column with hover for full ID
|
|
218
|
+
- `/graph`: Node-click sidebar showing contributing sessions with active/stale status
|
|
219
|
+
- `/constitution`: Copy button with clipboard feedback
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
## Shipping
|
|
224
|
+
|
|
225
|
+
**When all 11 phases pass their tests:**
|
|
226
|
+
- Finalize `pyproject.toml` (version 0.1.0, correct classifiers, description)
|
|
227
|
+
- Write `README.md`: one-liner, 3-command quickstart, architecture diagram, link to PyPI
|
|
228
|
+
- `uv build` + `uv publish` to PyPI
|
|
229
|
+
- Test `uvx prism-mem serve` from a fresh environment (no install needed)
|
|
230
|
+
- Add to Claude Code: `claude mcp add prism -- uvx prism-mem serve`
|
|
231
|
+
|
|
232
|
+
---
|
|
233
|
+
|
|
234
|
+
## Post-Shipping Enhancements
|
|
235
|
+
|
|
236
|
+
Work completed after all 11 phases. None of these change the DB schema or add new routes.
|
|
237
|
+
|
|
238
|
+
### Multi-provider LLM config ✅ DONE
|
|
239
|
+
- `~/.prism/config.toml` stores `provider`, `model`, `api_key` (flat TOML, built-in `tomllib`)
|
|
240
|
+
- `config.py` exports `load_config`, `save_config`, `get_model_string` (`provider/model`), `get_api_key`, `is_config_complete`, `validate_provider` (uses `litellm.provider_list`)
|
|
241
|
+
- `generator.py` switched from `anthropic.Anthropic` client to `litellm.completion()`; response via `response.choices[0].message.content`
|
|
242
|
+
- `extractor.py` uses `get_model_string()` / `get_api_key()` — kg-gen/dspy.LM already speaks LiteLLM format
|
|
243
|
+
- `cli.py` adds `prism config set {provider|model|api-key}` (provider validated at set-time) and `prism config show` (api-key masked)
|
|
244
|
+
- `crystallize` now checks `is_config_complete()` with actionable error instead of crashing on missing env var
|
|
245
|
+
- `pyproject.toml`: removed `anthropic` direct dep, added `litellm>=1.0.0`
|
|
246
|
+
|
|
247
|
+
### Code review fixes ✅ DONE
|
|
248
|
+
- Removed unused `timezone` import in `generator.py`
|
|
249
|
+
- Removed dead `_CURATED_PROVIDERS` in `config.py`, `_VALID_KEYS` in `cli.py`
|
|
250
|
+
- `ui_server.py` `/constitution/regenerate` now returns error page instead of silently swallowing exceptions
|
|
251
|
+
- `mcp_server.py` `crystallize` tool checks `is_config_complete()` before spawning subprocess
|
|
252
|
+
|
|
253
|
+
### UI enhancements ✅ DONE
|
|
254
|
+
- `/memory`: added `Session` column after `Timestamp` — 8-char truncation with full ID in `title` attribute
|
|
255
|
+
- `/graph`: node click opens a fixed sidebar listing contributing session IDs with active/stale badges. Node info built server-side from all triples (including stale), embedded as JSON, accessed via injected vis.js `network.on('click', ...)` listener. `</` escaped to `<\/` in embedded JSON.
|
|
256
|
+
- `/constitution`: added `Copy` button beside Regenerate. Reads `pre.innerText` via `navigator.clipboard.writeText`, label swaps to `Copied!` for 1.5s then restores.
|
|
257
|
+
|
|
258
|
+
---
|
|
259
|
+
|
|
260
|
+
## Scope Rules
|
|
261
|
+
|
|
262
|
+
If it is not in the 11 phases above, it is not in v1. Specifically:
|
|
263
|
+
|
|
264
|
+
- No multi-agent orchestration
|
|
265
|
+
- No team features
|
|
266
|
+
- No cloud sync or remote storage
|
|
267
|
+
- No chat interface
|
|
268
|
+
- No Memorix integration (that is v2 if ever)
|
|
269
|
+
- No support for agents other than Claude Code for ingestion (Cursor/Codex use the MCP read path, not ingestion)
|
|
270
|
+
|
|
271
|
+
The v1 goal is: one user, one machine, any agent that speaks MCP, automatic context regeneration after every commit.
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"Bash(python3 -m ensurepip --version)",
|
|
5
|
+
"Bash(python3 -c \"import ensurepip; print\\(ensurepip.__file__\\)\")",
|
|
6
|
+
"Bash(brew list *)",
|
|
7
|
+
"Bash(pyenv versions *)",
|
|
8
|
+
"Read(//usr/local/lib/**)",
|
|
9
|
+
"Read(//opt/homebrew/lib/python3.14/**)",
|
|
10
|
+
"Bash(/opt/homebrew/bin/python3.14 *)",
|
|
11
|
+
"Bash(otool *)",
|
|
12
|
+
"Read(//opt/homebrew/Cellar/expat/2.8.1/lib/**)",
|
|
13
|
+
"Bash(nm /opt/homebrew/opt/expat/lib/libexpat.dylib)",
|
|
14
|
+
"Bash(/Users/rahul/Desktop/Projects/prism-mem/.venv/bin/python *)",
|
|
15
|
+
"mcp__plugin_claude-mem_mcp-search__get_observations",
|
|
16
|
+
"Bash(curl -sS https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py)",
|
|
17
|
+
"Bash(DYLD_LIBRARY_PATH=/opt/homebrew/opt/expat/lib /Users/rahul/Desktop/Projects/prism-mem/.venv/bin/python /tmp/get-pip.py)",
|
|
18
|
+
"Bash(DYLD_LIBRARY_PATH=/opt/homebrew/opt/expat/lib .venv/bin/pip install *)",
|
|
19
|
+
"Bash(sort -k6 -r)",
|
|
20
|
+
"Bash(DYLD_LIBRARY_PATH=/opt/homebrew/opt/expat/lib .venv/bin/python *)",
|
|
21
|
+
"Bash(grep -v \"^$\\\\|LiteLLM\\\\|WARNING\\\\|INFO\")",
|
|
22
|
+
"Bash(grep -v \"^$\")",
|
|
23
|
+
"Bash(sqlite3 *)",
|
|
24
|
+
"Skill(claude-mem:make-plan)",
|
|
25
|
+
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://127.0.0.1:7823/graph)",
|
|
26
|
+
"Bash(curl -s http://127.0.0.1:7823/graph)",
|
|
27
|
+
"Bash(curl -s http://127.0.0.1:7823/memory)",
|
|
28
|
+
"Bash(kill %1)"
|
|
29
|
+
]
|
|
30
|
+
}
|
|
31
|
+
}
|