imprint-memory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ # imprint-memory: Persistent memory system for Claude Code
imprint_memory/bus.py ADDED
@@ -0,0 +1,53 @@
1
+ """
2
+ Message bus.
3
+ Shared log for messages sent/received across different sources.
4
+ """
5
+
6
+ import os
7
+ from .db import _get_db, now_str
8
+
9
+ MESSAGE_BUS_LIMIT = int(os.environ.get("MESSAGE_BUS_LIMIT", 40))
10
+
11
+
12
+ def bus_post(source: str, direction: str, content: str) -> None:
13
+ """Write a message to the bus. Auto-prunes old messages beyond limit.
14
+ source: free-form label (e.g. cc, chat, api, webhook)
15
+ direction: in (received) / out (sent)
16
+ content: message content (auto-truncated to 200 chars)"""
17
+ if len(content) > 200:
18
+ content = content[:197] + "..."
19
+
20
+ db = _get_db()
21
+ db.execute(
22
+ "INSERT INTO message_bus (source, direction, content, created_at) VALUES (?, ?, ?, ?)",
23
+ (source, direction, content, now_str()),
24
+ )
25
+ db.execute(
26
+ "DELETE FROM message_bus WHERE id NOT IN (SELECT id FROM message_bus ORDER BY id DESC LIMIT ?)",
27
+ (MESSAGE_BUS_LIMIT,),
28
+ )
29
+ db.commit()
30
+ db.close()
31
+
32
+
33
+ def bus_read(limit: int = 20) -> list[dict]:
34
+ """Read recent bus messages."""
35
+ db = _get_db()
36
+ rows = db.execute(
37
+ "SELECT source, direction, content, created_at FROM message_bus ORDER BY id DESC LIMIT ?",
38
+ (limit,),
39
+ ).fetchall()
40
+ db.close()
41
+ return [dict(r) for r in reversed(rows)]
42
+
43
+
44
+ def bus_format(limit: int = 20) -> str:
45
+ """Format bus messages for context injection."""
46
+ messages = bus_read(limit)
47
+ if not messages:
48
+ return "(No recent messages)"
49
+ lines = ["# Recent Messages\n"]
50
+ for m in messages:
51
+ arrow = "\u2192" if m["direction"] == "out" else "\u2190"
52
+ lines.append(f"[{m['created_at']}] [{m['source']}] {arrow} {m['content']}")
53
+ return "\n".join(lines)
@@ -0,0 +1,390 @@
1
+ """
2
+ Chat Sync Receiver — HTTP endpoint that ingests conversations from the
3
+ imprint-chat-sync browser extension (or any client posting to /api/ingest).
4
+
5
+ Run with:
6
+ imprint-memory-receiver # listens on 127.0.0.1:8001
7
+ PORT=9001 imprint-memory-receiver # custom port
8
+
9
+ Pipeline per ingest call:
10
+ POST /api/ingest (browser extension)
11
+
12
+ log_message() (writes to conversation_log)
13
+ ↓ background task
14
+ embed_new_messages() (Gemini Embedding 2 per message)
15
+
16
+ detect_topic_shifts() (adjacent user-msg cosine → topic edges)
17
+
18
+ incremental_chunk_update() (chunk → summarize → embed → top-K edges)
19
+
20
+ The receiver shares the same SQLite database as the main imprint-memory server
21
+ (configured via IMPRINT_DATA_DIR / IMPRINT_DB).
22
+ """
23
+
24
+ import math
25
+ import os
26
+ import re
27
+ import struct
28
+ import threading
29
+ import time
30
+
31
+ from starlette.applications import Starlette
32
+ from starlette.background import BackgroundTask
33
+ from starlette.middleware.cors import CORSMiddleware
34
+ from starlette.responses import JSONResponse
35
+ from starlette.routing import Route
36
+
37
+ from .conversation import log_message, get_recent
38
+ from .db import _get_db as _get_app_db
39
+
40
+ DEFAULT_PORT = 8001
41
+ THINK_RE = re.compile(r"<think>.*?</think>", re.DOTALL)
42
+ EMBED_DELAY = float(os.environ.get("IMPRINT_RECEIVER_EMBED_DELAY", "0.7"))
43
+ SHIFT_THRESHOLD = float(os.environ.get("IMPRINT_RECEIVER_SHIFT_THRESHOLD", "0.50"))
44
+ RECEIVER_HOST_ENV = os.environ.get("IMPRINT_RECEIVER_HOST", os.environ.get("HOST", "127.0.0.1"))
45
+ RECEIVER_PORT_ENV = int(os.environ.get("IMPRINT_RECEIVER_PORT", os.environ.get("PORT", DEFAULT_PORT)))
46
+ RECEIVER_CORS_ORIGIN_REGEX = os.environ.get(
47
+ "IMPRINT_RECEIVER_CORS_ORIGIN_REGEX",
48
+ r"^chrome-extension://.*$",
49
+ )
50
+
51
+
52
+ def _blob_to_vec(blob):
53
+ """Decode a float32 embedding blob."""
54
+ return list(struct.unpack(f"{len(blob)//4}f", blob))
55
+
56
+
57
+ def _cosine_sim(a, b):
58
+ """Return cosine similarity for same-length vectors."""
59
+ if len(a) != len(b):
60
+ return 0.0
61
+ dot = sum(x * y for x, y in zip(a, b))
62
+ na = math.sqrt(sum(x * x for x in a))
63
+ nb = math.sqrt(sum(x * x for x in b))
64
+ return dot / (na * nb) if na and nb else 0.0
65
+
66
+
67
+ def _get_db():
68
+ """Open the receiver database connection and ensure receiver-owned tables."""
69
+ db = _get_app_db()
70
+ db.execute("""CREATE TABLE IF NOT EXISTS conversation_vectors (
71
+ msg_id INTEGER PRIMARY KEY,
72
+ embedding BLOB NOT NULL,
73
+ model TEXT DEFAULT 'gemini-embedding-2'
74
+ )""")
75
+ db.execute("""CREATE TABLE IF NOT EXISTS conversation_edges (
76
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
77
+ msg_before INTEGER NOT NULL,
78
+ msg_after INTEGER NOT NULL,
79
+ session_id TEXT DEFAULT '',
80
+ similarity REAL NOT NULL,
81
+ strength REAL DEFAULT 1.0,
82
+ surfaced_count INTEGER DEFAULT 0,
83
+ last_surfaced_at TEXT,
84
+ status TEXT DEFAULT 'active',
85
+ created_at TEXT NOT NULL,
86
+ UNIQUE(msg_before, msg_after)
87
+ )""")
88
+ return db
89
+
90
+
91
+ def embed_new_messages(msg_ids):
92
+ """Background: embed newly ingested messages with the configured embedder."""
93
+ if not msg_ids:
94
+ return
95
+ try:
96
+ from .memory_manager import EMBED_MODEL, _embed
97
+ except Exception:
98
+ return
99
+
100
+ db = _get_db()
101
+ try:
102
+ for msg_id in msg_ids:
103
+ existing = db.execute(
104
+ "SELECT 1 FROM conversation_vectors WHERE msg_id=?", (msg_id,)
105
+ ).fetchone()
106
+ if existing:
107
+ continue
108
+
109
+ row = db.execute(
110
+ "SELECT content, direction FROM conversation_log WHERE id=?", (msg_id,)
111
+ ).fetchone()
112
+ if not row:
113
+ continue
114
+
115
+ content = row["content"]
116
+ if row["direction"] == "out":
117
+ content = THINK_RE.sub("", content).strip()
118
+ if len(content) < 10:
119
+ continue
120
+
121
+ try:
122
+ vec = _embed(content[:2000])
123
+ if vec:
124
+ db.execute(
125
+ "INSERT OR IGNORE INTO conversation_vectors (msg_id, embedding, model) VALUES (?, ?, ?)",
126
+ (msg_id, struct.pack(f"{len(vec)}f", *vec), EMBED_MODEL),
127
+ )
128
+ db.commit()
129
+ except Exception:
130
+ pass
131
+
132
+ time.sleep(EMBED_DELAY)
133
+
134
+ detect_topic_shifts(db, msg_ids)
135
+ except Exception:
136
+ pass
137
+ finally:
138
+ db.close()
139
+
140
+ # Incremental chunk processing: split → summarize → embed → Top-K edges
141
+ try:
142
+ from .conversation_chunker import incremental_chunk_update
143
+ result = incremental_chunk_update(batch_size=200)
144
+ if result.get("chunks_created", 0) > 0:
145
+ print(
146
+ f"[chunk] Created {result['chunks_created']} chunks, "
147
+ f"{result.get('edges_created', 0)} edges",
148
+ flush=True,
149
+ )
150
+ except Exception as e:
151
+ print(f"[chunk] Error: {e}", flush=True)
152
+
153
+
154
+ def detect_topic_shifts(db, msg_ids):
155
+ """Detect topic shifts by cosine similarity between adjacent user messages."""
156
+ if not msg_ids:
157
+ return
158
+
159
+ placeholders = ",".join("?" * len(msg_ids))
160
+ sessions = db.execute(
161
+ f"SELECT DISTINCT session_id FROM conversation_log WHERE id IN ({placeholders})",
162
+ msg_ids,
163
+ ).fetchall()
164
+
165
+ shifts = 0
166
+ for (session_id,) in sessions:
167
+ if not session_id:
168
+ continue
169
+
170
+ rows = db.execute(
171
+ """SELECT c.id, v.embedding
172
+ FROM conversation_log c
173
+ JOIN conversation_vectors v ON c.id = v.msg_id
174
+ WHERE c.session_id = ? AND c.direction = 'in'
175
+ ORDER BY c.id""",
176
+ (session_id,),
177
+ ).fetchall()
178
+
179
+ if len(rows) < 2:
180
+ continue
181
+
182
+ for i in range(1, len(rows)):
183
+ vec_a = _blob_to_vec(rows[i - 1]["embedding"])
184
+ vec_b = _blob_to_vec(rows[i]["embedding"])
185
+ sim = _cosine_sim(vec_a, vec_b)
186
+
187
+ if sim < SHIFT_THRESHOLD:
188
+ try:
189
+ db.execute(
190
+ """INSERT OR IGNORE INTO conversation_edges
191
+ (msg_before, msg_after, session_id, similarity, created_at)
192
+ VALUES (?, ?, ?, ?, datetime('now'))""",
193
+ (rows[i - 1]["id"], rows[i]["id"], session_id, sim),
194
+ )
195
+ shifts += 1
196
+ except Exception:
197
+ pass
198
+
199
+ if shifts:
200
+ db.commit()
201
+
202
+
203
+ async def ingest(request):
204
+ """Ingest a batch of conversation messages from the browser extension."""
205
+ try:
206
+ data = await request.json()
207
+ except Exception:
208
+ return JSONResponse({"ok": False, "error": "invalid JSON"}, status_code=400)
209
+
210
+ conversation_id = data.get("conversation_id", "")
211
+ messages = data.get("messages", [])
212
+
213
+ if not messages:
214
+ return JSONResponse({"ok": True, "ingested": 0, "skipped": 0, "errors": 0})
215
+
216
+ results = {"ingested": 0, "skipped": 0, "errors": 0}
217
+ ingested_ids = []
218
+
219
+ for msg in messages:
220
+ content = (msg.get("content") or "").strip()
221
+ if not content:
222
+ results["errors"] += 1
223
+ continue
224
+
225
+ result = log_message(
226
+ platform=data.get("platform", "claude.ai"),
227
+ direction=msg.get("direction", "in"),
228
+ content=content,
229
+ speaker=msg.get("speaker", ""),
230
+ session_id=conversation_id,
231
+ entrypoint="browser_extension",
232
+ created_at=msg.get("created_at", ""),
233
+ summary=msg.get("summary", ""),
234
+ model=msg.get("model", data.get("model", "")),
235
+ external_id=msg.get("external_id") or msg.get("uuid", ""),
236
+ )
237
+
238
+ if result.get("skipped"):
239
+ results["skipped"] += 1
240
+ elif result.get("ok"):
241
+ results["ingested"] += 1
242
+ if result.get("id"):
243
+ ingested_ids.append(result["id"])
244
+ else:
245
+ results["errors"] += 1
246
+
247
+ return JSONResponse(
248
+ {"ok": True, **results},
249
+ background=BackgroundTask(embed_new_messages, ingested_ids),
250
+ )
251
+
252
+
253
+ async def health(request):
254
+ """Return a lightweight liveness response."""
255
+ return JSONResponse({"ok": True, "service": "imprint-chat-sync-receiver"})
256
+
257
+
258
+ async def status(request):
259
+ """Return recent ingest and embedding status for the extension popup."""
260
+ recent = get_recent(platform="claude.ai", limit=5)
261
+ db = _get_db()
262
+ try:
263
+ vec_count = db.execute("SELECT count(*) FROM conversation_vectors").fetchone()[0]
264
+ finally:
265
+ db.close()
266
+ return JSONResponse({
267
+ "ok": True,
268
+ "recent_count": len(recent),
269
+ "last_message": recent[-1]["created_at"] if recent else None,
270
+ "vectors": vec_count,
271
+ })
272
+
273
+
274
+ app = Starlette(
275
+ routes=[
276
+ Route("/api/ingest", ingest, methods=["POST"]),
277
+ Route("/api/health", health, methods=["GET"]),
278
+ Route("/api/status", status, methods=["GET"]),
279
+ ],
280
+ )
281
+
282
+ app.add_middleware(
283
+ CORSMiddleware,
284
+ allow_origin_regex=RECEIVER_CORS_ORIGIN_REGEX,
285
+ allow_methods=["GET", "POST", "OPTIONS"],
286
+ allow_headers=["Content-Type"],
287
+ )
288
+
289
+
290
+ def _backfill_on_startup():
291
+ """Background-embed any messages that don't have a vector yet."""
292
+
293
+ def _run():
294
+ db = _get_db()
295
+ try:
296
+ unembedded = db.execute("""
297
+ SELECT COUNT(*) as c FROM conversation_log cl
298
+ LEFT JOIN conversation_vectors cv ON cl.id = cv.msg_id
299
+ WHERE cv.msg_id IS NULL AND cl.platform NOT IN ('cc')
300
+ AND cl.content IS NOT NULL AND length(cl.content) > 0
301
+ """).fetchone()[0]
302
+ finally:
303
+ db.close()
304
+
305
+ if unembedded == 0:
306
+ print("[backfill] No unembedded messages, skipping", flush=True)
307
+ return
308
+
309
+ print(
310
+ f"[backfill] Found {unembedded} unembedded messages, processing...",
311
+ flush=True,
312
+ )
313
+
314
+ db = _get_db()
315
+ try:
316
+ rows = db.execute("""
317
+ SELECT cl.id FROM conversation_log cl
318
+ LEFT JOIN conversation_vectors cv ON cl.id = cv.msg_id
319
+ WHERE cv.msg_id IS NULL AND cl.platform NOT IN ('cc')
320
+ AND cl.content IS NOT NULL AND length(cl.content) > 0
321
+ ORDER BY cl.id
322
+ """).fetchall()
323
+ msg_ids = [r[0] for r in rows]
324
+ finally:
325
+ db.close()
326
+
327
+ batch_size = 100
328
+ for i in range(0, len(msg_ids), batch_size):
329
+ batch = msg_ids[i:i + batch_size]
330
+ embed_new_messages(batch)
331
+ print(
332
+ f"[backfill] Embedded {min(i + batch_size, len(msg_ids))}/{len(msg_ids)}",
333
+ flush=True,
334
+ )
335
+
336
+ print("[backfill] Embedding complete. Running incremental chunk update...", flush=True)
337
+ try:
338
+ from .conversation_chunker import incremental_chunk_update
339
+ result = incremental_chunk_update(batch_size=200)
340
+ print(f"[backfill] Chunk update: {result}", flush=True)
341
+ except Exception as e:
342
+ print(f"[backfill] Chunk update error: {e}", flush=True)
343
+
344
+ t = threading.Thread(target=_run, daemon=True)
345
+ t.start()
346
+
347
+
348
+ def main():
349
+ """Run the chat-sync receiver HTTP service."""
350
+ import argparse
351
+
352
+ parser = argparse.ArgumentParser(description="Imprint chat sync receiver")
353
+ parser.add_argument(
354
+ "--host",
355
+ default=RECEIVER_HOST_ENV,
356
+ help="Bind address (default: 127.0.0.1)",
357
+ )
358
+ parser.add_argument(
359
+ "--port",
360
+ type=int,
361
+ default=RECEIVER_PORT_ENV,
362
+ help=f"Port (default: {DEFAULT_PORT})",
363
+ )
364
+ parser.add_argument(
365
+ "--no-backfill",
366
+ action="store_true",
367
+ help="Skip the startup backfill pass over unembedded messages",
368
+ )
369
+ args = parser.parse_args()
370
+
371
+ try:
372
+ import uvicorn
373
+ except ImportError:
374
+ print(
375
+ "uvicorn is required. Install with: pip install 'imprint-memory[receiver]'",
376
+ flush=True,
377
+ )
378
+ raise SystemExit(1)
379
+
380
+ print(
381
+ f"imprint-memory chat-sync receiver listening on {args.host}:{args.port}",
382
+ flush=True,
383
+ )
384
+ if not args.no_backfill:
385
+ _backfill_on_startup()
386
+ uvicorn.run(app, host=args.host, port=args.port, log_level="info")
387
+
388
+
389
+ if __name__ == "__main__":
390
+ main()
@@ -0,0 +1,127 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Compress old messages in a context file using a local Ollama model.
4
+
5
+ Can be called standalone or imported as a library.
6
+ Designed to keep a rolling context file (e.g. recent_context.md) from growing
7
+ unbounded by summarizing older messages with a local LLM.
8
+
9
+ Usage:
10
+ python3 -m imprint_memory.compress /path/to/recent_context.md
11
+
12
+ Environment variables:
13
+ OLLAMA_URL — Ollama API endpoint (default: http://localhost:11434)
14
+ COMPRESS_MODEL — Model to use (default: qwen3:8b)
15
+ COMPRESS_KEEP — Number of recent lines to keep as-is (default: 30)
16
+ COMPRESS_THRESHOLD — Line count that triggers compression (default: 50)
17
+ """
18
+
19
+ import json
20
+ import os
21
+ import sys
22
+ import urllib.request
23
+ from pathlib import Path
24
+
25
+ OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434")
26
+ COMPRESS_MODEL = os.environ.get("COMPRESS_MODEL", "qwen3:8b")
27
+ KEEP_RECENT = int(os.environ.get("COMPRESS_KEEP", 30))
28
+ THRESHOLD = int(os.environ.get("COMPRESS_THRESHOLD", 50))
29
+
30
+ SYSTEM_PROMPT = (
31
+ "You are a log compressor. "
32
+ "Compress the following log into 3-5 summary lines. "
33
+ "Preserve all topics faithfully. "
34
+ "Keep names, timestamps, decisions, and context. "
35
+ "Output ONLY the summary lines, nothing else."
36
+ )
37
+
38
+
39
+ def compress_messages(messages: list[str], model: str = "", ollama_url: str = "") -> str | None:
40
+ """Call local Ollama to summarize messages into 3-5 lines.
41
+
42
+ Returns the summary string, or None if Ollama is unavailable.
43
+ """
44
+ text = "\n".join(messages)
45
+ url = ollama_url or OLLAMA_URL
46
+ mdl = model or COMPRESS_MODEL
47
+
48
+ try:
49
+ req = urllib.request.Request(
50
+ f"{url}/api/chat",
51
+ data=json.dumps({
52
+ "model": mdl,
53
+ "messages": [
54
+ {"role": "system", "content": SYSTEM_PROMPT},
55
+ {"role": "user", "content": f"Compress this log:\n\n{text}"},
56
+ ],
57
+ "stream": False,
58
+ "think": False,
59
+ "options": {"temperature": 0.3, "num_predict": 500},
60
+ }).encode(),
61
+ headers={"Content-Type": "application/json"},
62
+ )
63
+ resp = json.loads(urllib.request.urlopen(req, timeout=60).read())
64
+ result = resp.get("message", {}).get("content", "").strip()
65
+ lines = [l.strip() for l in result.splitlines() if l.strip()]
66
+ return "\n".join(lines) if lines else None
67
+ except Exception as e:
68
+ print(f"Ollama compression failed: {e}", file=sys.stderr)
69
+ return None
70
+
71
+
72
+ def compress_file(context_file: Path, keep: int = 0, threshold: int = 0) -> bool:
73
+ """Compress a context file in-place. Returns True if compression happened."""
74
+ keep = keep or KEEP_RECENT
75
+ threshold = threshold or THRESHOLD
76
+
77
+ if not context_file.exists():
78
+ return False
79
+
80
+ content = context_file.read_text(encoding="utf-8")
81
+ lines = content.splitlines()
82
+
83
+ # Separate header comments from message lines
84
+ header_lines = []
85
+ message_lines = []
86
+ for line in lines:
87
+ if line.startswith("<!--") or not line.strip():
88
+ header_lines.append(line)
89
+ else:
90
+ message_lines.append(line)
91
+
92
+ if len(message_lines) <= threshold:
93
+ print(f"Only {len(message_lines)} messages, below threshold {threshold}", file=sys.stderr)
94
+ return False
95
+
96
+ old_messages = message_lines[:-keep]
97
+ recent_messages = message_lines[-keep:]
98
+
99
+ print(f"Compressing {len(old_messages)} old messages, keeping {len(recent_messages)} recent", file=sys.stderr)
100
+
101
+ summary = compress_messages(old_messages)
102
+ if summary is None:
103
+ print("Compression failed, truncating to recent messages only", file=sys.stderr)
104
+ new_content = "\n".join(header_lines + [""] + recent_messages) + "\n"
105
+ else:
106
+ new_content = "\n".join(header_lines + [""] + [summary, ""] + recent_messages) + "\n"
107
+
108
+ # Atomic write
109
+ tmp = context_file.with_suffix(".tmp")
110
+ tmp.write_text(new_content, encoding="utf-8")
111
+ os.replace(str(tmp), str(context_file))
112
+
113
+ n = len(summary.splitlines()) if summary else 0
114
+ print(f"Compressed: {len(old_messages)} -> {n} lines", file=sys.stderr)
115
+ return True
116
+
117
+
118
+ def main():
119
+ """CLI entry point for context-file compression."""
120
+ if len(sys.argv) < 2:
121
+ print("Usage: python3 -m imprint_memory.compress <context_file>", file=sys.stderr)
122
+ sys.exit(1)
123
+ compress_file(Path(sys.argv[1]))
124
+
125
+
126
+ if __name__ == "__main__":
127
+ main()