imprint-memory 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imprint_memory/__init__.py +1 -0
- imprint_memory/bus.py +53 -0
- imprint_memory/chat_sync_receiver.py +390 -0
- imprint_memory/compress.py +127 -0
- imprint_memory/console.py +250 -0
- imprint_memory/conversation.py +208 -0
- imprint_memory/conversation_chunker.py +1031 -0
- imprint_memory/conversation_search.py +208 -0
- imprint_memory/db.py +346 -0
- imprint_memory/hooks/__init__.py +0 -0
- imprint_memory/hooks/memory-check.sh +172 -0
- imprint_memory/memory_manager.py +2576 -0
- imprint_memory/server.py +544 -0
- imprint_memory/tasks.py +133 -0
- imprint_memory-0.1.0.dist-info/METADATA +339 -0
- imprint_memory-0.1.0.dist-info/RECORD +19 -0
- imprint_memory-0.1.0.dist-info/WHEEL +4 -0
- imprint_memory-0.1.0.dist-info/entry_points.txt +4 -0
- imprint_memory-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# imprint-memory: Persistent memory system for Claude Code
|
imprint_memory/bus.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Message bus.
|
|
3
|
+
Shared log for messages sent/received across different sources.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
from .db import _get_db, now_str
|
|
8
|
+
|
|
9
|
+
MESSAGE_BUS_LIMIT = int(os.environ.get("MESSAGE_BUS_LIMIT", 40))
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def bus_post(source: str, direction: str, content: str) -> None:
|
|
13
|
+
"""Write a message to the bus. Auto-prunes old messages beyond limit.
|
|
14
|
+
source: free-form label (e.g. cc, chat, api, webhook)
|
|
15
|
+
direction: in (received) / out (sent)
|
|
16
|
+
content: message content (auto-truncated to 200 chars)"""
|
|
17
|
+
if len(content) > 200:
|
|
18
|
+
content = content[:197] + "..."
|
|
19
|
+
|
|
20
|
+
db = _get_db()
|
|
21
|
+
db.execute(
|
|
22
|
+
"INSERT INTO message_bus (source, direction, content, created_at) VALUES (?, ?, ?, ?)",
|
|
23
|
+
(source, direction, content, now_str()),
|
|
24
|
+
)
|
|
25
|
+
db.execute(
|
|
26
|
+
"DELETE FROM message_bus WHERE id NOT IN (SELECT id FROM message_bus ORDER BY id DESC LIMIT ?)",
|
|
27
|
+
(MESSAGE_BUS_LIMIT,),
|
|
28
|
+
)
|
|
29
|
+
db.commit()
|
|
30
|
+
db.close()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def bus_read(limit: int = 20) -> list[dict]:
|
|
34
|
+
"""Read recent bus messages."""
|
|
35
|
+
db = _get_db()
|
|
36
|
+
rows = db.execute(
|
|
37
|
+
"SELECT source, direction, content, created_at FROM message_bus ORDER BY id DESC LIMIT ?",
|
|
38
|
+
(limit,),
|
|
39
|
+
).fetchall()
|
|
40
|
+
db.close()
|
|
41
|
+
return [dict(r) for r in reversed(rows)]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def bus_format(limit: int = 20) -> str:
|
|
45
|
+
"""Format bus messages for context injection."""
|
|
46
|
+
messages = bus_read(limit)
|
|
47
|
+
if not messages:
|
|
48
|
+
return "(No recent messages)"
|
|
49
|
+
lines = ["# Recent Messages\n"]
|
|
50
|
+
for m in messages:
|
|
51
|
+
arrow = "\u2192" if m["direction"] == "out" else "\u2190"
|
|
52
|
+
lines.append(f"[{m['created_at']}] [{m['source']}] {arrow} {m['content']}")
|
|
53
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Chat Sync Receiver — HTTP endpoint that ingests conversations from the
|
|
3
|
+
imprint-chat-sync browser extension (or any client posting to /api/ingest).
|
|
4
|
+
|
|
5
|
+
Run with:
|
|
6
|
+
imprint-memory-receiver # listens on 127.0.0.1:8001
|
|
7
|
+
PORT=9001 imprint-memory-receiver # custom port
|
|
8
|
+
|
|
9
|
+
Pipeline per ingest call:
|
|
10
|
+
POST /api/ingest (browser extension)
|
|
11
|
+
↓
|
|
12
|
+
log_message() (writes to conversation_log)
|
|
13
|
+
↓ background task
|
|
14
|
+
embed_new_messages() (Gemini Embedding 2 per message)
|
|
15
|
+
↓
|
|
16
|
+
detect_topic_shifts() (adjacent user-msg cosine → topic edges)
|
|
17
|
+
↓
|
|
18
|
+
incremental_chunk_update() (chunk → summarize → embed → top-K edges)
|
|
19
|
+
|
|
20
|
+
The receiver shares the same SQLite database as the main imprint-memory server
|
|
21
|
+
(configured via IMPRINT_DATA_DIR / IMPRINT_DB).
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import math
|
|
25
|
+
import os
|
|
26
|
+
import re
|
|
27
|
+
import struct
|
|
28
|
+
import threading
|
|
29
|
+
import time
|
|
30
|
+
|
|
31
|
+
from starlette.applications import Starlette
|
|
32
|
+
from starlette.background import BackgroundTask
|
|
33
|
+
from starlette.middleware.cors import CORSMiddleware
|
|
34
|
+
from starlette.responses import JSONResponse
|
|
35
|
+
from starlette.routing import Route
|
|
36
|
+
|
|
37
|
+
from .conversation import log_message, get_recent
|
|
38
|
+
from .db import _get_db as _get_app_db
|
|
39
|
+
|
|
40
|
+
DEFAULT_PORT = 8001
|
|
41
|
+
THINK_RE = re.compile(r"<think>.*?</think>", re.DOTALL)
|
|
42
|
+
EMBED_DELAY = float(os.environ.get("IMPRINT_RECEIVER_EMBED_DELAY", "0.7"))
|
|
43
|
+
SHIFT_THRESHOLD = float(os.environ.get("IMPRINT_RECEIVER_SHIFT_THRESHOLD", "0.50"))
|
|
44
|
+
RECEIVER_HOST_ENV = os.environ.get("IMPRINT_RECEIVER_HOST", os.environ.get("HOST", "127.0.0.1"))
|
|
45
|
+
RECEIVER_PORT_ENV = int(os.environ.get("IMPRINT_RECEIVER_PORT", os.environ.get("PORT", DEFAULT_PORT)))
|
|
46
|
+
RECEIVER_CORS_ORIGIN_REGEX = os.environ.get(
|
|
47
|
+
"IMPRINT_RECEIVER_CORS_ORIGIN_REGEX",
|
|
48
|
+
r"^chrome-extension://.*$",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _blob_to_vec(blob):
|
|
53
|
+
"""Decode a float32 embedding blob."""
|
|
54
|
+
return list(struct.unpack(f"{len(blob)//4}f", blob))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _cosine_sim(a, b):
|
|
58
|
+
"""Return cosine similarity for same-length vectors."""
|
|
59
|
+
if len(a) != len(b):
|
|
60
|
+
return 0.0
|
|
61
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
62
|
+
na = math.sqrt(sum(x * x for x in a))
|
|
63
|
+
nb = math.sqrt(sum(x * x for x in b))
|
|
64
|
+
return dot / (na * nb) if na and nb else 0.0
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _get_db():
|
|
68
|
+
"""Open the receiver database connection and ensure receiver-owned tables."""
|
|
69
|
+
db = _get_app_db()
|
|
70
|
+
db.execute("""CREATE TABLE IF NOT EXISTS conversation_vectors (
|
|
71
|
+
msg_id INTEGER PRIMARY KEY,
|
|
72
|
+
embedding BLOB NOT NULL,
|
|
73
|
+
model TEXT DEFAULT 'gemini-embedding-2'
|
|
74
|
+
)""")
|
|
75
|
+
db.execute("""CREATE TABLE IF NOT EXISTS conversation_edges (
|
|
76
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
77
|
+
msg_before INTEGER NOT NULL,
|
|
78
|
+
msg_after INTEGER NOT NULL,
|
|
79
|
+
session_id TEXT DEFAULT '',
|
|
80
|
+
similarity REAL NOT NULL,
|
|
81
|
+
strength REAL DEFAULT 1.0,
|
|
82
|
+
surfaced_count INTEGER DEFAULT 0,
|
|
83
|
+
last_surfaced_at TEXT,
|
|
84
|
+
status TEXT DEFAULT 'active',
|
|
85
|
+
created_at TEXT NOT NULL,
|
|
86
|
+
UNIQUE(msg_before, msg_after)
|
|
87
|
+
)""")
|
|
88
|
+
return db
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def embed_new_messages(msg_ids):
|
|
92
|
+
"""Background: embed newly ingested messages with the configured embedder."""
|
|
93
|
+
if not msg_ids:
|
|
94
|
+
return
|
|
95
|
+
try:
|
|
96
|
+
from .memory_manager import EMBED_MODEL, _embed
|
|
97
|
+
except Exception:
|
|
98
|
+
return
|
|
99
|
+
|
|
100
|
+
db = _get_db()
|
|
101
|
+
try:
|
|
102
|
+
for msg_id in msg_ids:
|
|
103
|
+
existing = db.execute(
|
|
104
|
+
"SELECT 1 FROM conversation_vectors WHERE msg_id=?", (msg_id,)
|
|
105
|
+
).fetchone()
|
|
106
|
+
if existing:
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
row = db.execute(
|
|
110
|
+
"SELECT content, direction FROM conversation_log WHERE id=?", (msg_id,)
|
|
111
|
+
).fetchone()
|
|
112
|
+
if not row:
|
|
113
|
+
continue
|
|
114
|
+
|
|
115
|
+
content = row["content"]
|
|
116
|
+
if row["direction"] == "out":
|
|
117
|
+
content = THINK_RE.sub("", content).strip()
|
|
118
|
+
if len(content) < 10:
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
vec = _embed(content[:2000])
|
|
123
|
+
if vec:
|
|
124
|
+
db.execute(
|
|
125
|
+
"INSERT OR IGNORE INTO conversation_vectors (msg_id, embedding, model) VALUES (?, ?, ?)",
|
|
126
|
+
(msg_id, struct.pack(f"{len(vec)}f", *vec), EMBED_MODEL),
|
|
127
|
+
)
|
|
128
|
+
db.commit()
|
|
129
|
+
except Exception:
|
|
130
|
+
pass
|
|
131
|
+
|
|
132
|
+
time.sleep(EMBED_DELAY)
|
|
133
|
+
|
|
134
|
+
detect_topic_shifts(db, msg_ids)
|
|
135
|
+
except Exception:
|
|
136
|
+
pass
|
|
137
|
+
finally:
|
|
138
|
+
db.close()
|
|
139
|
+
|
|
140
|
+
# Incremental chunk processing: split → summarize → embed → Top-K edges
|
|
141
|
+
try:
|
|
142
|
+
from .conversation_chunker import incremental_chunk_update
|
|
143
|
+
result = incremental_chunk_update(batch_size=200)
|
|
144
|
+
if result.get("chunks_created", 0) > 0:
|
|
145
|
+
print(
|
|
146
|
+
f"[chunk] Created {result['chunks_created']} chunks, "
|
|
147
|
+
f"{result.get('edges_created', 0)} edges",
|
|
148
|
+
flush=True,
|
|
149
|
+
)
|
|
150
|
+
except Exception as e:
|
|
151
|
+
print(f"[chunk] Error: {e}", flush=True)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def detect_topic_shifts(db, msg_ids):
|
|
155
|
+
"""Detect topic shifts by cosine similarity between adjacent user messages."""
|
|
156
|
+
if not msg_ids:
|
|
157
|
+
return
|
|
158
|
+
|
|
159
|
+
placeholders = ",".join("?" * len(msg_ids))
|
|
160
|
+
sessions = db.execute(
|
|
161
|
+
f"SELECT DISTINCT session_id FROM conversation_log WHERE id IN ({placeholders})",
|
|
162
|
+
msg_ids,
|
|
163
|
+
).fetchall()
|
|
164
|
+
|
|
165
|
+
shifts = 0
|
|
166
|
+
for (session_id,) in sessions:
|
|
167
|
+
if not session_id:
|
|
168
|
+
continue
|
|
169
|
+
|
|
170
|
+
rows = db.execute(
|
|
171
|
+
"""SELECT c.id, v.embedding
|
|
172
|
+
FROM conversation_log c
|
|
173
|
+
JOIN conversation_vectors v ON c.id = v.msg_id
|
|
174
|
+
WHERE c.session_id = ? AND c.direction = 'in'
|
|
175
|
+
ORDER BY c.id""",
|
|
176
|
+
(session_id,),
|
|
177
|
+
).fetchall()
|
|
178
|
+
|
|
179
|
+
if len(rows) < 2:
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
for i in range(1, len(rows)):
|
|
183
|
+
vec_a = _blob_to_vec(rows[i - 1]["embedding"])
|
|
184
|
+
vec_b = _blob_to_vec(rows[i]["embedding"])
|
|
185
|
+
sim = _cosine_sim(vec_a, vec_b)
|
|
186
|
+
|
|
187
|
+
if sim < SHIFT_THRESHOLD:
|
|
188
|
+
try:
|
|
189
|
+
db.execute(
|
|
190
|
+
"""INSERT OR IGNORE INTO conversation_edges
|
|
191
|
+
(msg_before, msg_after, session_id, similarity, created_at)
|
|
192
|
+
VALUES (?, ?, ?, ?, datetime('now'))""",
|
|
193
|
+
(rows[i - 1]["id"], rows[i]["id"], session_id, sim),
|
|
194
|
+
)
|
|
195
|
+
shifts += 1
|
|
196
|
+
except Exception:
|
|
197
|
+
pass
|
|
198
|
+
|
|
199
|
+
if shifts:
|
|
200
|
+
db.commit()
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
async def ingest(request):
|
|
204
|
+
"""Ingest a batch of conversation messages from the browser extension."""
|
|
205
|
+
try:
|
|
206
|
+
data = await request.json()
|
|
207
|
+
except Exception:
|
|
208
|
+
return JSONResponse({"ok": False, "error": "invalid JSON"}, status_code=400)
|
|
209
|
+
|
|
210
|
+
conversation_id = data.get("conversation_id", "")
|
|
211
|
+
messages = data.get("messages", [])
|
|
212
|
+
|
|
213
|
+
if not messages:
|
|
214
|
+
return JSONResponse({"ok": True, "ingested": 0, "skipped": 0, "errors": 0})
|
|
215
|
+
|
|
216
|
+
results = {"ingested": 0, "skipped": 0, "errors": 0}
|
|
217
|
+
ingested_ids = []
|
|
218
|
+
|
|
219
|
+
for msg in messages:
|
|
220
|
+
content = (msg.get("content") or "").strip()
|
|
221
|
+
if not content:
|
|
222
|
+
results["errors"] += 1
|
|
223
|
+
continue
|
|
224
|
+
|
|
225
|
+
result = log_message(
|
|
226
|
+
platform=data.get("platform", "claude.ai"),
|
|
227
|
+
direction=msg.get("direction", "in"),
|
|
228
|
+
content=content,
|
|
229
|
+
speaker=msg.get("speaker", ""),
|
|
230
|
+
session_id=conversation_id,
|
|
231
|
+
entrypoint="browser_extension",
|
|
232
|
+
created_at=msg.get("created_at", ""),
|
|
233
|
+
summary=msg.get("summary", ""),
|
|
234
|
+
model=msg.get("model", data.get("model", "")),
|
|
235
|
+
external_id=msg.get("external_id") or msg.get("uuid", ""),
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
if result.get("skipped"):
|
|
239
|
+
results["skipped"] += 1
|
|
240
|
+
elif result.get("ok"):
|
|
241
|
+
results["ingested"] += 1
|
|
242
|
+
if result.get("id"):
|
|
243
|
+
ingested_ids.append(result["id"])
|
|
244
|
+
else:
|
|
245
|
+
results["errors"] += 1
|
|
246
|
+
|
|
247
|
+
return JSONResponse(
|
|
248
|
+
{"ok": True, **results},
|
|
249
|
+
background=BackgroundTask(embed_new_messages, ingested_ids),
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
async def health(request):
|
|
254
|
+
"""Return a lightweight liveness response."""
|
|
255
|
+
return JSONResponse({"ok": True, "service": "imprint-chat-sync-receiver"})
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
async def status(request):
|
|
259
|
+
"""Return recent ingest and embedding status for the extension popup."""
|
|
260
|
+
recent = get_recent(platform="claude.ai", limit=5)
|
|
261
|
+
db = _get_db()
|
|
262
|
+
try:
|
|
263
|
+
vec_count = db.execute("SELECT count(*) FROM conversation_vectors").fetchone()[0]
|
|
264
|
+
finally:
|
|
265
|
+
db.close()
|
|
266
|
+
return JSONResponse({
|
|
267
|
+
"ok": True,
|
|
268
|
+
"recent_count": len(recent),
|
|
269
|
+
"last_message": recent[-1]["created_at"] if recent else None,
|
|
270
|
+
"vectors": vec_count,
|
|
271
|
+
})
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
app = Starlette(
|
|
275
|
+
routes=[
|
|
276
|
+
Route("/api/ingest", ingest, methods=["POST"]),
|
|
277
|
+
Route("/api/health", health, methods=["GET"]),
|
|
278
|
+
Route("/api/status", status, methods=["GET"]),
|
|
279
|
+
],
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
app.add_middleware(
|
|
283
|
+
CORSMiddleware,
|
|
284
|
+
allow_origin_regex=RECEIVER_CORS_ORIGIN_REGEX,
|
|
285
|
+
allow_methods=["GET", "POST", "OPTIONS"],
|
|
286
|
+
allow_headers=["Content-Type"],
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _backfill_on_startup():
|
|
291
|
+
"""Background-embed any messages that don't have a vector yet."""
|
|
292
|
+
|
|
293
|
+
def _run():
|
|
294
|
+
db = _get_db()
|
|
295
|
+
try:
|
|
296
|
+
unembedded = db.execute("""
|
|
297
|
+
SELECT COUNT(*) as c FROM conversation_log cl
|
|
298
|
+
LEFT JOIN conversation_vectors cv ON cl.id = cv.msg_id
|
|
299
|
+
WHERE cv.msg_id IS NULL AND cl.platform NOT IN ('cc')
|
|
300
|
+
AND cl.content IS NOT NULL AND length(cl.content) > 0
|
|
301
|
+
""").fetchone()[0]
|
|
302
|
+
finally:
|
|
303
|
+
db.close()
|
|
304
|
+
|
|
305
|
+
if unembedded == 0:
|
|
306
|
+
print("[backfill] No unembedded messages, skipping", flush=True)
|
|
307
|
+
return
|
|
308
|
+
|
|
309
|
+
print(
|
|
310
|
+
f"[backfill] Found {unembedded} unembedded messages, processing...",
|
|
311
|
+
flush=True,
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
db = _get_db()
|
|
315
|
+
try:
|
|
316
|
+
rows = db.execute("""
|
|
317
|
+
SELECT cl.id FROM conversation_log cl
|
|
318
|
+
LEFT JOIN conversation_vectors cv ON cl.id = cv.msg_id
|
|
319
|
+
WHERE cv.msg_id IS NULL AND cl.platform NOT IN ('cc')
|
|
320
|
+
AND cl.content IS NOT NULL AND length(cl.content) > 0
|
|
321
|
+
ORDER BY cl.id
|
|
322
|
+
""").fetchall()
|
|
323
|
+
msg_ids = [r[0] for r in rows]
|
|
324
|
+
finally:
|
|
325
|
+
db.close()
|
|
326
|
+
|
|
327
|
+
batch_size = 100
|
|
328
|
+
for i in range(0, len(msg_ids), batch_size):
|
|
329
|
+
batch = msg_ids[i:i + batch_size]
|
|
330
|
+
embed_new_messages(batch)
|
|
331
|
+
print(
|
|
332
|
+
f"[backfill] Embedded {min(i + batch_size, len(msg_ids))}/{len(msg_ids)}",
|
|
333
|
+
flush=True,
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
print("[backfill] Embedding complete. Running incremental chunk update...", flush=True)
|
|
337
|
+
try:
|
|
338
|
+
from .conversation_chunker import incremental_chunk_update
|
|
339
|
+
result = incremental_chunk_update(batch_size=200)
|
|
340
|
+
print(f"[backfill] Chunk update: {result}", flush=True)
|
|
341
|
+
except Exception as e:
|
|
342
|
+
print(f"[backfill] Chunk update error: {e}", flush=True)
|
|
343
|
+
|
|
344
|
+
t = threading.Thread(target=_run, daemon=True)
|
|
345
|
+
t.start()
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def main():
|
|
349
|
+
"""Run the chat-sync receiver HTTP service."""
|
|
350
|
+
import argparse
|
|
351
|
+
|
|
352
|
+
parser = argparse.ArgumentParser(description="Imprint chat sync receiver")
|
|
353
|
+
parser.add_argument(
|
|
354
|
+
"--host",
|
|
355
|
+
default=RECEIVER_HOST_ENV,
|
|
356
|
+
help="Bind address (default: 127.0.0.1)",
|
|
357
|
+
)
|
|
358
|
+
parser.add_argument(
|
|
359
|
+
"--port",
|
|
360
|
+
type=int,
|
|
361
|
+
default=RECEIVER_PORT_ENV,
|
|
362
|
+
help=f"Port (default: {DEFAULT_PORT})",
|
|
363
|
+
)
|
|
364
|
+
parser.add_argument(
|
|
365
|
+
"--no-backfill",
|
|
366
|
+
action="store_true",
|
|
367
|
+
help="Skip the startup backfill pass over unembedded messages",
|
|
368
|
+
)
|
|
369
|
+
args = parser.parse_args()
|
|
370
|
+
|
|
371
|
+
try:
|
|
372
|
+
import uvicorn
|
|
373
|
+
except ImportError:
|
|
374
|
+
print(
|
|
375
|
+
"uvicorn is required. Install with: pip install 'imprint-memory[receiver]'",
|
|
376
|
+
flush=True,
|
|
377
|
+
)
|
|
378
|
+
raise SystemExit(1)
|
|
379
|
+
|
|
380
|
+
print(
|
|
381
|
+
f"imprint-memory chat-sync receiver listening on {args.host}:{args.port}",
|
|
382
|
+
flush=True,
|
|
383
|
+
)
|
|
384
|
+
if not args.no_backfill:
|
|
385
|
+
_backfill_on_startup()
|
|
386
|
+
uvicorn.run(app, host=args.host, port=args.port, log_level="info")
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
if __name__ == "__main__":
|
|
390
|
+
main()
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Compress old messages in a context file using a local Ollama model.
|
|
4
|
+
|
|
5
|
+
Can be called standalone or imported as a library.
|
|
6
|
+
Designed to keep a rolling context file (e.g. recent_context.md) from growing
|
|
7
|
+
unbounded by summarizing older messages with a local LLM.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
python3 -m imprint_memory.compress /path/to/recent_context.md
|
|
11
|
+
|
|
12
|
+
Environment variables:
|
|
13
|
+
OLLAMA_URL — Ollama API endpoint (default: http://localhost:11434)
|
|
14
|
+
COMPRESS_MODEL — Model to use (default: qwen3:8b)
|
|
15
|
+
COMPRESS_KEEP — Number of recent lines to keep as-is (default: 30)
|
|
16
|
+
COMPRESS_THRESHOLD — Line count that triggers compression (default: 50)
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import json
|
|
20
|
+
import os
|
|
21
|
+
import sys
|
|
22
|
+
import urllib.request
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
|
|
25
|
+
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
|
26
|
+
COMPRESS_MODEL = os.environ.get("COMPRESS_MODEL", "qwen3:8b")
|
|
27
|
+
KEEP_RECENT = int(os.environ.get("COMPRESS_KEEP", 30))
|
|
28
|
+
THRESHOLD = int(os.environ.get("COMPRESS_THRESHOLD", 50))
|
|
29
|
+
|
|
30
|
+
SYSTEM_PROMPT = (
|
|
31
|
+
"You are a log compressor. "
|
|
32
|
+
"Compress the following log into 3-5 summary lines. "
|
|
33
|
+
"Preserve all topics faithfully. "
|
|
34
|
+
"Keep names, timestamps, decisions, and context. "
|
|
35
|
+
"Output ONLY the summary lines, nothing else."
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def compress_messages(messages: list[str], model: str = "", ollama_url: str = "") -> str | None:
|
|
40
|
+
"""Call local Ollama to summarize messages into 3-5 lines.
|
|
41
|
+
|
|
42
|
+
Returns the summary string, or None if Ollama is unavailable.
|
|
43
|
+
"""
|
|
44
|
+
text = "\n".join(messages)
|
|
45
|
+
url = ollama_url or OLLAMA_URL
|
|
46
|
+
mdl = model or COMPRESS_MODEL
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
req = urllib.request.Request(
|
|
50
|
+
f"{url}/api/chat",
|
|
51
|
+
data=json.dumps({
|
|
52
|
+
"model": mdl,
|
|
53
|
+
"messages": [
|
|
54
|
+
{"role": "system", "content": SYSTEM_PROMPT},
|
|
55
|
+
{"role": "user", "content": f"Compress this log:\n\n{text}"},
|
|
56
|
+
],
|
|
57
|
+
"stream": False,
|
|
58
|
+
"think": False,
|
|
59
|
+
"options": {"temperature": 0.3, "num_predict": 500},
|
|
60
|
+
}).encode(),
|
|
61
|
+
headers={"Content-Type": "application/json"},
|
|
62
|
+
)
|
|
63
|
+
resp = json.loads(urllib.request.urlopen(req, timeout=60).read())
|
|
64
|
+
result = resp.get("message", {}).get("content", "").strip()
|
|
65
|
+
lines = [l.strip() for l in result.splitlines() if l.strip()]
|
|
66
|
+
return "\n".join(lines) if lines else None
|
|
67
|
+
except Exception as e:
|
|
68
|
+
print(f"Ollama compression failed: {e}", file=sys.stderr)
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def compress_file(context_file: Path, keep: int = 0, threshold: int = 0) -> bool:
|
|
73
|
+
"""Compress a context file in-place. Returns True if compression happened."""
|
|
74
|
+
keep = keep or KEEP_RECENT
|
|
75
|
+
threshold = threshold or THRESHOLD
|
|
76
|
+
|
|
77
|
+
if not context_file.exists():
|
|
78
|
+
return False
|
|
79
|
+
|
|
80
|
+
content = context_file.read_text(encoding="utf-8")
|
|
81
|
+
lines = content.splitlines()
|
|
82
|
+
|
|
83
|
+
# Separate header comments from message lines
|
|
84
|
+
header_lines = []
|
|
85
|
+
message_lines = []
|
|
86
|
+
for line in lines:
|
|
87
|
+
if line.startswith("<!--") or not line.strip():
|
|
88
|
+
header_lines.append(line)
|
|
89
|
+
else:
|
|
90
|
+
message_lines.append(line)
|
|
91
|
+
|
|
92
|
+
if len(message_lines) <= threshold:
|
|
93
|
+
print(f"Only {len(message_lines)} messages, below threshold {threshold}", file=sys.stderr)
|
|
94
|
+
return False
|
|
95
|
+
|
|
96
|
+
old_messages = message_lines[:-keep]
|
|
97
|
+
recent_messages = message_lines[-keep:]
|
|
98
|
+
|
|
99
|
+
print(f"Compressing {len(old_messages)} old messages, keeping {len(recent_messages)} recent", file=sys.stderr)
|
|
100
|
+
|
|
101
|
+
summary = compress_messages(old_messages)
|
|
102
|
+
if summary is None:
|
|
103
|
+
print("Compression failed, truncating to recent messages only", file=sys.stderr)
|
|
104
|
+
new_content = "\n".join(header_lines + [""] + recent_messages) + "\n"
|
|
105
|
+
else:
|
|
106
|
+
new_content = "\n".join(header_lines + [""] + [summary, ""] + recent_messages) + "\n"
|
|
107
|
+
|
|
108
|
+
# Atomic write
|
|
109
|
+
tmp = context_file.with_suffix(".tmp")
|
|
110
|
+
tmp.write_text(new_content, encoding="utf-8")
|
|
111
|
+
os.replace(str(tmp), str(context_file))
|
|
112
|
+
|
|
113
|
+
n = len(summary.splitlines()) if summary else 0
|
|
114
|
+
print(f"Compressed: {len(old_messages)} -> {n} lines", file=sys.stderr)
|
|
115
|
+
return True
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def main():
|
|
119
|
+
"""CLI entry point for context-file compression."""
|
|
120
|
+
if len(sys.argv) < 2:
|
|
121
|
+
print("Usage: python3 -m imprint_memory.compress <context_file>", file=sys.stderr)
|
|
122
|
+
sys.exit(1)
|
|
123
|
+
compress_file(Path(sys.argv[1]))
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
if __name__ == "__main__":
|
|
127
|
+
main()
|