@199-bio/engram 0.11.1 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,297 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Jina v5 embedding bridge for Engram
4
+ Uses jina-embeddings-v5-small with MLX Metal acceleration (~9ms/query, 1.6GB RAM)
5
+
6
+ Drop-in replacement for colbert-bridge.py.
7
+ Run as subprocess from Node.js, communicates via JSON over stdin/stdout.
8
+
9
+ Vector store: numpy arrays in-memory, persisted to .npz + .json sidecar files.
10
+ Search: cosine similarity via normalized dot product (vectors normalized at embed time).
11
+ """
12
+
13
+ import sys
14
+ import json
15
+ import os
16
+ from pathlib import Path
17
+
18
+ import numpy as np
19
+
20
+ EMBEDDING_DIM = 256 # Matryoshka truncation dimension
21
+ MODEL_NAME = "jina-embeddings-v5-small"
22
+
23
+
24
+ def lazy_load_embedder():
25
+ """Lazy load jina_grep LocalEmbedder to speed up startup."""
26
+ try:
27
+ from jina_grep.embedder import LocalEmbedder
28
+ return LocalEmbedder
29
+ except ImportError:
30
+ return None
31
+
32
+
33
+ class JinaBridge:
34
+ def __init__(self, index_path: str):
35
+ self.index_path = Path(index_path)
36
+ self.index_path.mkdir(parents=True, exist_ok=True)
37
+
38
+ self.vectors_file = self.index_path / "engram_vectors.npz"
39
+ self.meta_file = self.index_path / "engram_meta.json"
40
+
41
+ self.embedder = None
42
+ self.vectors: np.ndarray | None = None # shape (n, EMBEDDING_DIM), L2-normalized
43
+ self.doc_ids: list[str] = []
44
+ self.doc_contents: dict[str, str] = {} # id -> content
45
+
46
+ self._load_from_disk()
47
+
48
+ # ------------------------------------------------------------------
49
+ # Embedder lifecycle
50
+ # ------------------------------------------------------------------
51
+
52
+ def _ensure_embedder(self):
53
+ """Load embedder model on first use."""
54
+ if self.embedder is None:
55
+ LocalEmbedder = lazy_load_embedder()
56
+ if LocalEmbedder is None:
57
+ raise RuntimeError(
58
+ "jina_grep not installed. Run: pip install jina-grep"
59
+ )
60
+ self.embedder = LocalEmbedder()
61
+
62
+ # ------------------------------------------------------------------
63
+ # Persistence
64
+ # ------------------------------------------------------------------
65
+
66
+ def _load_from_disk(self):
67
+ """Load existing vectors and metadata from disk if present."""
68
+ if self.vectors_file.exists() and self.meta_file.exists():
69
+ try:
70
+ data = np.load(self.vectors_file)
71
+ self.vectors = data["vectors"]
72
+
73
+ with open(self.meta_file, "r") as f:
74
+ meta = json.load(f)
75
+ self.doc_ids = meta.get("doc_ids", [])
76
+ self.doc_contents = meta.get("doc_contents", {})
77
+ except Exception:
78
+ # Corrupted files -- start fresh
79
+ self.vectors = None
80
+ self.doc_ids = []
81
+ self.doc_contents = {}
82
+
83
+ def _persist(self):
84
+ """Write current vectors and metadata to disk."""
85
+ if self.vectors is not None and len(self.doc_ids) > 0:
86
+ np.savez(self.vectors_file, vectors=self.vectors)
87
+ with open(self.meta_file, "w") as f:
88
+ json.dump(
89
+ {"doc_ids": self.doc_ids, "doc_contents": self.doc_contents},
90
+ f,
91
+ )
92
+ else:
93
+ # Empty index -- remove stale files
94
+ if self.vectors_file.exists():
95
+ self.vectors_file.unlink()
96
+ if self.meta_file.exists():
97
+ self.meta_file.unlink()
98
+
99
+ # ------------------------------------------------------------------
100
+ # Embedding helpers
101
+ # ------------------------------------------------------------------
102
+
103
+ def _embed(self, texts: list[str]) -> np.ndarray:
104
+ """Embed a list of texts and return L2-normalized vectors (n, EMBEDDING_DIM)."""
105
+ self._ensure_embedder()
106
+ raw = self.embedder.embed(texts, model=MODEL_NAME, task="retrieval")
107
+ vecs = np.array(raw, dtype=np.float32)
108
+
109
+ # Matryoshka truncation to target dimension
110
+ if vecs.shape[1] > EMBEDDING_DIM:
111
+ vecs = vecs[:, :EMBEDDING_DIM]
112
+
113
+ # L2-normalize so dot product == cosine similarity
114
+ norms = np.linalg.norm(vecs, axis=1, keepdims=True)
115
+ norms = np.where(norms == 0, 1, norms)
116
+ vecs = vecs / norms
117
+
118
+ return vecs
119
+
120
+ # ------------------------------------------------------------------
121
+ # Actions
122
+ # ------------------------------------------------------------------
123
+
124
+ def index_documents(self, documents: list[dict]) -> dict:
125
+ """
126
+ Build a fresh index from documents.
127
+ documents: [{"id": "...", "content": "..."}]
128
+ """
129
+ if not documents:
130
+ return {"success": True, "count": 0}
131
+
132
+ ids = [d["id"] for d in documents]
133
+ contents = [d["content"] for d in documents]
134
+
135
+ vecs = self._embed(contents)
136
+
137
+ self.vectors = vecs
138
+ self.doc_ids = ids
139
+ self.doc_contents = {d["id"]: d["content"] for d in documents}
140
+ self._persist()
141
+
142
+ return {"success": True, "count": len(documents)}
143
+
144
+ def add_documents(self, documents: list[dict]) -> dict:
145
+ """
146
+ Append documents to the existing index.
147
+ Falls back to full index if no index exists yet.
148
+ """
149
+ if not documents:
150
+ return {"success": True, "count": 0}
151
+
152
+ if self.vectors is None or len(self.doc_ids) == 0:
153
+ return self.index_documents(documents)
154
+
155
+ ids = [d["id"] for d in documents]
156
+ contents = [d["content"] for d in documents]
157
+
158
+ new_vecs = self._embed(contents)
159
+
160
+ self.vectors = np.vstack([self.vectors, new_vecs])
161
+ self.doc_ids.extend(ids)
162
+ for d in documents:
163
+ self.doc_contents[d["id"]] = d["content"]
164
+ self._persist()
165
+
166
+ return {"success": True, "count": len(documents)}
167
+
168
+ def search(self, query: str, k: int = 10) -> dict:
169
+ """
170
+ Search for documents by cosine similarity.
171
+ Returns: {"results": [{"id": "...", "score": 0.9, "content": "..."}]}
172
+ """
173
+ if self.vectors is None or len(self.doc_ids) == 0:
174
+ return {"results": []}
175
+
176
+ try:
177
+ q_vec = self._embed([query]) # (1, EMBEDDING_DIM)
178
+ scores = (self.vectors @ q_vec.T).squeeze() # (n,)
179
+
180
+ top_k = min(k, len(self.doc_ids))
181
+ top_indices = np.argsort(scores)[::-1][:top_k]
182
+
183
+ results = []
184
+ for idx in top_indices:
185
+ doc_id = self.doc_ids[idx]
186
+ results.append({
187
+ "id": doc_id,
188
+ "score": float(scores[idx]),
189
+ "content": self.doc_contents.get(doc_id, ""),
190
+ })
191
+
192
+ return {"results": results}
193
+ except Exception as e:
194
+ return {"results": [], "error": str(e)}
195
+
196
+ def rerank(self, query: str, documents: list[dict], k: int = 10) -> dict:
197
+ """
198
+ Rerank provided documents by embedding similarity (no persistent index used).
199
+ documents: [{"id": "...", "content": "..."}]
200
+ """
201
+ if not documents:
202
+ return {"results": []}
203
+
204
+ try:
205
+ contents = [d["content"] for d in documents]
206
+ doc_vecs = self._embed(contents)
207
+ q_vec = self._embed([query])
208
+
209
+ scores = (doc_vecs @ q_vec.T).squeeze()
210
+ if scores.ndim == 0:
211
+ scores = scores.reshape(1)
212
+
213
+ top_k = min(k, len(documents))
214
+ top_indices = np.argsort(scores)[::-1][:top_k]
215
+
216
+ results = []
217
+ for idx in top_indices:
218
+ results.append({
219
+ "id": documents[idx]["id"],
220
+ "score": float(scores[idx]),
221
+ "content": documents[idx]["content"],
222
+ })
223
+
224
+ return {"results": results}
225
+ except Exception as e:
226
+ return {"results": [], "error": str(e)}
227
+
228
+ def delete_documents(self, doc_ids: list[str]) -> dict:
229
+ """Remove documents by ID and persist."""
230
+ if self.vectors is None or len(self.doc_ids) == 0:
231
+ return {"success": True, "count": 0}
232
+
233
+ ids_to_remove = set(doc_ids)
234
+ keep_mask = [i for i, did in enumerate(self.doc_ids) if did not in ids_to_remove]
235
+
236
+ if len(keep_mask) == 0:
237
+ self.vectors = None
238
+ self.doc_ids = []
239
+ self.doc_contents = {}
240
+ else:
241
+ self.vectors = self.vectors[keep_mask]
242
+ self.doc_ids = [self.doc_ids[i] for i in keep_mask]
243
+ for did in doc_ids:
244
+ self.doc_contents.pop(did, None)
245
+
246
+ self._persist()
247
+ return {"success": True, "count": len(doc_ids)}
248
+
249
+
250
+ def main():
251
+ """Main loop -- read JSON commands from stdin, write responses to stdout."""
252
+ index_path = os.environ.get("ENGRAM_INDEX_PATH", os.path.expanduser("~/.engram"))
253
+ bridge = JinaBridge(index_path)
254
+
255
+ # Signal ready
256
+ print(json.dumps({"status": "ready"}), flush=True)
257
+
258
+ for line in sys.stdin:
259
+ line = line.strip()
260
+ if not line:
261
+ continue
262
+
263
+ try:
264
+ cmd = json.loads(line)
265
+ action = cmd.get("action")
266
+
267
+ if action == "index":
268
+ result = bridge.index_documents(cmd.get("documents", []))
269
+ elif action == "add":
270
+ result = bridge.add_documents(cmd.get("documents", []))
271
+ elif action == "search":
272
+ result = bridge.search(cmd.get("query", ""), cmd.get("k", 10))
273
+ elif action == "rerank":
274
+ result = bridge.rerank(
275
+ cmd.get("query", ""),
276
+ cmd.get("documents", []),
277
+ cmd.get("k", 10),
278
+ )
279
+ elif action == "delete":
280
+ result = bridge.delete_documents(cmd.get("ids", []))
281
+ elif action == "ping":
282
+ result = {"status": "ok"}
283
+ elif action == "quit":
284
+ break
285
+ else:
286
+ result = {"error": f"Unknown action: {action}"}
287
+
288
+ print(json.dumps(result), flush=True)
289
+
290
+ except json.JSONDecodeError as e:
291
+ print(json.dumps({"error": f"Invalid JSON: {e}"}), flush=True)
292
+ except Exception as e:
293
+ print(json.dumps({"error": str(e)}), flush=True)
294
+
295
+
296
+ if __name__ == "__main__":
297
+ main()
@@ -1,5 +1,5 @@
1
1
  /**
2
- * ColBERT retriever - TypeScript wrapper for Python bridge
2
+ * Jina v5 retriever - TypeScript wrapper for Python bridge
3
3
  */
4
4
 
5
5
  import { spawn, ChildProcess } from "child_process";
@@ -11,7 +11,7 @@ const __filename = fileURLToPath(import.meta.url);
11
11
  const __dirname = path.dirname(__filename);
12
12
 
13
13
  // Python bridge is in src/, not dist/ - go up from dist/retrieval to project root, then into src/
14
- const BRIDGE_PATH = path.join(__dirname, "..", "..", "src", "retrieval", "colbert-bridge.py");
14
+ const BRIDGE_PATH = path.join(__dirname, "..", "..", "src", "retrieval", "jina-bridge.py");
15
15
 
16
16
  export interface Document {
17
17
  id: string;
@@ -32,7 +32,7 @@ interface BridgeResponse {
32
32
  error?: string;
33
33
  }
34
34
 
35
- export class ColBERTRetriever {
35
+ export class JinaRetriever {
36
36
  private process: ChildProcess | null = null;
37
37
  private readline: Interface | null = null;
38
38
  private pendingRequests: Map<number, {
@@ -76,14 +76,20 @@ export class ColBERTRetriever {
76
76
 
77
77
  this.process.stderr?.on("data", (data) => {
78
78
  // Log Python errors for debugging
79
- console.error(`[ColBERT] ${data.toString()}`);
79
+ console.error(`[Jina] ${data.toString()}`);
80
80
  });
81
81
 
82
82
  this.process.on("exit", (code) => {
83
- console.error(`[ColBERT] Process exited with code ${code}`);
83
+ console.error(`[Jina] Process exited with code ${code}`);
84
84
  this.ready = false;
85
85
  this.process = null;
86
86
  this.readline = null;
87
+
88
+ // Reject all pending requests
89
+ for (const [id, { reject }] of this.pendingRequests) {
90
+ reject(new Error("Bridge process exited"));
91
+ }
92
+ this.pendingRequests.clear();
87
93
  });
88
94
 
89
95
  // Wait for ready signal
@@ -109,7 +115,7 @@ export class ColBERTRetriever {
109
115
  resolve(response);
110
116
  }
111
117
  } catch (error) {
112
- console.error(`[ColBERT] Failed to parse: ${line}`);
118
+ console.error(`[Jina] Failed to parse: ${line}`);
113
119
  }
114
120
  }
115
121
 
@@ -120,7 +126,16 @@ export class ColBERTRetriever {
120
126
 
121
127
  return new Promise((resolve, reject) => {
122
128
  const id = this.requestId++;
123
- this.pendingRequests.set(id, { resolve, reject });
129
+
130
+ const timeout = setTimeout(() => {
131
+ this.pendingRequests.delete(id);
132
+ reject(new Error(`Bridge request timed out after 30s: ${command.action}`));
133
+ }, 30_000);
134
+
135
+ this.pendingRequests.set(id, {
136
+ resolve: (value) => { clearTimeout(timeout); resolve(value); },
137
+ reject: (error) => { clearTimeout(timeout); reject(error); },
138
+ });
124
139
 
125
140
  const json = JSON.stringify(command) + "\n";
126
141
  this.process!.stdin!.write(json);
@@ -171,7 +186,7 @@ export class ColBERTRetriever {
171
186
  }
172
187
 
173
188
  /**
174
- * Rerank documents using ColBERT
189
+ * Rerank documents using Jina v5
175
190
  */
176
191
  async rerank(query: string, documents: Document[], k: number = 10): Promise<SearchResult[]> {
177
192
  const response = await this.send({
@@ -230,7 +245,7 @@ export class ColBERTRetriever {
230
245
  }
231
246
 
232
247
  /**
233
- * Fallback retriever when ColBERT is not available
248
+ * Fallback retriever when Jina is not available
234
249
  * Uses simple TF-IDF-like scoring
235
250
  */
236
251
  export class SimpleRetriever {
@@ -299,17 +314,17 @@ export class SimpleRetriever {
299
314
  /**
300
315
  * Create the best available retriever
301
316
  */
302
- export async function createRetriever(indexPath: string): Promise<ColBERTRetriever | SimpleRetriever> {
303
- const colbert = new ColBERTRetriever(indexPath);
317
+ export async function createRetriever(indexPath: string): Promise<JinaRetriever | SimpleRetriever> {
318
+ const jina = new JinaRetriever(indexPath);
304
319
 
305
320
  try {
306
- await colbert.start();
307
- if (await colbert.ping()) {
308
- console.error("[Engram] Using ColBERT retriever");
309
- return colbert;
321
+ await jina.start();
322
+ if (await jina.ping()) {
323
+ console.error("[Engram] Using Jina v5 retriever");
324
+ return jina;
310
325
  }
311
326
  } catch (error) {
312
- console.error("[Engram] ColBERT not available, using simple retriever:", error);
327
+ console.error("[Engram] Jina not available, using simple retriever:", error);
313
328
  }
314
329
 
315
330
  console.error("[Engram] Using simple fallback retriever");
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * Chat Handler for Engram Web Interface
3
- * Uses Claude Opus 4.5 with tools for entity/memory management
3
+ * Uses Claude Opus 4.6 with tools for entity/memory management
4
4
  */
5
5
 
6
6
  import Anthropic from "@anthropic-ai/sdk";
@@ -414,7 +414,7 @@ export class ChatHandler {
414
414
 
415
415
  while (continueLoop) {
416
416
  const stream = this.client.messages.stream({
417
- model: "claude-opus-4-5-20251101",
417
+ model: "claude-opus-4-6-20250514",
418
418
  max_tokens: 16000,
419
419
  system: SYSTEM_PROMPT,
420
420
  tools: TOOLS,
@@ -528,7 +528,7 @@ export class ChatHandler {
528
528
  }
529
529
 
530
530
  let response = await this.client.messages.create({
531
- model: "claude-opus-4-5-20251101",
531
+ model: "claude-opus-4-6-20250514",
532
532
  max_tokens: 16000,
533
533
  system: SYSTEM_PROMPT,
534
534
  tools: TOOLS,
@@ -569,7 +569,7 @@ export class ChatHandler {
569
569
 
570
570
  // Continue the conversation
571
571
  response = await this.client.messages.create({
572
- model: "claude-opus-4-5-20251101",
572
+ model: "claude-opus-4-6-20250514",
573
573
  max_tokens: 16000,
574
574
  system: SYSTEM_PROMPT,
575
575
  tools: TOOLS,
package/src/web/server.ts CHANGED
@@ -223,7 +223,16 @@ export class EngramWebServer {
223
223
  url: URL
224
224
  ): Promise<void> {
225
225
  const method = req.method || "GET";
226
- const body = method !== "GET" ? await this.parseBody(req) : null;
226
+ let body: unknown = null;
227
+ if (method !== "GET") {
228
+ try {
229
+ body = await this.parseBody(req);
230
+ } catch (e) {
231
+ res.writeHead(400, { "Content-Type": "application/json" });
232
+ res.end(JSON.stringify({ error: e instanceof Error ? e.message : "Invalid request body" }));
233
+ return;
234
+ }
235
+ }
227
236
 
228
237
  res.setHeader("Content-Type", "application/json");
229
238
 
@@ -237,8 +246,10 @@ export class EngramWebServer {
237
246
  // GET /api/memories
238
247
  if (pathname === "/api/memories" && method === "GET") {
239
248
  const query = url.searchParams.get("q");
240
- const limit = parseInt(url.searchParams.get("limit") || "50");
241
- const offset = parseInt(url.searchParams.get("offset") || "0");
249
+ const rawLimit = parseInt(url.searchParams.get("limit") || "50");
250
+ const rawOffset = parseInt(url.searchParams.get("offset") || "0");
251
+ const limit = Math.max(1, Math.min(500, isNaN(rawLimit) ? 50 : rawLimit));
252
+ const offset = Math.max(0, isNaN(rawOffset) ? 0 : rawOffset);
242
253
 
243
254
  if (query) {
244
255
  const response = await this.search.search(query, { limit });
@@ -259,7 +270,17 @@ export class EngramWebServer {
259
270
  // POST /api/memories
260
271
  if (pathname === "/api/memories" && method === "POST") {
261
272
  const { content, source, importance } = body as any;
262
- const memory = this.db.createMemory(content, source || "web", importance || 0.5);
273
+ if (!content || typeof content !== "string" || content.trim().length === 0) {
274
+ res.writeHead(400);
275
+ res.end(JSON.stringify({ error: "content is required and must be a non-empty string" }));
276
+ return;
277
+ }
278
+ if (importance !== undefined && (typeof importance !== "number" || importance < 0 || importance > 1)) {
279
+ res.writeHead(400);
280
+ res.end(JSON.stringify({ error: "importance must be a number between 0 and 1" }));
281
+ return;
282
+ }
283
+ const memory = this.db.createMemory(content, source || "web", importance ?? 0.5);
263
284
  await this.search.indexMemory(memory);
264
285
  res.writeHead(201);
265
286
  res.end(JSON.stringify({ memory }));
@@ -271,6 +292,16 @@ export class EngramWebServer {
271
292
  if (memoryMatch && method === "PUT") {
272
293
  const id = memoryMatch[1];
273
294
  const { content, importance } = body as any;
295
+ if (content !== undefined && (typeof content !== "string" || content.trim().length === 0)) {
296
+ res.writeHead(400);
297
+ res.end(JSON.stringify({ error: "content must be a non-empty string" }));
298
+ return;
299
+ }
300
+ if (importance !== undefined && (typeof importance !== "number" || importance < 0 || importance > 1)) {
301
+ res.writeHead(400);
302
+ res.end(JSON.stringify({ error: "importance must be a number between 0 and 1" }));
303
+ return;
304
+ }
274
305
  const updated = this.db.updateMemory(id, { content, importance });
275
306
  if (updated) {
276
307
  res.end(JSON.stringify({ memory: updated }));
@@ -444,6 +475,11 @@ export class EngramWebServer {
444
475
  res.end(JSON.stringify({ error: "Message is required" }));
445
476
  return;
446
477
  }
478
+ if (typeof message !== "string" || message.length > 100_000) {
479
+ res.writeHead(400);
480
+ res.end(JSON.stringify({ error: "message must be a string under 100,000 characters" }));
481
+ return;
482
+ }
447
483
 
448
484
  const response = await this.chat.chat(message);
449
485
  res.end(JSON.stringify({ response }));
@@ -465,6 +501,11 @@ export class EngramWebServer {
465
501
  res.end(JSON.stringify({ error: "Message is required" }));
466
502
  return;
467
503
  }
504
+ if (typeof message !== "string" || message.length > 100_000) {
505
+ res.writeHead(400, { "Content-Type": "application/json" });
506
+ res.end(JSON.stringify({ error: "message must be a string under 100,000 characters" }));
507
+ return;
508
+ }
468
509
 
469
510
  // Check if chat is busy
470
511
  if (this.chat.isBusy()) {
@@ -612,6 +653,64 @@ export class EngramWebServer {
612
653
  return;
613
654
  }
614
655
 
656
+ // GET /api/export - export database as CSV
657
+ if (pathname === "/api/export" && method === "GET") {
658
+ const format = url.searchParams.get("format") || "csv";
659
+
660
+ // Get all data
661
+ const memories = this.db.getAllMemories();
662
+ const entities = this.graph.listEntities(undefined, 10000);
663
+ const digests = this.db.getDigests(undefined, 10000);
664
+
665
+ if (format === "csv") {
666
+ // Build CSV content
667
+ const csvParts: string[] = [];
668
+
669
+ // Memories CSV
670
+ csvParts.push("=== MEMORIES ===");
671
+ csvParts.push("id,content,importance,emotional_weight,timestamp,source");
672
+ for (const m of memories) {
673
+ const content = m.content.replace(/"/g, '""').replace(/\n/g, ' ');
674
+ csvParts.push(`"${m.id}","${content}",${m.importance || 0.5},${m.emotional_weight || 0.5},"${m.timestamp.toISOString()}","${m.source || ''}"`);
675
+ }
676
+
677
+ csvParts.push("");
678
+ csvParts.push("=== ENTITIES ===");
679
+ csvParts.push("id,name,type");
680
+ for (const e of entities) {
681
+ const name = e.name.replace(/"/g, '""');
682
+ csvParts.push(`"${e.id}","${name}","${e.type}"`);
683
+ }
684
+
685
+ csvParts.push("");
686
+ csvParts.push("=== DIGESTS ===");
687
+ csvParts.push("id,level,topic,content,source_count,created_at");
688
+ for (const d of digests) {
689
+ const content = d.content.replace(/"/g, '""').replace(/\n/g, ' ');
690
+ const topic = (d.topic || '').replace(/"/g, '""');
691
+ csvParts.push(`"${d.id}",${d.level},"${topic}","${content}",${d.source_count},"${d.created_at.toISOString()}"`);
692
+ }
693
+
694
+ const csv = csvParts.join("\n");
695
+ const timestamp = new Date().toISOString().slice(0, 10);
696
+
697
+ res.writeHead(200, {
698
+ "Content-Type": "text/csv",
699
+ "Content-Disposition": `attachment; filename="engram-export-${timestamp}.csv"`,
700
+ });
701
+ res.end(csv);
702
+ return;
703
+ }
704
+
705
+ // JSON format (default fallback)
706
+ res.writeHead(200, {
707
+ "Content-Type": "application/json",
708
+ "Content-Disposition": `attachment; filename="engram-export.json"`,
709
+ });
710
+ res.end(JSON.stringify({ memories, entities, digests }, null, 2));
711
+ return;
712
+ }
713
+
615
714
  // 404 for unknown API routes
616
715
  res.writeHead(404);
617
716
  res.end(JSON.stringify({ error: "Not found" }));
@@ -651,10 +750,22 @@ export class EngramWebServer {
651
750
 
652
751
  private parseBody(req: http.IncomingMessage): Promise<unknown> {
653
752
  return new Promise((resolve, reject) => {
654
- let data = "";
655
- req.on("data", (chunk) => (data += chunk));
753
+ const chunks: Buffer[] = [];
754
+ let size = 0;
755
+ const MAX_BODY = 1_048_576; // 1MB
756
+
757
+ req.on("data", (chunk: Buffer) => {
758
+ size += chunk.length;
759
+ if (size > MAX_BODY) {
760
+ req.destroy();
761
+ reject(new Error("Payload too large (max 1MB)"));
762
+ return;
763
+ }
764
+ chunks.push(chunk);
765
+ });
656
766
  req.on("end", () => {
657
767
  try {
768
+ const data = Buffer.concat(chunks).toString();
658
769
  resolve(data ? JSON.parse(data) : {});
659
770
  } catch (e) {
660
771
  reject(e);
@@ -1213,6 +1213,18 @@ clearApiKeyBtn?.addEventListener('click', async () => {
1213
1213
  }
1214
1214
  });
1215
1215
 
1216
+ // Export buttons
1217
+ const exportCsvBtn = document.getElementById('export-csv');
1218
+ const exportJsonBtn = document.getElementById('export-json');
1219
+
1220
+ exportCsvBtn?.addEventListener('click', () => {
1221
+ window.location.href = '/api/export?format=csv';
1222
+ });
1223
+
1224
+ exportJsonBtn?.addEventListener('click', () => {
1225
+ window.location.href = '/api/export?format=json';
1226
+ });
1227
+
1216
1228
  // Chat
1217
1229
  chatToggle?.addEventListener('click', toggleChat);
1218
1230
  chatClose?.addEventListener('click', toggleChat);
@@ -308,6 +308,41 @@
308
308
  </select>
309
309
  </div>
310
310
  </div>
311
+
312
+ <div class="settings-section">
313
+ <div class="settings-section-header">
314
+ <h3>
315
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
316
+ <path d="M21 15v4a2 2 0 01-2 2H5a2 2 0 01-2-2v-4"/>
317
+ <polyline points="7 10 12 15 17 10"/>
318
+ <line x1="12" y1="15" x2="12" y2="3"/>
319
+ </svg>
320
+ Data Export
321
+ </h3>
322
+ </div>
323
+ <p class="help-text" style="margin-top: 0; margin-bottom: 20px;">
324
+ Export your memories, entities, and digests.
325
+ </p>
326
+
327
+ <div class="settings-row" style="gap: 12px;">
328
+ <button class="btn btn-secondary" id="export-csv">
329
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="width: 16px; height: 16px;">
330
+ <path d="M14 2H6a2 2 0 00-2 2v16a2 2 0 002 2h12a2 2 0 002-2V8z"/>
331
+ <polyline points="14 2 14 8 20 8"/>
332
+ <line x1="16" y1="13" x2="8" y2="13"/>
333
+ <line x1="16" y1="17" x2="8" y2="17"/>
334
+ </svg>
335
+ Export CSV
336
+ </button>
337
+ <button class="btn btn-secondary" id="export-json">
338
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="width: 16px; height: 16px;">
339
+ <polyline points="16 18 22 12 16 6"/>
340
+ <polyline points="8 6 2 12 8 18"/>
341
+ </svg>
342
+ Export JSON
343
+ </button>
344
+ </div>
345
+ </div>
311
346
  </div>
312
347
  </section>
313
348
  </main>