get-claudia 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/memory-daemon/claudia_memory/database.py +19 -0
- package/memory-daemon/claudia_memory/mcp/server.py +108 -1
- package/memory-daemon/claudia_memory/schema.sql +5 -0
- package/memory-daemon/claudia_memory/services/recall.py +176 -24
- package/memory-daemon/claudia_memory/services/remember.py +97 -20
- package/package.json +1 -1
|
@@ -240,6 +240,25 @@ class Database:
|
|
|
240
240
|
conn.commit()
|
|
241
241
|
logger.info("Applied migration 2: turn buffer and session narratives")
|
|
242
242
|
|
|
243
|
+
if current_version < 3:
|
|
244
|
+
# Migration 3: Add source_context to memories, is_archived to turn_buffer
|
|
245
|
+
migration_stmts = [
|
|
246
|
+
"ALTER TABLE memories ADD COLUMN source_context TEXT",
|
|
247
|
+
"ALTER TABLE turn_buffer ADD COLUMN is_archived INTEGER DEFAULT 0",
|
|
248
|
+
]
|
|
249
|
+
for stmt in migration_stmts:
|
|
250
|
+
try:
|
|
251
|
+
conn.execute(stmt)
|
|
252
|
+
except sqlite3.OperationalError as e:
|
|
253
|
+
if "duplicate column" not in str(e).lower():
|
|
254
|
+
logger.warning(f"Migration 3 statement failed: {e}")
|
|
255
|
+
|
|
256
|
+
conn.execute(
|
|
257
|
+
"INSERT OR IGNORE INTO schema_migrations (version, description) VALUES (3, 'Add source_context to memories, is_archived to turn_buffer for episodic provenance')"
|
|
258
|
+
)
|
|
259
|
+
conn.commit()
|
|
260
|
+
logger.info("Applied migration 3: episodic memory provenance")
|
|
261
|
+
|
|
243
262
|
def execute(
|
|
244
263
|
self, sql: str, params: Tuple = (), fetch: bool = False
|
|
245
264
|
) -> Optional[List[sqlite3.Row]]:
|
|
@@ -31,6 +31,7 @@ from ..services.recall import (
|
|
|
31
31
|
recall_about,
|
|
32
32
|
recall_episodes,
|
|
33
33
|
search_entities,
|
|
34
|
+
trace_memory,
|
|
34
35
|
)
|
|
35
36
|
from ..services.remember import (
|
|
36
37
|
buffer_turn,
|
|
@@ -79,6 +80,18 @@ async def list_tools() -> ListToolsResult:
|
|
|
79
80
|
"description": "Importance score from 0.0 to 1.0",
|
|
80
81
|
"default": 1.0,
|
|
81
82
|
},
|
|
83
|
+
"source": {
|
|
84
|
+
"type": "string",
|
|
85
|
+
"description": "Source type: email, transcript, document, conversation, user_input",
|
|
86
|
+
},
|
|
87
|
+
"source_context": {
|
|
88
|
+
"type": "string",
|
|
89
|
+
"description": "One-line breadcrumb describing origin (e.g., 'Email from Jim Ferry re: Forum V+, 2025-01-28')",
|
|
90
|
+
},
|
|
91
|
+
"source_material": {
|
|
92
|
+
"type": "string",
|
|
93
|
+
"description": "Full raw text of the source (email body, transcript, etc.). Saved to disk, not stored in DB.",
|
|
94
|
+
},
|
|
82
95
|
},
|
|
83
96
|
"required": ["content"],
|
|
84
97
|
},
|
|
@@ -303,6 +316,18 @@ async def list_tools() -> ListToolsResult:
|
|
|
303
316
|
"description": "Entity names this fact relates to",
|
|
304
317
|
},
|
|
305
318
|
"importance": {"type": "number", "default": 1.0},
|
|
319
|
+
"source": {
|
|
320
|
+
"type": "string",
|
|
321
|
+
"description": "Override source type (default: session_summary)",
|
|
322
|
+
},
|
|
323
|
+
"source_context": {
|
|
324
|
+
"type": "string",
|
|
325
|
+
"description": "One-line breadcrumb describing origin",
|
|
326
|
+
},
|
|
327
|
+
"source_material": {
|
|
328
|
+
"type": "string",
|
|
329
|
+
"description": "Full raw source text, saved to disk",
|
|
330
|
+
},
|
|
306
331
|
},
|
|
307
332
|
"required": ["content"],
|
|
308
333
|
},
|
|
@@ -319,6 +344,18 @@ async def list_tools() -> ListToolsResult:
|
|
|
319
344
|
"items": {"type": "string"},
|
|
320
345
|
},
|
|
321
346
|
"importance": {"type": "number", "default": 1.0},
|
|
347
|
+
"source": {
|
|
348
|
+
"type": "string",
|
|
349
|
+
"description": "Override source type (default: session_summary)",
|
|
350
|
+
},
|
|
351
|
+
"source_context": {
|
|
352
|
+
"type": "string",
|
|
353
|
+
"description": "One-line breadcrumb describing origin",
|
|
354
|
+
},
|
|
355
|
+
"source_material": {
|
|
356
|
+
"type": "string",
|
|
357
|
+
"description": "Full raw source text, saved to disk",
|
|
358
|
+
},
|
|
322
359
|
},
|
|
323
360
|
"required": ["content"],
|
|
324
361
|
},
|
|
@@ -436,7 +473,15 @@ async def list_tools() -> ListToolsResult:
|
|
|
436
473
|
},
|
|
437
474
|
"source": {
|
|
438
475
|
"type": "string",
|
|
439
|
-
"description": "Source entity (for 'relate' op)",
|
|
476
|
+
"description": "Source entity (for 'relate' op) or source type (for 'remember' op)",
|
|
477
|
+
},
|
|
478
|
+
"source_context": {
|
|
479
|
+
"type": "string",
|
|
480
|
+
"description": "One-line breadcrumb (for 'remember' op)",
|
|
481
|
+
},
|
|
482
|
+
"source_material": {
|
|
483
|
+
"type": "string",
|
|
484
|
+
"description": "Full raw source text, saved to disk (for 'remember' op)",
|
|
440
485
|
},
|
|
441
486
|
"target": {
|
|
442
487
|
"type": "string",
|
|
@@ -458,6 +503,25 @@ async def list_tools() -> ListToolsResult:
|
|
|
458
503
|
"required": ["operations"],
|
|
459
504
|
},
|
|
460
505
|
),
|
|
506
|
+
Tool(
|
|
507
|
+
name="memory.trace",
|
|
508
|
+
description=(
|
|
509
|
+
"Reconstruct full provenance for a memory. Returns the memory with all fields, "
|
|
510
|
+
"the source episode narrative and archived conversation turns (if applicable), "
|
|
511
|
+
"related entities, and a preview of any source material file saved on disk. "
|
|
512
|
+
"Zero cost until invoked -- use when asked 'where did that come from?'"
|
|
513
|
+
),
|
|
514
|
+
inputSchema={
|
|
515
|
+
"type": "object",
|
|
516
|
+
"properties": {
|
|
517
|
+
"memory_id": {
|
|
518
|
+
"type": "integer",
|
|
519
|
+
"description": "The memory ID to trace provenance for",
|
|
520
|
+
},
|
|
521
|
+
},
|
|
522
|
+
"required": ["memory_id"],
|
|
523
|
+
},
|
|
524
|
+
),
|
|
461
525
|
]
|
|
462
526
|
return ListToolsResult(tools=tools)
|
|
463
527
|
|
|
@@ -472,7 +536,20 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
|
|
|
472
536
|
memory_type=arguments.get("type", "fact"),
|
|
473
537
|
about_entities=arguments.get("about"),
|
|
474
538
|
importance=arguments.get("importance", 1.0),
|
|
539
|
+
source=arguments.get("source"),
|
|
540
|
+
source_context=arguments.get("source_context"),
|
|
475
541
|
)
|
|
542
|
+
# Save source material to disk if provided
|
|
543
|
+
if memory_id and arguments.get("source_material"):
|
|
544
|
+
svc = get_remember_service()
|
|
545
|
+
svc.save_source_material(
|
|
546
|
+
memory_id,
|
|
547
|
+
arguments["source_material"],
|
|
548
|
+
metadata={
|
|
549
|
+
"source": arguments.get("source"),
|
|
550
|
+
"source_context": arguments.get("source_context"),
|
|
551
|
+
},
|
|
552
|
+
)
|
|
476
553
|
return CallToolResult(
|
|
477
554
|
content=[
|
|
478
555
|
TextContent(
|
|
@@ -504,6 +581,9 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
|
|
|
504
581
|
"importance": r.importance,
|
|
505
582
|
"entities": r.entities,
|
|
506
583
|
"created_at": r.created_at,
|
|
584
|
+
"source": r.source,
|
|
585
|
+
"source_id": r.source_id,
|
|
586
|
+
"source_context": r.source_context,
|
|
507
587
|
}
|
|
508
588
|
for r in results
|
|
509
589
|
]
|
|
@@ -528,6 +608,9 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
|
|
|
528
608
|
"type": m.type,
|
|
529
609
|
"importance": m.importance,
|
|
530
610
|
"created_at": m.created_at,
|
|
611
|
+
"source": m.source,
|
|
612
|
+
"source_id": m.source_id,
|
|
613
|
+
"source_context": m.source_context,
|
|
531
614
|
}
|
|
532
615
|
for m in result["memories"]
|
|
533
616
|
]
|
|
@@ -698,9 +781,22 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
|
|
|
698
781
|
memory_type=op.get("type", "fact"),
|
|
699
782
|
about_entities=op.get("about"),
|
|
700
783
|
importance=op.get("importance", 1.0),
|
|
784
|
+
source=op.get("source"),
|
|
785
|
+
source_context=op.get("source_context"),
|
|
701
786
|
)
|
|
702
787
|
op_result["success"] = True
|
|
703
788
|
op_result["memory_id"] = memory_id
|
|
789
|
+
# Save source material to disk if provided
|
|
790
|
+
if memory_id and op.get("source_material"):
|
|
791
|
+
svc = get_remember_service()
|
|
792
|
+
svc.save_source_material(
|
|
793
|
+
memory_id,
|
|
794
|
+
op["source_material"],
|
|
795
|
+
metadata={
|
|
796
|
+
"source": op.get("source"),
|
|
797
|
+
"source_context": op.get("source_context"),
|
|
798
|
+
},
|
|
799
|
+
)
|
|
704
800
|
elif op_type == "relate":
|
|
705
801
|
relationship_id = relate_entities(
|
|
706
802
|
source=op["source"],
|
|
@@ -736,6 +832,17 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
|
|
|
736
832
|
]
|
|
737
833
|
)
|
|
738
834
|
|
|
835
|
+
elif name == "memory.trace":
|
|
836
|
+
result = trace_memory(memory_id=arguments["memory_id"])
|
|
837
|
+
return CallToolResult(
|
|
838
|
+
content=[
|
|
839
|
+
TextContent(
|
|
840
|
+
type="text",
|
|
841
|
+
text=json.dumps(result),
|
|
842
|
+
)
|
|
843
|
+
]
|
|
844
|
+
)
|
|
845
|
+
|
|
739
846
|
else:
|
|
740
847
|
return CallToolResult(
|
|
741
848
|
content=[
|
|
@@ -53,6 +53,7 @@ CREATE TABLE IF NOT EXISTS memories (
|
|
|
53
53
|
confidence REAL DEFAULT 1.0, -- How sure we are about this
|
|
54
54
|
source TEXT, -- Where this came from (conversation, document, etc.)
|
|
55
55
|
source_id TEXT, -- Reference to source (episode_id, etc.)
|
|
56
|
+
source_context TEXT, -- One-line breadcrumb (e.g., "Email from Jim re: Forum V+, 2025-01-28")
|
|
56
57
|
created_at TEXT DEFAULT (datetime('now')),
|
|
57
58
|
updated_at TEXT DEFAULT (datetime('now')),
|
|
58
59
|
last_accessed_at TEXT, -- For rehearsal-based importance boost
|
|
@@ -220,6 +221,7 @@ CREATE TABLE IF NOT EXISTS turn_buffer (
|
|
|
220
221
|
turn_number INTEGER NOT NULL,
|
|
221
222
|
user_content TEXT,
|
|
222
223
|
assistant_content TEXT,
|
|
224
|
+
is_archived INTEGER DEFAULT 0,
|
|
223
225
|
created_at TEXT DEFAULT (datetime('now'))
|
|
224
226
|
);
|
|
225
227
|
|
|
@@ -247,3 +249,6 @@ VALUES (1, 'Initial schema with entities, memories, relationships, episodes, pat
|
|
|
247
249
|
|
|
248
250
|
INSERT OR IGNORE INTO schema_migrations (version, description)
|
|
249
251
|
VALUES (2, 'Add turn_buffer table, episode narrative/summary columns, episode_embeddings');
|
|
252
|
+
|
|
253
|
+
INSERT OR IGNORE INTO schema_migrations (version, description)
|
|
254
|
+
VALUES (3, 'Add source_context to memories, is_archived to turn_buffer for episodic provenance');
|
|
@@ -10,6 +10,7 @@ import logging
|
|
|
10
10
|
import math
|
|
11
11
|
from dataclasses import dataclass
|
|
12
12
|
from datetime import datetime, timedelta
|
|
13
|
+
from pathlib import Path
|
|
13
14
|
from typing import Any, Dict, List, Optional, Tuple
|
|
14
15
|
|
|
15
16
|
from ..config import get_config
|
|
@@ -32,6 +33,9 @@ class RecallResult:
|
|
|
32
33
|
created_at: str
|
|
33
34
|
entities: List[str] # Related entity names
|
|
34
35
|
metadata: Optional[Dict] = None
|
|
36
|
+
source: Optional[str] = None
|
|
37
|
+
source_id: Optional[str] = None
|
|
38
|
+
source_context: Optional[str] = None
|
|
35
39
|
|
|
36
40
|
|
|
37
41
|
@dataclass
|
|
@@ -196,6 +200,12 @@ class RecallService:
|
|
|
196
200
|
# Parse metadata
|
|
197
201
|
metadata_val = row["metadata"] if "metadata" in row.keys() else None
|
|
198
202
|
|
|
203
|
+
# Extract source fields (may not exist in older DBs)
|
|
204
|
+
row_keys = row.keys()
|
|
205
|
+
source_val = row["source"] if "source" in row_keys else None
|
|
206
|
+
source_id_val = row["source_id"] if "source_id" in row_keys else None
|
|
207
|
+
source_context_val = row["source_context"] if "source_context" in row_keys else None
|
|
208
|
+
|
|
199
209
|
results.append(
|
|
200
210
|
RecallResult(
|
|
201
211
|
id=row["id"],
|
|
@@ -206,6 +216,9 @@ class RecallService:
|
|
|
206
216
|
created_at=row["created_at"],
|
|
207
217
|
entities=entity_names,
|
|
208
218
|
metadata=json.loads(metadata_val) if metadata_val else None,
|
|
219
|
+
source=source_val,
|
|
220
|
+
source_id=source_id_val,
|
|
221
|
+
source_context=source_context_val,
|
|
209
222
|
)
|
|
210
223
|
)
|
|
211
224
|
|
|
@@ -288,19 +301,24 @@ class RecallService:
|
|
|
288
301
|
|
|
289
302
|
memory_rows = self.db.execute(sql, tuple(params), fetch=True) or []
|
|
290
303
|
|
|
291
|
-
memories = [
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
304
|
+
memories = []
|
|
305
|
+
for row in memory_rows:
|
|
306
|
+
row_keys = row.keys()
|
|
307
|
+
memories.append(
|
|
308
|
+
RecallResult(
|
|
309
|
+
id=row["id"],
|
|
310
|
+
content=row["content"],
|
|
311
|
+
type=row["type"],
|
|
312
|
+
score=row["importance"],
|
|
313
|
+
importance=row["importance"],
|
|
314
|
+
created_at=row["created_at"],
|
|
315
|
+
entities=[entity["name"]],
|
|
316
|
+
metadata=json.loads(row["metadata"]) if row["metadata"] else None,
|
|
317
|
+
source=row["source"] if "source" in row_keys else None,
|
|
318
|
+
source_id=row["source_id"] if "source_id" in row_keys else None,
|
|
319
|
+
source_context=row["source_context"] if "source_context" in row_keys else None,
|
|
320
|
+
)
|
|
301
321
|
)
|
|
302
|
-
for row in memory_rows
|
|
303
|
-
]
|
|
304
322
|
|
|
305
323
|
# Get relationships
|
|
306
324
|
rel_sql = """
|
|
@@ -456,19 +474,25 @@ class RecallService:
|
|
|
456
474
|
|
|
457
475
|
rows = self.db.execute(sql, tuple(params), fetch=True) or []
|
|
458
476
|
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
477
|
+
results = []
|
|
478
|
+
for row in rows:
|
|
479
|
+
row_keys = row.keys()
|
|
480
|
+
results.append(
|
|
481
|
+
RecallResult(
|
|
482
|
+
id=row["id"],
|
|
483
|
+
content=row["content"],
|
|
484
|
+
type=row["type"],
|
|
485
|
+
score=row["importance"],
|
|
486
|
+
importance=row["importance"],
|
|
487
|
+
created_at=row["created_at"],
|
|
488
|
+
entities=row["entity_names"].split(",") if row["entity_names"] else [],
|
|
489
|
+
metadata=json.loads(row["metadata"]) if row["metadata"] else None,
|
|
490
|
+
source=row["source"] if "source" in row_keys else None,
|
|
491
|
+
source_id=row["source_id"] if "source_id" in row_keys else None,
|
|
492
|
+
source_context=row["source_context"] if "source_context" in row_keys else None,
|
|
493
|
+
)
|
|
469
494
|
)
|
|
470
|
-
|
|
471
|
-
]
|
|
495
|
+
return results
|
|
472
496
|
|
|
473
497
|
def recall_episodes(
|
|
474
498
|
self,
|
|
@@ -547,6 +571,129 @@ class RecallService:
|
|
|
547
571
|
for row in rows
|
|
548
572
|
]
|
|
549
573
|
|
|
574
|
+
def trace_memory(self, memory_id: int) -> Dict[str, Any]:
|
|
575
|
+
"""
|
|
576
|
+
Reconstruct full provenance for a memory.
|
|
577
|
+
|
|
578
|
+
Returns the memory with all fields, the source episode and its
|
|
579
|
+
archived turns (if the memory came from a session), and a preview
|
|
580
|
+
of any source material file saved to disk.
|
|
581
|
+
|
|
582
|
+
Args:
|
|
583
|
+
memory_id: The memory ID to trace
|
|
584
|
+
|
|
585
|
+
Returns:
|
|
586
|
+
Dict with memory, episode, archived_turns, source_file info
|
|
587
|
+
"""
|
|
588
|
+
result: Dict[str, Any] = {
|
|
589
|
+
"memory": None,
|
|
590
|
+
"episode": None,
|
|
591
|
+
"archived_turns": None,
|
|
592
|
+
"source_file": None,
|
|
593
|
+
"source_file_preview": None,
|
|
594
|
+
"entities": [],
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
# 1. Fetch the memory row
|
|
598
|
+
memory_row = self.db.get_one(
|
|
599
|
+
"memories", where="id = ?", where_params=(memory_id,)
|
|
600
|
+
)
|
|
601
|
+
if not memory_row:
|
|
602
|
+
return result
|
|
603
|
+
|
|
604
|
+
row_keys = memory_row.keys()
|
|
605
|
+
result["memory"] = {
|
|
606
|
+
"id": memory_row["id"],
|
|
607
|
+
"content": memory_row["content"],
|
|
608
|
+
"type": memory_row["type"],
|
|
609
|
+
"importance": memory_row["importance"],
|
|
610
|
+
"confidence": memory_row["confidence"],
|
|
611
|
+
"source": memory_row["source"] if "source" in row_keys else None,
|
|
612
|
+
"source_id": memory_row["source_id"] if "source_id" in row_keys else None,
|
|
613
|
+
"source_context": memory_row["source_context"] if "source_context" in row_keys else None,
|
|
614
|
+
"created_at": memory_row["created_at"],
|
|
615
|
+
"updated_at": memory_row["updated_at"],
|
|
616
|
+
"access_count": memory_row["access_count"],
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
# 2. Fetch related entities
|
|
620
|
+
entity_rows = self.db.execute(
|
|
621
|
+
"""
|
|
622
|
+
SELECT e.name, e.type FROM entities e
|
|
623
|
+
JOIN memory_entities me ON e.id = me.entity_id
|
|
624
|
+
WHERE me.memory_id = ?
|
|
625
|
+
""",
|
|
626
|
+
(memory_id,),
|
|
627
|
+
fetch=True,
|
|
628
|
+
) or []
|
|
629
|
+
result["entities"] = [
|
|
630
|
+
{"name": row["name"], "type": row["type"]} for row in entity_rows
|
|
631
|
+
]
|
|
632
|
+
|
|
633
|
+
# 3. If source_id points to an episode, fetch it with archived turns
|
|
634
|
+
source_id = result["memory"].get("source_id")
|
|
635
|
+
if source_id:
|
|
636
|
+
try:
|
|
637
|
+
episode_id = int(source_id)
|
|
638
|
+
episode_row = self.db.get_one(
|
|
639
|
+
"episodes", where="id = ?", where_params=(episode_id,)
|
|
640
|
+
)
|
|
641
|
+
if episode_row:
|
|
642
|
+
ep_keys = episode_row.keys()
|
|
643
|
+
result["episode"] = {
|
|
644
|
+
"id": episode_row["id"],
|
|
645
|
+
"narrative": episode_row["narrative"] if "narrative" in ep_keys else None,
|
|
646
|
+
"started_at": episode_row["started_at"],
|
|
647
|
+
"ended_at": episode_row["ended_at"] if "ended_at" in ep_keys else None,
|
|
648
|
+
"key_topics": json.loads(episode_row["key_topics"]) if episode_row.get("key_topics") else [],
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
# Fetch archived turns
|
|
652
|
+
turn_rows = self.db.execute(
|
|
653
|
+
"""
|
|
654
|
+
SELECT turn_number, user_content, assistant_content, created_at
|
|
655
|
+
FROM turn_buffer
|
|
656
|
+
WHERE episode_id = ? AND is_archived = 1
|
|
657
|
+
ORDER BY turn_number ASC
|
|
658
|
+
""",
|
|
659
|
+
(episode_id,),
|
|
660
|
+
fetch=True,
|
|
661
|
+
) or []
|
|
662
|
+
if turn_rows:
|
|
663
|
+
result["archived_turns"] = [
|
|
664
|
+
{
|
|
665
|
+
"turn": row["turn_number"],
|
|
666
|
+
"user": row["user_content"],
|
|
667
|
+
"assistant": row["assistant_content"],
|
|
668
|
+
"timestamp": row["created_at"],
|
|
669
|
+
}
|
|
670
|
+
for row in turn_rows
|
|
671
|
+
]
|
|
672
|
+
except (ValueError, TypeError):
|
|
673
|
+
pass # source_id wasn't a numeric episode ID
|
|
674
|
+
|
|
675
|
+
# 4. Check for source material file on disk
|
|
676
|
+
sources_dir = self.db.db_path.parent / "sources"
|
|
677
|
+
source_file = sources_dir / f"{memory_id}.md"
|
|
678
|
+
if source_file.exists():
|
|
679
|
+
result["source_file"] = str(source_file)
|
|
680
|
+
try:
|
|
681
|
+
file_text = source_file.read_text(encoding="utf-8")
|
|
682
|
+
# Skip frontmatter for preview
|
|
683
|
+
if file_text.startswith("---"):
|
|
684
|
+
end_idx = file_text.find("---", 3)
|
|
685
|
+
if end_idx != -1:
|
|
686
|
+
body = file_text[end_idx + 3:].strip()
|
|
687
|
+
else:
|
|
688
|
+
body = file_text
|
|
689
|
+
else:
|
|
690
|
+
body = file_text
|
|
691
|
+
result["source_file_preview"] = body[:200]
|
|
692
|
+
except Exception:
|
|
693
|
+
result["source_file_preview"] = "(could not read file)"
|
|
694
|
+
|
|
695
|
+
return result
|
|
696
|
+
|
|
550
697
|
def _keyword_search(
|
|
551
698
|
self,
|
|
552
699
|
query: str,
|
|
@@ -610,3 +757,8 @@ def search_entities(query: str, **kwargs) -> List[EntityResult]:
|
|
|
610
757
|
def recall_episodes(query: str, **kwargs) -> List[Dict[str, Any]]:
|
|
611
758
|
"""Search episode narratives"""
|
|
612
759
|
return get_recall_service().recall_episodes(query, **kwargs)
|
|
760
|
+
|
|
761
|
+
|
|
762
|
+
def trace_memory(memory_id: int) -> Dict[str, Any]:
|
|
763
|
+
"""Reconstruct full provenance for a memory"""
|
|
764
|
+
return get_recall_service().trace_memory(memory_id)
|
|
@@ -9,6 +9,7 @@ import json
|
|
|
9
9
|
import logging
|
|
10
10
|
import uuid
|
|
11
11
|
from datetime import datetime
|
|
12
|
+
from pathlib import Path
|
|
12
13
|
from typing import Any, Dict, List, Optional
|
|
13
14
|
|
|
14
15
|
from ..database import content_hash, get_db
|
|
@@ -127,6 +128,7 @@ class RememberService:
|
|
|
127
128
|
confidence: float = 1.0,
|
|
128
129
|
source: Optional[str] = None,
|
|
129
130
|
source_id: Optional[str] = None,
|
|
131
|
+
source_context: Optional[str] = None,
|
|
130
132
|
metadata: Optional[Dict] = None,
|
|
131
133
|
) -> Optional[int]:
|
|
132
134
|
"""
|
|
@@ -140,6 +142,7 @@ class RememberService:
|
|
|
140
142
|
confidence: How confident we are (0.0-1.0)
|
|
141
143
|
source: Where this came from
|
|
142
144
|
source_id: Reference to source
|
|
145
|
+
source_context: One-line breadcrumb describing the source material
|
|
143
146
|
metadata: Additional metadata
|
|
144
147
|
|
|
145
148
|
Returns:
|
|
@@ -164,21 +167,22 @@ class RememberService:
|
|
|
164
167
|
return existing["id"]
|
|
165
168
|
|
|
166
169
|
# Insert new memory
|
|
167
|
-
|
|
168
|
-
"
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
170
|
+
insert_data = {
|
|
171
|
+
"content": content,
|
|
172
|
+
"content_hash": mem_hash,
|
|
173
|
+
"type": memory_type,
|
|
174
|
+
"importance": importance,
|
|
175
|
+
"confidence": confidence,
|
|
176
|
+
"source": source,
|
|
177
|
+
"source_id": source_id,
|
|
178
|
+
"created_at": datetime.utcnow().isoformat(),
|
|
179
|
+
"updated_at": datetime.utcnow().isoformat(),
|
|
180
|
+
"metadata": json.dumps(metadata) if metadata else None,
|
|
181
|
+
}
|
|
182
|
+
if source_context:
|
|
183
|
+
insert_data["source_context"] = source_context
|
|
184
|
+
|
|
185
|
+
memory_id = self.db.insert("memories", insert_data)
|
|
182
186
|
|
|
183
187
|
# Generate and store embedding
|
|
184
188
|
embedding = embed_sync(content)
|
|
@@ -485,11 +489,22 @@ class RememberService:
|
|
|
485
489
|
memory_type=fact.get("type", "fact"),
|
|
486
490
|
about_entities=fact.get("about"),
|
|
487
491
|
importance=fact.get("importance", 1.0),
|
|
488
|
-
source="session_summary",
|
|
492
|
+
source=fact.get("source", "session_summary"),
|
|
489
493
|
source_id=str(episode_id),
|
|
494
|
+
source_context=fact.get("source_context"),
|
|
490
495
|
)
|
|
491
496
|
if memory_id:
|
|
492
497
|
result["facts_stored"] += 1
|
|
498
|
+
# Save source material to disk if provided
|
|
499
|
+
if fact.get("source_material"):
|
|
500
|
+
self.save_source_material(
|
|
501
|
+
memory_id,
|
|
502
|
+
fact["source_material"],
|
|
503
|
+
metadata={
|
|
504
|
+
"source": fact.get("source", "session_summary"),
|
|
505
|
+
"source_context": fact.get("source_context"),
|
|
506
|
+
},
|
|
507
|
+
)
|
|
493
508
|
|
|
494
509
|
# 4. Store commitments
|
|
495
510
|
if commitments:
|
|
@@ -499,11 +514,21 @@ class RememberService:
|
|
|
499
514
|
memory_type="commitment",
|
|
500
515
|
about_entities=commitment.get("about"),
|
|
501
516
|
importance=commitment.get("importance", 1.0),
|
|
502
|
-
source="session_summary",
|
|
517
|
+
source=commitment.get("source", "session_summary"),
|
|
503
518
|
source_id=str(episode_id),
|
|
519
|
+
source_context=commitment.get("source_context"),
|
|
504
520
|
)
|
|
505
521
|
if memory_id:
|
|
506
522
|
result["commitments_stored"] += 1
|
|
523
|
+
if commitment.get("source_material"):
|
|
524
|
+
self.save_source_material(
|
|
525
|
+
memory_id,
|
|
526
|
+
commitment["source_material"],
|
|
527
|
+
metadata={
|
|
528
|
+
"source": commitment.get("source", "session_summary"),
|
|
529
|
+
"source_context": commitment.get("source_context"),
|
|
530
|
+
},
|
|
531
|
+
)
|
|
507
532
|
|
|
508
533
|
# 5. Store entities
|
|
509
534
|
if entities:
|
|
@@ -529,8 +554,11 @@ class RememberService:
|
|
|
529
554
|
if rel_id:
|
|
530
555
|
result["relationships_stored"] += 1
|
|
531
556
|
|
|
532
|
-
# 7.
|
|
533
|
-
self.db.
|
|
557
|
+
# 7. Archive turn buffer for this episode (preserve for provenance tracing)
|
|
558
|
+
self.db.execute(
|
|
559
|
+
"UPDATE turn_buffer SET is_archived = 1 WHERE episode_id = ?",
|
|
560
|
+
(episode_id,),
|
|
561
|
+
)
|
|
534
562
|
|
|
535
563
|
logger.info(
|
|
536
564
|
f"Session {episode_id} summarized: {result['facts_stored']} facts, "
|
|
@@ -568,7 +596,7 @@ class RememberService:
|
|
|
568
596
|
"""
|
|
569
597
|
SELECT turn_number, user_content, assistant_content, created_at
|
|
570
598
|
FROM turn_buffer
|
|
571
|
-
WHERE episode_id = ?
|
|
599
|
+
WHERE episode_id = ? AND (is_archived = 0 OR is_archived IS NULL)
|
|
572
600
|
ORDER BY turn_number ASC
|
|
573
601
|
""",
|
|
574
602
|
(ep["id"],),
|
|
@@ -594,6 +622,55 @@ class RememberService:
|
|
|
594
622
|
|
|
595
623
|
return results
|
|
596
624
|
|
|
625
|
+
def save_source_material(
|
|
626
|
+
self,
|
|
627
|
+
memory_id: int,
|
|
628
|
+
content: str,
|
|
629
|
+
metadata: Optional[Dict] = None,
|
|
630
|
+
) -> Optional[Path]:
|
|
631
|
+
"""
|
|
632
|
+
Save raw source material (email, transcript, document) to disk.
|
|
633
|
+
|
|
634
|
+
Files are plain markdown with a YAML frontmatter header, stored at
|
|
635
|
+
~/.claudia/memory/sources/{memory_id}.md. The directory is created
|
|
636
|
+
lazily on first write.
|
|
637
|
+
|
|
638
|
+
Args:
|
|
639
|
+
memory_id: The memory this source material belongs to
|
|
640
|
+
content: Full raw text of the source material
|
|
641
|
+
metadata: Optional dict with source, source_context, etc.
|
|
642
|
+
|
|
643
|
+
Returns:
|
|
644
|
+
Path to the saved file, or None on failure
|
|
645
|
+
"""
|
|
646
|
+
try:
|
|
647
|
+
sources_dir = self.db.db_path.parent / "sources"
|
|
648
|
+
sources_dir.mkdir(parents=True, exist_ok=True)
|
|
649
|
+
|
|
650
|
+
file_path = sources_dir / f"{memory_id}.md"
|
|
651
|
+
|
|
652
|
+
# Build frontmatter
|
|
653
|
+
header_lines = ["---"]
|
|
654
|
+
header_lines.append(f"memory_id: {memory_id}")
|
|
655
|
+
if metadata:
|
|
656
|
+
for key, value in metadata.items():
|
|
657
|
+
if value is not None:
|
|
658
|
+
# Quote strings that might contain YAML-special chars
|
|
659
|
+
header_lines.append(f'{key}: "{value}"')
|
|
660
|
+
header_lines.append(f"saved_at: {datetime.utcnow().isoformat()}")
|
|
661
|
+
header_lines.append("---")
|
|
662
|
+
header_lines.append("")
|
|
663
|
+
|
|
664
|
+
file_content = "\n".join(header_lines) + content
|
|
665
|
+
|
|
666
|
+
file_path.write_text(file_content, encoding="utf-8")
|
|
667
|
+
logger.debug(f"Saved source material for memory {memory_id} to {file_path}")
|
|
668
|
+
return file_path
|
|
669
|
+
|
|
670
|
+
except Exception as e:
|
|
671
|
+
logger.warning(f"Could not save source material for memory {memory_id}: {e}")
|
|
672
|
+
return None
|
|
673
|
+
|
|
597
674
|
def _ensure_entity(self, extracted: ExtractedEntity) -> Optional[int]:
|
|
598
675
|
"""Ensure an extracted entity exists in the database"""
|
|
599
676
|
existing = self.db.get_one(
|