get-claudia 1.28.2 → 1.28.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/memory-daemon/claudia_memory/mcp/server.py +99 -25
- package/memory-daemon/claudia_memory/services/remember.py +8 -6
- package/memory-daemon/tests/test_batch_parallel.py +348 -0
- package/memory-daemon/tests/test_llm_coercion.py +258 -0
- package/package.json +1 -1
- package/template-v2/.claude/agents/document-processor.md +67 -0
- package/template-v2/.claude/skills/capture-meeting/SKILL.md +27 -1
- package/template-v2/.claude/skills/memory-manager.md +28 -2
|
@@ -66,9 +66,33 @@ from ..services.remember import (
|
|
|
66
66
|
remember_fact,
|
|
67
67
|
remember_message,
|
|
68
68
|
)
|
|
69
|
+
from ..embeddings import get_embedding_service
|
|
69
70
|
|
|
70
71
|
logger = logging.getLogger(__name__)
|
|
71
72
|
|
|
73
|
+
|
|
74
|
+
def _coerce_arg(arguments: Dict[str, Any], key: str, expected_type: type = list) -> None:
|
|
75
|
+
"""Coerce a tool argument from JSON string to expected type in-place.
|
|
76
|
+
|
|
77
|
+
LLMs sometimes serialize array parameters as JSON strings instead of
|
|
78
|
+
native arrays. This transparently parses them back so handler code
|
|
79
|
+
can assume native types.
|
|
80
|
+
"""
|
|
81
|
+
value = arguments.get(key)
|
|
82
|
+
if isinstance(value, str):
|
|
83
|
+
try:
|
|
84
|
+
parsed = json.loads(value)
|
|
85
|
+
if isinstance(parsed, expected_type):
|
|
86
|
+
arguments[key] = parsed
|
|
87
|
+
else:
|
|
88
|
+
logger.warning(
|
|
89
|
+
f"Coercion: '{key}' parsed to {type(parsed).__name__}, "
|
|
90
|
+
f"expected {expected_type.__name__}"
|
|
91
|
+
)
|
|
92
|
+
except (json.JSONDecodeError, TypeError):
|
|
93
|
+
logger.warning(f"Could not parse '{key}' as JSON: {value[:100]}")
|
|
94
|
+
|
|
95
|
+
|
|
72
96
|
# Initialize the MCP server
|
|
73
97
|
server = Server("claudia-memory")
|
|
74
98
|
|
|
@@ -94,7 +118,7 @@ async def list_tools() -> ListToolsResult:
|
|
|
94
118
|
"default": "fact",
|
|
95
119
|
},
|
|
96
120
|
"about": {
|
|
97
|
-
"type": "array",
|
|
121
|
+
"type": ["array", "string"],
|
|
98
122
|
"items": {"type": "string"},
|
|
99
123
|
"description": "Entity names this memory relates to (people, projects, etc.)",
|
|
100
124
|
},
|
|
@@ -139,7 +163,7 @@ async def list_tools() -> ListToolsResult:
|
|
|
139
163
|
"default": 10,
|
|
140
164
|
},
|
|
141
165
|
"types": {
|
|
142
|
-
"type": "array",
|
|
166
|
+
"type": ["array", "string"],
|
|
143
167
|
"items": {"type": "string"},
|
|
144
168
|
"description": "Filter by memory types (fact, preference, observation, learning, commitment)",
|
|
145
169
|
},
|
|
@@ -153,7 +177,7 @@ async def list_tools() -> ListToolsResult:
|
|
|
153
177
|
"default": False,
|
|
154
178
|
},
|
|
155
179
|
"ids": {
|
|
156
|
-
"type": "array",
|
|
180
|
+
"type": ["array", "string"],
|
|
157
181
|
"items": {"type": "integer"},
|
|
158
182
|
"description": "Fetch specific memories by ID (skips search). Use after a compact search to get full content.",
|
|
159
183
|
},
|
|
@@ -232,7 +256,7 @@ async def list_tools() -> ListToolsResult:
|
|
|
232
256
|
"default": 5,
|
|
233
257
|
},
|
|
234
258
|
"types": {
|
|
235
|
-
"type": "array",
|
|
259
|
+
"type": ["array", "string"],
|
|
236
260
|
"items": {"type": "string"},
|
|
237
261
|
"description": "Filter by type (reminder, suggestion, warning, insight)",
|
|
238
262
|
},
|
|
@@ -268,7 +292,7 @@ async def list_tools() -> ListToolsResult:
|
|
|
268
292
|
"description": "Description of the entity",
|
|
269
293
|
},
|
|
270
294
|
"aliases": {
|
|
271
|
-
"type": "array",
|
|
295
|
+
"type": ["array", "string"],
|
|
272
296
|
"items": {"type": "string"},
|
|
273
297
|
"description": "Alternative names or spellings",
|
|
274
298
|
},
|
|
@@ -287,7 +311,7 @@ async def list_tools() -> ListToolsResult:
|
|
|
287
311
|
"description": "Search query",
|
|
288
312
|
},
|
|
289
313
|
"types": {
|
|
290
|
-
"type": "array",
|
|
314
|
+
"type": ["array", "string"],
|
|
291
315
|
"items": {"type": "string"},
|
|
292
316
|
"description": "Filter by entity types",
|
|
293
317
|
},
|
|
@@ -354,7 +378,7 @@ async def list_tools() -> ListToolsResult:
|
|
|
354
378
|
),
|
|
355
379
|
},
|
|
356
380
|
"facts": {
|
|
357
|
-
"type": "array",
|
|
381
|
+
"type": ["array", "string"],
|
|
358
382
|
"items": {
|
|
359
383
|
"type": "object",
|
|
360
384
|
"properties": {
|
|
@@ -388,7 +412,7 @@ async def list_tools() -> ListToolsResult:
|
|
|
388
412
|
"description": "Structured facts, preferences, observations, learnings extracted from the session",
|
|
389
413
|
},
|
|
390
414
|
"commitments": {
|
|
391
|
-
"type": "array",
|
|
415
|
+
"type": ["array", "string"],
|
|
392
416
|
"items": {
|
|
393
417
|
"type": "object",
|
|
394
418
|
"properties": {
|
|
@@ -416,7 +440,7 @@ async def list_tools() -> ListToolsResult:
|
|
|
416
440
|
"description": "Commitments or promises made during the session",
|
|
417
441
|
},
|
|
418
442
|
"entities": {
|
|
419
|
-
"type": "array",
|
|
443
|
+
"type": ["array", "string"],
|
|
420
444
|
"items": {
|
|
421
445
|
"type": "object",
|
|
422
446
|
"properties": {
|
|
@@ -437,7 +461,7 @@ async def list_tools() -> ListToolsResult:
|
|
|
437
461
|
"description": "New or updated entities mentioned during the session",
|
|
438
462
|
},
|
|
439
463
|
"relationships": {
|
|
440
|
-
"type": "array",
|
|
464
|
+
"type": ["array", "string"],
|
|
441
465
|
"items": {
|
|
442
466
|
"type": "object",
|
|
443
467
|
"properties": {
|
|
@@ -451,12 +475,12 @@ async def list_tools() -> ListToolsResult:
|
|
|
451
475
|
"description": "Relationships between entities observed during the session",
|
|
452
476
|
},
|
|
453
477
|
"key_topics": {
|
|
454
|
-
"type": "array",
|
|
478
|
+
"type": ["array", "string"],
|
|
455
479
|
"items": {"type": "string"},
|
|
456
480
|
"description": "Main topics discussed in the session",
|
|
457
481
|
},
|
|
458
482
|
"reflections": {
|
|
459
|
-
"type": "array",
|
|
483
|
+
"type": ["array", "string"],
|
|
460
484
|
"items": {
|
|
461
485
|
"type": "object",
|
|
462
486
|
"properties": {
|
|
@@ -492,7 +516,7 @@ async def list_tools() -> ListToolsResult:
|
|
|
492
516
|
),
|
|
493
517
|
},
|
|
494
518
|
},
|
|
495
|
-
"required": ["
|
|
519
|
+
"required": ["narrative"],
|
|
496
520
|
},
|
|
497
521
|
),
|
|
498
522
|
Tool(
|
|
@@ -524,7 +548,7 @@ async def list_tools() -> ListToolsResult:
|
|
|
524
548
|
"description": "Semantic search query (optional). If omitted, returns recent high-importance reflections.",
|
|
525
549
|
},
|
|
526
550
|
"types": {
|
|
527
|
-
"type": "array",
|
|
551
|
+
"type": ["array", "string"],
|
|
528
552
|
"items": {
|
|
529
553
|
"type": "string",
|
|
530
554
|
"enum": ["observation", "pattern", "learning", "question"],
|
|
@@ -570,7 +594,7 @@ async def list_tools() -> ListToolsResult:
|
|
|
570
594
|
"type": "object",
|
|
571
595
|
"properties": {
|
|
572
596
|
"operations": {
|
|
573
|
-
"type": "array",
|
|
597
|
+
"type": ["array", "string"],
|
|
574
598
|
"description": "Array of operations to execute in order",
|
|
575
599
|
"items": {
|
|
576
600
|
"type": "object",
|
|
@@ -775,12 +799,12 @@ async def list_tools() -> ListToolsResult:
|
|
|
775
799
|
"description": "Brief summary of the document",
|
|
776
800
|
},
|
|
777
801
|
"about": {
|
|
778
|
-
"type": "array",
|
|
802
|
+
"type": ["array", "string"],
|
|
779
803
|
"items": {"type": "string"},
|
|
780
804
|
"description": "Entity names this document relates to",
|
|
781
805
|
},
|
|
782
806
|
"memory_ids": {
|
|
783
|
-
"type": "array",
|
|
807
|
+
"type": ["array", "string"],
|
|
784
808
|
"items": {"type": "integer"},
|
|
785
809
|
"description": "Memory IDs to link as sourced from this document",
|
|
786
810
|
},
|
|
@@ -1158,6 +1182,7 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
|
|
|
1158
1182
|
"""Handle tool calls"""
|
|
1159
1183
|
try:
|
|
1160
1184
|
if name == "memory.remember":
|
|
1185
|
+
_coerce_arg(arguments, "about")
|
|
1161
1186
|
memory_id = remember_fact(
|
|
1162
1187
|
content=arguments["content"],
|
|
1163
1188
|
memory_type=arguments.get("type", "fact"),
|
|
@@ -1187,6 +1212,8 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
|
|
|
1187
1212
|
)
|
|
1188
1213
|
|
|
1189
1214
|
elif name == "memory.recall":
|
|
1215
|
+
_coerce_arg(arguments, "types")
|
|
1216
|
+
_coerce_arg(arguments, "ids")
|
|
1190
1217
|
# Direct fetch by IDs (skip search)
|
|
1191
1218
|
if "ids" in arguments and arguments["ids"]:
|
|
1192
1219
|
results = fetch_by_ids(arguments["ids"])
|
|
@@ -1339,6 +1366,7 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
|
|
|
1339
1366
|
)
|
|
1340
1367
|
|
|
1341
1368
|
elif name == "memory.predictions":
|
|
1369
|
+
_coerce_arg(arguments, "types")
|
|
1342
1370
|
predictions = get_predictions(
|
|
1343
1371
|
limit=arguments.get("limit", 5),
|
|
1344
1372
|
prediction_types=arguments.get("types"),
|
|
@@ -1364,6 +1392,7 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
|
|
|
1364
1392
|
)
|
|
1365
1393
|
|
|
1366
1394
|
elif name == "memory.entity":
|
|
1395
|
+
_coerce_arg(arguments, "aliases")
|
|
1367
1396
|
entity_id = remember_entity(
|
|
1368
1397
|
name=arguments["name"],
|
|
1369
1398
|
entity_type=arguments.get("type", "person"),
|
|
@@ -1380,6 +1409,7 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
|
|
|
1380
1409
|
)
|
|
1381
1410
|
|
|
1382
1411
|
elif name == "memory.search_entities":
|
|
1412
|
+
_coerce_arg(arguments, "types")
|
|
1383
1413
|
results = search_entities(
|
|
1384
1414
|
query=arguments["query"],
|
|
1385
1415
|
entity_types=arguments.get("types"),
|
|
@@ -1426,19 +1456,30 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
|
|
|
1426
1456
|
)
|
|
1427
1457
|
|
|
1428
1458
|
elif name == "memory.end_session":
|
|
1429
|
-
|
|
1459
|
+
# Coerce all array fields (LLMs may send JSON strings)
|
|
1460
|
+
for field in ("facts", "commitments", "entities", "relationships", "key_topics", "reflections"):
|
|
1461
|
+
_coerce_arg(arguments, field)
|
|
1430
1462
|
|
|
1431
|
-
#
|
|
1463
|
+
# Handle missing or invalid episode_id: auto-create
|
|
1464
|
+
episode_id = arguments.get("episode_id")
|
|
1432
1465
|
svc = get_remember_service()
|
|
1433
|
-
|
|
1434
|
-
if not episode:
|
|
1466
|
+
if episode_id is None:
|
|
1435
1467
|
from datetime import datetime
|
|
1436
|
-
|
|
1468
|
+
episode_id = svc.db.insert("episodes", {
|
|
1437
1469
|
"started_at": datetime.utcnow().isoformat(),
|
|
1438
|
-
"source":
|
|
1470
|
+
"source": "claude_code",
|
|
1439
1471
|
})
|
|
1440
|
-
logger.info(f"Auto-created episode {
|
|
1441
|
-
|
|
1472
|
+
logger.info(f"Auto-created episode {episode_id} (no episode_id provided)")
|
|
1473
|
+
else:
|
|
1474
|
+
episode = svc.db.get_one("episodes", where="id = ?", where_params=(episode_id,))
|
|
1475
|
+
if not episode:
|
|
1476
|
+
from datetime import datetime
|
|
1477
|
+
new_id = svc.db.insert("episodes", {
|
|
1478
|
+
"started_at": datetime.utcnow().isoformat(),
|
|
1479
|
+
"source": arguments.get("source", "claude_code"),
|
|
1480
|
+
})
|
|
1481
|
+
logger.info(f"Auto-created episode {new_id} (requested {episode_id} did not exist)")
|
|
1482
|
+
episode_id = new_id
|
|
1442
1483
|
|
|
1443
1484
|
result = end_session(
|
|
1444
1485
|
episode_id=episode_id,
|
|
@@ -1489,6 +1530,7 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
|
|
|
1489
1530
|
)
|
|
1490
1531
|
|
|
1491
1532
|
elif name == "memory.reflections":
|
|
1533
|
+
_coerce_arg(arguments, "types")
|
|
1492
1534
|
action = arguments.get("action", "get")
|
|
1493
1535
|
limit = arguments.get("limit", 10)
|
|
1494
1536
|
types = arguments.get("types")
|
|
@@ -1578,7 +1620,35 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
|
|
|
1578
1620
|
)
|
|
1579
1621
|
|
|
1580
1622
|
elif name == "memory.batch":
|
|
1623
|
+
_coerce_arg(arguments, "operations")
|
|
1581
1624
|
operations = arguments.get("operations", [])
|
|
1625
|
+
|
|
1626
|
+
# --- Pass 1: Collect all texts that need embeddings ---
|
|
1627
|
+
embed_tasks = [] # list of (index, text) for parallel embedding
|
|
1628
|
+
for i, op in enumerate(operations):
|
|
1629
|
+
op_type = op.get("op")
|
|
1630
|
+
if op_type == "remember":
|
|
1631
|
+
embed_tasks.append((i, op["content"]))
|
|
1632
|
+
elif op_type == "entity":
|
|
1633
|
+
# Only new entities need embeddings; collect optimistically
|
|
1634
|
+
embed_text = f"{op['name']}. {op.get('description') or ''}"
|
|
1635
|
+
embed_tasks.append((i, embed_text))
|
|
1636
|
+
|
|
1637
|
+
# --- Parallel embedding pass ---
|
|
1638
|
+
embeddings_map = {} # index -> embedding
|
|
1639
|
+
if embed_tasks:
|
|
1640
|
+
try:
|
|
1641
|
+
emb_svc = get_embedding_service()
|
|
1642
|
+
texts = [text for _, text in embed_tasks]
|
|
1643
|
+
all_embeddings = await emb_svc.embed_batch(texts)
|
|
1644
|
+
for (idx, _), emb in zip(embed_tasks, all_embeddings):
|
|
1645
|
+
if emb is not None:
|
|
1646
|
+
embeddings_map[idx] = emb
|
|
1647
|
+
except Exception as e:
|
|
1648
|
+
logger.warning(f"Batch parallel embedding failed, falling back to per-op: {e}")
|
|
1649
|
+
# embeddings_map stays empty; remember_fact/entity will embed individually
|
|
1650
|
+
|
|
1651
|
+
# --- Pass 2: Execute operations with pre-computed embeddings ---
|
|
1582
1652
|
results = []
|
|
1583
1653
|
for i, op in enumerate(operations):
|
|
1584
1654
|
op_type = op.get("op")
|
|
@@ -1590,6 +1660,7 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
|
|
|
1590
1660
|
entity_type=op.get("type", "person"),
|
|
1591
1661
|
description=op.get("description"),
|
|
1592
1662
|
aliases=op.get("aliases"),
|
|
1663
|
+
_precomputed_embedding=embeddings_map.get(i),
|
|
1593
1664
|
)
|
|
1594
1665
|
op_result["success"] = True
|
|
1595
1666
|
op_result["entity_id"] = entity_id
|
|
@@ -1601,6 +1672,7 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
|
|
|
1601
1672
|
importance=op.get("importance", 1.0),
|
|
1602
1673
|
source=op.get("source"),
|
|
1603
1674
|
source_context=op.get("source_context"),
|
|
1675
|
+
_precomputed_embedding=embeddings_map.get(i),
|
|
1604
1676
|
)
|
|
1605
1677
|
op_result["success"] = True
|
|
1606
1678
|
op_result["memory_id"] = memory_id
|
|
@@ -1716,6 +1788,8 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
|
|
|
1716
1788
|
)
|
|
1717
1789
|
|
|
1718
1790
|
elif name == "memory.file":
|
|
1791
|
+
_coerce_arg(arguments, "about")
|
|
1792
|
+
_coerce_arg(arguments, "memory_ids")
|
|
1719
1793
|
doc_svc = get_document_service()
|
|
1720
1794
|
result = doc_svc.file_document_from_text(
|
|
1721
1795
|
content=arguments["content"],
|
|
@@ -141,6 +141,7 @@ class RememberService:
|
|
|
141
141
|
source_context: Optional[str] = None,
|
|
142
142
|
metadata: Optional[Dict] = None,
|
|
143
143
|
origin_type: Optional[str] = None,
|
|
144
|
+
_precomputed_embedding: Optional[List[float]] = None,
|
|
144
145
|
) -> Optional[int]:
|
|
145
146
|
"""
|
|
146
147
|
Store a discrete fact/memory.
|
|
@@ -217,8 +218,8 @@ class RememberService:
|
|
|
217
218
|
|
|
218
219
|
memory_id = self.db.insert("memories", insert_data)
|
|
219
220
|
|
|
220
|
-
#
|
|
221
|
-
embedding = embed_sync(content)
|
|
221
|
+
# Store embedding (use precomputed if available, otherwise generate)
|
|
222
|
+
embedding = _precomputed_embedding or embed_sync(content)
|
|
222
223
|
if embedding:
|
|
223
224
|
try:
|
|
224
225
|
self.db.execute(
|
|
@@ -263,6 +264,7 @@ class RememberService:
|
|
|
263
264
|
description: Optional[str] = None,
|
|
264
265
|
aliases: Optional[List[str]] = None,
|
|
265
266
|
metadata: Optional[Dict] = None,
|
|
267
|
+
_precomputed_embedding: Optional[List[float]] = None,
|
|
266
268
|
) -> int:
|
|
267
269
|
"""
|
|
268
270
|
Create or update an entity.
|
|
@@ -326,9 +328,9 @@ class RememberService:
|
|
|
326
328
|
},
|
|
327
329
|
)
|
|
328
330
|
|
|
329
|
-
#
|
|
331
|
+
# Store embedding (use precomputed if available, otherwise generate)
|
|
330
332
|
embed_text = f"{name}. {description or ''}"
|
|
331
|
-
embedding = embed_sync(embed_text)
|
|
333
|
+
embedding = _precomputed_embedding or embed_sync(embed_text)
|
|
332
334
|
if embedding:
|
|
333
335
|
try:
|
|
334
336
|
self.db.execute(
|
|
@@ -1512,12 +1514,12 @@ def remember_message(content: str, role: str = "user", **kwargs) -> Dict[str, An
|
|
|
1512
1514
|
|
|
1513
1515
|
|
|
1514
1516
|
def remember_fact(content: str, **kwargs) -> Optional[int]:
|
|
1515
|
-
"""Store a discrete fact"""
|
|
1517
|
+
"""Store a discrete fact. Pass _precomputed_embedding to skip Ollama call."""
|
|
1516
1518
|
return get_remember_service().remember_fact(content, **kwargs)
|
|
1517
1519
|
|
|
1518
1520
|
|
|
1519
1521
|
def remember_entity(name: str, **kwargs) -> int:
|
|
1520
|
-
"""Create or update an entity"""
|
|
1522
|
+
"""Create or update an entity. Pass _precomputed_embedding to skip Ollama call."""
|
|
1521
1523
|
return get_remember_service().remember_entity(name, **kwargs)
|
|
1522
1524
|
|
|
1523
1525
|
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
"""Tests for parallel batch embedding optimization.
|
|
2
|
+
|
|
3
|
+
Verifies that the batch handler's parallel embedding pass correctly:
|
|
4
|
+
- Skips embed_sync when precomputed embeddings are provided
|
|
5
|
+
- Falls back to embed_sync when no precomputed embedding is given
|
|
6
|
+
- Stores memories and entities correctly in both cases
|
|
7
|
+
|
|
8
|
+
Note: Vector tables (memory_embeddings, entity_embeddings) require sqlite-vec
|
|
9
|
+
which may not be available in test environments. Tests verify behavior through
|
|
10
|
+
mock assertions rather than querying vector tables directly.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import tempfile
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from unittest.mock import MagicMock, patch
|
|
18
|
+
|
|
19
|
+
import pytest
|
|
20
|
+
|
|
21
|
+
from claudia_memory.database import Database, content_hash
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@pytest.fixture
|
|
25
|
+
def db():
|
|
26
|
+
"""Create a temporary test database"""
|
|
27
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
28
|
+
db_path = Path(tmpdir) / "test.db"
|
|
29
|
+
database = Database(db_path)
|
|
30
|
+
database.initialize()
|
|
31
|
+
yield database
|
|
32
|
+
database.close()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _get_remember_service(db):
|
|
36
|
+
"""Create a RememberService with test database and mocked embeddings"""
|
|
37
|
+
from claudia_memory.services.remember import RememberService
|
|
38
|
+
from claudia_memory.extraction.entity_extractor import get_extractor
|
|
39
|
+
|
|
40
|
+
svc = RememberService.__new__(RememberService)
|
|
41
|
+
svc.db = db
|
|
42
|
+
svc.embedding_service = MagicMock()
|
|
43
|
+
svc.extractor = get_extractor()
|
|
44
|
+
return svc
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _fake_embedding(text):
|
|
48
|
+
"""Generate a deterministic fake 384-dim embedding from text"""
|
|
49
|
+
import hashlib
|
|
50
|
+
h = hashlib.sha256(text.encode()).digest()
|
|
51
|
+
return [float(b) / 255.0 for b in (h * 12)][:384]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class TestPrecomputedEmbedding:
|
|
55
|
+
"""Tests that precomputed embeddings skip the embed_sync call"""
|
|
56
|
+
|
|
57
|
+
def test_remember_fact_with_precomputed_skips_embed(self, db):
|
|
58
|
+
"""When precomputed embedding is provided, embed_sync is not called"""
|
|
59
|
+
svc = _get_remember_service(db)
|
|
60
|
+
fake_emb = _fake_embedding("test content")
|
|
61
|
+
|
|
62
|
+
with patch("claudia_memory.services.remember.embed_sync") as mock_embed:
|
|
63
|
+
memory_id = svc.remember_fact(
|
|
64
|
+
content="Ford prefers async communication",
|
|
65
|
+
memory_type="preference",
|
|
66
|
+
importance=0.7,
|
|
67
|
+
_precomputed_embedding=fake_emb,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
mock_embed.assert_not_called()
|
|
71
|
+
|
|
72
|
+
assert memory_id is not None
|
|
73
|
+
# Memory itself is stored in the regular memories table
|
|
74
|
+
memory = db.get_one("memories", where="id = ?", where_params=(memory_id,))
|
|
75
|
+
assert memory is not None
|
|
76
|
+
assert memory["content"] == "Ford prefers async communication"
|
|
77
|
+
assert memory["type"] == "preference"
|
|
78
|
+
|
|
79
|
+
def test_remember_fact_without_precomputed_calls_embed(self, db):
|
|
80
|
+
"""Without precomputed embedding, embed_sync is called normally"""
|
|
81
|
+
svc = _get_remember_service(db)
|
|
82
|
+
fallback_emb = _fake_embedding("fallback")
|
|
83
|
+
|
|
84
|
+
with patch("claudia_memory.services.remember.embed_sync", return_value=fallback_emb) as mock_embed:
|
|
85
|
+
memory_id = svc.remember_fact(
|
|
86
|
+
content="Some fact to remember",
|
|
87
|
+
memory_type="fact",
|
|
88
|
+
importance=0.8,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
mock_embed.assert_called_once_with("Some fact to remember")
|
|
92
|
+
|
|
93
|
+
assert memory_id is not None
|
|
94
|
+
|
|
95
|
+
def test_remember_entity_with_precomputed_skips_embed(self, db):
|
|
96
|
+
"""New entity uses precomputed embedding instead of calling embed_sync"""
|
|
97
|
+
svc = _get_remember_service(db)
|
|
98
|
+
fake_emb = _fake_embedding("Ford Perry. CEO of Perry Ventures")
|
|
99
|
+
|
|
100
|
+
with patch("claudia_memory.services.remember.embed_sync") as mock_embed:
|
|
101
|
+
entity_id = svc.remember_entity(
|
|
102
|
+
name="Ford Perry",
|
|
103
|
+
entity_type="person",
|
|
104
|
+
description="CEO of Perry Ventures",
|
|
105
|
+
_precomputed_embedding=fake_emb,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
mock_embed.assert_not_called()
|
|
109
|
+
|
|
110
|
+
assert entity_id is not None
|
|
111
|
+
entity = db.get_one("entities", where="id = ?", where_params=(entity_id,))
|
|
112
|
+
assert entity["name"] == "Ford Perry"
|
|
113
|
+
assert entity["description"] == "CEO of Perry Ventures"
|
|
114
|
+
|
|
115
|
+
def test_remember_entity_existing_skips_embedding(self, db):
|
|
116
|
+
"""Updating an existing entity doesn't try to embed again"""
|
|
117
|
+
svc = _get_remember_service(db)
|
|
118
|
+
|
|
119
|
+
with patch("claudia_memory.services.remember.embed_sync") as mock_embed:
|
|
120
|
+
# Create entity first time
|
|
121
|
+
entity_id1 = svc.remember_entity(
|
|
122
|
+
name="Ford Perry",
|
|
123
|
+
entity_type="person",
|
|
124
|
+
_precomputed_embedding=_fake_embedding("first"),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# Update same entity (existing path doesn't embed)
|
|
128
|
+
entity_id2 = svc.remember_entity(
|
|
129
|
+
name="Ford Perry",
|
|
130
|
+
entity_type="person",
|
|
131
|
+
description="Updated description",
|
|
132
|
+
_precomputed_embedding=_fake_embedding("second"),
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
mock_embed.assert_not_called()
|
|
136
|
+
|
|
137
|
+
assert entity_id1 == entity_id2
|
|
138
|
+
|
|
139
|
+
def test_remember_fact_dedup_with_precomputed(self, db):
|
|
140
|
+
"""Duplicate content deduplicates even with precomputed embeddings"""
|
|
141
|
+
svc = _get_remember_service(db)
|
|
142
|
+
|
|
143
|
+
with patch("claudia_memory.services.remember.embed_sync") as mock_embed:
|
|
144
|
+
id1 = svc.remember_fact(
|
|
145
|
+
content="Ford prefers email",
|
|
146
|
+
_precomputed_embedding=_fake_embedding("v1"),
|
|
147
|
+
)
|
|
148
|
+
id2 = svc.remember_fact(
|
|
149
|
+
content="Ford prefers email",
|
|
150
|
+
_precomputed_embedding=_fake_embedding("v2"),
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
mock_embed.assert_not_called()
|
|
154
|
+
|
|
155
|
+
assert id1 == id2
|
|
156
|
+
|
|
157
|
+
def test_precomputed_none_falls_back_to_embed_sync(self, db):
|
|
158
|
+
"""Explicitly passing None for precomputed embedding falls back"""
|
|
159
|
+
svc = _get_remember_service(db)
|
|
160
|
+
fallback_emb = _fake_embedding("fallback")
|
|
161
|
+
|
|
162
|
+
with patch("claudia_memory.services.remember.embed_sync", return_value=fallback_emb) as mock_embed:
|
|
163
|
+
memory_id = svc.remember_fact(
|
|
164
|
+
content="Content needing fallback",
|
|
165
|
+
_precomputed_embedding=None,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
mock_embed.assert_called_once()
|
|
169
|
+
|
|
170
|
+
assert memory_id is not None
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class TestBatchWithParallelEmbeddings:
|
|
174
|
+
"""Integration-style tests simulating the batch handler's two-pass flow"""
|
|
175
|
+
|
|
176
|
+
def test_batch_seven_memories_no_sequential_embeds(self, db):
|
|
177
|
+
"""Realistic scenario: 7 memories from a transcript, all pre-embedded"""
|
|
178
|
+
svc = _get_remember_service(db)
|
|
179
|
+
|
|
180
|
+
memories = [
|
|
181
|
+
{"content": f"Memory {i}: fact about the call", "type": "fact", "importance": 0.6 + i * 0.05}
|
|
182
|
+
for i in range(7)
|
|
183
|
+
]
|
|
184
|
+
embeddings = [_fake_embedding(m["content"]) for m in memories]
|
|
185
|
+
|
|
186
|
+
with patch("claudia_memory.services.remember.embed_sync") as mock_embed:
|
|
187
|
+
ids = []
|
|
188
|
+
for m, emb in zip(memories, embeddings):
|
|
189
|
+
mid = svc.remember_fact(
|
|
190
|
+
content=m["content"],
|
|
191
|
+
memory_type=m["type"],
|
|
192
|
+
importance=m["importance"],
|
|
193
|
+
_precomputed_embedding=emb,
|
|
194
|
+
)
|
|
195
|
+
ids.append(mid)
|
|
196
|
+
|
|
197
|
+
mock_embed.assert_not_called()
|
|
198
|
+
|
|
199
|
+
assert len(ids) == 7
|
|
200
|
+
assert all(mid is not None for mid in ids)
|
|
201
|
+
assert len(set(ids)) == 7 # All unique
|
|
202
|
+
|
|
203
|
+
# Verify all memories stored in regular table
|
|
204
|
+
for mid, m in zip(ids, memories):
|
|
205
|
+
row = db.get_one("memories", where="id = ?", where_params=(mid,))
|
|
206
|
+
assert row is not None
|
|
207
|
+
assert row["content"] == m["content"]
|
|
208
|
+
|
|
209
|
+
def test_batch_mixed_operations(self, db):
|
|
210
|
+
"""Batch with entity + remember + relate, only primary ops pre-embedded"""
|
|
211
|
+
svc = _get_remember_service(db)
|
|
212
|
+
|
|
213
|
+
with patch("claudia_memory.services.remember.embed_sync") as mock_embed:
|
|
214
|
+
# Entity op with precomputed
|
|
215
|
+
entity_id = svc.remember_entity(
|
|
216
|
+
name="Ford Perry",
|
|
217
|
+
entity_type="person",
|
|
218
|
+
description="CEO of Perry Ventures",
|
|
219
|
+
_precomputed_embedding=_fake_embedding("Ford Perry. CEO"),
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
# Remember op with precomputed
|
|
223
|
+
memory_id = svc.remember_fact(
|
|
224
|
+
content="Ford prefers async communication",
|
|
225
|
+
memory_type="preference",
|
|
226
|
+
about_entities=["Ford Perry"],
|
|
227
|
+
_precomputed_embedding=_fake_embedding("Ford prefers async"),
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
# Relate op (no embedding needed)
|
|
231
|
+
rel_id = svc.relate_entities(
|
|
232
|
+
source_name="Ford Perry",
|
|
233
|
+
target_name="Test User",
|
|
234
|
+
relationship_type="potential_partner",
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
assert entity_id is not None
|
|
238
|
+
assert memory_id is not None
|
|
239
|
+
assert rel_id is not None
|
|
240
|
+
|
|
241
|
+
# Verify entity linked to memory
|
|
242
|
+
link = db.get_one(
|
|
243
|
+
"memory_entities",
|
|
244
|
+
where="memory_id = ? AND entity_id = ?",
|
|
245
|
+
where_params=(memory_id, entity_id),
|
|
246
|
+
)
|
|
247
|
+
assert link is not None
|
|
248
|
+
|
|
249
|
+
def test_batch_partial_embedding_failure_fallback(self, db):
|
|
250
|
+
"""When some embeddings fail (None), those ops fall back to embed_sync"""
|
|
251
|
+
svc = _get_remember_service(db)
|
|
252
|
+
fallback_emb = _fake_embedding("fallback")
|
|
253
|
+
|
|
254
|
+
with patch("claudia_memory.services.remember.embed_sync", return_value=fallback_emb) as mock_embed:
|
|
255
|
+
# Op with precomputed embedding
|
|
256
|
+
id1 = svc.remember_fact(
|
|
257
|
+
content="Good content with embedding",
|
|
258
|
+
_precomputed_embedding=_fake_embedding("good"),
|
|
259
|
+
)
|
|
260
|
+
# Op without precomputed (simulating embedding failure)
|
|
261
|
+
id2 = svc.remember_fact(
|
|
262
|
+
content="Content that failed embedding",
|
|
263
|
+
_precomputed_embedding=None,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
# embed_sync called only for the second (failed) one
|
|
267
|
+
mock_embed.assert_called_once_with("Content that failed embedding")
|
|
268
|
+
|
|
269
|
+
assert id1 is not None
|
|
270
|
+
assert id2 is not None
|
|
271
|
+
assert id1 != id2
|
|
272
|
+
|
|
273
|
+
def test_two_pass_flow_simulation(self, db):
|
|
274
|
+
"""Full simulation of the batch handler's two-pass architecture"""
|
|
275
|
+
svc = _get_remember_service(db)
|
|
276
|
+
|
|
277
|
+
# These are the operations that would come from memory.batch
|
|
278
|
+
operations = [
|
|
279
|
+
{"op": "entity", "name": "Ford Perry", "type": "person", "description": "CEO"},
|
|
280
|
+
{"op": "remember", "content": "Ford prefers email", "type": "preference", "importance": 0.7, "about": ["Ford Perry"]},
|
|
281
|
+
{"op": "remember", "content": "Meeting scheduled for Friday", "type": "fact", "importance": 0.8},
|
|
282
|
+
{"op": "relate", "source": "Ford Perry", "target": "Kamil", "relationship": "business_contact"},
|
|
283
|
+
]
|
|
284
|
+
|
|
285
|
+
# --- Pass 1: Collect texts and generate embeddings ---
|
|
286
|
+
embed_texts = []
|
|
287
|
+
embed_indices = []
|
|
288
|
+
for i, op in enumerate(operations):
|
|
289
|
+
if op["op"] == "remember":
|
|
290
|
+
embed_texts.append(op["content"])
|
|
291
|
+
embed_indices.append(i)
|
|
292
|
+
elif op["op"] == "entity":
|
|
293
|
+
embed_texts.append(f"{op['name']}. {op.get('description', '')}")
|
|
294
|
+
embed_indices.append(i)
|
|
295
|
+
|
|
296
|
+
# Simulate parallel embedding
|
|
297
|
+
all_embeddings = [_fake_embedding(t) for t in embed_texts]
|
|
298
|
+
embeddings_map = {idx: emb for idx, emb in zip(embed_indices, all_embeddings)}
|
|
299
|
+
|
|
300
|
+
# --- Pass 2: Execute with precomputed embeddings ---
|
|
301
|
+
results = []
|
|
302
|
+
with patch("claudia_memory.services.remember.embed_sync") as mock_embed:
|
|
303
|
+
for i, op in enumerate(operations):
|
|
304
|
+
if op["op"] == "entity":
|
|
305
|
+
eid = svc.remember_entity(
|
|
306
|
+
name=op["name"],
|
|
307
|
+
entity_type=op.get("type", "person"),
|
|
308
|
+
description=op.get("description"),
|
|
309
|
+
_precomputed_embedding=embeddings_map.get(i),
|
|
310
|
+
)
|
|
311
|
+
results.append({"op": "entity", "id": eid})
|
|
312
|
+
elif op["op"] == "remember":
|
|
313
|
+
mid = svc.remember_fact(
|
|
314
|
+
content=op["content"],
|
|
315
|
+
memory_type=op.get("type", "fact"),
|
|
316
|
+
about_entities=op.get("about"),
|
|
317
|
+
importance=op.get("importance", 1.0),
|
|
318
|
+
_precomputed_embedding=embeddings_map.get(i),
|
|
319
|
+
)
|
|
320
|
+
results.append({"op": "remember", "id": mid})
|
|
321
|
+
elif op["op"] == "relate":
|
|
322
|
+
rid = svc.relate_entities(
|
|
323
|
+
source_name=op["source"],
|
|
324
|
+
target_name=op["target"],
|
|
325
|
+
relationship_type=op["relationship"],
|
|
326
|
+
)
|
|
327
|
+
results.append({"op": "relate", "id": rid})
|
|
328
|
+
|
|
329
|
+
# embed_sync may be called for entities created during linking
|
|
330
|
+
# (e.g., "Kamil" created by relate), but NOT for the primary ops
|
|
331
|
+
# For the 2 remember ops and 1 entity op, embed_sync was NOT used
|
|
332
|
+
# It might be called for auto-created entities in about_entities linking
|
|
333
|
+
pass
|
|
334
|
+
|
|
335
|
+
assert len(results) == 4
|
|
336
|
+
assert all(r["id"] is not None for r in results)
|
|
337
|
+
|
|
338
|
+
# Verify data integrity
|
|
339
|
+
entity = db.get_one("entities", where="name = ?", where_params=("Ford Perry",))
|
|
340
|
+
assert entity is not None
|
|
341
|
+
assert entity["description"] == "CEO"
|
|
342
|
+
|
|
343
|
+
mem1 = db.get_one("memories", where="content = ?", where_params=("Ford prefers email",))
|
|
344
|
+
assert mem1 is not None
|
|
345
|
+
assert mem1["type"] == "preference"
|
|
346
|
+
|
|
347
|
+
mem2 = db.get_one("memories", where="content = ?", where_params=("Meeting scheduled for Friday",))
|
|
348
|
+
assert mem2 is not None
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
"""Tests for LLM serialization error defense.
|
|
2
|
+
|
|
3
|
+
Validates that the MCP layer gracefully handles two classes of LLM tool-calling
|
|
4
|
+
errors: (1) string-serialized arrays and (2) missing optional fields like
|
|
5
|
+
episode_id in end_session.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import tempfile
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
import pytest
|
|
13
|
+
import jsonschema
|
|
14
|
+
|
|
15
|
+
from claudia_memory.database import Database
|
|
16
|
+
from claudia_memory.mcp.server import _coerce_arg
|
|
17
|
+
from claudia_memory.services.remember import RememberService
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
# Helpers
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
def _make_db():
|
|
25
|
+
"""Create a fresh test database."""
|
|
26
|
+
tmpdir = tempfile.mkdtemp()
|
|
27
|
+
db_path = Path(tmpdir) / "test.db"
|
|
28
|
+
db = Database(db_path)
|
|
29
|
+
db.initialize()
|
|
30
|
+
return db, tmpdir
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _make_service(db):
|
|
34
|
+
"""Create a RememberService without embedding service."""
|
|
35
|
+
svc = RememberService.__new__(RememberService)
|
|
36
|
+
svc.db = db
|
|
37
|
+
svc.embeddings = None
|
|
38
|
+
return svc
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
# TestCoerceArg -- unit tests for the _coerce_arg utility
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
class TestCoerceArg:
|
|
46
|
+
"""Tests for _coerce_arg string-to-native coercion."""
|
|
47
|
+
|
|
48
|
+
def test_string_to_list(self):
|
|
49
|
+
"""JSON string containing an array should be parsed to a list."""
|
|
50
|
+
args = {"about": '["Alice", "Bob"]'}
|
|
51
|
+
_coerce_arg(args, "about")
|
|
52
|
+
assert args["about"] == ["Alice", "Bob"]
|
|
53
|
+
|
|
54
|
+
def test_string_to_list_of_objects(self):
|
|
55
|
+
"""JSON string containing array of objects should parse correctly."""
|
|
56
|
+
reflections = [{"type": "learning", "content": "User prefers concise responses"}]
|
|
57
|
+
args = {"reflections": json.dumps(reflections)}
|
|
58
|
+
_coerce_arg(args, "reflections")
|
|
59
|
+
assert args["reflections"] == reflections
|
|
60
|
+
|
|
61
|
+
def test_noop_on_native_list(self):
|
|
62
|
+
"""Already-native list should be left unchanged."""
|
|
63
|
+
original = ["Alice", "Bob"]
|
|
64
|
+
args = {"about": original}
|
|
65
|
+
_coerce_arg(args, "about")
|
|
66
|
+
assert args["about"] is original
|
|
67
|
+
|
|
68
|
+
def test_noop_on_none(self):
|
|
69
|
+
"""None value should be left unchanged."""
|
|
70
|
+
args = {"about": None}
|
|
71
|
+
_coerce_arg(args, "about")
|
|
72
|
+
assert args["about"] is None
|
|
73
|
+
|
|
74
|
+
def test_noop_on_missing_key(self):
|
|
75
|
+
"""Missing key should not raise or create the key."""
|
|
76
|
+
args = {}
|
|
77
|
+
_coerce_arg(args, "about")
|
|
78
|
+
assert "about" not in args
|
|
79
|
+
|
|
80
|
+
def test_invalid_json_leaves_value(self):
|
|
81
|
+
"""Unparseable string should be left as-is (with warning logged)."""
|
|
82
|
+
args = {"about": "not valid json ["}
|
|
83
|
+
_coerce_arg(args, "about")
|
|
84
|
+
assert args["about"] == "not valid json ["
|
|
85
|
+
|
|
86
|
+
def test_wrong_type_after_parse_leaves_value(self):
|
|
87
|
+
"""String that parses to wrong type (e.g. dict instead of list) stays as-is."""
|
|
88
|
+
args = {"about": '{"key": "value"}'}
|
|
89
|
+
_coerce_arg(args, "about")
|
|
90
|
+
# Should remain the original string since parsed result is dict, not list
|
|
91
|
+
assert args["about"] == '{"key": "value"}'
|
|
92
|
+
|
|
93
|
+
def test_empty_array_string(self):
|
|
94
|
+
"""Empty array string should parse to empty list."""
|
|
95
|
+
args = {"types": "[]"}
|
|
96
|
+
_coerce_arg(args, "types")
|
|
97
|
+
assert args["types"] == []
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ---------------------------------------------------------------------------
|
|
101
|
+
# TestEndSessionMissingEpisodeId -- handler auto-creates episodes
|
|
102
|
+
# ---------------------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
class TestEndSessionMissingEpisodeId:
|
|
105
|
+
"""Tests for end_session handler when episode_id is missing or invalid."""
|
|
106
|
+
|
|
107
|
+
def test_auto_creates_episode_when_none(self):
|
|
108
|
+
"""end_session should auto-create an episode when episode_id is not provided."""
|
|
109
|
+
db, tmpdir = _make_db()
|
|
110
|
+
try:
|
|
111
|
+
svc = _make_service(db)
|
|
112
|
+
|
|
113
|
+
# Simulate calling end_session without episode_id
|
|
114
|
+
# (We test at the service level with a freshly-created episode)
|
|
115
|
+
from datetime import datetime
|
|
116
|
+
episode_id = svc.db.insert("episodes", {
|
|
117
|
+
"started_at": datetime.utcnow().isoformat(),
|
|
118
|
+
"source": "claude_code",
|
|
119
|
+
})
|
|
120
|
+
|
|
121
|
+
result = svc.end_session(
|
|
122
|
+
episode_id=episode_id,
|
|
123
|
+
narrative="Session without prior buffer_turn calls.",
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
assert result["narrative_stored"] is True
|
|
127
|
+
episode = db.get_one("episodes", where="id = ?", where_params=(episode_id,))
|
|
128
|
+
assert episode is not None
|
|
129
|
+
assert episode["narrative"] == "Session without prior buffer_turn calls."
|
|
130
|
+
finally:
|
|
131
|
+
db.close()
|
|
132
|
+
|
|
133
|
+
def test_auto_creates_episode_for_nonexistent_id(self):
|
|
134
|
+
"""end_session should create a new episode if requested ID doesn't exist."""
|
|
135
|
+
db, tmpdir = _make_db()
|
|
136
|
+
try:
|
|
137
|
+
svc = _make_service(db)
|
|
138
|
+
|
|
139
|
+
# Use an ID that doesn't exist
|
|
140
|
+
fake_id = 99999
|
|
141
|
+
episode = db.get_one("episodes", where="id = ?", where_params=(fake_id,))
|
|
142
|
+
assert episode is None # Confirm it doesn't exist
|
|
143
|
+
|
|
144
|
+
# The handler logic (tested here at DB level) should create a new one
|
|
145
|
+
from datetime import datetime
|
|
146
|
+
new_id = svc.db.insert("episodes", {
|
|
147
|
+
"started_at": datetime.utcnow().isoformat(),
|
|
148
|
+
"source": "claude_code",
|
|
149
|
+
})
|
|
150
|
+
|
|
151
|
+
result = svc.end_session(
|
|
152
|
+
episode_id=new_id,
|
|
153
|
+
narrative="Fallback episode for missing ID.",
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
assert result["narrative_stored"] is True
|
|
157
|
+
finally:
|
|
158
|
+
db.close()
|
|
159
|
+
|
|
160
|
+
def test_end_session_with_facts_as_string(self):
|
|
161
|
+
"""end_session should work when facts arrive as a JSON string after coercion."""
|
|
162
|
+
db, tmpdir = _make_db()
|
|
163
|
+
try:
|
|
164
|
+
svc = _make_service(db)
|
|
165
|
+
|
|
166
|
+
# Create episode
|
|
167
|
+
from datetime import datetime
|
|
168
|
+
episode_id = svc.db.insert("episodes", {
|
|
169
|
+
"started_at": datetime.utcnow().isoformat(),
|
|
170
|
+
"source": "claude_code",
|
|
171
|
+
})
|
|
172
|
+
|
|
173
|
+
# Simulate coerced facts (string -> list already done by _coerce_arg)
|
|
174
|
+
facts = [{"content": "User likes dark mode", "type": "preference"}]
|
|
175
|
+
|
|
176
|
+
result = svc.end_session(
|
|
177
|
+
episode_id=episode_id,
|
|
178
|
+
narrative="Testing string-serialized facts.",
|
|
179
|
+
facts=facts,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
assert result["narrative_stored"] is True
|
|
183
|
+
assert result["facts_stored"] >= 1
|
|
184
|
+
finally:
|
|
185
|
+
db.close()
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
# ---------------------------------------------------------------------------
|
|
189
|
+
# TestSchemaValidation -- verify updated schemas accept both types
|
|
190
|
+
# ---------------------------------------------------------------------------
|
|
191
|
+
|
|
192
|
+
class TestSchemaValidation:
|
|
193
|
+
"""Tests that updated schemas pass jsonschema validation for both array and string."""
|
|
194
|
+
|
|
195
|
+
def _make_schema(self, prop_name, prop_schema, required=None):
|
|
196
|
+
"""Build a minimal object schema with one property."""
|
|
197
|
+
schema = {
|
|
198
|
+
"type": "object",
|
|
199
|
+
"properties": {
|
|
200
|
+
prop_name: prop_schema,
|
|
201
|
+
},
|
|
202
|
+
}
|
|
203
|
+
if required:
|
|
204
|
+
schema["required"] = required
|
|
205
|
+
return schema
|
|
206
|
+
|
|
207
|
+
def test_array_type_union_accepts_native_array(self):
|
|
208
|
+
"""Schema with type: [array, string] should accept a native array."""
|
|
209
|
+
schema = self._make_schema("about", {
|
|
210
|
+
"type": ["array", "string"],
|
|
211
|
+
"items": {"type": "string"},
|
|
212
|
+
})
|
|
213
|
+
# Should not raise
|
|
214
|
+
jsonschema.validate({"about": ["Alice", "Bob"]}, schema)
|
|
215
|
+
|
|
216
|
+
def test_array_type_union_accepts_string(self):
|
|
217
|
+
"""Schema with type: [array, string] should accept a JSON string."""
|
|
218
|
+
schema = self._make_schema("about", {
|
|
219
|
+
"type": ["array", "string"],
|
|
220
|
+
"items": {"type": "string"},
|
|
221
|
+
})
|
|
222
|
+
# Should not raise
|
|
223
|
+
jsonschema.validate({"about": '["Alice", "Bob"]'}, schema)
|
|
224
|
+
|
|
225
|
+
def test_original_array_only_rejects_string(self):
|
|
226
|
+
"""Original schema with type: array should reject a string (proving the fix is needed)."""
|
|
227
|
+
schema = self._make_schema("about", {
|
|
228
|
+
"type": "array",
|
|
229
|
+
"items": {"type": "string"},
|
|
230
|
+
})
|
|
231
|
+
with pytest.raises(jsonschema.ValidationError):
|
|
232
|
+
jsonschema.validate({"about": '["Alice", "Bob"]'}, schema)
|
|
233
|
+
|
|
234
|
+
def test_end_session_narrative_only_required(self):
|
|
235
|
+
"""end_session schema should only require narrative, not episode_id."""
|
|
236
|
+
schema = {
|
|
237
|
+
"type": "object",
|
|
238
|
+
"properties": {
|
|
239
|
+
"episode_id": {"type": "integer"},
|
|
240
|
+
"narrative": {"type": "string"},
|
|
241
|
+
},
|
|
242
|
+
"required": ["narrative"],
|
|
243
|
+
}
|
|
244
|
+
# Should not raise -- episode_id is omitted
|
|
245
|
+
jsonschema.validate({"narrative": "Session summary"}, schema)
|
|
246
|
+
|
|
247
|
+
def test_end_session_rejects_missing_narrative(self):
|
|
248
|
+
"""end_session schema should still require narrative."""
|
|
249
|
+
schema = {
|
|
250
|
+
"type": "object",
|
|
251
|
+
"properties": {
|
|
252
|
+
"episode_id": {"type": "integer"},
|
|
253
|
+
"narrative": {"type": "string"},
|
|
254
|
+
},
|
|
255
|
+
"required": ["narrative"],
|
|
256
|
+
}
|
|
257
|
+
with pytest.raises(jsonschema.ValidationError):
|
|
258
|
+
jsonschema.validate({"episode_id": 1}, schema)
|
package/package.json
CHANGED
|
@@ -131,6 +131,73 @@ Return this exact JSON structure:
|
|
|
131
131
|
}
|
|
132
132
|
```
|
|
133
133
|
|
|
134
|
+
### Memory Operations (for batch storage pipeline)
|
|
135
|
+
|
|
136
|
+
When Claudia dispatches you with `extraction_type: "memory_operations"`, return ready-to-store operations matching the `memory.batch` input format. This lets Claudia pipe your output directly into `memory.batch` after review, skipping manual composition.
|
|
137
|
+
|
|
138
|
+
```json
|
|
139
|
+
{
|
|
140
|
+
"extraction_type": "memory_operations",
|
|
141
|
+
"source_summary": "Extracted 7 memories from call with Ford Perry",
|
|
142
|
+
"memory_operations": [
|
|
143
|
+
{
|
|
144
|
+
"op": "remember",
|
|
145
|
+
"content": "Ford Perry prefers async communication over calls",
|
|
146
|
+
"type": "preference",
|
|
147
|
+
"importance": 0.7,
|
|
148
|
+
"about": ["Ford Perry"],
|
|
149
|
+
"source_context": "2026-02-04 call with Ford Perry re: partnership"
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
"op": "remember",
|
|
153
|
+
"content": "Ford committed to sending the revised proposal by Friday Feb 7",
|
|
154
|
+
"type": "commitment",
|
|
155
|
+
"importance": 0.9,
|
|
156
|
+
"about": ["Ford Perry"],
|
|
157
|
+
"source_context": "2026-02-04 call with Ford Perry re: partnership"
|
|
158
|
+
},
|
|
159
|
+
{
|
|
160
|
+
"op": "entity",
|
|
161
|
+
"name": "Ford Perry",
|
|
162
|
+
"type": "person",
|
|
163
|
+
"description": "Potential partner, CEO of Perry Ventures"
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
"op": "relate",
|
|
167
|
+
"source": "Kamil Banc",
|
|
168
|
+
"target": "Ford Perry",
|
|
169
|
+
"relationship": "potential_partner",
|
|
170
|
+
"strength": 0.6
|
|
171
|
+
}
|
|
172
|
+
],
|
|
173
|
+
"confidence": 0.85,
|
|
174
|
+
"ambiguities": [],
|
|
175
|
+
"needs_claudia_judgment": true,
|
|
176
|
+
"judgment_reason": "Review extracted memories for accuracy before batch storage"
|
|
177
|
+
}
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
**Memory operation field reference:**
|
|
181
|
+
|
|
182
|
+
| Field | Required | Description |
|
|
183
|
+
|-------|----------|-------------|
|
|
184
|
+
| `op` | Yes | `"remember"`, `"entity"`, or `"relate"` |
|
|
185
|
+
| `content` | For remember | The memory text (preserve exact wording for commitments) |
|
|
186
|
+
| `type` | For remember | `"fact"`, `"preference"`, `"observation"`, `"commitment"`, `"decision"` |
|
|
187
|
+
| `importance` | For remember | 0.0-1.0 (commitments default 0.9, facts 0.7, observations 0.6) |
|
|
188
|
+
| `about` | For remember | Entity names this memory relates to |
|
|
189
|
+
| `source_context` | For remember | One-line breadcrumb: "YYYY-MM-DD [source] re: [topic]" |
|
|
190
|
+
| `name` | For entity | Entity name |
|
|
191
|
+
| `source`/`target` | For relate | Entity names for relationship |
|
|
192
|
+
| `relationship` | For relate | Relationship type (works_with, client_of, etc.) |
|
|
193
|
+
|
|
194
|
+
**When to use memory_operations extraction:**
|
|
195
|
+
- Processing transcripts where Claudia needs structured memories
|
|
196
|
+
- Processing emails where facts, commitments, and relationships need capturing
|
|
197
|
+
- Any document where multiple memory operations are expected
|
|
198
|
+
|
|
199
|
+
**Always set `needs_claudia_judgment: true`** for memory_operations. Claudia must review before storing.
|
|
200
|
+
|
|
134
201
|
## Deadline Confidence
|
|
135
202
|
|
|
136
203
|
| Level | Meaning |
|
|
@@ -50,7 +50,33 @@ The file is automatically routed to the right folder:
|
|
|
50
50
|
- Which person files to update?
|
|
51
51
|
- Any new people to track?
|
|
52
52
|
|
|
53
|
-
### 3. Extract Key Information
|
|
53
|
+
### 3. Extract Key Information (Agent-Accelerated)
|
|
54
|
+
|
|
55
|
+
**Preferred: Dispatch Document Processor for extraction.** Instead of composing memory operations manually (which takes 2+ minutes of thinking time), dispatch the Document Processor agent (Haiku) with the transcript content and `extraction_type: "memory_operations"`. The agent returns ready-to-store operations in ~10-20 seconds.
|
|
56
|
+
|
|
57
|
+
**Agent pipeline workflow:**
|
|
58
|
+
```
|
|
59
|
+
1. Dispatch Document Processor (Haiku) with:
|
|
60
|
+
- The full transcript text
|
|
61
|
+
- extraction_type: "memory_operations"
|
|
62
|
+
- Context: participant names, meeting topic, date
|
|
63
|
+
|
|
64
|
+
2. Agent returns memory_operations[] array with:
|
|
65
|
+
- Facts, preferences, observations
|
|
66
|
+
- Commitments with deadlines
|
|
67
|
+
- Entity definitions
|
|
68
|
+
- Relationship links
|
|
69
|
+
|
|
70
|
+
3. Review agent output (judgment layer):
|
|
71
|
+
- Verify commitment wording is accurate
|
|
72
|
+
- Check importance scores are reasonable
|
|
73
|
+
- Confirm entity names match existing entities
|
|
74
|
+
- Adjust or remove any questionable extractions
|
|
75
|
+
|
|
76
|
+
4. Call memory.batch with the reviewed operations
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
**Fallback: Manual extraction** (use when agent is unavailable or for very short notes)
|
|
54
80
|
|
|
55
81
|
**Decisions Made:**
|
|
56
82
|
- What was decided?
|
|
@@ -68,11 +68,37 @@ User shares transcript/email/document
|
|
|
68
68
|
↓
|
|
69
69
|
If user says "extract now":
|
|
70
70
|
↓
|
|
71
|
-
|
|
71
|
+
Use agent-accelerated extraction (see below)
|
|
72
72
|
↓
|
|
73
|
-
|
|
73
|
+
Review agent output, then store verified memories/entities
|
|
74
74
|
```
|
|
75
75
|
|
|
76
|
+
**Agent-Accelerated Extraction (Preferred for transcripts and emails)**
|
|
77
|
+
|
|
78
|
+
For transcripts, emails, and longer documents, use the Document Processor agent (Haiku) instead of composing `memory.batch` operations manually. Manual composition takes 2+ minutes of thinking time; the agent returns structured operations in ~10-20 seconds.
|
|
79
|
+
|
|
80
|
+
```
|
|
81
|
+
Dispatch Document Processor (Haiku) with:
|
|
82
|
+
├── The filed document content
|
|
83
|
+
├── extraction_type: "memory_operations"
|
|
84
|
+
└── Context: participant names, topic, date
|
|
85
|
+
↓
|
|
86
|
+
Agent returns memory_operations[] array
|
|
87
|
+
(facts, commitments, entities, relationships)
|
|
88
|
+
↓
|
|
89
|
+
Review agent output (Claudia's judgment layer):
|
|
90
|
+
├── Verify commitment wording is accurate
|
|
91
|
+
├── Check importance scores
|
|
92
|
+
├── Confirm entity names match existing entities
|
|
93
|
+
└── Remove or adjust questionable extractions
|
|
94
|
+
↓
|
|
95
|
+
Call memory.batch with reviewed operations
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
**When to use agent extraction:** Transcripts (3+ paragraphs), emails with multiple topics, documents with commitments or relationship context.
|
|
99
|
+
|
|
100
|
+
**When to extract manually:** Very short notes (1-2 sentences), single-fact corrections, quick entity creation.
|
|
101
|
+
|
|
76
102
|
**If you find yourself reading multiple source documents** without calling `memory.file` for each one, **STOP and fix it**. Go back and file each source before continuing.
|
|
77
103
|
|
|
78
104
|
**If you find yourself auto-extracting without asking**, **STOP**. File first, then ask if the user wants extraction now or later. This keeps you responsive during long documents.
|