get-claudia 1.28.2 → 1.28.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -66,9 +66,33 @@ from ..services.remember import (
66
66
  remember_fact,
67
67
  remember_message,
68
68
  )
69
+ from ..embeddings import get_embedding_service
69
70
 
70
71
  logger = logging.getLogger(__name__)
71
72
 
73
+
74
+ def _coerce_arg(arguments: Dict[str, Any], key: str, expected_type: type = list) -> None:
75
+ """Coerce a tool argument from JSON string to expected type in-place.
76
+
77
+ LLMs sometimes serialize array parameters as JSON strings instead of
78
+ native arrays. This transparently parses them back so handler code
79
+ can assume native types.
80
+ """
81
+ value = arguments.get(key)
82
+ if isinstance(value, str):
83
+ try:
84
+ parsed = json.loads(value)
85
+ if isinstance(parsed, expected_type):
86
+ arguments[key] = parsed
87
+ else:
88
+ logger.warning(
89
+ f"Coercion: '{key}' parsed to {type(parsed).__name__}, "
90
+ f"expected {expected_type.__name__}"
91
+ )
92
+ except (json.JSONDecodeError, TypeError):
93
+ logger.warning(f"Could not parse '{key}' as JSON: {value[:100]}")
94
+
95
+
72
96
  # Initialize the MCP server
73
97
  server = Server("claudia-memory")
74
98
 
@@ -94,7 +118,7 @@ async def list_tools() -> ListToolsResult:
94
118
  "default": "fact",
95
119
  },
96
120
  "about": {
97
- "type": "array",
121
+ "type": ["array", "string"],
98
122
  "items": {"type": "string"},
99
123
  "description": "Entity names this memory relates to (people, projects, etc.)",
100
124
  },
@@ -139,7 +163,7 @@ async def list_tools() -> ListToolsResult:
139
163
  "default": 10,
140
164
  },
141
165
  "types": {
142
- "type": "array",
166
+ "type": ["array", "string"],
143
167
  "items": {"type": "string"},
144
168
  "description": "Filter by memory types (fact, preference, observation, learning, commitment)",
145
169
  },
@@ -153,7 +177,7 @@ async def list_tools() -> ListToolsResult:
153
177
  "default": False,
154
178
  },
155
179
  "ids": {
156
- "type": "array",
180
+ "type": ["array", "string"],
157
181
  "items": {"type": "integer"},
158
182
  "description": "Fetch specific memories by ID (skips search). Use after a compact search to get full content.",
159
183
  },
@@ -232,7 +256,7 @@ async def list_tools() -> ListToolsResult:
232
256
  "default": 5,
233
257
  },
234
258
  "types": {
235
- "type": "array",
259
+ "type": ["array", "string"],
236
260
  "items": {"type": "string"},
237
261
  "description": "Filter by type (reminder, suggestion, warning, insight)",
238
262
  },
@@ -268,7 +292,7 @@ async def list_tools() -> ListToolsResult:
268
292
  "description": "Description of the entity",
269
293
  },
270
294
  "aliases": {
271
- "type": "array",
295
+ "type": ["array", "string"],
272
296
  "items": {"type": "string"},
273
297
  "description": "Alternative names or spellings",
274
298
  },
@@ -287,7 +311,7 @@ async def list_tools() -> ListToolsResult:
287
311
  "description": "Search query",
288
312
  },
289
313
  "types": {
290
- "type": "array",
314
+ "type": ["array", "string"],
291
315
  "items": {"type": "string"},
292
316
  "description": "Filter by entity types",
293
317
  },
@@ -354,7 +378,7 @@ async def list_tools() -> ListToolsResult:
354
378
  ),
355
379
  },
356
380
  "facts": {
357
- "type": "array",
381
+ "type": ["array", "string"],
358
382
  "items": {
359
383
  "type": "object",
360
384
  "properties": {
@@ -388,7 +412,7 @@ async def list_tools() -> ListToolsResult:
388
412
  "description": "Structured facts, preferences, observations, learnings extracted from the session",
389
413
  },
390
414
  "commitments": {
391
- "type": "array",
415
+ "type": ["array", "string"],
392
416
  "items": {
393
417
  "type": "object",
394
418
  "properties": {
@@ -416,7 +440,7 @@ async def list_tools() -> ListToolsResult:
416
440
  "description": "Commitments or promises made during the session",
417
441
  },
418
442
  "entities": {
419
- "type": "array",
443
+ "type": ["array", "string"],
420
444
  "items": {
421
445
  "type": "object",
422
446
  "properties": {
@@ -437,7 +461,7 @@ async def list_tools() -> ListToolsResult:
437
461
  "description": "New or updated entities mentioned during the session",
438
462
  },
439
463
  "relationships": {
440
- "type": "array",
464
+ "type": ["array", "string"],
441
465
  "items": {
442
466
  "type": "object",
443
467
  "properties": {
@@ -451,12 +475,12 @@ async def list_tools() -> ListToolsResult:
451
475
  "description": "Relationships between entities observed during the session",
452
476
  },
453
477
  "key_topics": {
454
- "type": "array",
478
+ "type": ["array", "string"],
455
479
  "items": {"type": "string"},
456
480
  "description": "Main topics discussed in the session",
457
481
  },
458
482
  "reflections": {
459
- "type": "array",
483
+ "type": ["array", "string"],
460
484
  "items": {
461
485
  "type": "object",
462
486
  "properties": {
@@ -492,7 +516,7 @@ async def list_tools() -> ListToolsResult:
492
516
  ),
493
517
  },
494
518
  },
495
- "required": ["episode_id", "narrative"],
519
+ "required": ["narrative"],
496
520
  },
497
521
  ),
498
522
  Tool(
@@ -524,7 +548,7 @@ async def list_tools() -> ListToolsResult:
524
548
  "description": "Semantic search query (optional). If omitted, returns recent high-importance reflections.",
525
549
  },
526
550
  "types": {
527
- "type": "array",
551
+ "type": ["array", "string"],
528
552
  "items": {
529
553
  "type": "string",
530
554
  "enum": ["observation", "pattern", "learning", "question"],
@@ -570,7 +594,7 @@ async def list_tools() -> ListToolsResult:
570
594
  "type": "object",
571
595
  "properties": {
572
596
  "operations": {
573
- "type": "array",
597
+ "type": ["array", "string"],
574
598
  "description": "Array of operations to execute in order",
575
599
  "items": {
576
600
  "type": "object",
@@ -775,12 +799,12 @@ async def list_tools() -> ListToolsResult:
775
799
  "description": "Brief summary of the document",
776
800
  },
777
801
  "about": {
778
- "type": "array",
802
+ "type": ["array", "string"],
779
803
  "items": {"type": "string"},
780
804
  "description": "Entity names this document relates to",
781
805
  },
782
806
  "memory_ids": {
783
- "type": "array",
807
+ "type": ["array", "string"],
784
808
  "items": {"type": "integer"},
785
809
  "description": "Memory IDs to link as sourced from this document",
786
810
  },
@@ -1158,6 +1182,7 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
1158
1182
  """Handle tool calls"""
1159
1183
  try:
1160
1184
  if name == "memory.remember":
1185
+ _coerce_arg(arguments, "about")
1161
1186
  memory_id = remember_fact(
1162
1187
  content=arguments["content"],
1163
1188
  memory_type=arguments.get("type", "fact"),
@@ -1187,6 +1212,8 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
1187
1212
  )
1188
1213
 
1189
1214
  elif name == "memory.recall":
1215
+ _coerce_arg(arguments, "types")
1216
+ _coerce_arg(arguments, "ids")
1190
1217
  # Direct fetch by IDs (skip search)
1191
1218
  if "ids" in arguments and arguments["ids"]:
1192
1219
  results = fetch_by_ids(arguments["ids"])
@@ -1339,6 +1366,7 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
1339
1366
  )
1340
1367
 
1341
1368
  elif name == "memory.predictions":
1369
+ _coerce_arg(arguments, "types")
1342
1370
  predictions = get_predictions(
1343
1371
  limit=arguments.get("limit", 5),
1344
1372
  prediction_types=arguments.get("types"),
@@ -1364,6 +1392,7 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
1364
1392
  )
1365
1393
 
1366
1394
  elif name == "memory.entity":
1395
+ _coerce_arg(arguments, "aliases")
1367
1396
  entity_id = remember_entity(
1368
1397
  name=arguments["name"],
1369
1398
  entity_type=arguments.get("type", "person"),
@@ -1380,6 +1409,7 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
1380
1409
  )
1381
1410
 
1382
1411
  elif name == "memory.search_entities":
1412
+ _coerce_arg(arguments, "types")
1383
1413
  results = search_entities(
1384
1414
  query=arguments["query"],
1385
1415
  entity_types=arguments.get("types"),
@@ -1426,19 +1456,30 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
1426
1456
  )
1427
1457
 
1428
1458
  elif name == "memory.end_session":
1429
- episode_id = arguments["episode_id"]
1459
+ # Coerce all array fields (LLMs may send JSON strings)
1460
+ for field in ("facts", "commitments", "entities", "relationships", "key_topics", "reflections"):
1461
+ _coerce_arg(arguments, field)
1430
1462
 
1431
- # Auto-create episode if it doesn't exist (handles skipped buffer_turn)
1463
+ # Handle missing or invalid episode_id: auto-create
1464
+ episode_id = arguments.get("episode_id")
1432
1465
  svc = get_remember_service()
1433
- episode = svc.db.get_one("episodes", where="id = ?", where_params=(episode_id,))
1434
- if not episode:
1466
+ if episode_id is None:
1435
1467
  from datetime import datetime
1436
- new_id = svc.db.insert("episodes", {
1468
+ episode_id = svc.db.insert("episodes", {
1437
1469
  "started_at": datetime.utcnow().isoformat(),
1438
- "source": arguments.get("source", "claude_code"),
1470
+ "source": "claude_code",
1439
1471
  })
1440
- logger.info(f"Auto-created episode {new_id} (requested {episode_id} did not exist)")
1441
- episode_id = new_id
1472
+ logger.info(f"Auto-created episode {episode_id} (no episode_id provided)")
1473
+ else:
1474
+ episode = svc.db.get_one("episodes", where="id = ?", where_params=(episode_id,))
1475
+ if not episode:
1476
+ from datetime import datetime
1477
+ new_id = svc.db.insert("episodes", {
1478
+ "started_at": datetime.utcnow().isoformat(),
1479
+ "source": arguments.get("source", "claude_code"),
1480
+ })
1481
+ logger.info(f"Auto-created episode {new_id} (requested {episode_id} did not exist)")
1482
+ episode_id = new_id
1442
1483
 
1443
1484
  result = end_session(
1444
1485
  episode_id=episode_id,
@@ -1489,6 +1530,7 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
1489
1530
  )
1490
1531
 
1491
1532
  elif name == "memory.reflections":
1533
+ _coerce_arg(arguments, "types")
1492
1534
  action = arguments.get("action", "get")
1493
1535
  limit = arguments.get("limit", 10)
1494
1536
  types = arguments.get("types")
@@ -1578,7 +1620,35 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
1578
1620
  )
1579
1621
 
1580
1622
  elif name == "memory.batch":
1623
+ _coerce_arg(arguments, "operations")
1581
1624
  operations = arguments.get("operations", [])
1625
+
1626
+ # --- Pass 1: Collect all texts that need embeddings ---
1627
+ embed_tasks = [] # list of (index, text) for parallel embedding
1628
+ for i, op in enumerate(operations):
1629
+ op_type = op.get("op")
1630
+ if op_type == "remember":
1631
+ embed_tasks.append((i, op["content"]))
1632
+ elif op_type == "entity":
1633
+ # Only new entities need embeddings; collect optimistically
1634
+ embed_text = f"{op['name']}. {op.get('description') or ''}"
1635
+ embed_tasks.append((i, embed_text))
1636
+
1637
+ # --- Parallel embedding pass ---
1638
+ embeddings_map = {} # index -> embedding
1639
+ if embed_tasks:
1640
+ try:
1641
+ emb_svc = get_embedding_service()
1642
+ texts = [text for _, text in embed_tasks]
1643
+ all_embeddings = await emb_svc.embed_batch(texts)
1644
+ for (idx, _), emb in zip(embed_tasks, all_embeddings):
1645
+ if emb is not None:
1646
+ embeddings_map[idx] = emb
1647
+ except Exception as e:
1648
+ logger.warning(f"Batch parallel embedding failed, falling back to per-op: {e}")
1649
+ # embeddings_map stays empty; remember_fact/entity will embed individually
1650
+
1651
+ # --- Pass 2: Execute operations with pre-computed embeddings ---
1582
1652
  results = []
1583
1653
  for i, op in enumerate(operations):
1584
1654
  op_type = op.get("op")
@@ -1590,6 +1660,7 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
1590
1660
  entity_type=op.get("type", "person"),
1591
1661
  description=op.get("description"),
1592
1662
  aliases=op.get("aliases"),
1663
+ _precomputed_embedding=embeddings_map.get(i),
1593
1664
  )
1594
1665
  op_result["success"] = True
1595
1666
  op_result["entity_id"] = entity_id
@@ -1601,6 +1672,7 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
1601
1672
  importance=op.get("importance", 1.0),
1602
1673
  source=op.get("source"),
1603
1674
  source_context=op.get("source_context"),
1675
+ _precomputed_embedding=embeddings_map.get(i),
1604
1676
  )
1605
1677
  op_result["success"] = True
1606
1678
  op_result["memory_id"] = memory_id
@@ -1716,6 +1788,8 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
1716
1788
  )
1717
1789
 
1718
1790
  elif name == "memory.file":
1791
+ _coerce_arg(arguments, "about")
1792
+ _coerce_arg(arguments, "memory_ids")
1719
1793
  doc_svc = get_document_service()
1720
1794
  result = doc_svc.file_document_from_text(
1721
1795
  content=arguments["content"],
@@ -141,6 +141,7 @@ class RememberService:
141
141
  source_context: Optional[str] = None,
142
142
  metadata: Optional[Dict] = None,
143
143
  origin_type: Optional[str] = None,
144
+ _precomputed_embedding: Optional[List[float]] = None,
144
145
  ) -> Optional[int]:
145
146
  """
146
147
  Store a discrete fact/memory.
@@ -217,8 +218,8 @@ class RememberService:
217
218
 
218
219
  memory_id = self.db.insert("memories", insert_data)
219
220
 
220
- # Generate and store embedding
221
- embedding = embed_sync(content)
221
+ # Store embedding (use precomputed if available, otherwise generate)
222
+ embedding = _precomputed_embedding or embed_sync(content)
222
223
  if embedding:
223
224
  try:
224
225
  self.db.execute(
@@ -263,6 +264,7 @@ class RememberService:
263
264
  description: Optional[str] = None,
264
265
  aliases: Optional[List[str]] = None,
265
266
  metadata: Optional[Dict] = None,
267
+ _precomputed_embedding: Optional[List[float]] = None,
266
268
  ) -> int:
267
269
  """
268
270
  Create or update an entity.
@@ -326,9 +328,9 @@ class RememberService:
326
328
  },
327
329
  )
328
330
 
329
- # Generate and store embedding
331
+ # Store embedding (use precomputed if available, otherwise generate)
330
332
  embed_text = f"{name}. {description or ''}"
331
- embedding = embed_sync(embed_text)
333
+ embedding = _precomputed_embedding or embed_sync(embed_text)
332
334
  if embedding:
333
335
  try:
334
336
  self.db.execute(
@@ -1512,12 +1514,12 @@ def remember_message(content: str, role: str = "user", **kwargs) -> Dict[str, An
1512
1514
 
1513
1515
 
1514
1516
  def remember_fact(content: str, **kwargs) -> Optional[int]:
1515
- """Store a discrete fact"""
1517
+ """Store a discrete fact. Pass _precomputed_embedding to skip Ollama call."""
1516
1518
  return get_remember_service().remember_fact(content, **kwargs)
1517
1519
 
1518
1520
 
1519
1521
  def remember_entity(name: str, **kwargs) -> int:
1520
- """Create or update an entity"""
1522
+ """Create or update an entity. Pass _precomputed_embedding to skip Ollama call."""
1521
1523
  return get_remember_service().remember_entity(name, **kwargs)
1522
1524
 
1523
1525
 
@@ -0,0 +1,348 @@
1
+ """Tests for parallel batch embedding optimization.
2
+
3
+ Verifies that the batch handler's parallel embedding pass correctly:
4
+ - Skips embed_sync when precomputed embeddings are provided
5
+ - Falls back to embed_sync when no precomputed embedding is given
6
+ - Stores memories and entities correctly in both cases
7
+
8
+ Note: Vector tables (memory_embeddings, entity_embeddings) require sqlite-vec
9
+ which may not be available in test environments. Tests verify behavior through
10
+ mock assertions rather than querying vector tables directly.
11
+ """
12
+
13
+ import json
14
+ import tempfile
15
+ from datetime import datetime
16
+ from pathlib import Path
17
+ from unittest.mock import MagicMock, patch
18
+
19
+ import pytest
20
+
21
+ from claudia_memory.database import Database, content_hash
22
+
23
+
24
+ @pytest.fixture
25
+ def db():
26
+ """Create a temporary test database"""
27
+ with tempfile.TemporaryDirectory() as tmpdir:
28
+ db_path = Path(tmpdir) / "test.db"
29
+ database = Database(db_path)
30
+ database.initialize()
31
+ yield database
32
+ database.close()
33
+
34
+
35
+ def _get_remember_service(db):
36
+ """Create a RememberService with test database and mocked embeddings"""
37
+ from claudia_memory.services.remember import RememberService
38
+ from claudia_memory.extraction.entity_extractor import get_extractor
39
+
40
+ svc = RememberService.__new__(RememberService)
41
+ svc.db = db
42
+ svc.embedding_service = MagicMock()
43
+ svc.extractor = get_extractor()
44
+ return svc
45
+
46
+
47
+ def _fake_embedding(text):
48
+ """Generate a deterministic fake 384-dim embedding from text"""
49
+ import hashlib
50
+ h = hashlib.sha256(text.encode()).digest()
51
+ return [float(b) / 255.0 for b in (h * 12)][:384]
52
+
53
+
54
+ class TestPrecomputedEmbedding:
55
+ """Tests that precomputed embeddings skip the embed_sync call"""
56
+
57
+ def test_remember_fact_with_precomputed_skips_embed(self, db):
58
+ """When precomputed embedding is provided, embed_sync is not called"""
59
+ svc = _get_remember_service(db)
60
+ fake_emb = _fake_embedding("test content")
61
+
62
+ with patch("claudia_memory.services.remember.embed_sync") as mock_embed:
63
+ memory_id = svc.remember_fact(
64
+ content="Ford prefers async communication",
65
+ memory_type="preference",
66
+ importance=0.7,
67
+ _precomputed_embedding=fake_emb,
68
+ )
69
+
70
+ mock_embed.assert_not_called()
71
+
72
+ assert memory_id is not None
73
+ # Memory itself is stored in the regular memories table
74
+ memory = db.get_one("memories", where="id = ?", where_params=(memory_id,))
75
+ assert memory is not None
76
+ assert memory["content"] == "Ford prefers async communication"
77
+ assert memory["type"] == "preference"
78
+
79
+ def test_remember_fact_without_precomputed_calls_embed(self, db):
80
+ """Without precomputed embedding, embed_sync is called normally"""
81
+ svc = _get_remember_service(db)
82
+ fallback_emb = _fake_embedding("fallback")
83
+
84
+ with patch("claudia_memory.services.remember.embed_sync", return_value=fallback_emb) as mock_embed:
85
+ memory_id = svc.remember_fact(
86
+ content="Some fact to remember",
87
+ memory_type="fact",
88
+ importance=0.8,
89
+ )
90
+
91
+ mock_embed.assert_called_once_with("Some fact to remember")
92
+
93
+ assert memory_id is not None
94
+
95
+ def test_remember_entity_with_precomputed_skips_embed(self, db):
96
+ """New entity uses precomputed embedding instead of calling embed_sync"""
97
+ svc = _get_remember_service(db)
98
+ fake_emb = _fake_embedding("Ford Perry. CEO of Perry Ventures")
99
+
100
+ with patch("claudia_memory.services.remember.embed_sync") as mock_embed:
101
+ entity_id = svc.remember_entity(
102
+ name="Ford Perry",
103
+ entity_type="person",
104
+ description="CEO of Perry Ventures",
105
+ _precomputed_embedding=fake_emb,
106
+ )
107
+
108
+ mock_embed.assert_not_called()
109
+
110
+ assert entity_id is not None
111
+ entity = db.get_one("entities", where="id = ?", where_params=(entity_id,))
112
+ assert entity["name"] == "Ford Perry"
113
+ assert entity["description"] == "CEO of Perry Ventures"
114
+
115
+ def test_remember_entity_existing_skips_embedding(self, db):
116
+ """Updating an existing entity doesn't try to embed again"""
117
+ svc = _get_remember_service(db)
118
+
119
+ with patch("claudia_memory.services.remember.embed_sync") as mock_embed:
120
+ # Create entity first time
121
+ entity_id1 = svc.remember_entity(
122
+ name="Ford Perry",
123
+ entity_type="person",
124
+ _precomputed_embedding=_fake_embedding("first"),
125
+ )
126
+
127
+ # Update same entity (existing path doesn't embed)
128
+ entity_id2 = svc.remember_entity(
129
+ name="Ford Perry",
130
+ entity_type="person",
131
+ description="Updated description",
132
+ _precomputed_embedding=_fake_embedding("second"),
133
+ )
134
+
135
+ mock_embed.assert_not_called()
136
+
137
+ assert entity_id1 == entity_id2
138
+
139
+ def test_remember_fact_dedup_with_precomputed(self, db):
140
+ """Duplicate content deduplicates even with precomputed embeddings"""
141
+ svc = _get_remember_service(db)
142
+
143
+ with patch("claudia_memory.services.remember.embed_sync") as mock_embed:
144
+ id1 = svc.remember_fact(
145
+ content="Ford prefers email",
146
+ _precomputed_embedding=_fake_embedding("v1"),
147
+ )
148
+ id2 = svc.remember_fact(
149
+ content="Ford prefers email",
150
+ _precomputed_embedding=_fake_embedding("v2"),
151
+ )
152
+
153
+ mock_embed.assert_not_called()
154
+
155
+ assert id1 == id2
156
+
157
+ def test_precomputed_none_falls_back_to_embed_sync(self, db):
158
+ """Explicitly passing None for precomputed embedding falls back"""
159
+ svc = _get_remember_service(db)
160
+ fallback_emb = _fake_embedding("fallback")
161
+
162
+ with patch("claudia_memory.services.remember.embed_sync", return_value=fallback_emb) as mock_embed:
163
+ memory_id = svc.remember_fact(
164
+ content="Content needing fallback",
165
+ _precomputed_embedding=None,
166
+ )
167
+
168
+ mock_embed.assert_called_once()
169
+
170
+ assert memory_id is not None
171
+
172
+
173
+ class TestBatchWithParallelEmbeddings:
174
+ """Integration-style tests simulating the batch handler's two-pass flow"""
175
+
176
+ def test_batch_seven_memories_no_sequential_embeds(self, db):
177
+ """Realistic scenario: 7 memories from a transcript, all pre-embedded"""
178
+ svc = _get_remember_service(db)
179
+
180
+ memories = [
181
+ {"content": f"Memory {i}: fact about the call", "type": "fact", "importance": 0.6 + i * 0.05}
182
+ for i in range(7)
183
+ ]
184
+ embeddings = [_fake_embedding(m["content"]) for m in memories]
185
+
186
+ with patch("claudia_memory.services.remember.embed_sync") as mock_embed:
187
+ ids = []
188
+ for m, emb in zip(memories, embeddings):
189
+ mid = svc.remember_fact(
190
+ content=m["content"],
191
+ memory_type=m["type"],
192
+ importance=m["importance"],
193
+ _precomputed_embedding=emb,
194
+ )
195
+ ids.append(mid)
196
+
197
+ mock_embed.assert_not_called()
198
+
199
+ assert len(ids) == 7
200
+ assert all(mid is not None for mid in ids)
201
+ assert len(set(ids)) == 7 # All unique
202
+
203
+ # Verify all memories stored in regular table
204
+ for mid, m in zip(ids, memories):
205
+ row = db.get_one("memories", where="id = ?", where_params=(mid,))
206
+ assert row is not None
207
+ assert row["content"] == m["content"]
208
+
209
+ def test_batch_mixed_operations(self, db):
210
+ """Batch with entity + remember + relate, only primary ops pre-embedded"""
211
+ svc = _get_remember_service(db)
212
+
213
+ with patch("claudia_memory.services.remember.embed_sync") as mock_embed:
214
+ # Entity op with precomputed
215
+ entity_id = svc.remember_entity(
216
+ name="Ford Perry",
217
+ entity_type="person",
218
+ description="CEO of Perry Ventures",
219
+ _precomputed_embedding=_fake_embedding("Ford Perry. CEO"),
220
+ )
221
+
222
+ # Remember op with precomputed
223
+ memory_id = svc.remember_fact(
224
+ content="Ford prefers async communication",
225
+ memory_type="preference",
226
+ about_entities=["Ford Perry"],
227
+ _precomputed_embedding=_fake_embedding("Ford prefers async"),
228
+ )
229
+
230
+ # Relate op (no embedding needed)
231
+ rel_id = svc.relate_entities(
232
+ source_name="Ford Perry",
233
+ target_name="Test User",
234
+ relationship_type="potential_partner",
235
+ )
236
+
237
+ assert entity_id is not None
238
+ assert memory_id is not None
239
+ assert rel_id is not None
240
+
241
+ # Verify entity linked to memory
242
+ link = db.get_one(
243
+ "memory_entities",
244
+ where="memory_id = ? AND entity_id = ?",
245
+ where_params=(memory_id, entity_id),
246
+ )
247
+ assert link is not None
248
+
249
+ def test_batch_partial_embedding_failure_fallback(self, db):
250
+ """When some embeddings fail (None), those ops fall back to embed_sync"""
251
+ svc = _get_remember_service(db)
252
+ fallback_emb = _fake_embedding("fallback")
253
+
254
+ with patch("claudia_memory.services.remember.embed_sync", return_value=fallback_emb) as mock_embed:
255
+ # Op with precomputed embedding
256
+ id1 = svc.remember_fact(
257
+ content="Good content with embedding",
258
+ _precomputed_embedding=_fake_embedding("good"),
259
+ )
260
+ # Op without precomputed (simulating embedding failure)
261
+ id2 = svc.remember_fact(
262
+ content="Content that failed embedding",
263
+ _precomputed_embedding=None,
264
+ )
265
+
266
+ # embed_sync called only for the second (failed) one
267
+ mock_embed.assert_called_once_with("Content that failed embedding")
268
+
269
+ assert id1 is not None
270
+ assert id2 is not None
271
+ assert id1 != id2
272
+
273
+ def test_two_pass_flow_simulation(self, db):
274
+ """Full simulation of the batch handler's two-pass architecture"""
275
+ svc = _get_remember_service(db)
276
+
277
+ # These are the operations that would come from memory.batch
278
+ operations = [
279
+ {"op": "entity", "name": "Ford Perry", "type": "person", "description": "CEO"},
280
+ {"op": "remember", "content": "Ford prefers email", "type": "preference", "importance": 0.7, "about": ["Ford Perry"]},
281
+ {"op": "remember", "content": "Meeting scheduled for Friday", "type": "fact", "importance": 0.8},
282
+ {"op": "relate", "source": "Ford Perry", "target": "Kamil", "relationship": "business_contact"},
283
+ ]
284
+
285
+ # --- Pass 1: Collect texts and generate embeddings ---
286
+ embed_texts = []
287
+ embed_indices = []
288
+ for i, op in enumerate(operations):
289
+ if op["op"] == "remember":
290
+ embed_texts.append(op["content"])
291
+ embed_indices.append(i)
292
+ elif op["op"] == "entity":
293
+ embed_texts.append(f"{op['name']}. {op.get('description', '')}")
294
+ embed_indices.append(i)
295
+
296
+ # Simulate parallel embedding
297
+ all_embeddings = [_fake_embedding(t) for t in embed_texts]
298
+ embeddings_map = {idx: emb for idx, emb in zip(embed_indices, all_embeddings)}
299
+
300
+ # --- Pass 2: Execute with precomputed embeddings ---
301
+ results = []
302
+ with patch("claudia_memory.services.remember.embed_sync") as mock_embed:
303
+ for i, op in enumerate(operations):
304
+ if op["op"] == "entity":
305
+ eid = svc.remember_entity(
306
+ name=op["name"],
307
+ entity_type=op.get("type", "person"),
308
+ description=op.get("description"),
309
+ _precomputed_embedding=embeddings_map.get(i),
310
+ )
311
+ results.append({"op": "entity", "id": eid})
312
+ elif op["op"] == "remember":
313
+ mid = svc.remember_fact(
314
+ content=op["content"],
315
+ memory_type=op.get("type", "fact"),
316
+ about_entities=op.get("about"),
317
+ importance=op.get("importance", 1.0),
318
+ _precomputed_embedding=embeddings_map.get(i),
319
+ )
320
+ results.append({"op": "remember", "id": mid})
321
+ elif op["op"] == "relate":
322
+ rid = svc.relate_entities(
323
+ source_name=op["source"],
324
+ target_name=op["target"],
325
+ relationship_type=op["relationship"],
326
+ )
327
+ results.append({"op": "relate", "id": rid})
328
+
329
+ # embed_sync may be called for entities created during linking
330
+ # (e.g., "Kamil" created by relate), but NOT for the primary ops
331
+ # For the 2 remember ops and 1 entity op, embed_sync was NOT used
332
+ # It might be called for auto-created entities in about_entities linking
333
+ pass
334
+
335
+ assert len(results) == 4
336
+ assert all(r["id"] is not None for r in results)
337
+
338
+ # Verify data integrity
339
+ entity = db.get_one("entities", where="name = ?", where_params=("Ford Perry",))
340
+ assert entity is not None
341
+ assert entity["description"] == "CEO"
342
+
343
+ mem1 = db.get_one("memories", where="content = ?", where_params=("Ford prefers email",))
344
+ assert mem1 is not None
345
+ assert mem1["type"] == "preference"
346
+
347
+ mem2 = db.get_one("memories", where="content = ?", where_params=("Meeting scheduled for Friday",))
348
+ assert mem2 is not None
@@ -0,0 +1,258 @@
1
+ """Tests for LLM serialization error defense.
2
+
3
+ Validates that the MCP layer gracefully handles two classes of LLM tool-calling
4
+ errors: (1) string-serialized arrays and (2) missing optional fields like
5
+ episode_id in end_session.
6
+ """
7
+
8
+ import json
9
+ import tempfile
10
+ from pathlib import Path
11
+
12
+ import pytest
13
+ import jsonschema
14
+
15
+ from claudia_memory.database import Database
16
+ from claudia_memory.mcp.server import _coerce_arg
17
+ from claudia_memory.services.remember import RememberService
18
+
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Helpers
22
+ # ---------------------------------------------------------------------------
23
+
24
+ def _make_db():
25
+ """Create a fresh test database."""
26
+ tmpdir = tempfile.mkdtemp()
27
+ db_path = Path(tmpdir) / "test.db"
28
+ db = Database(db_path)
29
+ db.initialize()
30
+ return db, tmpdir
31
+
32
+
33
+ def _make_service(db):
34
+ """Create a RememberService without embedding service."""
35
+ svc = RememberService.__new__(RememberService)
36
+ svc.db = db
37
+ svc.embeddings = None
38
+ return svc
39
+
40
+
41
+ # ---------------------------------------------------------------------------
42
+ # TestCoerceArg -- unit tests for the _coerce_arg utility
43
+ # ---------------------------------------------------------------------------
44
+
45
+ class TestCoerceArg:
46
+ """Tests for _coerce_arg string-to-native coercion."""
47
+
48
+ def test_string_to_list(self):
49
+ """JSON string containing an array should be parsed to a list."""
50
+ args = {"about": '["Alice", "Bob"]'}
51
+ _coerce_arg(args, "about")
52
+ assert args["about"] == ["Alice", "Bob"]
53
+
54
+ def test_string_to_list_of_objects(self):
55
+ """JSON string containing array of objects should parse correctly."""
56
+ reflections = [{"type": "learning", "content": "User prefers concise responses"}]
57
+ args = {"reflections": json.dumps(reflections)}
58
+ _coerce_arg(args, "reflections")
59
+ assert args["reflections"] == reflections
60
+
61
+ def test_noop_on_native_list(self):
62
+ """Already-native list should be left unchanged."""
63
+ original = ["Alice", "Bob"]
64
+ args = {"about": original}
65
+ _coerce_arg(args, "about")
66
+ assert args["about"] is original
67
+
68
+ def test_noop_on_none(self):
69
+ """None value should be left unchanged."""
70
+ args = {"about": None}
71
+ _coerce_arg(args, "about")
72
+ assert args["about"] is None
73
+
74
+ def test_noop_on_missing_key(self):
75
+ """Missing key should not raise or create the key."""
76
+ args = {}
77
+ _coerce_arg(args, "about")
78
+ assert "about" not in args
79
+
80
+ def test_invalid_json_leaves_value(self):
81
+ """Unparseable string should be left as-is (with warning logged)."""
82
+ args = {"about": "not valid json ["}
83
+ _coerce_arg(args, "about")
84
+ assert args["about"] == "not valid json ["
85
+
86
+ def test_wrong_type_after_parse_leaves_value(self):
87
+ """String that parses to wrong type (e.g. dict instead of list) stays as-is."""
88
+ args = {"about": '{"key": "value"}'}
89
+ _coerce_arg(args, "about")
90
+ # Should remain the original string since parsed result is dict, not list
91
+ assert args["about"] == '{"key": "value"}'
92
+
93
+ def test_empty_array_string(self):
94
+ """Empty array string should parse to empty list."""
95
+ args = {"types": "[]"}
96
+ _coerce_arg(args, "types")
97
+ assert args["types"] == []
98
+
99
+
100
+ # ---------------------------------------------------------------------------
101
+ # TestEndSessionMissingEpisodeId -- handler auto-creates episodes
102
+ # ---------------------------------------------------------------------------
103
+
104
+ class TestEndSessionMissingEpisodeId:
105
+ """Tests for end_session handler when episode_id is missing or invalid."""
106
+
107
+ def test_auto_creates_episode_when_none(self):
108
+ """end_session should auto-create an episode when episode_id is not provided."""
109
+ db, tmpdir = _make_db()
110
+ try:
111
+ svc = _make_service(db)
112
+
113
+ # Simulate calling end_session without episode_id
114
+ # (We test at the service level with a freshly-created episode)
115
+ from datetime import datetime
116
+ episode_id = svc.db.insert("episodes", {
117
+ "started_at": datetime.utcnow().isoformat(),
118
+ "source": "claude_code",
119
+ })
120
+
121
+ result = svc.end_session(
122
+ episode_id=episode_id,
123
+ narrative="Session without prior buffer_turn calls.",
124
+ )
125
+
126
+ assert result["narrative_stored"] is True
127
+ episode = db.get_one("episodes", where="id = ?", where_params=(episode_id,))
128
+ assert episode is not None
129
+ assert episode["narrative"] == "Session without prior buffer_turn calls."
130
+ finally:
131
+ db.close()
132
+
133
+ def test_auto_creates_episode_for_nonexistent_id(self):
134
+ """end_session should create a new episode if requested ID doesn't exist."""
135
+ db, tmpdir = _make_db()
136
+ try:
137
+ svc = _make_service(db)
138
+
139
+ # Use an ID that doesn't exist
140
+ fake_id = 99999
141
+ episode = db.get_one("episodes", where="id = ?", where_params=(fake_id,))
142
+ assert episode is None # Confirm it doesn't exist
143
+
144
+ # The handler logic (tested here at DB level) should create a new one
145
+ from datetime import datetime
146
+ new_id = svc.db.insert("episodes", {
147
+ "started_at": datetime.utcnow().isoformat(),
148
+ "source": "claude_code",
149
+ })
150
+
151
+ result = svc.end_session(
152
+ episode_id=new_id,
153
+ narrative="Fallback episode for missing ID.",
154
+ )
155
+
156
+ assert result["narrative_stored"] is True
157
+ finally:
158
+ db.close()
159
+
160
+ def test_end_session_with_facts_as_string(self):
161
+ """end_session should work when facts arrive as a JSON string after coercion."""
162
+ db, tmpdir = _make_db()
163
+ try:
164
+ svc = _make_service(db)
165
+
166
+ # Create episode
167
+ from datetime import datetime
168
+ episode_id = svc.db.insert("episodes", {
169
+ "started_at": datetime.utcnow().isoformat(),
170
+ "source": "claude_code",
171
+ })
172
+
173
+ # Simulate coerced facts (string -> list already done by _coerce_arg)
174
+ facts = [{"content": "User likes dark mode", "type": "preference"}]
175
+
176
+ result = svc.end_session(
177
+ episode_id=episode_id,
178
+ narrative="Testing string-serialized facts.",
179
+ facts=facts,
180
+ )
181
+
182
+ assert result["narrative_stored"] is True
183
+ assert result["facts_stored"] >= 1
184
+ finally:
185
+ db.close()
186
+
187
+
188
+ # ---------------------------------------------------------------------------
189
+ # TestSchemaValidation -- verify updated schemas accept both types
190
+ # ---------------------------------------------------------------------------
191
+
192
+ class TestSchemaValidation:
193
+ """Tests that updated schemas pass jsonschema validation for both array and string."""
194
+
195
+ def _make_schema(self, prop_name, prop_schema, required=None):
196
+ """Build a minimal object schema with one property."""
197
+ schema = {
198
+ "type": "object",
199
+ "properties": {
200
+ prop_name: prop_schema,
201
+ },
202
+ }
203
+ if required:
204
+ schema["required"] = required
205
+ return schema
206
+
207
+ def test_array_type_union_accepts_native_array(self):
208
+ """Schema with type: [array, string] should accept a native array."""
209
+ schema = self._make_schema("about", {
210
+ "type": ["array", "string"],
211
+ "items": {"type": "string"},
212
+ })
213
+ # Should not raise
214
+ jsonschema.validate({"about": ["Alice", "Bob"]}, schema)
215
+
216
+ def test_array_type_union_accepts_string(self):
217
+ """Schema with type: [array, string] should accept a JSON string."""
218
+ schema = self._make_schema("about", {
219
+ "type": ["array", "string"],
220
+ "items": {"type": "string"},
221
+ })
222
+ # Should not raise
223
+ jsonschema.validate({"about": '["Alice", "Bob"]'}, schema)
224
+
225
+ def test_original_array_only_rejects_string(self):
226
+ """Original schema with type: array should reject a string (proving the fix is needed)."""
227
+ schema = self._make_schema("about", {
228
+ "type": "array",
229
+ "items": {"type": "string"},
230
+ })
231
+ with pytest.raises(jsonschema.ValidationError):
232
+ jsonschema.validate({"about": '["Alice", "Bob"]'}, schema)
233
+
234
+ def test_end_session_narrative_only_required(self):
235
+ """end_session schema should only require narrative, not episode_id."""
236
+ schema = {
237
+ "type": "object",
238
+ "properties": {
239
+ "episode_id": {"type": "integer"},
240
+ "narrative": {"type": "string"},
241
+ },
242
+ "required": ["narrative"],
243
+ }
244
+ # Should not raise -- episode_id is omitted
245
+ jsonschema.validate({"narrative": "Session summary"}, schema)
246
+
247
+ def test_end_session_rejects_missing_narrative(self):
248
+ """end_session schema should still require narrative."""
249
+ schema = {
250
+ "type": "object",
251
+ "properties": {
252
+ "episode_id": {"type": "integer"},
253
+ "narrative": {"type": "string"},
254
+ },
255
+ "required": ["narrative"],
256
+ }
257
+ with pytest.raises(jsonschema.ValidationError):
258
+ jsonschema.validate({"episode_id": 1}, schema)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "get-claudia",
3
- "version": "1.28.2",
3
+ "version": "1.28.3",
4
4
  "description": "An AI assistant who learns how you work.",
5
5
  "keywords": [
6
6
  "claudia",
@@ -131,6 +131,73 @@ Return this exact JSON structure:
131
131
  }
132
132
  ```
133
133
 
134
+ ### Memory Operations (for batch storage pipeline)
135
+
136
+ When Claudia dispatches you with `extraction_type: "memory_operations"`, return ready-to-store operations matching the `memory.batch` input format. This lets Claudia pipe your output directly into `memory.batch` after review, skipping manual composition.
137
+
138
+ ```json
139
+ {
140
+ "extraction_type": "memory_operations",
141
+ "source_summary": "Extracted 7 memories from call with Ford Perry",
142
+ "memory_operations": [
143
+ {
144
+ "op": "remember",
145
+ "content": "Ford Perry prefers async communication over calls",
146
+ "type": "preference",
147
+ "importance": 0.7,
148
+ "about": ["Ford Perry"],
149
+ "source_context": "2026-02-04 call with Ford Perry re: partnership"
150
+ },
151
+ {
152
+ "op": "remember",
153
+ "content": "Ford committed to sending the revised proposal by Friday Feb 7",
154
+ "type": "commitment",
155
+ "importance": 0.9,
156
+ "about": ["Ford Perry"],
157
+ "source_context": "2026-02-04 call with Ford Perry re: partnership"
158
+ },
159
+ {
160
+ "op": "entity",
161
+ "name": "Ford Perry",
162
+ "type": "person",
163
+ "description": "Potential partner, CEO of Perry Ventures"
164
+ },
165
+ {
166
+ "op": "relate",
167
+ "source": "Kamil Banc",
168
+ "target": "Ford Perry",
169
+ "relationship": "potential_partner",
170
+ "strength": 0.6
171
+ }
172
+ ],
173
+ "confidence": 0.85,
174
+ "ambiguities": [],
175
+ "needs_claudia_judgment": true,
176
+ "judgment_reason": "Review extracted memories for accuracy before batch storage"
177
+ }
178
+ ```
179
+
180
+ **Memory operation field reference:**
181
+
182
+ | Field | Required | Description |
183
+ |-------|----------|-------------|
184
+ | `op` | Yes | `"remember"`, `"entity"`, or `"relate"` |
185
+ | `content` | For remember | The memory text (preserve exact wording for commitments) |
186
+ | `type` | For remember | `"fact"`, `"preference"`, `"observation"`, `"commitment"`, `"decision"` |
187
+ | `importance` | For remember | 0.0-1.0 (commitments default 0.9, facts 0.7, observations 0.6) |
188
+ | `about` | For remember | Entity names this memory relates to |
189
+ | `source_context` | For remember | One-line breadcrumb: "YYYY-MM-DD [source] re: [topic]" |
190
+ | `name` | For entity | Entity name |
191
+ | `source`/`target` | For relate | Entity names for relationship |
192
+ | `relationship` | For relate | Relationship type (works_with, client_of, etc.) |
193
+
194
+ **When to use memory_operations extraction:**
195
+ - Processing transcripts where Claudia needs structured memories
196
+ - Processing emails where facts, commitments, and relationships need capturing
197
+ - Any document where multiple memory operations are expected
198
+
199
+ **Always set `needs_claudia_judgment: true`** for memory_operations. Claudia must review before storing.
200
+
134
201
  ## Deadline Confidence
135
202
 
136
203
  | Level | Meaning |
@@ -50,7 +50,33 @@ The file is automatically routed to the right folder:
50
50
  - Which person files to update?
51
51
  - Any new people to track?
52
52
 
53
- ### 3. Extract Key Information
53
+ ### 3. Extract Key Information (Agent-Accelerated)
54
+
55
+ **Preferred: Dispatch Document Processor for extraction.** Instead of composing memory operations manually (which takes 2+ minutes of thinking time), dispatch the Document Processor agent (Haiku) with the transcript content and `extraction_type: "memory_operations"`. The agent returns ready-to-store operations in ~10-20 seconds.
56
+
57
+ **Agent pipeline workflow:**
58
+ ```
59
+ 1. Dispatch Document Processor (Haiku) with:
60
+ - The full transcript text
61
+ - extraction_type: "memory_operations"
62
+ - Context: participant names, meeting topic, date
63
+
64
+ 2. Agent returns memory_operations[] array with:
65
+ - Facts, preferences, observations
66
+ - Commitments with deadlines
67
+ - Entity definitions
68
+ - Relationship links
69
+
70
+ 3. Review agent output (judgment layer):
71
+ - Verify commitment wording is accurate
72
+ - Check importance scores are reasonable
73
+ - Confirm entity names match existing entities
74
+ - Adjust or remove any questionable extractions
75
+
76
+ 4. Call memory.batch with the reviewed operations
77
+ ```
78
+
79
+ **Fallback: Manual extraction** (use when agent is unavailable or for very short notes)
54
80
 
55
81
  **Decisions Made:**
56
82
  - What was decided?
@@ -68,11 +68,37 @@ User shares transcript/email/document
68
68
 
69
69
  If user says "extract now":
70
70
 
71
- Extract and present for verification
71
+ Use agent-accelerated extraction (see below)
72
72
 
73
- Store verified memories/entities
73
+ Review agent output, then store verified memories/entities
74
74
  ```
75
75
 
76
+ **Agent-Accelerated Extraction (Preferred for transcripts and emails)**
77
+
78
+ For transcripts, emails, and longer documents, use the Document Processor agent (Haiku) instead of composing `memory.batch` operations manually. Manual composition takes 2+ minutes of thinking time; the agent returns structured operations in ~10-20 seconds.
79
+
80
+ ```
81
+ Dispatch Document Processor (Haiku) with:
82
+ ├── The filed document content
83
+ ├── extraction_type: "memory_operations"
84
+ └── Context: participant names, topic, date
85
+
86
+ Agent returns memory_operations[] array
87
+ (facts, commitments, entities, relationships)
88
+
89
+ Review agent output (Claudia's judgment layer):
90
+ ├── Verify commitment wording is accurate
91
+ ├── Check importance scores
92
+ ├── Confirm entity names match existing entities
93
+ └── Remove or adjust questionable extractions
94
+
95
+ Call memory.batch with reviewed operations
96
+ ```
97
+
98
+ **When to use agent extraction:** Transcripts (3+ paragraphs), emails with multiple topics, documents with commitments or relationship context.
99
+
100
+ **When to extract manually:** Very short notes (1-2 sentences), single-fact corrections, quick entity creation.
101
+
76
102
  **If you find yourself reading multiple source documents** without calling `memory.file` for each one, **STOP and fix it**. Go back and file each source before continuing.
77
103
 
78
104
  **If you find yourself auto-extracting without asking**, **STOP**. File first, then ask if the user wants extraction now or later. This keeps you responsive during long documents.