claude-memory-agent 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +206 -200
  2. package/agent_card.py +186 -0
  3. package/bin/cli.js +317 -181
  4. package/bin/postinstall.js +270 -216
  5. package/dashboard.html +4232 -2689
  6. package/hooks/__pycache__/grounding-hook.cpython-312.pyc +0 -0
  7. package/hooks/__pycache__/session_end.cpython-312.pyc +0 -0
  8. package/hooks/grounding-hook.py +422 -348
  9. package/hooks/session_end.py +293 -192
  10. package/hooks/session_start.py +227 -227
  11. package/install.py +919 -887
  12. package/main.py +4496 -2859
  13. package/package.json +47 -55
  14. package/services/__init__.py +50 -50
  15. package/services/__pycache__/__init__.cpython-312.pyc +0 -0
  16. package/services/__pycache__/curator.cpython-312.pyc +0 -0
  17. package/services/__pycache__/database.cpython-312.pyc +0 -0
  18. package/services/curator.py +1606 -0
  19. package/services/database.py +3637 -2485
  20. package/skills/__init__.py +21 -1
  21. package/skills/__pycache__/__init__.cpython-312.pyc +0 -0
  22. package/skills/__pycache__/confidence_tracker.cpython-312.pyc +0 -0
  23. package/skills/__pycache__/context.cpython-312.pyc +0 -0
  24. package/skills/__pycache__/curator.cpython-312.pyc +0 -0
  25. package/skills/__pycache__/search.cpython-312.pyc +0 -0
  26. package/skills/__pycache__/session_review.cpython-312.pyc +0 -0
  27. package/skills/__pycache__/store.cpython-312.pyc +0 -0
  28. package/skills/confidence_tracker.py +441 -0
  29. package/skills/context.py +675 -0
  30. package/skills/curator.py +348 -0
  31. package/skills/search.py +369 -213
  32. package/skills/session_review.py +418 -0
  33. package/skills/store.py +377 -179
  34. package/update_system.py +829 -817
@@ -2,5 +2,25 @@ from .store import store_memory
2
2
  from .retrieve import retrieve_memory
3
3
  from .search import semantic_search
4
4
  from .summarize import summarize_session
5
+ from .confidence_tracker import (
6
+ report_solution_outcome,
7
+ get_reliability_stats,
8
+ get_unreliable_memories,
9
+ reset_memory_reliability,
10
+ memory_worked,
11
+ memory_failed
12
+ )
5
13
 
6
- __all__ = ["store_memory", "retrieve_memory", "semantic_search", "summarize_session"]
14
+ __all__ = [
15
+ "store_memory",
16
+ "retrieve_memory",
17
+ "semantic_search",
18
+ "summarize_session",
19
+ # Self-correcting confidence
20
+ "report_solution_outcome",
21
+ "get_reliability_stats",
22
+ "get_unreliable_memories",
23
+ "reset_memory_reliability",
24
+ "memory_worked",
25
+ "memory_failed"
26
+ ]
@@ -0,0 +1,441 @@
1
+ """Self-correcting confidence tracker for memory reliability.
2
+
3
+ Tracks solution outcomes and adjusts confidence automatically:
4
+ - When a solution works: increase confidence by 0.15 (max 1.0)
5
+ - When a solution fails: decrease confidence by 0.2 (min 0.0)
6
+ - After 3 consecutive failures: mark as unreliable
7
+
8
+ This creates a learning loop where frequently successful solutions
9
+ gain trust while failed solutions are demoted automatically.
10
+ """
11
+ import json
12
+ import logging
13
+ from typing import Dict, Any, Optional, List
14
+ from datetime import datetime
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ # Confidence adjustment constants
20
+ CONFIDENCE_INCREASE = 0.15 # Boost when solution works
21
+ CONFIDENCE_DECREASE = 0.20 # Penalty when solution fails
22
+ MIN_CONFIDENCE = 0.0
23
+ MAX_CONFIDENCE = 1.0
24
+ UNRELIABLE_CONFIDENCE = 0.1 # Confidence floor for unreliable memories
25
+ MAX_CONSECUTIVE_FAILURES = 3 # Mark unreliable after this many failures
26
+
27
+
28
+ async def report_solution_outcome(
29
+ db,
30
+ memory_id: int,
31
+ worked: bool,
32
+ context: Optional[Dict[str, Any]] = None
33
+ ) -> Dict[str, Any]:
34
+ """Report whether a solution from memory worked or failed.
35
+
36
+ This is the core feedback mechanism for self-correcting confidence.
37
+
38
+ Args:
39
+ db: Database service instance
40
+ memory_id: ID of the memory to update
41
+ worked: True if solution worked, False if it failed
42
+ context: Optional context about the usage (environment, problem, etc.)
43
+
44
+ Returns:
45
+ Dict with updated confidence, failure_count, and reliability status
46
+
47
+ Behavior:
48
+ - worked=True: confidence += 0.15, failure_count reset to 0
49
+ - worked=False: confidence -= 0.20, failure_count += 1
50
+ - After 3 consecutive failures: mark as unreliable (outcome_status='failed')
51
+ """
52
+ cursor = db.conn.cursor()
53
+
54
+ # Get current memory state
55
+ cursor.execute("""
56
+ SELECT id, confidence, outcome_status, failure_count,
57
+ times_worked, times_failed, metadata
58
+ FROM memories WHERE id = ?
59
+ """, [memory_id])
60
+
61
+ row = cursor.fetchone()
62
+ if not row:
63
+ return {
64
+ "success": False,
65
+ "error": f"Memory with ID {memory_id} not found",
66
+ "error_code": "MEMORY_NOT_FOUND"
67
+ }
68
+
69
+ # Extract current values (handle missing columns gracefully)
70
+ current_confidence = row["confidence"] if row["confidence"] is not None else 0.5
71
+ current_failure_count = row["failure_count"] if "failure_count" in row.keys() and row["failure_count"] is not None else 0
72
+ times_worked = row["times_worked"] if "times_worked" in row.keys() and row["times_worked"] is not None else 0
73
+ times_failed = row["times_failed"] if "times_failed" in row.keys() and row["times_failed"] is not None else 0
74
+ current_outcome_status = row["outcome_status"] if "outcome_status" in row.keys() else "pending"
75
+
76
+ # Load metadata
77
+ try:
78
+ metadata = json.loads(row["metadata"]) if row["metadata"] else {}
79
+ except (json.JSONDecodeError, TypeError):
80
+ metadata = {}
81
+
82
+ # Calculate new values
83
+ if worked:
84
+ # Solution worked - boost confidence and reset failure streak
85
+ new_confidence = min(MAX_CONFIDENCE, current_confidence + CONFIDENCE_INCREASE)
86
+ new_failure_count = 0 # Reset consecutive failures
87
+ times_worked += 1
88
+
89
+ # Update outcome status to success if not already set or was pending
90
+ if current_outcome_status in ('pending', 'partial', None):
91
+ new_outcome_status = 'success'
92
+ else:
93
+ new_outcome_status = current_outcome_status
94
+
95
+ action = "boosted"
96
+ message = f"Solution worked! Confidence increased from {current_confidence:.3f} to {new_confidence:.3f}"
97
+ else:
98
+ # Solution failed - decrease confidence and increment failure streak
99
+ new_confidence = max(MIN_CONFIDENCE, current_confidence - CONFIDENCE_DECREASE)
100
+ new_failure_count = current_failure_count + 1
101
+ times_failed += 1
102
+
103
+ # Check if memory should be marked as unreliable
104
+ if new_failure_count >= MAX_CONSECUTIVE_FAILURES:
105
+ new_confidence = UNRELIABLE_CONFIDENCE
106
+ new_outcome_status = 'failed'
107
+ action = "marked_unreliable"
108
+ message = f"Memory marked as unreliable after {new_failure_count} consecutive failures"
109
+ logger.warning(f"Memory {memory_id} marked unreliable: {new_failure_count} consecutive failures")
110
+ else:
111
+ new_outcome_status = current_outcome_status if current_outcome_status != 'success' else 'partial'
112
+ action = "penalized"
113
+ message = f"Solution failed. Confidence decreased from {current_confidence:.3f} to {new_confidence:.3f}"
114
+
115
+ # Record outcome in metadata history
116
+ outcome_history = metadata.get("outcome_history", [])
117
+ outcome_history.append({
118
+ "timestamp": datetime.now().isoformat(),
119
+ "worked": worked,
120
+ "confidence_before": current_confidence,
121
+ "confidence_after": new_confidence,
122
+ "context": context
123
+ })
124
+ # Keep last 20 outcomes to avoid unbounded growth
125
+ metadata["outcome_history"] = outcome_history[-20:]
126
+ metadata["last_outcome"] = {
127
+ "worked": worked,
128
+ "timestamp": datetime.now().isoformat()
129
+ }
130
+
131
+ # Update the memory
132
+ cursor.execute("""
133
+ UPDATE memories SET
134
+ confidence = ?,
135
+ failure_count = ?,
136
+ times_worked = ?,
137
+ times_failed = ?,
138
+ outcome_status = ?,
139
+ metadata = ?,
140
+ updated_at = datetime('now')
141
+ WHERE id = ?
142
+ """, [
143
+ new_confidence,
144
+ new_failure_count,
145
+ times_worked,
146
+ times_failed,
147
+ new_outcome_status,
148
+ json.dumps(metadata),
149
+ memory_id
150
+ ])
151
+ db.conn.commit()
152
+
153
+ # Calculate reliability classification
154
+ reliability = _classify_reliability(new_confidence, new_failure_count, times_worked, times_failed)
155
+
156
+ return {
157
+ "success": True,
158
+ "memory_id": memory_id,
159
+ "action": action,
160
+ "message": message,
161
+ "old_confidence": current_confidence,
162
+ "new_confidence": new_confidence,
163
+ "failure_count": new_failure_count,
164
+ "times_worked": times_worked,
165
+ "times_failed": times_failed,
166
+ "outcome_status": new_outcome_status,
167
+ "reliability": reliability,
168
+ "is_unreliable": new_failure_count >= MAX_CONSECUTIVE_FAILURES
169
+ }
170
+
171
+
172
+ async def get_reliability_stats(
173
+ db,
174
+ memory_id: int
175
+ ) -> Dict[str, Any]:
176
+ """Get detailed reliability statistics for a memory.
177
+
178
+ Returns comprehensive reliability information including:
179
+ - Current confidence score
180
+ - Usage statistics (times worked/failed)
181
+ - Failure streak count
182
+ - Reliability classification
183
+ - Outcome history
184
+
185
+ Args:
186
+ db: Database service instance
187
+ memory_id: ID of the memory to analyze
188
+
189
+ Returns:
190
+ Dict with full reliability stats and history
191
+ """
192
+ cursor = db.conn.cursor()
193
+
194
+ cursor.execute("""
195
+ SELECT id, content, type, confidence, outcome_status,
196
+ failure_count, times_worked, times_failed,
197
+ created_at, updated_at, metadata
198
+ FROM memories WHERE id = ?
199
+ """, [memory_id])
200
+
201
+ row = cursor.fetchone()
202
+ if not row:
203
+ return {
204
+ "success": False,
205
+ "error": f"Memory with ID {memory_id} not found",
206
+ "error_code": "MEMORY_NOT_FOUND"
207
+ }
208
+
209
+ # Extract values (handle missing columns gracefully)
210
+ confidence = row["confidence"] if row["confidence"] is not None else 0.5
211
+ failure_count = row["failure_count"] if "failure_count" in row.keys() and row["failure_count"] is not None else 0
212
+ times_worked = row["times_worked"] if "times_worked" in row.keys() and row["times_worked"] is not None else 0
213
+ times_failed = row["times_failed"] if "times_failed" in row.keys() and row["times_failed"] is not None else 0
214
+ outcome_status = row["outcome_status"] if "outcome_status" in row.keys() else "pending"
215
+
216
+ # Load metadata for outcome history
217
+ try:
218
+ metadata = json.loads(row["metadata"]) if row["metadata"] else {}
219
+ except (json.JSONDecodeError, TypeError):
220
+ metadata = {}
221
+
222
+ outcome_history = metadata.get("outcome_history", [])
223
+ last_outcome = metadata.get("last_outcome")
224
+
225
+ # Calculate reliability classification
226
+ reliability = _classify_reliability(confidence, failure_count, times_worked, times_failed)
227
+
228
+ # Calculate success rate
229
+ total_uses = times_worked + times_failed
230
+ success_rate = (times_worked / total_uses) if total_uses > 0 else None
231
+
232
+ return {
233
+ "success": True,
234
+ "memory_id": memory_id,
235
+ "content_preview": row["content"][:200] if row["content"] else None,
236
+ "type": row["type"],
237
+ "confidence": confidence,
238
+ "times_worked": times_worked,
239
+ "times_failed": times_failed,
240
+ "total_uses": total_uses,
241
+ "success_rate": round(success_rate, 3) if success_rate is not None else None,
242
+ "failure_count": failure_count,
243
+ "consecutive_failures": failure_count, # Same as failure_count for clarity
244
+ "outcome_status": outcome_status,
245
+ "reliability": reliability,
246
+ "is_unreliable": failure_count >= MAX_CONSECUTIVE_FAILURES,
247
+ "last_outcome": last_outcome,
248
+ "outcome_history": outcome_history,
249
+ "created_at": row["created_at"],
250
+ "updated_at": row["updated_at"],
251
+ "interpretation": _interpret_reliability(reliability, confidence, failure_count)
252
+ }
253
+
254
+
255
+ async def get_unreliable_memories(
256
+ db,
257
+ project_path: Optional[str] = None,
258
+ limit: int = 50
259
+ ) -> Dict[str, Any]:
260
+ """Get all memories marked as unreliable.
261
+
262
+ Args:
263
+ db: Database service instance
264
+ project_path: Optional filter by project
265
+ limit: Maximum number of results
266
+
267
+ Returns:
268
+ Dict with list of unreliable memories
269
+ """
270
+ from services.database import normalize_path
271
+
272
+ cursor = db.conn.cursor()
273
+
274
+ query = """
275
+ SELECT id, content, type, confidence, outcome_status,
276
+ failure_count, times_worked, times_failed, project_path,
277
+ created_at, updated_at
278
+ FROM memories
279
+ WHERE (failure_count >= ? OR outcome_status = 'failed')
280
+ """
281
+ params = [MAX_CONSECUTIVE_FAILURES]
282
+
283
+ if project_path:
284
+ project_path = normalize_path(project_path)
285
+ query += " AND project_path = ?"
286
+ params.append(project_path)
287
+
288
+ query += " ORDER BY failure_count DESC, updated_at DESC LIMIT ?"
289
+ params.append(limit)
290
+
291
+ cursor.execute(query, params)
292
+ rows = cursor.fetchall()
293
+
294
+ memories = []
295
+ for row in rows:
296
+ confidence = row["confidence"] if row["confidence"] is not None else 0.5
297
+ failure_count = row["failure_count"] if "failure_count" in row.keys() and row["failure_count"] is not None else 0
298
+ times_worked = row["times_worked"] if "times_worked" in row.keys() and row["times_worked"] is not None else 0
299
+ times_failed = row["times_failed"] if "times_failed" in row.keys() and row["times_failed"] is not None else 0
300
+
301
+ memories.append({
302
+ "id": row["id"],
303
+ "content_preview": row["content"][:200] if row["content"] else None,
304
+ "type": row["type"],
305
+ "confidence": confidence,
306
+ "failure_count": failure_count,
307
+ "times_worked": times_worked,
308
+ "times_failed": times_failed,
309
+ "outcome_status": row["outcome_status"],
310
+ "project_path": row["project_path"],
311
+ "reliability": _classify_reliability(confidence, failure_count, times_worked, times_failed),
312
+ "updated_at": row["updated_at"]
313
+ })
314
+
315
+ return {
316
+ "success": True,
317
+ "unreliable_memories": memories,
318
+ "count": len(memories),
319
+ "project_path": project_path
320
+ }
321
+
322
+
323
+ async def reset_memory_reliability(
324
+ db,
325
+ memory_id: int,
326
+ new_confidence: float = 0.5
327
+ ) -> Dict[str, Any]:
328
+ """Reset a memory's reliability stats (admin function).
329
+
330
+ Useful when a memory has been fixed or updated and should be
331
+ given a fresh chance.
332
+
333
+ Args:
334
+ db: Database service instance
335
+ memory_id: ID of the memory to reset
336
+ new_confidence: Starting confidence (default 0.5)
337
+
338
+ Returns:
339
+ Dict with reset status
340
+ """
341
+ cursor = db.conn.cursor()
342
+
343
+ # Verify memory exists
344
+ cursor.execute("SELECT id FROM memories WHERE id = ?", [memory_id])
345
+ if not cursor.fetchone():
346
+ return {
347
+ "success": False,
348
+ "error": f"Memory with ID {memory_id} not found",
349
+ "error_code": "MEMORY_NOT_FOUND"
350
+ }
351
+
352
+ # Clamp confidence to valid range
353
+ new_confidence = max(MIN_CONFIDENCE, min(MAX_CONFIDENCE, new_confidence))
354
+
355
+ cursor.execute("""
356
+ UPDATE memories SET
357
+ confidence = ?,
358
+ failure_count = 0,
359
+ times_worked = 0,
360
+ times_failed = 0,
361
+ outcome_status = 'pending',
362
+ updated_at = datetime('now')
363
+ WHERE id = ?
364
+ """, [new_confidence, memory_id])
365
+ db.conn.commit()
366
+
367
+ return {
368
+ "success": True,
369
+ "memory_id": memory_id,
370
+ "message": "Reliability stats reset",
371
+ "new_confidence": new_confidence,
372
+ "failure_count": 0,
373
+ "outcome_status": "pending"
374
+ }
375
+
376
+
377
+ def _classify_reliability(
378
+ confidence: float,
379
+ failure_count: int,
380
+ times_worked: int,
381
+ times_failed: int
382
+ ) -> str:
383
+ """Classify reliability based on confidence and usage stats.
384
+
385
+ Returns:
386
+ One of: 'proven', 'high', 'moderate', 'low', 'unreliable', 'untested'
387
+ """
388
+ total_uses = times_worked + times_failed
389
+
390
+ if total_uses == 0:
391
+ return "untested"
392
+
393
+ if failure_count >= MAX_CONSECUTIVE_FAILURES:
394
+ return "unreliable"
395
+
396
+ if confidence >= 0.85 and times_worked >= 3:
397
+ return "proven"
398
+ elif confidence >= 0.7:
399
+ return "high"
400
+ elif confidence >= 0.5:
401
+ return "moderate"
402
+ elif confidence >= 0.3:
403
+ return "low"
404
+ else:
405
+ return "unreliable"
406
+
407
+
408
+ def _interpret_reliability(
409
+ reliability: str,
410
+ confidence: float,
411
+ failure_count: int
412
+ ) -> str:
413
+ """Human-readable interpretation of reliability status."""
414
+ interpretations = {
415
+ "proven": "This solution has been repeatedly verified and is highly reliable.",
416
+ "high": "This solution has a good track record and can be trusted.",
417
+ "moderate": "This solution may work, but consider verifying before relying on it.",
418
+ "low": "This solution has mixed results. Use with caution.",
419
+ "unreliable": f"This solution has failed {failure_count} times consecutively. Consider alternatives.",
420
+ "untested": "This solution has not been tested yet. Report outcome after use."
421
+ }
422
+ return interpretations.get(reliability, "Unknown reliability status")
423
+
424
+
425
+ # Export functions that integrate with MCP
426
+ async def memory_worked(
427
+ db,
428
+ memory_id: int,
429
+ context: Optional[Dict[str, Any]] = None
430
+ ) -> Dict[str, Any]:
431
+ """MCP-friendly wrapper: Report that a memory solution worked."""
432
+ return await report_solution_outcome(db, memory_id, worked=True, context=context)
433
+
434
+
435
+ async def memory_failed(
436
+ db,
437
+ memory_id: int,
438
+ context: Optional[Dict[str, Any]] = None
439
+ ) -> Dict[str, Any]:
440
+ """MCP-friendly wrapper: Report that a memory solution failed."""
441
+ return await report_solution_outcome(db, memory_id, worked=False, context=context)