claude-memory-agent 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/.env.example +107 -0
  2. package/README.md +200 -0
  3. package/agent_card.py +512 -0
  4. package/bin/cli.js +181 -0
  5. package/bin/postinstall.js +216 -0
  6. package/config.py +104 -0
  7. package/dashboard.html +2689 -0
  8. package/hooks/README.md +196 -0
  9. package/hooks/__pycache__/auto-detect-response.cpython-312.pyc +0 -0
  10. package/hooks/__pycache__/auto_capture.cpython-312.pyc +0 -0
  11. package/hooks/__pycache__/session_end.cpython-312.pyc +0 -0
  12. package/hooks/__pycache__/session_start.cpython-312.pyc +0 -0
  13. package/hooks/auto-detect-response.py +348 -0
  14. package/hooks/auto_capture.py +255 -0
  15. package/hooks/detect-correction.py +173 -0
  16. package/hooks/grounding-hook.py +348 -0
  17. package/hooks/log-tool-use.py +234 -0
  18. package/hooks/log-user-request.py +208 -0
  19. package/hooks/pre-tool-decision.py +218 -0
  20. package/hooks/problem-detector.py +343 -0
  21. package/hooks/session_end.py +192 -0
  22. package/hooks/session_start.py +227 -0
  23. package/install.py +887 -0
  24. package/main.py +2859 -0
  25. package/manager.py +997 -0
  26. package/package.json +55 -0
  27. package/requirements.txt +8 -0
  28. package/run_server.py +136 -0
  29. package/services/__init__.py +50 -0
  30. package/services/__pycache__/__init__.cpython-312.pyc +0 -0
  31. package/services/__pycache__/agent_registry.cpython-312.pyc +0 -0
  32. package/services/__pycache__/auth.cpython-312.pyc +0 -0
  33. package/services/__pycache__/auto_inject.cpython-312.pyc +0 -0
  34. package/services/__pycache__/claude_md_sync.cpython-312.pyc +0 -0
  35. package/services/__pycache__/cleanup.cpython-312.pyc +0 -0
  36. package/services/__pycache__/compaction_flush.cpython-312.pyc +0 -0
  37. package/services/__pycache__/confidence.cpython-312.pyc +0 -0
  38. package/services/__pycache__/daily_log.cpython-312.pyc +0 -0
  39. package/services/__pycache__/database.cpython-312.pyc +0 -0
  40. package/services/__pycache__/embeddings.cpython-312.pyc +0 -0
  41. package/services/__pycache__/insights.cpython-312.pyc +0 -0
  42. package/services/__pycache__/llm_analyzer.cpython-312.pyc +0 -0
  43. package/services/__pycache__/memory_md_sync.cpython-312.pyc +0 -0
  44. package/services/__pycache__/retry_queue.cpython-312.pyc +0 -0
  45. package/services/__pycache__/timeline.cpython-312.pyc +0 -0
  46. package/services/__pycache__/vector_index.cpython-312.pyc +0 -0
  47. package/services/__pycache__/websocket.cpython-312.pyc +0 -0
  48. package/services/agent_registry.py +753 -0
  49. package/services/auth.py +331 -0
  50. package/services/auto_inject.py +250 -0
  51. package/services/claude_md_sync.py +275 -0
  52. package/services/cleanup.py +667 -0
  53. package/services/compaction_flush.py +447 -0
  54. package/services/confidence.py +301 -0
  55. package/services/daily_log.py +333 -0
  56. package/services/database.py +2485 -0
  57. package/services/embeddings.py +358 -0
  58. package/services/insights.py +632 -0
  59. package/services/llm_analyzer.py +595 -0
  60. package/services/memory_md_sync.py +409 -0
  61. package/services/retry_queue.py +453 -0
  62. package/services/timeline.py +579 -0
  63. package/services/vector_index.py +398 -0
  64. package/services/websocket.py +257 -0
  65. package/skills/__init__.py +6 -0
  66. package/skills/__pycache__/__init__.cpython-312.pyc +0 -0
  67. package/skills/__pycache__/admin.cpython-312.pyc +0 -0
  68. package/skills/__pycache__/checkpoint.cpython-312.pyc +0 -0
  69. package/skills/__pycache__/claude_md.cpython-312.pyc +0 -0
  70. package/skills/__pycache__/cleanup.cpython-312.pyc +0 -0
  71. package/skills/__pycache__/grounding.cpython-312.pyc +0 -0
  72. package/skills/__pycache__/insights.cpython-312.pyc +0 -0
  73. package/skills/__pycache__/natural_language.cpython-312.pyc +0 -0
  74. package/skills/__pycache__/retrieve.cpython-312.pyc +0 -0
  75. package/skills/__pycache__/search.cpython-312.pyc +0 -0
  76. package/skills/__pycache__/state.cpython-312.pyc +0 -0
  77. package/skills/__pycache__/store.cpython-312.pyc +0 -0
  78. package/skills/__pycache__/summarize.cpython-312.pyc +0 -0
  79. package/skills/__pycache__/timeline.cpython-312.pyc +0 -0
  80. package/skills/__pycache__/verification.cpython-312.pyc +0 -0
  81. package/skills/admin.py +469 -0
  82. package/skills/checkpoint.py +198 -0
  83. package/skills/claude_md.py +363 -0
  84. package/skills/cleanup.py +241 -0
  85. package/skills/grounding.py +801 -0
  86. package/skills/insights.py +231 -0
  87. package/skills/natural_language.py +277 -0
  88. package/skills/retrieve.py +67 -0
  89. package/skills/search.py +213 -0
  90. package/skills/state.py +182 -0
  91. package/skills/store.py +179 -0
  92. package/skills/summarize.py +588 -0
  93. package/skills/timeline.py +387 -0
  94. package/skills/verification.py +391 -0
  95. package/start_daemon.py +155 -0
  96. package/test_automation.py +221 -0
  97. package/test_complete.py +338 -0
  98. package/test_full.py +322 -0
  99. package/update_system.py +817 -0
  100. package/verify_db.py +134 -0
@@ -0,0 +1,632 @@
1
+ """Cross-session learning and insight aggregation service.
2
+
3
+ Analyzes memories across sessions to identify patterns, recurring issues,
4
+ and aggregated learnings that can improve future interactions.
5
+ """
6
+ import json
7
+ from datetime import datetime, timedelta
8
+ from typing import Dict, Any, Optional, List, Tuple
9
+ from collections import defaultdict
10
+
11
+
12
+ class InsightsService:
13
+ """Service for generating and managing cross-session insights.
14
+
15
+ Features:
16
+ - Error pattern detection (similar errors across sessions)
17
+ - Decision aggregation (same problem -> same solution patterns)
18
+ - User correction detection (Claude blind spots)
19
+ - High-value memory identification
20
+ - CLAUDE.md improvement suggestions
21
+ """
22
+
23
+ def __init__(self, db, embeddings):
24
+ self.db = db
25
+ self.embeddings = embeddings
26
+
27
+ async def aggregate_error_patterns(
28
+ self,
29
+ days_back: int = 30,
30
+ min_occurrences: int = 2,
31
+ similarity_threshold: float = 0.85
32
+ ) -> List[Dict[str, Any]]:
33
+ """Find recurring error patterns across sessions.
34
+
35
+ Groups similar errors by embedding similarity and extracts
36
+ common resolution patterns.
37
+
38
+ Returns:
39
+ List of error pattern insights
40
+ """
41
+ # Get recent error memories
42
+ cutoff = (datetime.now() - timedelta(days=days_back)).isoformat()
43
+ errors = await self.db.execute_query(
44
+ """
45
+ SELECT id, content, embedding, session_id, outcome, project_path,
46
+ tech_stack, created_at
47
+ FROM memories
48
+ WHERE type = 'error'
49
+ AND created_at > ?
50
+ AND embedding IS NOT NULL
51
+ ORDER BY created_at DESC
52
+ LIMIT 500
53
+ """,
54
+ (cutoff,)
55
+ )
56
+
57
+ if not errors or len(errors) < min_occurrences:
58
+ return []
59
+
60
+ # Group similar errors
61
+ clusters = await self._cluster_by_embedding(
62
+ errors, similarity_threshold
63
+ )
64
+
65
+ insights = []
66
+ for cluster in clusters:
67
+ if len(cluster) < min_occurrences:
68
+ continue
69
+
70
+ # Extract common elements
71
+ content_samples = [e["content"][:200] for e in cluster[:3]]
72
+ sessions = list(set(e["session_id"] for e in cluster if e["session_id"]))
73
+ projects = list(set(e["project_path"] for e in cluster if e["project_path"]))
74
+
75
+ # Find successful resolutions
76
+ resolved = [e for e in cluster if e.get("outcome") and "fix" in e.get("outcome", "").lower()]
77
+
78
+ insight = {
79
+ "insight_type": "recurring_error",
80
+ "title": f"Recurring error pattern ({len(cluster)} occurrences)",
81
+ "description": self._summarize_cluster(cluster, "error"),
82
+ "evidence_count": len(cluster),
83
+ "evidence_ids": json.dumps([e["id"] for e in cluster]),
84
+ "source_sessions": json.dumps(sessions[:10]),
85
+ "confidence": min(0.9, 0.5 + (len(cluster) * 0.1)),
86
+ "impact_score": min(10, 5 + len(cluster)),
87
+ "project_path": projects[0] if len(projects) == 1 else None,
88
+ "category": "error_pattern",
89
+ "resolution_found": len(resolved) > 0,
90
+ "sample_content": content_samples
91
+ }
92
+ insights.append(insight)
93
+
94
+ return insights
95
+
96
+ async def aggregate_decision_patterns(
97
+ self,
98
+ days_back: int = 60,
99
+ min_occurrences: int = 2,
100
+ similarity_threshold: float = 0.80
101
+ ) -> List[Dict[str, Any]]:
102
+ """Find recurring decision patterns (same problem -> same solution).
103
+
104
+ Identifies when Claude makes the same type of decision repeatedly,
105
+ which could be codified into CLAUDE.md rules.
106
+
107
+ Returns:
108
+ List of decision pattern insights
109
+ """
110
+ cutoff = (datetime.now() - timedelta(days=days_back)).isoformat()
111
+ decisions = await self.db.execute_query(
112
+ """
113
+ SELECT id, content, embedding, session_id, outcome, success,
114
+ project_path, tech_stack, created_at
115
+ FROM memories
116
+ WHERE type = 'decision'
117
+ AND created_at > ?
118
+ AND embedding IS NOT NULL
119
+ ORDER BY created_at DESC
120
+ LIMIT 500
121
+ """,
122
+ (cutoff,)
123
+ )
124
+
125
+ if not decisions or len(decisions) < min_occurrences:
126
+ return []
127
+
128
+ # Group similar decisions
129
+ clusters = await self._cluster_by_embedding(
130
+ decisions, similarity_threshold
131
+ )
132
+
133
+ insights = []
134
+ for cluster in clusters:
135
+ if len(cluster) < min_occurrences:
136
+ continue
137
+
138
+ # Calculate success rate for this decision type
139
+ successful = sum(1 for d in cluster if d.get("success") == 1)
140
+ success_rate = successful / len(cluster) if cluster else 0
141
+
142
+ # Extract tech context
143
+ tech_stacks = []
144
+ for d in cluster:
145
+ if d.get("tech_stack"):
146
+ try:
147
+ stacks = json.loads(d["tech_stack"]) if isinstance(d["tech_stack"], str) else d["tech_stack"]
148
+ tech_stacks.extend(stacks if isinstance(stacks, list) else [stacks])
149
+ except:
150
+ pass
151
+
152
+ insight = {
153
+ "insight_type": "decision_pattern",
154
+ "title": f"Recurring decision pattern ({len(cluster)} times)",
155
+ "description": self._summarize_cluster(cluster, "decision"),
156
+ "evidence_count": len(cluster),
157
+ "evidence_ids": json.dumps([d["id"] for d in cluster]),
158
+ "source_sessions": json.dumps(list(set(d["session_id"] for d in cluster if d["session_id"]))[:10]),
159
+ "confidence": min(0.95, 0.5 + (success_rate * 0.3) + (len(cluster) * 0.05)),
160
+ "impact_score": min(10, 5 + (success_rate * 3)),
161
+ "category": "decision_pattern",
162
+ "success_rate": success_rate,
163
+ "tech_context": json.dumps(list(set(tech_stacks))[:5]) if tech_stacks else None,
164
+ "sample_content": [d["content"][:200] for d in cluster[:3]]
165
+ }
166
+ insights.append(insight)
167
+
168
+ return insights
169
+
170
+ async def detect_correction_patterns(
171
+ self,
172
+ days_back: int = 30
173
+ ) -> List[Dict[str, Any]]:
174
+ """Detect patterns where user had to correct Claude.
175
+
176
+ These indicate blind spots that should be addressed in CLAUDE.md.
177
+
178
+ Returns:
179
+ List of correction pattern insights
180
+ """
181
+ cutoff = (datetime.now() - timedelta(days=days_back)).isoformat()
182
+
183
+ # Look for memories with negative user feedback or failed outcomes
184
+ corrections = await self.db.execute_query(
185
+ """
186
+ SELECT id, content, type, session_id, outcome, user_feedback,
187
+ project_path, agent_type, created_at
188
+ FROM memories
189
+ WHERE created_at > ?
190
+ AND (
191
+ user_feedback LIKE '%wrong%' OR
192
+ user_feedback LIKE '%incorrect%' OR
193
+ user_feedback LIKE '%no%' OR
194
+ user_feedback LIKE '%fix%' OR
195
+ outcome LIKE '%failed%' OR
196
+ outcome LIKE '%error%' OR
197
+ success = 0
198
+ )
199
+ ORDER BY created_at DESC
200
+ LIMIT 200
201
+ """,
202
+ (cutoff,)
203
+ )
204
+
205
+ if not corrections:
206
+ return []
207
+
208
+ # Group by type/pattern
209
+ by_type = defaultdict(list)
210
+ for c in corrections:
211
+ key = c.get("type", "unknown")
212
+ by_type[key].append(c)
213
+
214
+ insights = []
215
+ for memory_type, items in by_type.items():
216
+ if len(items) < 2:
217
+ continue
218
+
219
+ sessions = list(set(i["session_id"] for i in items if i["session_id"]))
220
+
221
+ insight = {
222
+ "insight_type": "correction_pattern",
223
+ "title": f"Repeated corrections in {memory_type} ({len(items)} times)",
224
+ "description": f"User frequently corrected Claude on {memory_type} tasks. "
225
+ f"Consider adding specific guidance to CLAUDE.md.",
226
+ "evidence_count": len(items),
227
+ "evidence_ids": json.dumps([i["id"] for i in items]),
228
+ "source_sessions": json.dumps(sessions[:10]),
229
+ "confidence": min(0.8, 0.4 + (len(items) * 0.1)),
230
+ "impact_score": min(10, 6 + len(items)),
231
+ "category": "blind_spot",
232
+ "memory_type": memory_type,
233
+ "sample_feedback": [i.get("user_feedback", i.get("outcome", ""))[:100]
234
+ for i in items[:3] if i.get("user_feedback") or i.get("outcome")]
235
+ }
236
+ insights.append(insight)
237
+
238
+ return insights
239
+
240
+ async def identify_high_value_memories(
241
+ self,
242
+ days_back: int = 90,
243
+ min_access_count: int = 3
244
+ ) -> List[Dict[str, Any]]:
245
+ """Identify frequently accessed memories (high-value content).
246
+
247
+ Returns:
248
+ List of high-value memory insights
249
+ """
250
+ cutoff = (datetime.now() - timedelta(days=days_back)).isoformat()
251
+
252
+ high_value = await self.db.execute_query(
253
+ """
254
+ SELECT id, content, type, access_count, importance, project_path,
255
+ tech_stack, session_id, created_at
256
+ FROM memories
257
+ WHERE created_at > ?
258
+ AND access_count >= ?
259
+ ORDER BY access_count DESC, importance DESC
260
+ LIMIT 50
261
+ """,
262
+ (cutoff, min_access_count)
263
+ )
264
+
265
+ if not high_value:
266
+ return []
267
+
268
+ insights = []
269
+ for mem in high_value:
270
+ insight = {
271
+ "insight_type": "high_value_memory",
272
+ "title": f"High-value {mem['type']} (accessed {mem['access_count']} times)",
273
+ "description": mem["content"][:300],
274
+ "evidence_count": 1,
275
+ "evidence_ids": json.dumps([mem["id"]]),
276
+ "confidence": 0.9,
277
+ "impact_score": min(10, mem["importance"] + (mem["access_count"] * 0.5)),
278
+ "category": "valuable_content",
279
+ "access_count": mem["access_count"],
280
+ "memory_type": mem["type"],
281
+ "project_path": mem.get("project_path")
282
+ }
283
+ insights.append(insight)
284
+
285
+ return insights
286
+
287
+ async def suggest_claude_md_updates(
288
+ self,
289
+ min_confidence: float = 0.7
290
+ ) -> List[Dict[str, Any]]:
291
+ """Generate suggestions for CLAUDE.md updates based on insights.
292
+
293
+ Returns:
294
+ List of suggested instructions to add to CLAUDE.md
295
+ """
296
+ # Get high-confidence insights that haven't been applied
297
+ insights = await self.db.execute_query(
298
+ """
299
+ SELECT * FROM insights
300
+ WHERE status = 'active'
301
+ AND applied_to_claude_md = 0
302
+ AND confidence >= ?
303
+ ORDER BY impact_score DESC, confidence DESC
304
+ LIMIT 20
305
+ """,
306
+ (min_confidence,)
307
+ )
308
+
309
+ if not insights:
310
+ return []
311
+
312
+ suggestions = []
313
+ for insight in insights:
314
+ insight_type = insight["insight_type"]
315
+ title = insight["title"]
316
+ desc = insight["description"]
317
+
318
+ # Generate appropriate instruction based on type
319
+ if insight_type == "recurring_error":
320
+ instruction = f"- When encountering similar issues: {desc[:200]}"
321
+ section = "Debugging & Errors"
322
+ elif insight_type == "decision_pattern":
323
+ instruction = f"- Standard approach: {desc[:200]}"
324
+ section = "Development Patterns"
325
+ elif insight_type == "correction_pattern":
326
+ instruction = f"- Reminder: {desc[:200]}"
327
+ section = "Important Notes"
328
+ elif insight_type == "high_value_memory":
329
+ instruction = f"- Reference: {desc[:200]}"
330
+ section = "Quick Reference"
331
+ else:
332
+ instruction = f"- {desc[:200]}"
333
+ section = "General"
334
+
335
+ suggestions.append({
336
+ "insight_id": insight["id"],
337
+ "section": section,
338
+ "instruction": instruction,
339
+ "confidence": insight["confidence"],
340
+ "impact_score": insight["impact_score"],
341
+ "evidence_count": insight["evidence_count"]
342
+ })
343
+
344
+ return suggestions
345
+
346
+ async def run_aggregation(
347
+ self,
348
+ days_back: int = 30
349
+ ) -> Dict[str, Any]:
350
+ """Run full aggregation pipeline.
351
+
352
+ Returns:
353
+ Summary of generated insights
354
+ """
355
+ results = {
356
+ "error_patterns": 0,
357
+ "decision_patterns": 0,
358
+ "correction_patterns": 0,
359
+ "high_value_memories": 0,
360
+ "total_insights": 0
361
+ }
362
+
363
+ # Run each aggregation
364
+ error_insights = await self.aggregate_error_patterns(days_back)
365
+ for insight in error_insights:
366
+ await self._store_insight(insight)
367
+ results["error_patterns"] = len(error_insights)
368
+
369
+ decision_insights = await self.aggregate_decision_patterns(days_back)
370
+ for insight in decision_insights:
371
+ await self._store_insight(insight)
372
+ results["decision_patterns"] = len(decision_insights)
373
+
374
+ correction_insights = await self.detect_correction_patterns(days_back)
375
+ for insight in correction_insights:
376
+ await self._store_insight(insight)
377
+ results["correction_patterns"] = len(correction_insights)
378
+
379
+ high_value = await self.identify_high_value_memories(days_back * 3)
380
+ for insight in high_value:
381
+ await self._store_insight(insight)
382
+ results["high_value_memories"] = len(high_value)
383
+
384
+ results["total_insights"] = sum([
385
+ results["error_patterns"],
386
+ results["decision_patterns"],
387
+ results["correction_patterns"],
388
+ results["high_value_memories"]
389
+ ])
390
+
391
+ return results
392
+
393
+ async def _store_insight(self, insight: Dict[str, Any]) -> int:
394
+ """Store an insight in the database."""
395
+ # Generate embedding for the insight
396
+ embedding = None
397
+ if self.embeddings:
398
+ text = f"{insight.get('title', '')} {insight.get('description', '')}"
399
+ embedding = await self.embeddings.generate_embedding(text)
400
+
401
+ cursor = self.db.conn.cursor()
402
+ cursor.execute(
403
+ """
404
+ INSERT INTO insights (
405
+ insight_type, title, description, evidence_ids, evidence_count,
406
+ source_sessions, confidence, impact_score, category,
407
+ project_path, tech_context, embedding, status
408
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'active')
409
+ """,
410
+ (
411
+ insight.get("insight_type"),
412
+ insight.get("title"),
413
+ insight.get("description"),
414
+ insight.get("evidence_ids"),
415
+ insight.get("evidence_count", 1),
416
+ insight.get("source_sessions"),
417
+ insight.get("confidence", 0.5),
418
+ insight.get("impact_score", 5.0),
419
+ insight.get("category"),
420
+ insight.get("project_path"),
421
+ insight.get("tech_context"),
422
+ json.dumps(embedding) if embedding else None
423
+ )
424
+ )
425
+ self.db.conn.commit()
426
+ return cursor.lastrowid
427
+
428
+ async def _cluster_by_embedding(
429
+ self,
430
+ items: List[Dict[str, Any]],
431
+ threshold: float
432
+ ) -> List[List[Dict[str, Any]]]:
433
+ """Cluster items by embedding similarity.
434
+
435
+ Simple greedy clustering algorithm.
436
+ """
437
+ if not items:
438
+ return []
439
+
440
+ clusters = []
441
+ used = set()
442
+
443
+ for i, item in enumerate(items):
444
+ if i in used:
445
+ continue
446
+
447
+ cluster = [item]
448
+ used.add(i)
449
+
450
+ item_emb = self._parse_embedding(item.get("embedding"))
451
+ if not item_emb:
452
+ clusters.append(cluster)
453
+ continue
454
+
455
+ # Find similar items
456
+ for j, other in enumerate(items[i+1:], start=i+1):
457
+ if j in used:
458
+ continue
459
+
460
+ other_emb = self._parse_embedding(other.get("embedding"))
461
+ if not other_emb:
462
+ continue
463
+
464
+ similarity = self._cosine_similarity(item_emb, other_emb)
465
+ if similarity >= threshold:
466
+ cluster.append(other)
467
+ used.add(j)
468
+
469
+ clusters.append(cluster)
470
+
471
+ return [c for c in clusters if len(c) >= 1]
472
+
473
+ def _parse_embedding(self, embedding_str) -> Optional[List[float]]:
474
+ """Parse embedding from string or list."""
475
+ if not embedding_str:
476
+ return None
477
+ if isinstance(embedding_str, list):
478
+ return embedding_str
479
+ try:
480
+ return json.loads(embedding_str)
481
+ except:
482
+ return None
483
+
484
+ def _cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
485
+ """Calculate cosine similarity between two vectors."""
486
+ import numpy as np
487
+ a = np.array(vec1)
488
+ b = np.array(vec2)
489
+ norm_a = np.linalg.norm(a)
490
+ norm_b = np.linalg.norm(b)
491
+ if norm_a == 0 or norm_b == 0:
492
+ return 0.0
493
+ return float(np.dot(a, b) / (norm_a * norm_b))
494
+
495
+ def _summarize_cluster(self, cluster: List[Dict[str, Any]], cluster_type: str) -> str:
496
+ """Generate a summary description for a cluster."""
497
+ if not cluster:
498
+ return ""
499
+
500
+ # Use the first item as representative
501
+ first = cluster[0]
502
+ content = first.get("content", "")[:300]
503
+
504
+ if cluster_type == "error":
505
+ return f"Error pattern seen {len(cluster)} times: {content}"
506
+ elif cluster_type == "decision":
507
+ return f"Decision pattern applied {len(cluster)} times: {content}"
508
+ else:
509
+ return f"Pattern ({len(cluster)} occurrences): {content}"
510
+
511
+ async def get_insights(
512
+ self,
513
+ insight_type: Optional[str] = None,
514
+ project_path: Optional[str] = None,
515
+ min_confidence: float = 0.0,
516
+ limit: int = 20
517
+ ) -> List[Dict[str, Any]]:
518
+ """Retrieve stored insights.
519
+
520
+ Args:
521
+ insight_type: Filter by type (recurring_error, decision_pattern, etc.)
522
+ project_path: Filter by project
523
+ min_confidence: Minimum confidence threshold
524
+ limit: Maximum results
525
+
526
+ Returns:
527
+ List of insights
528
+ """
529
+ query = """
530
+ SELECT * FROM insights
531
+ WHERE status = 'active'
532
+ AND confidence >= ?
533
+ """
534
+ params = [min_confidence]
535
+
536
+ if insight_type:
537
+ query += " AND insight_type = ?"
538
+ params.append(insight_type)
539
+
540
+ if project_path:
541
+ query += " AND (project_path = ? OR project_path IS NULL)"
542
+ params.append(project_path)
543
+
544
+ query += " ORDER BY impact_score DESC, confidence DESC LIMIT ?"
545
+ params.append(limit)
546
+
547
+ results = await self.db.execute_query(query, tuple(params))
548
+ return [dict(r) for r in results] if results else []
549
+
550
+ async def record_feedback(
551
+ self,
552
+ insight_id: int,
553
+ helpful: bool,
554
+ session_id: Optional[str] = None,
555
+ comment: Optional[str] = None
556
+ ) -> bool:
557
+ """Record user feedback on an insight.
558
+
559
+ Args:
560
+ insight_id: The insight ID
561
+ helpful: Whether the insight was helpful
562
+ session_id: Current session
563
+ comment: Optional feedback comment
564
+
565
+ Returns:
566
+ True if recorded successfully
567
+ """
568
+ cursor = self.db.conn.cursor()
569
+
570
+ # Record feedback
571
+ cursor.execute(
572
+ """
573
+ INSERT INTO insight_feedback (insight_id, session_id, feedback_type, helpful, comment)
574
+ VALUES (?, ?, ?, ?, ?)
575
+ """,
576
+ (insight_id, session_id, "usage", 1 if helpful else 0, comment)
577
+ )
578
+
579
+ # Update insight validation counts
580
+ if helpful:
581
+ cursor.execute(
582
+ """
583
+ UPDATE insights
584
+ SET validation_count = validation_count + 1,
585
+ confidence = MIN(0.99, confidence + 0.02),
586
+ last_validated_at = datetime('now'),
587
+ updated_at = datetime('now')
588
+ WHERE id = ?
589
+ """,
590
+ (insight_id,)
591
+ )
592
+ else:
593
+ cursor.execute(
594
+ """
595
+ UPDATE insights
596
+ SET invalidation_count = invalidation_count + 1,
597
+ confidence = MAX(0.1, confidence - 0.05),
598
+ updated_at = datetime('now')
599
+ WHERE id = ?
600
+ """,
601
+ (insight_id,)
602
+ )
603
+
604
+ self.db.conn.commit()
605
+ return True
606
+
607
+ async def mark_applied_to_claude_md(self, insight_id: int) -> bool:
608
+ """Mark an insight as applied to CLAUDE.md."""
609
+ cursor = self.db.conn.cursor()
610
+ cursor.execute(
611
+ """
612
+ UPDATE insights
613
+ SET applied_to_claude_md = 1,
614
+ updated_at = datetime('now')
615
+ WHERE id = ?
616
+ """,
617
+ (insight_id,)
618
+ )
619
+ self.db.conn.commit()
620
+ return cursor.rowcount > 0
621
+
622
+
623
+ # Global instance
624
+ _insights: Optional[InsightsService] = None
625
+
626
+
627
+ def get_insights_service(db, embeddings) -> InsightsService:
628
+ """Get the global insights service instance."""
629
+ global _insights
630
+ if _insights is None:
631
+ _insights = InsightsService(db, embeddings)
632
+ return _insights