alma-memory 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alma/__init__.py +121 -45
- alma/confidence/__init__.py +1 -1
- alma/confidence/engine.py +92 -58
- alma/confidence/types.py +34 -14
- alma/config/loader.py +3 -2
- alma/consolidation/__init__.py +23 -0
- alma/consolidation/engine.py +678 -0
- alma/consolidation/prompts.py +84 -0
- alma/core.py +136 -28
- alma/domains/__init__.py +6 -6
- alma/domains/factory.py +12 -9
- alma/domains/schemas.py +17 -3
- alma/domains/types.py +8 -4
- alma/events/__init__.py +75 -0
- alma/events/emitter.py +284 -0
- alma/events/storage_mixin.py +246 -0
- alma/events/types.py +126 -0
- alma/events/webhook.py +425 -0
- alma/exceptions.py +49 -0
- alma/extraction/__init__.py +31 -0
- alma/extraction/auto_learner.py +265 -0
- alma/extraction/extractor.py +420 -0
- alma/graph/__init__.py +106 -0
- alma/graph/backends/__init__.py +32 -0
- alma/graph/backends/kuzu.py +624 -0
- alma/graph/backends/memgraph.py +432 -0
- alma/graph/backends/memory.py +236 -0
- alma/graph/backends/neo4j.py +417 -0
- alma/graph/base.py +159 -0
- alma/graph/extraction.py +198 -0
- alma/graph/store.py +860 -0
- alma/harness/__init__.py +4 -4
- alma/harness/base.py +18 -9
- alma/harness/domains.py +27 -11
- alma/initializer/__init__.py +1 -1
- alma/initializer/initializer.py +51 -43
- alma/initializer/types.py +25 -17
- alma/integration/__init__.py +9 -9
- alma/integration/claude_agents.py +32 -20
- alma/integration/helena.py +32 -22
- alma/integration/victor.py +57 -33
- alma/learning/__init__.py +27 -27
- alma/learning/forgetting.py +198 -148
- alma/learning/heuristic_extractor.py +40 -24
- alma/learning/protocols.py +65 -17
- alma/learning/validation.py +7 -2
- alma/mcp/__init__.py +4 -4
- alma/mcp/__main__.py +2 -1
- alma/mcp/resources.py +17 -16
- alma/mcp/server.py +102 -44
- alma/mcp/tools.py +180 -45
- alma/observability/__init__.py +84 -0
- alma/observability/config.py +302 -0
- alma/observability/logging.py +424 -0
- alma/observability/metrics.py +583 -0
- alma/observability/tracing.py +440 -0
- alma/progress/__init__.py +3 -3
- alma/progress/tracker.py +26 -20
- alma/progress/types.py +8 -12
- alma/py.typed +0 -0
- alma/retrieval/__init__.py +11 -11
- alma/retrieval/cache.py +20 -21
- alma/retrieval/embeddings.py +4 -4
- alma/retrieval/engine.py +179 -39
- alma/retrieval/scoring.py +73 -63
- alma/session/__init__.py +2 -2
- alma/session/manager.py +5 -5
- alma/session/types.py +5 -4
- alma/storage/__init__.py +70 -0
- alma/storage/azure_cosmos.py +414 -133
- alma/storage/base.py +215 -4
- alma/storage/chroma.py +1443 -0
- alma/storage/constants.py +103 -0
- alma/storage/file_based.py +59 -28
- alma/storage/migrations/__init__.py +21 -0
- alma/storage/migrations/base.py +321 -0
- alma/storage/migrations/runner.py +323 -0
- alma/storage/migrations/version_stores.py +337 -0
- alma/storage/migrations/versions/__init__.py +11 -0
- alma/storage/migrations/versions/v1_0_0.py +373 -0
- alma/storage/pinecone.py +1080 -0
- alma/storage/postgresql.py +1559 -0
- alma/storage/qdrant.py +1306 -0
- alma/storage/sqlite_local.py +504 -60
- alma/testing/__init__.py +46 -0
- alma/testing/factories.py +301 -0
- alma/testing/mocks.py +389 -0
- alma/types.py +62 -14
- alma_memory-0.5.1.dist-info/METADATA +939 -0
- alma_memory-0.5.1.dist-info/RECORD +93 -0
- {alma_memory-0.4.0.dist-info → alma_memory-0.5.1.dist-info}/WHEEL +1 -1
- alma_memory-0.4.0.dist-info/METADATA +0 -488
- alma_memory-0.4.0.dist-info/RECORD +0 -52
- {alma_memory-0.4.0.dist-info → alma_memory-0.5.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,678 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ALMA Memory Consolidation Engine.
|
|
3
|
+
|
|
4
|
+
Implements LLM-powered deduplication that merges similar memories,
|
|
5
|
+
inspired by Mem0's core innovation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
import math
|
|
12
|
+
import uuid
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
16
|
+
|
|
17
|
+
from alma.consolidation.prompts import (
|
|
18
|
+
MERGE_ANTI_PATTERNS_PROMPT,
|
|
19
|
+
MERGE_DOMAIN_KNOWLEDGE_PROMPT,
|
|
20
|
+
MERGE_HEURISTICS_PROMPT,
|
|
21
|
+
)
|
|
22
|
+
from alma.retrieval.embeddings import EmbeddingProvider, LocalEmbedder
|
|
23
|
+
from alma.storage.base import StorageBackend
|
|
24
|
+
from alma.types import (
|
|
25
|
+
AntiPattern,
|
|
26
|
+
DomainKnowledge,
|
|
27
|
+
Heuristic,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class ConsolidationResult:
|
|
35
|
+
"""Result of a consolidation operation."""
|
|
36
|
+
|
|
37
|
+
merged_count: int # Number of memories that were merged (deleted and replaced)
|
|
38
|
+
groups_found: int # Number of similar memory groups identified
|
|
39
|
+
memories_processed: int # Total memories analyzed
|
|
40
|
+
errors: List[str] = field(default_factory=list)
|
|
41
|
+
|
|
42
|
+
# Detailed merge information
|
|
43
|
+
merge_details: List[Dict[str, Any]] = field(default_factory=list)
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def success(self) -> bool:
|
|
47
|
+
"""Check if consolidation completed without critical errors."""
|
|
48
|
+
return len(self.errors) == 0 or self.merged_count > 0
|
|
49
|
+
|
|
50
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
51
|
+
"""Convert to dictionary for serialization."""
|
|
52
|
+
return {
|
|
53
|
+
"merged_count": self.merged_count,
|
|
54
|
+
"groups_found": self.groups_found,
|
|
55
|
+
"memories_processed": self.memories_processed,
|
|
56
|
+
"errors": self.errors,
|
|
57
|
+
"merge_details": self.merge_details,
|
|
58
|
+
"success": self.success,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class ConsolidationEngine:
|
|
63
|
+
"""
|
|
64
|
+
Memory consolidation engine for deduplicating and merging similar memories.
|
|
65
|
+
|
|
66
|
+
Key features:
|
|
67
|
+
- Cosine similarity-based grouping
|
|
68
|
+
- Optional LLM-powered intelligent merging
|
|
69
|
+
- Provenance tracking (merged_from metadata)
|
|
70
|
+
- Dry-run mode for safety
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(
|
|
74
|
+
self,
|
|
75
|
+
storage: StorageBackend,
|
|
76
|
+
embedder: Optional[EmbeddingProvider] = None,
|
|
77
|
+
llm_client: Optional[Any] = None,
|
|
78
|
+
):
|
|
79
|
+
"""
|
|
80
|
+
Initialize the consolidation engine.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
storage: Storage backend for memory operations
|
|
84
|
+
embedder: Embedding provider (defaults to LocalEmbedder)
|
|
85
|
+
llm_client: Optional LLM client for intelligent merging
|
|
86
|
+
Should have a method `complete(prompt: str) -> str`
|
|
87
|
+
"""
|
|
88
|
+
self.storage = storage
|
|
89
|
+
self.embedder = embedder or LocalEmbedder()
|
|
90
|
+
self.llm_client = llm_client
|
|
91
|
+
|
|
92
|
+
async def consolidate(
|
|
93
|
+
self,
|
|
94
|
+
agent: str,
|
|
95
|
+
project_id: str,
|
|
96
|
+
memory_type: str = "heuristics",
|
|
97
|
+
similarity_threshold: float = 0.85,
|
|
98
|
+
use_llm: bool = True,
|
|
99
|
+
dry_run: bool = False,
|
|
100
|
+
) -> ConsolidationResult:
|
|
101
|
+
"""
|
|
102
|
+
Merge similar memories to reduce redundancy.
|
|
103
|
+
|
|
104
|
+
Algorithm:
|
|
105
|
+
1. Get all memories for agent of the specified type
|
|
106
|
+
2. Compute embeddings if not present
|
|
107
|
+
3. Cluster by similarity (pairwise comparison)
|
|
108
|
+
4. For each cluster > 1 memory:
|
|
109
|
+
- If use_llm: Generate merged summary via LLM
|
|
110
|
+
- Else: Keep highest confidence/most recent
|
|
111
|
+
5. Delete originals, save merged (unless dry_run)
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
agent: Agent name whose memories to consolidate
|
|
115
|
+
project_id: Project ID
|
|
116
|
+
memory_type: Type of memory to consolidate
|
|
117
|
+
("heuristics", "outcomes", "domain_knowledge", "anti_patterns")
|
|
118
|
+
similarity_threshold: Minimum cosine similarity to group (0.0 to 1.0)
|
|
119
|
+
use_llm: Whether to use LLM for intelligent merging
|
|
120
|
+
dry_run: If True, don't actually modify storage
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
ConsolidationResult with details of the operation
|
|
124
|
+
"""
|
|
125
|
+
result = ConsolidationResult(
|
|
126
|
+
merged_count=0,
|
|
127
|
+
groups_found=0,
|
|
128
|
+
memories_processed=0,
|
|
129
|
+
errors=[],
|
|
130
|
+
merge_details=[],
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
try:
|
|
134
|
+
# 1. Get memories based on type
|
|
135
|
+
memories = self._get_memories_by_type(
|
|
136
|
+
agent=agent,
|
|
137
|
+
project_id=project_id,
|
|
138
|
+
memory_type=memory_type,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
result.memories_processed = len(memories)
|
|
142
|
+
|
|
143
|
+
if len(memories) < 2:
|
|
144
|
+
logger.info(f"Not enough memories to consolidate: {len(memories)}")
|
|
145
|
+
return result
|
|
146
|
+
|
|
147
|
+
# 2. Ensure embeddings are present
|
|
148
|
+
memories = self._ensure_embeddings(memories, memory_type)
|
|
149
|
+
|
|
150
|
+
# 3. Find similar groups
|
|
151
|
+
groups = self._find_similar_groups(memories, similarity_threshold)
|
|
152
|
+
result.groups_found = len([g for g in groups if len(g) > 1])
|
|
153
|
+
|
|
154
|
+
# 4. Process each group
|
|
155
|
+
for group in groups:
|
|
156
|
+
if len(group) <= 1:
|
|
157
|
+
continue # Skip singletons
|
|
158
|
+
|
|
159
|
+
try:
|
|
160
|
+
# Merge the group
|
|
161
|
+
merged, original_ids = await self._merge_group(
|
|
162
|
+
group=group,
|
|
163
|
+
memory_type=memory_type,
|
|
164
|
+
use_llm=use_llm,
|
|
165
|
+
project_id=project_id,
|
|
166
|
+
agent=agent,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
if not dry_run:
|
|
170
|
+
# Save merged memory
|
|
171
|
+
self._save_merged(merged, memory_type)
|
|
172
|
+
|
|
173
|
+
# Delete originals
|
|
174
|
+
for original_id in original_ids:
|
|
175
|
+
self._delete_memory(original_id, memory_type)
|
|
176
|
+
|
|
177
|
+
result.merged_count += len(original_ids) - 1 # N merged into 1
|
|
178
|
+
result.merge_details.append(
|
|
179
|
+
{
|
|
180
|
+
"merged_from": original_ids,
|
|
181
|
+
"merged_into": (
|
|
182
|
+
merged.id if hasattr(merged, "id") else str(merged)
|
|
183
|
+
),
|
|
184
|
+
"count": len(original_ids),
|
|
185
|
+
}
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
except Exception as e:
|
|
189
|
+
error_msg = f"Failed to merge group: {str(e)}"
|
|
190
|
+
logger.exception(error_msg)
|
|
191
|
+
result.errors.append(error_msg)
|
|
192
|
+
|
|
193
|
+
except Exception as e:
|
|
194
|
+
error_msg = f"Consolidation failed: {str(e)}"
|
|
195
|
+
logger.exception(error_msg)
|
|
196
|
+
result.errors.append(error_msg)
|
|
197
|
+
|
|
198
|
+
return result
|
|
199
|
+
|
|
200
|
+
def _get_memories_by_type(
|
|
201
|
+
self,
|
|
202
|
+
agent: str,
|
|
203
|
+
project_id: str,
|
|
204
|
+
memory_type: str,
|
|
205
|
+
) -> List[Any]:
|
|
206
|
+
"""Get all memories of a specific type for an agent."""
|
|
207
|
+
if memory_type == "heuristics":
|
|
208
|
+
return self.storage.get_heuristics(
|
|
209
|
+
project_id=project_id,
|
|
210
|
+
agent=agent,
|
|
211
|
+
top_k=1000, # Get all
|
|
212
|
+
)
|
|
213
|
+
elif memory_type == "outcomes":
|
|
214
|
+
return self.storage.get_outcomes(
|
|
215
|
+
project_id=project_id,
|
|
216
|
+
agent=agent,
|
|
217
|
+
top_k=1000,
|
|
218
|
+
)
|
|
219
|
+
elif memory_type == "domain_knowledge":
|
|
220
|
+
return self.storage.get_domain_knowledge(
|
|
221
|
+
project_id=project_id,
|
|
222
|
+
agent=agent,
|
|
223
|
+
top_k=1000,
|
|
224
|
+
)
|
|
225
|
+
elif memory_type == "anti_patterns":
|
|
226
|
+
return self.storage.get_anti_patterns(
|
|
227
|
+
project_id=project_id,
|
|
228
|
+
agent=agent,
|
|
229
|
+
top_k=1000,
|
|
230
|
+
)
|
|
231
|
+
else:
|
|
232
|
+
raise ValueError(f"Unknown memory type: {memory_type}")
|
|
233
|
+
|
|
234
|
+
def _ensure_embeddings(
|
|
235
|
+
self,
|
|
236
|
+
memories: List[Any],
|
|
237
|
+
memory_type: str,
|
|
238
|
+
) -> List[Any]:
|
|
239
|
+
"""Ensure all memories have embeddings, computing if needed."""
|
|
240
|
+
needs_embedding = []
|
|
241
|
+
needs_embedding_indices = []
|
|
242
|
+
|
|
243
|
+
for i, memory in enumerate(memories):
|
|
244
|
+
if not hasattr(memory, "embedding") or memory.embedding is None:
|
|
245
|
+
needs_embedding.append(self._get_embedding_text(memory, memory_type))
|
|
246
|
+
needs_embedding_indices.append(i)
|
|
247
|
+
|
|
248
|
+
if needs_embedding:
|
|
249
|
+
logger.info(f"Computing embeddings for {len(needs_embedding)} memories")
|
|
250
|
+
embeddings = self.embedder.encode_batch(needs_embedding)
|
|
251
|
+
|
|
252
|
+
for i, embedding in zip(needs_embedding_indices, embeddings, strict=False):
|
|
253
|
+
memories[i].embedding = embedding
|
|
254
|
+
|
|
255
|
+
return memories
|
|
256
|
+
|
|
257
|
+
def _get_embedding_text(self, memory: Any, memory_type: str) -> str:
|
|
258
|
+
"""Get the text to embed for a memory."""
|
|
259
|
+
if memory_type == "heuristics":
|
|
260
|
+
return f"{memory.condition} {memory.strategy}"
|
|
261
|
+
elif memory_type == "outcomes":
|
|
262
|
+
return f"{memory.task_description} {memory.strategy_used}"
|
|
263
|
+
elif memory_type == "domain_knowledge":
|
|
264
|
+
return f"{memory.domain} {memory.fact}"
|
|
265
|
+
elif memory_type == "anti_patterns":
|
|
266
|
+
return f"{memory.pattern} {memory.why_bad} {memory.better_alternative}"
|
|
267
|
+
else:
|
|
268
|
+
return str(memory)
|
|
269
|
+
|
|
270
|
+
def _find_similar_groups(
|
|
271
|
+
self,
|
|
272
|
+
memories: List[Any],
|
|
273
|
+
threshold: float,
|
|
274
|
+
) -> List[List[Any]]:
|
|
275
|
+
"""
|
|
276
|
+
Group memories by embedding similarity using union-find.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
memories: List of memories with embeddings
|
|
280
|
+
threshold: Minimum cosine similarity to group
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
List of groups (each group is a list of memories)
|
|
284
|
+
"""
|
|
285
|
+
n = len(memories)
|
|
286
|
+
if n == 0:
|
|
287
|
+
return []
|
|
288
|
+
|
|
289
|
+
# Union-Find data structure
|
|
290
|
+
parent = list(range(n))
|
|
291
|
+
rank = [0] * n
|
|
292
|
+
|
|
293
|
+
def find(x: int) -> int:
|
|
294
|
+
if parent[x] != x:
|
|
295
|
+
parent[x] = find(parent[x])
|
|
296
|
+
return parent[x]
|
|
297
|
+
|
|
298
|
+
def union(x: int, y: int) -> None:
|
|
299
|
+
px, py = find(x), find(y)
|
|
300
|
+
if px == py:
|
|
301
|
+
return
|
|
302
|
+
if rank[px] < rank[py]:
|
|
303
|
+
px, py = py, px
|
|
304
|
+
parent[py] = px
|
|
305
|
+
if rank[px] == rank[py]:
|
|
306
|
+
rank[px] += 1
|
|
307
|
+
|
|
308
|
+
# Compare all pairs
|
|
309
|
+
for i in range(n):
|
|
310
|
+
for j in range(i + 1, n):
|
|
311
|
+
emb1 = memories[i].embedding
|
|
312
|
+
emb2 = memories[j].embedding
|
|
313
|
+
|
|
314
|
+
if emb1 is not None and emb2 is not None:
|
|
315
|
+
similarity = self._compute_similarity(emb1, emb2)
|
|
316
|
+
if similarity >= threshold:
|
|
317
|
+
union(i, j)
|
|
318
|
+
|
|
319
|
+
# Build groups
|
|
320
|
+
groups_dict: Dict[int, List[Any]] = {}
|
|
321
|
+
for i in range(n):
|
|
322
|
+
root = find(i)
|
|
323
|
+
if root not in groups_dict:
|
|
324
|
+
groups_dict[root] = []
|
|
325
|
+
groups_dict[root].append(memories[i])
|
|
326
|
+
|
|
327
|
+
return list(groups_dict.values())
|
|
328
|
+
|
|
329
|
+
def _compute_similarity(
|
|
330
|
+
self,
|
|
331
|
+
emb1: List[float],
|
|
332
|
+
emb2: List[float],
|
|
333
|
+
) -> float:
|
|
334
|
+
"""
|
|
335
|
+
Compute cosine similarity between two embeddings.
|
|
336
|
+
|
|
337
|
+
Args:
|
|
338
|
+
emb1: First embedding vector
|
|
339
|
+
emb2: Second embedding vector
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
Cosine similarity (0.0 to 1.0)
|
|
343
|
+
"""
|
|
344
|
+
if len(emb1) != len(emb2):
|
|
345
|
+
return 0.0
|
|
346
|
+
|
|
347
|
+
dot_product = sum(a * b for a, b in zip(emb1, emb2, strict=False))
|
|
348
|
+
norm1 = math.sqrt(sum(a * a for a in emb1))
|
|
349
|
+
norm2 = math.sqrt(sum(b * b for b in emb2))
|
|
350
|
+
|
|
351
|
+
if norm1 == 0 or norm2 == 0:
|
|
352
|
+
return 0.0
|
|
353
|
+
|
|
354
|
+
return dot_product / (norm1 * norm2)
|
|
355
|
+
|
|
356
|
+
async def _merge_group(
|
|
357
|
+
self,
|
|
358
|
+
group: List[Any],
|
|
359
|
+
memory_type: str,
|
|
360
|
+
use_llm: bool,
|
|
361
|
+
project_id: str,
|
|
362
|
+
agent: str,
|
|
363
|
+
) -> Tuple[Any, List[str]]:
|
|
364
|
+
"""
|
|
365
|
+
Merge a group of similar memories into one.
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
group: List of similar memories
|
|
369
|
+
memory_type: Type of memories
|
|
370
|
+
use_llm: Whether to use LLM for intelligent merging
|
|
371
|
+
project_id: Project ID
|
|
372
|
+
agent: Agent name
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
Tuple of (merged_memory, list_of_original_ids)
|
|
376
|
+
"""
|
|
377
|
+
original_ids = [m.id for m in group]
|
|
378
|
+
|
|
379
|
+
if memory_type == "heuristics":
|
|
380
|
+
merged = await self._merge_heuristics(group, use_llm, project_id, agent)
|
|
381
|
+
elif memory_type == "domain_knowledge":
|
|
382
|
+
merged = await self._merge_domain_knowledge(
|
|
383
|
+
group, use_llm, project_id, agent
|
|
384
|
+
)
|
|
385
|
+
elif memory_type == "anti_patterns":
|
|
386
|
+
merged = await self._merge_anti_patterns(group, use_llm, project_id, agent)
|
|
387
|
+
elif memory_type == "outcomes":
|
|
388
|
+
# Outcomes typically aren't merged - just keep the most recent
|
|
389
|
+
merged = self._keep_most_recent(group)
|
|
390
|
+
else:
|
|
391
|
+
raise ValueError(f"Unknown memory type: {memory_type}")
|
|
392
|
+
|
|
393
|
+
return merged, original_ids
|
|
394
|
+
|
|
395
|
+
async def _merge_heuristics(
|
|
396
|
+
self,
|
|
397
|
+
group: List[Heuristic],
|
|
398
|
+
use_llm: bool,
|
|
399
|
+
project_id: str,
|
|
400
|
+
agent: str,
|
|
401
|
+
) -> Heuristic:
|
|
402
|
+
"""
|
|
403
|
+
Merge a group of similar heuristics into one.
|
|
404
|
+
|
|
405
|
+
Args:
|
|
406
|
+
group: List of similar heuristics
|
|
407
|
+
use_llm: Whether to use LLM for intelligent merging
|
|
408
|
+
project_id: Project ID
|
|
409
|
+
agent: Agent name
|
|
410
|
+
|
|
411
|
+
Returns:
|
|
412
|
+
Merged heuristic
|
|
413
|
+
"""
|
|
414
|
+
# Collect metadata
|
|
415
|
+
original_ids = [h.id for h in group]
|
|
416
|
+
total_occurrences = sum(h.occurrence_count for h in group)
|
|
417
|
+
total_successes = sum(h.success_count for h in group)
|
|
418
|
+
avg_confidence = sum(h.confidence for h in group) / len(group)
|
|
419
|
+
|
|
420
|
+
# Find the highest confidence heuristic as base
|
|
421
|
+
base = max(group, key=lambda h: h.confidence)
|
|
422
|
+
|
|
423
|
+
if use_llm and self.llm_client:
|
|
424
|
+
# Use LLM for intelligent merging
|
|
425
|
+
try:
|
|
426
|
+
merged_data = await self._llm_merge_heuristics(group)
|
|
427
|
+
condition = merged_data.get("condition", base.condition)
|
|
428
|
+
strategy = merged_data.get("strategy", base.strategy)
|
|
429
|
+
confidence = merged_data.get("confidence", avg_confidence)
|
|
430
|
+
except Exception as e:
|
|
431
|
+
logger.warning(f"LLM merge failed, using base: {e}")
|
|
432
|
+
condition = base.condition
|
|
433
|
+
strategy = base.strategy
|
|
434
|
+
confidence = avg_confidence
|
|
435
|
+
else:
|
|
436
|
+
# Without LLM, use the highest confidence heuristic
|
|
437
|
+
condition = base.condition
|
|
438
|
+
strategy = base.strategy
|
|
439
|
+
confidence = avg_confidence
|
|
440
|
+
|
|
441
|
+
# Create embedding for merged heuristic
|
|
442
|
+
embedding_text = f"{condition} {strategy}"
|
|
443
|
+
embedding = self.embedder.encode(embedding_text)
|
|
444
|
+
|
|
445
|
+
now = datetime.now(timezone.utc)
|
|
446
|
+
|
|
447
|
+
return Heuristic(
|
|
448
|
+
id=f"heuristic_{uuid.uuid4().hex[:12]}",
|
|
449
|
+
agent=agent,
|
|
450
|
+
project_id=project_id,
|
|
451
|
+
condition=condition,
|
|
452
|
+
strategy=strategy,
|
|
453
|
+
confidence=min(confidence, 1.0),
|
|
454
|
+
occurrence_count=total_occurrences,
|
|
455
|
+
success_count=total_successes,
|
|
456
|
+
last_validated=now,
|
|
457
|
+
created_at=now,
|
|
458
|
+
embedding=embedding,
|
|
459
|
+
metadata={
|
|
460
|
+
"merged_from": original_ids,
|
|
461
|
+
"merge_timestamp": now.isoformat(),
|
|
462
|
+
"original_count": len(group),
|
|
463
|
+
},
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
async def _merge_domain_knowledge(
|
|
467
|
+
self,
|
|
468
|
+
group: List[DomainKnowledge],
|
|
469
|
+
use_llm: bool,
|
|
470
|
+
project_id: str,
|
|
471
|
+
agent: str,
|
|
472
|
+
) -> DomainKnowledge:
|
|
473
|
+
"""Merge a group of similar domain knowledge items."""
|
|
474
|
+
original_ids = [dk.id for dk in group]
|
|
475
|
+
avg_confidence = sum(dk.confidence for dk in group) / len(group)
|
|
476
|
+
base = max(group, key=lambda dk: dk.confidence)
|
|
477
|
+
|
|
478
|
+
if use_llm and self.llm_client:
|
|
479
|
+
try:
|
|
480
|
+
merged_data = await self._llm_merge_domain_knowledge(group)
|
|
481
|
+
fact = merged_data.get("fact", base.fact)
|
|
482
|
+
confidence = merged_data.get("confidence", avg_confidence)
|
|
483
|
+
except Exception as e:
|
|
484
|
+
logger.warning(f"LLM merge failed, using base: {e}")
|
|
485
|
+
fact = base.fact
|
|
486
|
+
confidence = avg_confidence
|
|
487
|
+
else:
|
|
488
|
+
fact = base.fact
|
|
489
|
+
confidence = avg_confidence
|
|
490
|
+
|
|
491
|
+
embedding_text = f"{base.domain} {fact}"
|
|
492
|
+
embedding = self.embedder.encode(embedding_text)
|
|
493
|
+
|
|
494
|
+
now = datetime.now(timezone.utc)
|
|
495
|
+
|
|
496
|
+
return DomainKnowledge(
|
|
497
|
+
id=f"dk_{uuid.uuid4().hex[:12]}",
|
|
498
|
+
agent=agent,
|
|
499
|
+
project_id=project_id,
|
|
500
|
+
domain=base.domain,
|
|
501
|
+
fact=fact,
|
|
502
|
+
source="consolidation",
|
|
503
|
+
confidence=min(confidence, 1.0),
|
|
504
|
+
last_verified=now,
|
|
505
|
+
embedding=embedding,
|
|
506
|
+
metadata={
|
|
507
|
+
"merged_from": original_ids,
|
|
508
|
+
"merge_timestamp": now.isoformat(),
|
|
509
|
+
"original_count": len(group),
|
|
510
|
+
},
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
async def _merge_anti_patterns(
|
|
514
|
+
self,
|
|
515
|
+
group: List[AntiPattern],
|
|
516
|
+
use_llm: bool,
|
|
517
|
+
project_id: str,
|
|
518
|
+
agent: str,
|
|
519
|
+
) -> AntiPattern:
|
|
520
|
+
"""Merge a group of similar anti-patterns."""
|
|
521
|
+
original_ids = [ap.id for ap in group]
|
|
522
|
+
total_occurrences = sum(ap.occurrence_count for ap in group)
|
|
523
|
+
base = max(group, key=lambda ap: ap.occurrence_count)
|
|
524
|
+
|
|
525
|
+
if use_llm and self.llm_client:
|
|
526
|
+
try:
|
|
527
|
+
merged_data = await self._llm_merge_anti_patterns(group)
|
|
528
|
+
pattern = merged_data.get("pattern", base.pattern)
|
|
529
|
+
why_bad = merged_data.get("why_bad", base.why_bad)
|
|
530
|
+
better_alternative = merged_data.get(
|
|
531
|
+
"better_alternative", base.better_alternative
|
|
532
|
+
)
|
|
533
|
+
except Exception as e:
|
|
534
|
+
logger.warning(f"LLM merge failed, using base: {e}")
|
|
535
|
+
pattern = base.pattern
|
|
536
|
+
why_bad = base.why_bad
|
|
537
|
+
better_alternative = base.better_alternative
|
|
538
|
+
else:
|
|
539
|
+
pattern = base.pattern
|
|
540
|
+
why_bad = base.why_bad
|
|
541
|
+
better_alternative = base.better_alternative
|
|
542
|
+
|
|
543
|
+
embedding_text = f"{pattern} {why_bad} {better_alternative}"
|
|
544
|
+
embedding = self.embedder.encode(embedding_text)
|
|
545
|
+
|
|
546
|
+
now = datetime.now(timezone.utc)
|
|
547
|
+
|
|
548
|
+
return AntiPattern(
|
|
549
|
+
id=f"ap_{uuid.uuid4().hex[:12]}",
|
|
550
|
+
agent=agent,
|
|
551
|
+
project_id=project_id,
|
|
552
|
+
pattern=pattern,
|
|
553
|
+
why_bad=why_bad,
|
|
554
|
+
better_alternative=better_alternative,
|
|
555
|
+
occurrence_count=total_occurrences,
|
|
556
|
+
last_seen=now,
|
|
557
|
+
created_at=now,
|
|
558
|
+
embedding=embedding,
|
|
559
|
+
metadata={
|
|
560
|
+
"merged_from": original_ids,
|
|
561
|
+
"merge_timestamp": now.isoformat(),
|
|
562
|
+
"original_count": len(group),
|
|
563
|
+
},
|
|
564
|
+
)
|
|
565
|
+
|
|
566
|
+
def _keep_most_recent(self, group: List[Any]) -> Any:
|
|
567
|
+
"""Keep the most recent memory from a group."""
|
|
568
|
+
return max(
|
|
569
|
+
group,
|
|
570
|
+
key=lambda m: getattr(
|
|
571
|
+
m, "timestamp", getattr(m, "created_at", datetime.min)
|
|
572
|
+
),
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
async def _llm_merge_heuristics(self, group: List[Heuristic]) -> Dict[str, Any]:
|
|
576
|
+
"""Use LLM to intelligently merge heuristics."""
|
|
577
|
+
heuristics_text = "\n\n".join(
|
|
578
|
+
[
|
|
579
|
+
f"Heuristic {i + 1}:\n"
|
|
580
|
+
f" Condition: {h.condition}\n"
|
|
581
|
+
f" Strategy: {h.strategy}\n"
|
|
582
|
+
f" Confidence: {h.confidence:.2f}\n"
|
|
583
|
+
f" Occurrences: {h.occurrence_count}"
|
|
584
|
+
for i, h in enumerate(group)
|
|
585
|
+
]
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
prompt = MERGE_HEURISTICS_PROMPT.format(heuristics=heuristics_text)
|
|
589
|
+
|
|
590
|
+
response = await self._call_llm(prompt)
|
|
591
|
+
return json.loads(response)
|
|
592
|
+
|
|
593
|
+
async def _llm_merge_domain_knowledge(
|
|
594
|
+
self, group: List[DomainKnowledge]
|
|
595
|
+
) -> Dict[str, Any]:
|
|
596
|
+
"""Use LLM to intelligently merge domain knowledge."""
|
|
597
|
+
knowledge_text = "\n\n".join(
|
|
598
|
+
[
|
|
599
|
+
f"Knowledge {i + 1}:\n"
|
|
600
|
+
f" Domain: {dk.domain}\n"
|
|
601
|
+
f" Fact: {dk.fact}\n"
|
|
602
|
+
f" Confidence: {dk.confidence:.2f}"
|
|
603
|
+
for i, dk in enumerate(group)
|
|
604
|
+
]
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
prompt = MERGE_DOMAIN_KNOWLEDGE_PROMPT.format(knowledge_items=knowledge_text)
|
|
608
|
+
|
|
609
|
+
response = await self._call_llm(prompt)
|
|
610
|
+
return json.loads(response)
|
|
611
|
+
|
|
612
|
+
async def _llm_merge_anti_patterns(
|
|
613
|
+
self, group: List[AntiPattern]
|
|
614
|
+
) -> Dict[str, Any]:
|
|
615
|
+
"""Use LLM to intelligently merge anti-patterns."""
|
|
616
|
+
patterns_text = "\n\n".join(
|
|
617
|
+
[
|
|
618
|
+
f"Anti-Pattern {i + 1}:\n"
|
|
619
|
+
f" Pattern: {ap.pattern}\n"
|
|
620
|
+
f" Why Bad: {ap.why_bad}\n"
|
|
621
|
+
f" Alternative: {ap.better_alternative}\n"
|
|
622
|
+
f" Occurrences: {ap.occurrence_count}"
|
|
623
|
+
for i, ap in enumerate(group)
|
|
624
|
+
]
|
|
625
|
+
)
|
|
626
|
+
|
|
627
|
+
prompt = MERGE_ANTI_PATTERNS_PROMPT.format(anti_patterns=patterns_text)
|
|
628
|
+
|
|
629
|
+
response = await self._call_llm(prompt)
|
|
630
|
+
return json.loads(response)
|
|
631
|
+
|
|
632
|
+
async def _call_llm(self, prompt: str) -> str:
|
|
633
|
+
"""Call the LLM client."""
|
|
634
|
+
if not self.llm_client:
|
|
635
|
+
raise ValueError("LLM client not configured")
|
|
636
|
+
|
|
637
|
+
# Support different LLM client interfaces
|
|
638
|
+
if hasattr(self.llm_client, "complete"):
|
|
639
|
+
result = self.llm_client.complete(prompt)
|
|
640
|
+
if asyncio.iscoroutine(result):
|
|
641
|
+
return await result
|
|
642
|
+
return result
|
|
643
|
+
elif hasattr(self.llm_client, "chat"):
|
|
644
|
+
result = self.llm_client.chat([{"role": "user", "content": prompt}])
|
|
645
|
+
if asyncio.iscoroutine(result):
|
|
646
|
+
return await result
|
|
647
|
+
return result
|
|
648
|
+
elif callable(self.llm_client):
|
|
649
|
+
result = self.llm_client(prompt)
|
|
650
|
+
if asyncio.iscoroutine(result):
|
|
651
|
+
return await result
|
|
652
|
+
return result
|
|
653
|
+
else:
|
|
654
|
+
raise ValueError(
|
|
655
|
+
"LLM client must have 'complete', 'chat', or '__call__' method"
|
|
656
|
+
)
|
|
657
|
+
|
|
658
|
+
def _save_merged(self, memory: Any, memory_type: str) -> None:
|
|
659
|
+
"""Save a merged memory to storage."""
|
|
660
|
+
if memory_type == "heuristics":
|
|
661
|
+
self.storage.save_heuristic(memory)
|
|
662
|
+
elif memory_type == "domain_knowledge":
|
|
663
|
+
self.storage.save_domain_knowledge(memory)
|
|
664
|
+
elif memory_type == "anti_patterns":
|
|
665
|
+
self.storage.save_anti_pattern(memory)
|
|
666
|
+
elif memory_type == "outcomes":
|
|
667
|
+
self.storage.save_outcome(memory)
|
|
668
|
+
|
|
669
|
+
def _delete_memory(self, memory_id: str, memory_type: str) -> None:
|
|
670
|
+
"""Delete a memory from storage."""
|
|
671
|
+
if memory_type == "heuristics":
|
|
672
|
+
self.storage.delete_heuristic(memory_id)
|
|
673
|
+
elif memory_type == "domain_knowledge":
|
|
674
|
+
self.storage.delete_domain_knowledge(memory_id)
|
|
675
|
+
elif memory_type == "anti_patterns":
|
|
676
|
+
self.storage.delete_anti_pattern(memory_id)
|
|
677
|
+
elif memory_type == "outcomes":
|
|
678
|
+
self.storage.delete_outcome(memory_id)
|