alma-memory 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,374 @@
1
+ """
2
+ ALMA Heuristic Extraction.
3
+
4
+ Analyzes outcomes to identify patterns and create heuristics.
5
+ """
6
+
7
+ import uuid
8
+ import logging
9
+ from datetime import datetime, timezone
10
+ from typing import Optional, List, Dict, Any, Tuple
11
+ from dataclasses import dataclass, field
12
+ from collections import defaultdict
13
+
14
+ from alma.types import Heuristic, Outcome, MemoryScope
15
+ from alma.storage.base import StorageBackend
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @dataclass
21
+ class PatternCandidate:
22
+ """A potential pattern for heuristic creation."""
23
+ task_type: str
24
+ strategy: str
25
+ occurrence_count: int
26
+ success_count: int
27
+ failure_count: int
28
+ outcomes: List[Outcome] = field(default_factory=list)
29
+
30
+ @property
31
+ def success_rate(self) -> float:
32
+ """Calculate success rate."""
33
+ if self.occurrence_count == 0:
34
+ return 0.0
35
+ return self.success_count / self.occurrence_count
36
+
37
+ @property
38
+ def confidence(self) -> float:
39
+ """
40
+ Calculate confidence based on success rate and sample size.
41
+
42
+ Confidence is lower when sample size is small (uncertainty).
43
+ """
44
+ if self.occurrence_count == 0:
45
+ return 0.0
46
+
47
+ base_confidence = self.success_rate
48
+
49
+ # Apply sample size penalty (Bayesian-inspired)
50
+ # More samples = higher confidence, max confidence at 20+ samples
51
+ sample_factor = min(self.occurrence_count / 20.0, 1.0)
52
+
53
+ return base_confidence * (0.5 + 0.5 * sample_factor)
54
+
55
+
56
+ @dataclass
57
+ class ExtractionResult:
58
+ """Result of heuristic extraction."""
59
+ heuristics_created: int = 0
60
+ heuristics_updated: int = 0
61
+ patterns_analyzed: int = 0
62
+ patterns_rejected: int = 0
63
+ rejected_reasons: Dict[str, int] = field(default_factory=dict)
64
+
65
+ def to_dict(self) -> Dict[str, Any]:
66
+ """Convert to dictionary."""
67
+ return {
68
+ "heuristics_created": self.heuristics_created,
69
+ "heuristics_updated": self.heuristics_updated,
70
+ "patterns_analyzed": self.patterns_analyzed,
71
+ "patterns_rejected": self.patterns_rejected,
72
+ "rejected_reasons": self.rejected_reasons,
73
+ }
74
+
75
+
76
+ class HeuristicExtractor:
77
+ """
78
+ Extracts heuristics from outcome patterns.
79
+
80
+ Analyzes historical outcomes to identify successful strategies
81
+ and creates heuristics when patterns are validated.
82
+ """
83
+
84
+ def __init__(
85
+ self,
86
+ storage: StorageBackend,
87
+ scopes: Dict[str, MemoryScope],
88
+ min_occurrences: int = 3,
89
+ min_confidence: float = 0.5,
90
+ strategy_similarity_threshold: float = 0.5,
91
+ ):
92
+ """
93
+ Initialize extractor.
94
+
95
+ Args:
96
+ storage: Storage backend
97
+ scopes: Agent scope definitions
98
+ min_occurrences: Minimum outcomes before creating heuristic
99
+ min_confidence: Minimum confidence to create heuristic
100
+ strategy_similarity_threshold: How similar strategies must be to group
101
+ """
102
+ self.storage = storage
103
+ self.scopes = scopes
104
+ self.min_occurrences = min_occurrences
105
+ self.min_confidence = min_confidence
106
+ self.similarity_threshold = strategy_similarity_threshold
107
+
108
+ def extract(
109
+ self,
110
+ project_id: str,
111
+ agent: Optional[str] = None,
112
+ ) -> ExtractionResult:
113
+ """
114
+ Extract heuristics from all outcomes.
115
+
116
+ Args:
117
+ project_id: Project to analyze
118
+ agent: Specific agent or None for all
119
+
120
+ Returns:
121
+ ExtractionResult with summary
122
+ """
123
+ result = ExtractionResult()
124
+
125
+ # Get all outcomes
126
+ outcomes = self.storage.get_outcomes(
127
+ project_id=project_id,
128
+ agent=agent,
129
+ top_k=10000,
130
+ success_only=False,
131
+ )
132
+
133
+ if not outcomes:
134
+ logger.info("No outcomes to analyze")
135
+ return result
136
+
137
+ # Group outcomes by agent and task type
138
+ grouped = self._group_outcomes(outcomes)
139
+
140
+ for (ag, task_type), type_outcomes in grouped.items():
141
+ # Find patterns within this group
142
+ patterns = self._identify_patterns(type_outcomes)
143
+ result.patterns_analyzed += len(patterns)
144
+
145
+ for pattern in patterns:
146
+ created, reason = self._maybe_create_heuristic(
147
+ agent=ag,
148
+ project_id=project_id,
149
+ pattern=pattern,
150
+ )
151
+
152
+ if created:
153
+ result.heuristics_created += 1
154
+ else:
155
+ result.patterns_rejected += 1
156
+ result.rejected_reasons[reason] = (
157
+ result.rejected_reasons.get(reason, 0) + 1
158
+ )
159
+
160
+ logger.info(
161
+ f"Extraction complete: {result.heuristics_created} heuristics created, "
162
+ f"{result.patterns_rejected} patterns rejected"
163
+ )
164
+
165
+ return result
166
+
167
+ def _group_outcomes(
168
+ self,
169
+ outcomes: List[Outcome],
170
+ ) -> Dict[Tuple[str, str], List[Outcome]]:
171
+ """Group outcomes by agent and task type."""
172
+ grouped: Dict[Tuple[str, str], List[Outcome]] = defaultdict(list)
173
+ for outcome in outcomes:
174
+ key = (outcome.agent, outcome.task_type)
175
+ grouped[key].append(outcome)
176
+ return grouped
177
+
178
+ def _identify_patterns(
179
+ self,
180
+ outcomes: List[Outcome],
181
+ ) -> List[PatternCandidate]:
182
+ """
183
+ Identify patterns in outcomes by grouping similar strategies.
184
+
185
+ Uses fuzzy matching to group strategies that are similar.
186
+ """
187
+ # Group by similar strategies
188
+ strategy_groups: Dict[str, List[Outcome]] = defaultdict(list)
189
+
190
+ for outcome in outcomes:
191
+ # Find existing group or create new one
192
+ matched = False
193
+ for canonical in list(strategy_groups.keys()):
194
+ if self._strategies_similar(outcome.strategy_used, canonical):
195
+ strategy_groups[canonical].append(outcome)
196
+ matched = True
197
+ break
198
+
199
+ if not matched:
200
+ # Create new group
201
+ strategy_groups[outcome.strategy_used].append(outcome)
202
+
203
+ # Convert to PatternCandidates
204
+ patterns = []
205
+ for strategy, group_outcomes in strategy_groups.items():
206
+ success_count = sum(1 for o in group_outcomes if o.success)
207
+ patterns.append(PatternCandidate(
208
+ task_type=group_outcomes[0].task_type,
209
+ strategy=strategy,
210
+ occurrence_count=len(group_outcomes),
211
+ success_count=success_count,
212
+ failure_count=len(group_outcomes) - success_count,
213
+ outcomes=group_outcomes,
214
+ ))
215
+
216
+ return patterns
217
+
218
+ def _maybe_create_heuristic(
219
+ self,
220
+ agent: str,
221
+ project_id: str,
222
+ pattern: PatternCandidate,
223
+ ) -> Tuple[bool, str]:
224
+ """
225
+ Create a heuristic if the pattern meets criteria.
226
+
227
+ Returns:
228
+ Tuple of (created: bool, reason: str)
229
+ """
230
+ # Check minimum occurrences
231
+ scope = self.scopes.get(agent)
232
+ min_occ = self.min_occurrences
233
+ if scope:
234
+ min_occ = scope.min_occurrences_for_heuristic
235
+
236
+ if pattern.occurrence_count < min_occ:
237
+ return False, f"insufficient_occurrences_{pattern.occurrence_count}"
238
+
239
+ # Check confidence
240
+ if pattern.confidence < self.min_confidence:
241
+ return False, f"low_confidence_{pattern.confidence:.2f}"
242
+
243
+ # Check if heuristic already exists
244
+ existing = self._find_existing_heuristic(
245
+ agent=agent,
246
+ project_id=project_id,
247
+ task_type=pattern.task_type,
248
+ strategy=pattern.strategy,
249
+ )
250
+
251
+ if existing:
252
+ # Update existing heuristic
253
+ self._update_heuristic(existing, pattern)
254
+ return True, "updated"
255
+
256
+ # Create new heuristic
257
+ heuristic = Heuristic(
258
+ id=f"heur_{uuid.uuid4().hex[:12]}",
259
+ agent=agent,
260
+ project_id=project_id,
261
+ condition=f"task type: {pattern.task_type}",
262
+ strategy=pattern.strategy,
263
+ confidence=pattern.confidence,
264
+ occurrence_count=pattern.occurrence_count,
265
+ success_count=pattern.success_count,
266
+ last_validated=datetime.now(timezone.utc),
267
+ created_at=datetime.now(timezone.utc),
268
+ )
269
+
270
+ self.storage.save_heuristic(heuristic)
271
+ logger.info(
272
+ f"Created heuristic for {agent}: {pattern.strategy[:50]}... "
273
+ f"(confidence: {pattern.confidence:.0%})"
274
+ )
275
+
276
+ return True, "created"
277
+
278
+ def _find_existing_heuristic(
279
+ self,
280
+ agent: str,
281
+ project_id: str,
282
+ task_type: str,
283
+ strategy: str,
284
+ ) -> Optional[Heuristic]:
285
+ """Find an existing heuristic that matches this pattern."""
286
+ heuristics = self.storage.get_heuristics(
287
+ project_id=project_id,
288
+ agent=agent,
289
+ top_k=100,
290
+ min_confidence=0.0,
291
+ )
292
+
293
+ for h in heuristics:
294
+ if (task_type in h.condition and
295
+ self._strategies_similar(h.strategy, strategy)):
296
+ return h
297
+
298
+ return None
299
+
300
+ def _update_heuristic(
301
+ self,
302
+ heuristic: Heuristic,
303
+ pattern: PatternCandidate,
304
+ ):
305
+ """Update an existing heuristic with new data."""
306
+ # Merge counts
307
+ heuristic.occurrence_count = max(
308
+ heuristic.occurrence_count,
309
+ pattern.occurrence_count
310
+ )
311
+ heuristic.success_count = max(
312
+ heuristic.success_count,
313
+ pattern.success_count
314
+ )
315
+
316
+ # Update confidence
317
+ heuristic.confidence = pattern.confidence
318
+ heuristic.last_validated = datetime.now(timezone.utc)
319
+
320
+ self.storage.save_heuristic(heuristic)
321
+ logger.debug(f"Updated heuristic {heuristic.id}")
322
+
323
+ def _strategies_similar(self, s1: str, s2: str) -> bool:
324
+ """
325
+ Check if two strategies are similar enough to be grouped.
326
+
327
+ Uses word overlap with normalization.
328
+ """
329
+ # Normalize strategies
330
+ words1 = set(self._normalize_strategy(s1))
331
+ words2 = set(self._normalize_strategy(s2))
332
+
333
+ if not words1 or not words2:
334
+ return s1.lower() == s2.lower()
335
+
336
+ # Jaccard similarity
337
+ intersection = len(words1 & words2)
338
+ union = len(words1 | words2)
339
+ similarity = intersection / union if union > 0 else 0
340
+
341
+ return similarity >= self.similarity_threshold
342
+
343
+ def _normalize_strategy(self, strategy: str) -> List[str]:
344
+ """Normalize strategy text for comparison."""
345
+ # Remove common stop words and normalize
346
+ stop_words = {
347
+ "the", "a", "an", "and", "or", "but", "in", "on", "at",
348
+ "to", "for", "of", "with", "by", "then", "first", "next",
349
+ }
350
+
351
+ words = strategy.lower().replace(",", " ").replace(".", " ").split()
352
+ return [w for w in words if w not in stop_words and len(w) > 2]
353
+
354
+
355
+ def extract_heuristics_from_outcome(
356
+ outcome: Outcome,
357
+ existing_heuristics: List[Heuristic],
358
+ min_confidence: float = 0.5,
359
+ ) -> Optional[Dict[str, Any]]:
360
+ """
361
+ Convenience function to check if an outcome contributes to a heuristic.
362
+
363
+ Returns update details if the outcome should update a heuristic.
364
+ """
365
+ for h in existing_heuristics:
366
+ # Check if this outcome matches an existing heuristic
367
+ if h.agent == outcome.agent and outcome.task_type in h.condition:
368
+ return {
369
+ "heuristic_id": h.id,
370
+ "action": "validate" if outcome.success else "invalidate",
371
+ "current_confidence": h.confidence,
372
+ }
373
+
374
+ return None
@@ -0,0 +1,326 @@
1
+ """
2
+ ALMA Learning Protocols.
3
+
4
+ Defines how agents learn from outcomes while respecting scope constraints.
5
+ """
6
+
7
+ import uuid
8
+ import logging
9
+ from datetime import datetime, timezone, timedelta
10
+ from typing import Optional, Dict, Any
11
+
12
+ from alma.types import (
13
+ Heuristic,
14
+ Outcome,
15
+ UserPreference,
16
+ DomainKnowledge,
17
+ AntiPattern,
18
+ MemoryScope,
19
+ )
20
+ from alma.storage.base import StorageBackend
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class LearningProtocol:
26
+ """
27
+ Manages how agents learn from task outcomes.
28
+
29
+ Key principles:
30
+ - Validate scope before any learning
31
+ - Require minimum occurrences before creating heuristics
32
+ - Support forgetting to prevent memory bloat
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ storage: StorageBackend,
38
+ scopes: Dict[str, MemoryScope],
39
+ ):
40
+ """
41
+ Initialize learning protocol.
42
+
43
+ Args:
44
+ storage: Storage backend for persistence
45
+ scopes: Dict of agent_name -> MemoryScope
46
+ """
47
+ self.storage = storage
48
+ self.scopes = scopes
49
+
50
+ def learn(
51
+ self,
52
+ agent: str,
53
+ project_id: str,
54
+ task: str,
55
+ outcome: bool,
56
+ strategy_used: str,
57
+ task_type: Optional[str] = None,
58
+ duration_ms: Optional[int] = None,
59
+ error_message: Optional[str] = None,
60
+ feedback: Optional[str] = None,
61
+ ) -> bool:
62
+ """
63
+ Learn from a task outcome.
64
+
65
+ Creates an Outcome record and potentially updates/creates heuristics.
66
+
67
+ Args:
68
+ agent: Agent that executed the task
69
+ project_id: Project context
70
+ task: Task description
71
+ outcome: True if successful, False if failed
72
+ strategy_used: The approach taken
73
+ task_type: Category for grouping
74
+ duration_ms: Execution time
75
+ error_message: Error details if failed
76
+ feedback: User feedback
77
+
78
+ Returns:
79
+ True if learning was accepted
80
+ """
81
+ # Validate agent has a scope (warn but don't block)
82
+ scope = self.scopes.get(agent)
83
+ if scope is None:
84
+ logger.warning(f"Agent '{agent}' has no defined scope")
85
+
86
+ # Create outcome record
87
+ outcome_record = Outcome(
88
+ id=f"out_{uuid.uuid4().hex[:12]}",
89
+ agent=agent,
90
+ project_id=project_id,
91
+ task_type=task_type or self._infer_task_type(task),
92
+ task_description=task,
93
+ success=outcome,
94
+ strategy_used=strategy_used,
95
+ duration_ms=duration_ms,
96
+ error_message=error_message,
97
+ user_feedback=feedback,
98
+ timestamp=datetime.now(timezone.utc),
99
+ )
100
+
101
+ # Save outcome
102
+ self.storage.save_outcome(outcome_record)
103
+ logger.info(f"Recorded outcome for {agent}: {'success' if outcome else 'failure'}")
104
+
105
+ # Check if we should create/update a heuristic
106
+ self._maybe_create_heuristic(
107
+ agent=agent,
108
+ project_id=project_id,
109
+ task_type=outcome_record.task_type,
110
+ strategy=strategy_used,
111
+ success=outcome,
112
+ scope=scope,
113
+ )
114
+
115
+ # If failure with clear pattern, consider anti-pattern
116
+ if not outcome and error_message:
117
+ self._maybe_create_anti_pattern(
118
+ agent=agent,
119
+ project_id=project_id,
120
+ task=task,
121
+ strategy=strategy_used,
122
+ error=error_message,
123
+ )
124
+
125
+ return True
126
+
127
+ def _maybe_create_heuristic(
128
+ self,
129
+ agent: str,
130
+ project_id: str,
131
+ task_type: str,
132
+ strategy: str,
133
+ success: bool,
134
+ scope: Optional[MemoryScope],
135
+ ):
136
+ """
137
+ Create or update a heuristic if we have enough occurrences.
138
+
139
+ Only creates heuristic after min_occurrences similar outcomes.
140
+ """
141
+ min_occurrences = 3
142
+ if scope:
143
+ min_occurrences = scope.min_occurrences_for_heuristic
144
+
145
+ # Get similar outcomes to check occurrence count
146
+ similar_outcomes = self.storage.get_outcomes(
147
+ project_id=project_id,
148
+ agent=agent,
149
+ task_type=task_type,
150
+ top_k=min_occurrences + 1,
151
+ success_only=False,
152
+ )
153
+
154
+ # Filter to same strategy
155
+ same_strategy = [
156
+ o for o in similar_outcomes
157
+ if self._strategies_similar(o.strategy_used, strategy)
158
+ ]
159
+
160
+ if len(same_strategy) >= min_occurrences:
161
+ success_count = sum(1 for o in same_strategy if o.success)
162
+ confidence = success_count / len(same_strategy)
163
+
164
+ # Only create heuristic if confidence is meaningful
165
+ if confidence > 0.5:
166
+ heuristic = Heuristic(
167
+ id=f"heur_{uuid.uuid4().hex[:12]}",
168
+ agent=agent,
169
+ project_id=project_id,
170
+ condition=f"task type: {task_type}",
171
+ strategy=strategy,
172
+ confidence=confidence,
173
+ occurrence_count=len(same_strategy),
174
+ success_count=success_count,
175
+ last_validated=datetime.now(timezone.utc),
176
+ created_at=datetime.now(timezone.utc),
177
+ )
178
+ self.storage.save_heuristic(heuristic)
179
+ logger.info(
180
+ f"Created heuristic for {agent}: {strategy[:50]}... "
181
+ f"(confidence: {confidence:.0%})"
182
+ )
183
+
184
+ def _maybe_create_anti_pattern(
185
+ self,
186
+ agent: str,
187
+ project_id: str,
188
+ task: str,
189
+ strategy: str,
190
+ error: str,
191
+ ):
192
+ """Create anti-pattern if we see repeated failures with same pattern."""
193
+ # Check for similar failures
194
+ similar_failures = self.storage.get_outcomes(
195
+ project_id=project_id,
196
+ agent=agent,
197
+ success_only=False,
198
+ top_k=10,
199
+ )
200
+
201
+ # Filter to failures with similar error
202
+ similar = [
203
+ o for o in similar_failures
204
+ if not o.success and o.error_message and
205
+ self._errors_similar(o.error_message, error)
206
+ ]
207
+
208
+ if len(similar) >= 2: # At least 2 similar failures
209
+ anti_pattern = AntiPattern(
210
+ id=f"anti_{uuid.uuid4().hex[:12]}",
211
+ agent=agent,
212
+ project_id=project_id,
213
+ pattern=strategy,
214
+ why_bad=error,
215
+ better_alternative="[To be determined from successful outcomes]",
216
+ occurrence_count=len(similar),
217
+ last_seen=datetime.now(timezone.utc),
218
+ )
219
+ self.storage.save_anti_pattern(anti_pattern)
220
+ logger.info(f"Created anti-pattern for {agent}: {strategy[:50]}...")
221
+
222
+ def add_preference(
223
+ self,
224
+ user_id: str,
225
+ category: str,
226
+ preference: str,
227
+ source: str,
228
+ ) -> UserPreference:
229
+ """Add a user preference."""
230
+ pref = UserPreference(
231
+ id=f"pref_{uuid.uuid4().hex[:12]}",
232
+ user_id=user_id,
233
+ category=category,
234
+ preference=preference,
235
+ source=source,
236
+ confidence=1.0 if source == "explicit_instruction" else 0.7,
237
+ timestamp=datetime.now(timezone.utc),
238
+ )
239
+ self.storage.save_user_preference(pref)
240
+ return pref
241
+
242
+ def add_domain_knowledge(
243
+ self,
244
+ agent: str,
245
+ project_id: str,
246
+ domain: str,
247
+ fact: str,
248
+ source: str,
249
+ ) -> DomainKnowledge:
250
+ """Add domain knowledge."""
251
+ knowledge = DomainKnowledge(
252
+ id=f"dk_{uuid.uuid4().hex[:12]}",
253
+ agent=agent,
254
+ project_id=project_id,
255
+ domain=domain,
256
+ fact=fact,
257
+ source=source,
258
+ confidence=1.0 if source == "user_stated" else 0.8,
259
+ last_verified=datetime.now(timezone.utc),
260
+ )
261
+ self.storage.save_domain_knowledge(knowledge)
262
+ return knowledge
263
+
264
+ def forget(
265
+ self,
266
+ project_id: str,
267
+ agent: Optional[str] = None,
268
+ older_than_days: int = 90,
269
+ below_confidence: float = 0.3,
270
+ ) -> int:
271
+ """
272
+ Prune stale and low-confidence memories.
273
+
274
+ Returns:
275
+ Total number of items pruned
276
+ """
277
+ cutoff = datetime.now(timezone.utc) - timedelta(days=older_than_days)
278
+
279
+ # Delete old outcomes
280
+ outcomes_deleted = self.storage.delete_outcomes_older_than(
281
+ project_id=project_id,
282
+ older_than=cutoff,
283
+ agent=agent,
284
+ )
285
+
286
+ # Delete low-confidence heuristics
287
+ heuristics_deleted = self.storage.delete_low_confidence_heuristics(
288
+ project_id=project_id,
289
+ below_confidence=below_confidence,
290
+ agent=agent,
291
+ )
292
+
293
+ total = outcomes_deleted + heuristics_deleted
294
+ logger.info(
295
+ f"Forgot {total} items: {outcomes_deleted} outcomes, "
296
+ f"{heuristics_deleted} heuristics"
297
+ )
298
+ return total
299
+
300
+ def _infer_task_type(self, task: str) -> str:
301
+ """Infer task type from description."""
302
+ task_lower = task.lower()
303
+ if "test" in task_lower or "validate" in task_lower:
304
+ return "testing"
305
+ elif "api" in task_lower or "endpoint" in task_lower:
306
+ return "api_testing"
307
+ elif "form" in task_lower or "input" in task_lower:
308
+ return "form_testing"
309
+ elif "database" in task_lower or "query" in task_lower:
310
+ return "database_validation"
311
+ return "general"
312
+
313
+ def _strategies_similar(self, s1: str, s2: str) -> bool:
314
+ """Check if two strategies are similar enough to count together."""
315
+ # Simple word overlap check - could be improved with embeddings
316
+ words1 = set(s1.lower().split())
317
+ words2 = set(s2.lower().split())
318
+ overlap = len(words1 & words2)
319
+ return overlap >= min(3, len(words1) // 2)
320
+
321
+ def _errors_similar(self, e1: str, e2: str) -> bool:
322
+ """Check if two errors are similar."""
323
+ # Simple substring check
324
+ e1_lower = e1.lower()
325
+ e2_lower = e2.lower()
326
+ return e1_lower in e2_lower or e2_lower in e1_lower