alma-memory 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. alma/__init__.py +296 -226
  2. alma/compression/__init__.py +33 -0
  3. alma/compression/pipeline.py +980 -0
  4. alma/confidence/__init__.py +47 -47
  5. alma/confidence/engine.py +540 -540
  6. alma/confidence/types.py +351 -351
  7. alma/config/loader.py +157 -157
  8. alma/consolidation/__init__.py +23 -23
  9. alma/consolidation/engine.py +678 -678
  10. alma/consolidation/prompts.py +84 -84
  11. alma/core.py +1189 -430
  12. alma/domains/__init__.py +30 -30
  13. alma/domains/factory.py +359 -359
  14. alma/domains/schemas.py +448 -448
  15. alma/domains/types.py +272 -272
  16. alma/events/__init__.py +75 -75
  17. alma/events/emitter.py +285 -284
  18. alma/events/storage_mixin.py +246 -246
  19. alma/events/types.py +126 -126
  20. alma/events/webhook.py +425 -425
  21. alma/exceptions.py +49 -49
  22. alma/extraction/__init__.py +31 -31
  23. alma/extraction/auto_learner.py +265 -265
  24. alma/extraction/extractor.py +420 -420
  25. alma/graph/__init__.py +106 -106
  26. alma/graph/backends/__init__.py +32 -32
  27. alma/graph/backends/kuzu.py +624 -624
  28. alma/graph/backends/memgraph.py +432 -432
  29. alma/graph/backends/memory.py +236 -236
  30. alma/graph/backends/neo4j.py +417 -417
  31. alma/graph/base.py +159 -159
  32. alma/graph/extraction.py +198 -198
  33. alma/graph/store.py +860 -860
  34. alma/harness/__init__.py +35 -35
  35. alma/harness/base.py +386 -386
  36. alma/harness/domains.py +705 -705
  37. alma/initializer/__init__.py +37 -37
  38. alma/initializer/initializer.py +418 -418
  39. alma/initializer/types.py +250 -250
  40. alma/integration/__init__.py +62 -62
  41. alma/integration/claude_agents.py +444 -444
  42. alma/integration/helena.py +423 -423
  43. alma/integration/victor.py +471 -471
  44. alma/learning/__init__.py +101 -86
  45. alma/learning/decay.py +878 -0
  46. alma/learning/forgetting.py +1446 -1446
  47. alma/learning/heuristic_extractor.py +390 -390
  48. alma/learning/protocols.py +374 -374
  49. alma/learning/validation.py +346 -346
  50. alma/mcp/__init__.py +123 -45
  51. alma/mcp/__main__.py +156 -156
  52. alma/mcp/resources.py +122 -122
  53. alma/mcp/server.py +955 -591
  54. alma/mcp/tools.py +3254 -509
  55. alma/observability/__init__.py +91 -84
  56. alma/observability/config.py +302 -302
  57. alma/observability/guidelines.py +170 -0
  58. alma/observability/logging.py +424 -424
  59. alma/observability/metrics.py +583 -583
  60. alma/observability/tracing.py +440 -440
  61. alma/progress/__init__.py +21 -21
  62. alma/progress/tracker.py +607 -607
  63. alma/progress/types.py +250 -250
  64. alma/retrieval/__init__.py +134 -53
  65. alma/retrieval/budget.py +525 -0
  66. alma/retrieval/cache.py +1304 -1061
  67. alma/retrieval/embeddings.py +202 -202
  68. alma/retrieval/engine.py +850 -427
  69. alma/retrieval/modes.py +365 -0
  70. alma/retrieval/progressive.py +560 -0
  71. alma/retrieval/scoring.py +344 -344
  72. alma/retrieval/trust_scoring.py +637 -0
  73. alma/retrieval/verification.py +797 -0
  74. alma/session/__init__.py +19 -19
  75. alma/session/manager.py +442 -399
  76. alma/session/types.py +288 -288
  77. alma/storage/__init__.py +101 -90
  78. alma/storage/archive.py +233 -0
  79. alma/storage/azure_cosmos.py +1259 -1259
  80. alma/storage/base.py +1083 -583
  81. alma/storage/chroma.py +1443 -1443
  82. alma/storage/constants.py +103 -103
  83. alma/storage/file_based.py +614 -614
  84. alma/storage/migrations/__init__.py +21 -21
  85. alma/storage/migrations/base.py +321 -321
  86. alma/storage/migrations/runner.py +323 -323
  87. alma/storage/migrations/version_stores.py +337 -337
  88. alma/storage/migrations/versions/__init__.py +11 -11
  89. alma/storage/migrations/versions/v1_0_0.py +373 -373
  90. alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
  91. alma/storage/pinecone.py +1080 -1080
  92. alma/storage/postgresql.py +1948 -1559
  93. alma/storage/qdrant.py +1306 -1306
  94. alma/storage/sqlite_local.py +3041 -1457
  95. alma/testing/__init__.py +46 -46
  96. alma/testing/factories.py +301 -301
  97. alma/testing/mocks.py +389 -389
  98. alma/types.py +292 -264
  99. alma/utils/__init__.py +19 -0
  100. alma/utils/tokenizer.py +521 -0
  101. alma/workflow/__init__.py +83 -0
  102. alma/workflow/artifacts.py +170 -0
  103. alma/workflow/checkpoint.py +311 -0
  104. alma/workflow/context.py +228 -0
  105. alma/workflow/outcomes.py +189 -0
  106. alma/workflow/reducers.py +393 -0
  107. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/METADATA +210 -72
  108. alma_memory-0.7.0.dist-info/RECORD +112 -0
  109. alma_memory-0.5.1.dist-info/RECORD +0 -93
  110. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
  111. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
@@ -1,390 +1,390 @@
1
- """
2
- ALMA Heuristic Extraction.
3
-
4
- Analyzes outcomes to identify patterns and create heuristics.
5
- """
6
-
7
- import logging
8
- import uuid
9
- from collections import defaultdict
10
- from dataclasses import dataclass, field
11
- from datetime import datetime, timezone
12
- from typing import Any, Dict, List, Optional, Tuple
13
-
14
- from alma.storage.base import StorageBackend
15
- from alma.types import Heuristic, MemoryScope, Outcome
16
-
17
- logger = logging.getLogger(__name__)
18
-
19
-
20
- @dataclass
21
- class PatternCandidate:
22
- """A potential pattern for heuristic creation."""
23
-
24
- task_type: str
25
- strategy: str
26
- occurrence_count: int
27
- success_count: int
28
- failure_count: int
29
- outcomes: List[Outcome] = field(default_factory=list)
30
-
31
- @property
32
- def success_rate(self) -> float:
33
- """Calculate success rate."""
34
- if self.occurrence_count == 0:
35
- return 0.0
36
- return self.success_count / self.occurrence_count
37
-
38
- @property
39
- def confidence(self) -> float:
40
- """
41
- Calculate confidence based on success rate and sample size.
42
-
43
- Confidence is lower when sample size is small (uncertainty).
44
- """
45
- if self.occurrence_count == 0:
46
- return 0.0
47
-
48
- base_confidence = self.success_rate
49
-
50
- # Apply sample size penalty (Bayesian-inspired)
51
- # More samples = higher confidence, max confidence at 20+ samples
52
- sample_factor = min(self.occurrence_count / 20.0, 1.0)
53
-
54
- return base_confidence * (0.5 + 0.5 * sample_factor)
55
-
56
-
57
- @dataclass
58
- class ExtractionResult:
59
- """Result of heuristic extraction."""
60
-
61
- heuristics_created: int = 0
62
- heuristics_updated: int = 0
63
- patterns_analyzed: int = 0
64
- patterns_rejected: int = 0
65
- rejected_reasons: Dict[str, int] = field(default_factory=dict)
66
-
67
- def to_dict(self) -> Dict[str, Any]:
68
- """Convert to dictionary."""
69
- return {
70
- "heuristics_created": self.heuristics_created,
71
- "heuristics_updated": self.heuristics_updated,
72
- "patterns_analyzed": self.patterns_analyzed,
73
- "patterns_rejected": self.patterns_rejected,
74
- "rejected_reasons": self.rejected_reasons,
75
- }
76
-
77
-
78
- class HeuristicExtractor:
79
- """
80
- Extracts heuristics from outcome patterns.
81
-
82
- Analyzes historical outcomes to identify successful strategies
83
- and creates heuristics when patterns are validated.
84
- """
85
-
86
- def __init__(
87
- self,
88
- storage: StorageBackend,
89
- scopes: Dict[str, MemoryScope],
90
- min_occurrences: int = 3,
91
- min_confidence: float = 0.5,
92
- strategy_similarity_threshold: float = 0.5,
93
- ):
94
- """
95
- Initialize extractor.
96
-
97
- Args:
98
- storage: Storage backend
99
- scopes: Agent scope definitions
100
- min_occurrences: Minimum outcomes before creating heuristic
101
- min_confidence: Minimum confidence to create heuristic
102
- strategy_similarity_threshold: How similar strategies must be to group
103
- """
104
- self.storage = storage
105
- self.scopes = scopes
106
- self.min_occurrences = min_occurrences
107
- self.min_confidence = min_confidence
108
- self.similarity_threshold = strategy_similarity_threshold
109
-
110
- def extract(
111
- self,
112
- project_id: str,
113
- agent: Optional[str] = None,
114
- ) -> ExtractionResult:
115
- """
116
- Extract heuristics from all outcomes.
117
-
118
- Args:
119
- project_id: Project to analyze
120
- agent: Specific agent or None for all
121
-
122
- Returns:
123
- ExtractionResult with summary
124
- """
125
- result = ExtractionResult()
126
-
127
- # Get all outcomes
128
- outcomes = self.storage.get_outcomes(
129
- project_id=project_id,
130
- agent=agent,
131
- top_k=10000,
132
- success_only=False,
133
- )
134
-
135
- if not outcomes:
136
- logger.info("No outcomes to analyze")
137
- return result
138
-
139
- # Group outcomes by agent and task type
140
- grouped = self._group_outcomes(outcomes)
141
-
142
- for (ag, _task_type), type_outcomes in grouped.items():
143
- # Find patterns within this group
144
- patterns = self._identify_patterns(type_outcomes)
145
- result.patterns_analyzed += len(patterns)
146
-
147
- for pattern in patterns:
148
- created, reason = self._maybe_create_heuristic(
149
- agent=ag,
150
- project_id=project_id,
151
- pattern=pattern,
152
- )
153
-
154
- if created:
155
- result.heuristics_created += 1
156
- else:
157
- result.patterns_rejected += 1
158
- result.rejected_reasons[reason] = (
159
- result.rejected_reasons.get(reason, 0) + 1
160
- )
161
-
162
- logger.info(
163
- f"Extraction complete: {result.heuristics_created} heuristics created, "
164
- f"{result.patterns_rejected} patterns rejected"
165
- )
166
-
167
- return result
168
-
169
- def _group_outcomes(
170
- self,
171
- outcomes: List[Outcome],
172
- ) -> Dict[Tuple[str, str], List[Outcome]]:
173
- """Group outcomes by agent and task type."""
174
- grouped: Dict[Tuple[str, str], List[Outcome]] = defaultdict(list)
175
- for outcome in outcomes:
176
- key = (outcome.agent, outcome.task_type)
177
- grouped[key].append(outcome)
178
- return grouped
179
-
180
- def _identify_patterns(
181
- self,
182
- outcomes: List[Outcome],
183
- ) -> List[PatternCandidate]:
184
- """
185
- Identify patterns in outcomes by grouping similar strategies.
186
-
187
- Uses fuzzy matching to group strategies that are similar.
188
- """
189
- # Group by similar strategies
190
- strategy_groups: Dict[str, List[Outcome]] = defaultdict(list)
191
-
192
- for outcome in outcomes:
193
- # Find existing group or create new one
194
- matched = False
195
- for canonical in list(strategy_groups.keys()):
196
- if self._strategies_similar(outcome.strategy_used, canonical):
197
- strategy_groups[canonical].append(outcome)
198
- matched = True
199
- break
200
-
201
- if not matched:
202
- # Create new group
203
- strategy_groups[outcome.strategy_used].append(outcome)
204
-
205
- # Convert to PatternCandidates
206
- patterns = []
207
- for strategy, group_outcomes in strategy_groups.items():
208
- success_count = sum(1 for o in group_outcomes if o.success)
209
- patterns.append(
210
- PatternCandidate(
211
- task_type=group_outcomes[0].task_type,
212
- strategy=strategy,
213
- occurrence_count=len(group_outcomes),
214
- success_count=success_count,
215
- failure_count=len(group_outcomes) - success_count,
216
- outcomes=group_outcomes,
217
- )
218
- )
219
-
220
- return patterns
221
-
222
- def _maybe_create_heuristic(
223
- self,
224
- agent: str,
225
- project_id: str,
226
- pattern: PatternCandidate,
227
- ) -> Tuple[bool, str]:
228
- """
229
- Create a heuristic if the pattern meets criteria.
230
-
231
- Returns:
232
- Tuple of (created: bool, reason: str)
233
- """
234
- # Check minimum occurrences
235
- scope = self.scopes.get(agent)
236
- min_occ = self.min_occurrences
237
- if scope:
238
- min_occ = scope.min_occurrences_for_heuristic
239
-
240
- if pattern.occurrence_count < min_occ:
241
- return False, f"insufficient_occurrences_{pattern.occurrence_count}"
242
-
243
- # Check confidence
244
- if pattern.confidence < self.min_confidence:
245
- return False, f"low_confidence_{pattern.confidence:.2f}"
246
-
247
- # Check if heuristic already exists
248
- existing = self._find_existing_heuristic(
249
- agent=agent,
250
- project_id=project_id,
251
- task_type=pattern.task_type,
252
- strategy=pattern.strategy,
253
- )
254
-
255
- if existing:
256
- # Update existing heuristic
257
- self._update_heuristic(existing, pattern)
258
- return True, "updated"
259
-
260
- # Create new heuristic
261
- heuristic = Heuristic(
262
- id=f"heur_{uuid.uuid4().hex[:12]}",
263
- agent=agent,
264
- project_id=project_id,
265
- condition=f"task type: {pattern.task_type}",
266
- strategy=pattern.strategy,
267
- confidence=pattern.confidence,
268
- occurrence_count=pattern.occurrence_count,
269
- success_count=pattern.success_count,
270
- last_validated=datetime.now(timezone.utc),
271
- created_at=datetime.now(timezone.utc),
272
- )
273
-
274
- self.storage.save_heuristic(heuristic)
275
- logger.info(
276
- f"Created heuristic for {agent}: {pattern.strategy[:50]}... "
277
- f"(confidence: {pattern.confidence:.0%})"
278
- )
279
-
280
- return True, "created"
281
-
282
- def _find_existing_heuristic(
283
- self,
284
- agent: str,
285
- project_id: str,
286
- task_type: str,
287
- strategy: str,
288
- ) -> Optional[Heuristic]:
289
- """Find an existing heuristic that matches this pattern."""
290
- heuristics = self.storage.get_heuristics(
291
- project_id=project_id,
292
- agent=agent,
293
- top_k=100,
294
- min_confidence=0.0,
295
- )
296
-
297
- for h in heuristics:
298
- if task_type in h.condition and self._strategies_similar(
299
- h.strategy, strategy
300
- ):
301
- return h
302
-
303
- return None
304
-
305
- def _update_heuristic(
306
- self,
307
- heuristic: Heuristic,
308
- pattern: PatternCandidate,
309
- ):
310
- """Update an existing heuristic with new data."""
311
- # Merge counts
312
- heuristic.occurrence_count = max(
313
- heuristic.occurrence_count, pattern.occurrence_count
314
- )
315
- heuristic.success_count = max(heuristic.success_count, pattern.success_count)
316
-
317
- # Update confidence
318
- heuristic.confidence = pattern.confidence
319
- heuristic.last_validated = datetime.now(timezone.utc)
320
-
321
- self.storage.save_heuristic(heuristic)
322
- logger.debug(f"Updated heuristic {heuristic.id}")
323
-
324
- def _strategies_similar(self, s1: str, s2: str) -> bool:
325
- """
326
- Check if two strategies are similar enough to be grouped.
327
-
328
- Uses word overlap with normalization.
329
- """
330
- # Normalize strategies
331
- words1 = set(self._normalize_strategy(s1))
332
- words2 = set(self._normalize_strategy(s2))
333
-
334
- if not words1 or not words2:
335
- return s1.lower() == s2.lower()
336
-
337
- # Jaccard similarity
338
- intersection = len(words1 & words2)
339
- union = len(words1 | words2)
340
- similarity = intersection / union if union > 0 else 0
341
-
342
- return similarity >= self.similarity_threshold
343
-
344
- def _normalize_strategy(self, strategy: str) -> List[str]:
345
- """Normalize strategy text for comparison."""
346
- # Remove common stop words and normalize
347
- stop_words = {
348
- "the",
349
- "a",
350
- "an",
351
- "and",
352
- "or",
353
- "but",
354
- "in",
355
- "on",
356
- "at",
357
- "to",
358
- "for",
359
- "of",
360
- "with",
361
- "by",
362
- "then",
363
- "first",
364
- "next",
365
- }
366
-
367
- words = strategy.lower().replace(",", " ").replace(".", " ").split()
368
- return [w for w in words if w not in stop_words and len(w) > 2]
369
-
370
-
371
- def extract_heuristics_from_outcome(
372
- outcome: Outcome,
373
- existing_heuristics: List[Heuristic],
374
- min_confidence: float = 0.5,
375
- ) -> Optional[Dict[str, Any]]:
376
- """
377
- Convenience function to check if an outcome contributes to a heuristic.
378
-
379
- Returns update details if the outcome should update a heuristic.
380
- """
381
- for h in existing_heuristics:
382
- # Check if this outcome matches an existing heuristic
383
- if h.agent == outcome.agent and outcome.task_type in h.condition:
384
- return {
385
- "heuristic_id": h.id,
386
- "action": "validate" if outcome.success else "invalidate",
387
- "current_confidence": h.confidence,
388
- }
389
-
390
- return None
1
+ """
2
+ ALMA Heuristic Extraction.
3
+
4
+ Analyzes outcomes to identify patterns and create heuristics.
5
+ """
6
+
7
+ import logging
8
+ import uuid
9
+ from collections import defaultdict
10
+ from dataclasses import dataclass, field
11
+ from datetime import datetime, timezone
12
+ from typing import Any, Dict, List, Optional, Tuple
13
+
14
+ from alma.storage.base import StorageBackend
15
+ from alma.types import Heuristic, MemoryScope, Outcome
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @dataclass
21
+ class PatternCandidate:
22
+ """A potential pattern for heuristic creation."""
23
+
24
+ task_type: str
25
+ strategy: str
26
+ occurrence_count: int
27
+ success_count: int
28
+ failure_count: int
29
+ outcomes: List[Outcome] = field(default_factory=list)
30
+
31
+ @property
32
+ def success_rate(self) -> float:
33
+ """Calculate success rate."""
34
+ if self.occurrence_count == 0:
35
+ return 0.0
36
+ return self.success_count / self.occurrence_count
37
+
38
+ @property
39
+ def confidence(self) -> float:
40
+ """
41
+ Calculate confidence based on success rate and sample size.
42
+
43
+ Confidence is lower when sample size is small (uncertainty).
44
+ """
45
+ if self.occurrence_count == 0:
46
+ return 0.0
47
+
48
+ base_confidence = self.success_rate
49
+
50
+ # Apply sample size penalty (Bayesian-inspired)
51
+ # More samples = higher confidence, max confidence at 20+ samples
52
+ sample_factor = min(self.occurrence_count / 20.0, 1.0)
53
+
54
+ return base_confidence * (0.5 + 0.5 * sample_factor)
55
+
56
+
57
+ @dataclass
58
+ class ExtractionResult:
59
+ """Result of heuristic extraction."""
60
+
61
+ heuristics_created: int = 0
62
+ heuristics_updated: int = 0
63
+ patterns_analyzed: int = 0
64
+ patterns_rejected: int = 0
65
+ rejected_reasons: Dict[str, int] = field(default_factory=dict)
66
+
67
+ def to_dict(self) -> Dict[str, Any]:
68
+ """Convert to dictionary."""
69
+ return {
70
+ "heuristics_created": self.heuristics_created,
71
+ "heuristics_updated": self.heuristics_updated,
72
+ "patterns_analyzed": self.patterns_analyzed,
73
+ "patterns_rejected": self.patterns_rejected,
74
+ "rejected_reasons": self.rejected_reasons,
75
+ }
76
+
77
+
78
+ class HeuristicExtractor:
79
+ """
80
+ Extracts heuristics from outcome patterns.
81
+
82
+ Analyzes historical outcomes to identify successful strategies
83
+ and creates heuristics when patterns are validated.
84
+ """
85
+
86
+ def __init__(
87
+ self,
88
+ storage: StorageBackend,
89
+ scopes: Dict[str, MemoryScope],
90
+ min_occurrences: int = 3,
91
+ min_confidence: float = 0.5,
92
+ strategy_similarity_threshold: float = 0.5,
93
+ ):
94
+ """
95
+ Initialize extractor.
96
+
97
+ Args:
98
+ storage: Storage backend
99
+ scopes: Agent scope definitions
100
+ min_occurrences: Minimum outcomes before creating heuristic
101
+ min_confidence: Minimum confidence to create heuristic
102
+ strategy_similarity_threshold: How similar strategies must be to group
103
+ """
104
+ self.storage = storage
105
+ self.scopes = scopes
106
+ self.min_occurrences = min_occurrences
107
+ self.min_confidence = min_confidence
108
+ self.similarity_threshold = strategy_similarity_threshold
109
+
110
+ def extract(
111
+ self,
112
+ project_id: str,
113
+ agent: Optional[str] = None,
114
+ ) -> ExtractionResult:
115
+ """
116
+ Extract heuristics from all outcomes.
117
+
118
+ Args:
119
+ project_id: Project to analyze
120
+ agent: Specific agent or None for all
121
+
122
+ Returns:
123
+ ExtractionResult with summary
124
+ """
125
+ result = ExtractionResult()
126
+
127
+ # Get all outcomes
128
+ outcomes = self.storage.get_outcomes(
129
+ project_id=project_id,
130
+ agent=agent,
131
+ top_k=10000,
132
+ success_only=False,
133
+ )
134
+
135
+ if not outcomes:
136
+ logger.info("No outcomes to analyze")
137
+ return result
138
+
139
+ # Group outcomes by agent and task type
140
+ grouped = self._group_outcomes(outcomes)
141
+
142
+ for (ag, _task_type), type_outcomes in grouped.items():
143
+ # Find patterns within this group
144
+ patterns = self._identify_patterns(type_outcomes)
145
+ result.patterns_analyzed += len(patterns)
146
+
147
+ for pattern in patterns:
148
+ created, reason = self._maybe_create_heuristic(
149
+ agent=ag,
150
+ project_id=project_id,
151
+ pattern=pattern,
152
+ )
153
+
154
+ if created:
155
+ result.heuristics_created += 1
156
+ else:
157
+ result.patterns_rejected += 1
158
+ result.rejected_reasons[reason] = (
159
+ result.rejected_reasons.get(reason, 0) + 1
160
+ )
161
+
162
+ logger.info(
163
+ f"Extraction complete: {result.heuristics_created} heuristics created, "
164
+ f"{result.patterns_rejected} patterns rejected"
165
+ )
166
+
167
+ return result
168
+
169
+ def _group_outcomes(
170
+ self,
171
+ outcomes: List[Outcome],
172
+ ) -> Dict[Tuple[str, str], List[Outcome]]:
173
+ """Group outcomes by agent and task type."""
174
+ grouped: Dict[Tuple[str, str], List[Outcome]] = defaultdict(list)
175
+ for outcome in outcomes:
176
+ key = (outcome.agent, outcome.task_type)
177
+ grouped[key].append(outcome)
178
+ return grouped
179
+
180
+ def _identify_patterns(
181
+ self,
182
+ outcomes: List[Outcome],
183
+ ) -> List[PatternCandidate]:
184
+ """
185
+ Identify patterns in outcomes by grouping similar strategies.
186
+
187
+ Uses fuzzy matching to group strategies that are similar.
188
+ """
189
+ # Group by similar strategies
190
+ strategy_groups: Dict[str, List[Outcome]] = defaultdict(list)
191
+
192
+ for outcome in outcomes:
193
+ # Find existing group or create new one
194
+ matched = False
195
+ for canonical in list(strategy_groups.keys()):
196
+ if self._strategies_similar(outcome.strategy_used, canonical):
197
+ strategy_groups[canonical].append(outcome)
198
+ matched = True
199
+ break
200
+
201
+ if not matched:
202
+ # Create new group
203
+ strategy_groups[outcome.strategy_used].append(outcome)
204
+
205
+ # Convert to PatternCandidates
206
+ patterns = []
207
+ for strategy, group_outcomes in strategy_groups.items():
208
+ success_count = sum(1 for o in group_outcomes if o.success)
209
+ patterns.append(
210
+ PatternCandidate(
211
+ task_type=group_outcomes[0].task_type,
212
+ strategy=strategy,
213
+ occurrence_count=len(group_outcomes),
214
+ success_count=success_count,
215
+ failure_count=len(group_outcomes) - success_count,
216
+ outcomes=group_outcomes,
217
+ )
218
+ )
219
+
220
+ return patterns
221
+
222
+ def _maybe_create_heuristic(
223
+ self,
224
+ agent: str,
225
+ project_id: str,
226
+ pattern: PatternCandidate,
227
+ ) -> Tuple[bool, str]:
228
+ """
229
+ Create a heuristic if the pattern meets criteria.
230
+
231
+ Returns:
232
+ Tuple of (created: bool, reason: str)
233
+ """
234
+ # Check minimum occurrences
235
+ scope = self.scopes.get(agent)
236
+ min_occ = self.min_occurrences
237
+ if scope:
238
+ min_occ = scope.min_occurrences_for_heuristic
239
+
240
+ if pattern.occurrence_count < min_occ:
241
+ return False, f"insufficient_occurrences_{pattern.occurrence_count}"
242
+
243
+ # Check confidence
244
+ if pattern.confidence < self.min_confidence:
245
+ return False, f"low_confidence_{pattern.confidence:.2f}"
246
+
247
+ # Check if heuristic already exists
248
+ existing = self._find_existing_heuristic(
249
+ agent=agent,
250
+ project_id=project_id,
251
+ task_type=pattern.task_type,
252
+ strategy=pattern.strategy,
253
+ )
254
+
255
+ if existing:
256
+ # Update existing heuristic
257
+ self._update_heuristic(existing, pattern)
258
+ return True, "updated"
259
+
260
+ # Create new heuristic
261
+ heuristic = Heuristic(
262
+ id=f"heur_{uuid.uuid4().hex[:12]}",
263
+ agent=agent,
264
+ project_id=project_id,
265
+ condition=f"task type: {pattern.task_type}",
266
+ strategy=pattern.strategy,
267
+ confidence=pattern.confidence,
268
+ occurrence_count=pattern.occurrence_count,
269
+ success_count=pattern.success_count,
270
+ last_validated=datetime.now(timezone.utc),
271
+ created_at=datetime.now(timezone.utc),
272
+ )
273
+
274
+ self.storage.save_heuristic(heuristic)
275
+ logger.info(
276
+ f"Created heuristic for {agent}: {pattern.strategy[:50]}... "
277
+ f"(confidence: {pattern.confidence:.0%})"
278
+ )
279
+
280
+ return True, "created"
281
+
282
+ def _find_existing_heuristic(
283
+ self,
284
+ agent: str,
285
+ project_id: str,
286
+ task_type: str,
287
+ strategy: str,
288
+ ) -> Optional[Heuristic]:
289
+ """Find an existing heuristic that matches this pattern."""
290
+ heuristics = self.storage.get_heuristics(
291
+ project_id=project_id,
292
+ agent=agent,
293
+ top_k=100,
294
+ min_confidence=0.0,
295
+ )
296
+
297
+ for h in heuristics:
298
+ if task_type in h.condition and self._strategies_similar(
299
+ h.strategy, strategy
300
+ ):
301
+ return h
302
+
303
+ return None
304
+
305
+ def _update_heuristic(
306
+ self,
307
+ heuristic: Heuristic,
308
+ pattern: PatternCandidate,
309
+ ):
310
+ """Update an existing heuristic with new data."""
311
+ # Merge counts
312
+ heuristic.occurrence_count = max(
313
+ heuristic.occurrence_count, pattern.occurrence_count
314
+ )
315
+ heuristic.success_count = max(heuristic.success_count, pattern.success_count)
316
+
317
+ # Update confidence
318
+ heuristic.confidence = pattern.confidence
319
+ heuristic.last_validated = datetime.now(timezone.utc)
320
+
321
+ self.storage.save_heuristic(heuristic)
322
+ logger.debug(f"Updated heuristic {heuristic.id}")
323
+
324
+ def _strategies_similar(self, s1: str, s2: str) -> bool:
325
+ """
326
+ Check if two strategies are similar enough to be grouped.
327
+
328
+ Uses word overlap with normalization.
329
+ """
330
+ # Normalize strategies
331
+ words1 = set(self._normalize_strategy(s1))
332
+ words2 = set(self._normalize_strategy(s2))
333
+
334
+ if not words1 or not words2:
335
+ return s1.lower() == s2.lower()
336
+
337
+ # Jaccard similarity
338
+ intersection = len(words1 & words2)
339
+ union = len(words1 | words2)
340
+ similarity = intersection / union if union > 0 else 0
341
+
342
+ return similarity >= self.similarity_threshold
343
+
344
+ def _normalize_strategy(self, strategy: str) -> List[str]:
345
+ """Normalize strategy text for comparison."""
346
+ # Remove common stop words and normalize
347
+ stop_words = {
348
+ "the",
349
+ "a",
350
+ "an",
351
+ "and",
352
+ "or",
353
+ "but",
354
+ "in",
355
+ "on",
356
+ "at",
357
+ "to",
358
+ "for",
359
+ "of",
360
+ "with",
361
+ "by",
362
+ "then",
363
+ "first",
364
+ "next",
365
+ }
366
+
367
+ words = strategy.lower().replace(",", " ").replace(".", " ").split()
368
+ return [w for w in words if w not in stop_words and len(w) > 2]
369
+
370
+
371
+ def extract_heuristics_from_outcome(
372
+ outcome: Outcome,
373
+ existing_heuristics: List[Heuristic],
374
+ min_confidence: float = 0.5,
375
+ ) -> Optional[Dict[str, Any]]:
376
+ """
377
+ Convenience function to check if an outcome contributes to a heuristic.
378
+
379
+ Returns update details if the outcome should update a heuristic.
380
+ """
381
+ for h in existing_heuristics:
382
+ # Check if this outcome matches an existing heuristic
383
+ if h.agent == outcome.agent and outcome.task_type in h.condition:
384
+ return {
385
+ "heuristic_id": h.id,
386
+ "action": "validate" if outcome.success else "invalidate",
387
+ "current_confidence": h.confidence,
388
+ }
389
+
390
+ return None