alma-memory 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alma/__init__.py +99 -29
- alma/confidence/__init__.py +47 -0
- alma/confidence/engine.py +540 -0
- alma/confidence/types.py +351 -0
- alma/config/loader.py +3 -2
- alma/consolidation/__init__.py +23 -0
- alma/consolidation/engine.py +678 -0
- alma/consolidation/prompts.py +84 -0
- alma/core.py +15 -15
- alma/domains/__init__.py +6 -6
- alma/domains/factory.py +12 -9
- alma/domains/schemas.py +17 -3
- alma/domains/types.py +8 -4
- alma/events/__init__.py +75 -0
- alma/events/emitter.py +284 -0
- alma/events/storage_mixin.py +246 -0
- alma/events/types.py +126 -0
- alma/events/webhook.py +425 -0
- alma/exceptions.py +49 -0
- alma/extraction/__init__.py +31 -0
- alma/extraction/auto_learner.py +264 -0
- alma/extraction/extractor.py +420 -0
- alma/graph/__init__.py +81 -0
- alma/graph/backends/__init__.py +18 -0
- alma/graph/backends/memory.py +236 -0
- alma/graph/backends/neo4j.py +417 -0
- alma/graph/base.py +159 -0
- alma/graph/extraction.py +198 -0
- alma/graph/store.py +860 -0
- alma/harness/__init__.py +4 -4
- alma/harness/base.py +18 -9
- alma/harness/domains.py +27 -11
- alma/initializer/__init__.py +37 -0
- alma/initializer/initializer.py +418 -0
- alma/initializer/types.py +250 -0
- alma/integration/__init__.py +9 -9
- alma/integration/claude_agents.py +10 -10
- alma/integration/helena.py +32 -22
- alma/integration/victor.py +57 -33
- alma/learning/__init__.py +27 -27
- alma/learning/forgetting.py +198 -148
- alma/learning/heuristic_extractor.py +40 -24
- alma/learning/protocols.py +62 -14
- alma/learning/validation.py +7 -2
- alma/mcp/__init__.py +4 -4
- alma/mcp/__main__.py +2 -1
- alma/mcp/resources.py +17 -16
- alma/mcp/server.py +102 -44
- alma/mcp/tools.py +174 -37
- alma/progress/__init__.py +3 -3
- alma/progress/tracker.py +26 -20
- alma/progress/types.py +8 -12
- alma/py.typed +0 -0
- alma/retrieval/__init__.py +11 -11
- alma/retrieval/cache.py +20 -21
- alma/retrieval/embeddings.py +4 -4
- alma/retrieval/engine.py +114 -35
- alma/retrieval/scoring.py +73 -63
- alma/session/__init__.py +2 -2
- alma/session/manager.py +5 -5
- alma/session/types.py +5 -4
- alma/storage/__init__.py +41 -0
- alma/storage/azure_cosmos.py +107 -31
- alma/storage/base.py +157 -4
- alma/storage/chroma.py +1443 -0
- alma/storage/file_based.py +56 -20
- alma/storage/pinecone.py +1080 -0
- alma/storage/postgresql.py +1452 -0
- alma/storage/qdrant.py +1306 -0
- alma/storage/sqlite_local.py +376 -31
- alma/types.py +62 -14
- alma_memory-0.5.0.dist-info/METADATA +905 -0
- alma_memory-0.5.0.dist-info/RECORD +76 -0
- {alma_memory-0.3.0.dist-info → alma_memory-0.5.0.dist-info}/WHEEL +1 -1
- alma_memory-0.3.0.dist-info/METADATA +0 -438
- alma_memory-0.3.0.dist-info/RECORD +0 -46
- {alma_memory-0.3.0.dist-info → alma_memory-0.5.0.dist-info}/top_level.txt +0 -0
alma/learning/__init__.py
CHANGED
|
@@ -4,46 +4,46 @@ ALMA Learning Protocols.
|
|
|
4
4
|
Provides learning, validation, forgetting, and heuristic extraction.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
from alma.learning.protocols import LearningProtocol
|
|
8
|
-
from alma.learning.validation import (
|
|
9
|
-
ScopeValidator,
|
|
10
|
-
ValidationResult,
|
|
11
|
-
ValidationReport,
|
|
12
|
-
TaskTypeValidator,
|
|
13
|
-
validate_learning_request,
|
|
14
|
-
)
|
|
15
7
|
from alma.learning.forgetting import (
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
8
|
+
CleanupJob,
|
|
9
|
+
CleanupResult,
|
|
10
|
+
# Cleanup Scheduling
|
|
11
|
+
CleanupScheduler,
|
|
12
|
+
# Confidence Decay
|
|
13
|
+
ConfidenceDecayer,
|
|
22
14
|
# Decay Functions
|
|
23
15
|
DecayFunction,
|
|
16
|
+
DecayResult,
|
|
24
17
|
ExponentialDecay,
|
|
18
|
+
# Forgetting Engine
|
|
19
|
+
ForgettingEngine,
|
|
20
|
+
HealthAlert,
|
|
21
|
+
HealthThresholds,
|
|
25
22
|
LinearDecay,
|
|
26
|
-
|
|
27
|
-
NoDecay,
|
|
28
|
-
# Confidence Decay
|
|
29
|
-
ConfidenceDecayer,
|
|
30
|
-
DecayResult,
|
|
23
|
+
MemoryHealthMetrics,
|
|
31
24
|
# Memory Health Monitoring
|
|
32
25
|
MemoryHealthMonitor,
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
CleanupResult,
|
|
26
|
+
NoDecay,
|
|
27
|
+
PrunePolicy,
|
|
28
|
+
PruneReason,
|
|
29
|
+
PruneResult,
|
|
30
|
+
PruneSummary,
|
|
31
|
+
StepDecay,
|
|
40
32
|
)
|
|
41
33
|
from alma.learning.heuristic_extractor import (
|
|
34
|
+
ExtractionResult,
|
|
42
35
|
HeuristicExtractor,
|
|
43
36
|
PatternCandidate,
|
|
44
|
-
ExtractionResult,
|
|
45
37
|
extract_heuristics_from_outcome,
|
|
46
38
|
)
|
|
39
|
+
from alma.learning.protocols import LearningProtocol
|
|
40
|
+
from alma.learning.validation import (
|
|
41
|
+
ScopeValidator,
|
|
42
|
+
TaskTypeValidator,
|
|
43
|
+
ValidationReport,
|
|
44
|
+
ValidationResult,
|
|
45
|
+
validate_learning_request,
|
|
46
|
+
)
|
|
47
47
|
|
|
48
48
|
__all__ = [
|
|
49
49
|
# Core Protocol
|
alma/learning/forgetting.py
CHANGED
|
@@ -11,16 +11,14 @@ Features:
|
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
13
|
import logging
|
|
14
|
-
import time
|
|
15
14
|
import threading
|
|
16
|
-
import
|
|
17
|
-
from
|
|
18
|
-
from typing import Optional, List, Dict, Any, Callable
|
|
15
|
+
import time
|
|
16
|
+
from abc import ABC, abstractmethod
|
|
19
17
|
from dataclasses import dataclass, field
|
|
18
|
+
from datetime import datetime, timedelta, timezone
|
|
20
19
|
from enum import Enum
|
|
21
|
-
from
|
|
20
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
22
21
|
|
|
23
|
-
from alma.types import Heuristic, Outcome, DomainKnowledge, AntiPattern
|
|
24
22
|
from alma.storage.base import StorageBackend
|
|
25
23
|
|
|
26
24
|
logger = logging.getLogger(__name__)
|
|
@@ -28,17 +26,19 @@ logger = logging.getLogger(__name__)
|
|
|
28
26
|
|
|
29
27
|
class PruneReason(Enum):
|
|
30
28
|
"""Reason for pruning a memory item."""
|
|
31
|
-
|
|
29
|
+
|
|
30
|
+
STALE = "stale" # Too old without validation
|
|
32
31
|
LOW_CONFIDENCE = "low_confidence" # Below confidence threshold
|
|
33
|
-
LOW_SUCCESS_RATE = "low_success"
|
|
34
|
-
SUPERSEDED = "superseded"
|
|
35
|
-
DUPLICATE = "duplicate"
|
|
36
|
-
QUOTA_EXCEEDED = "quota"
|
|
32
|
+
LOW_SUCCESS_RATE = "low_success" # Too many failures
|
|
33
|
+
SUPERSEDED = "superseded" # Replaced by better heuristic
|
|
34
|
+
DUPLICATE = "duplicate" # Duplicate of another item
|
|
35
|
+
QUOTA_EXCEEDED = "quota" # Agent memory quota exceeded
|
|
37
36
|
|
|
38
37
|
|
|
39
38
|
@dataclass
|
|
40
39
|
class PruneResult:
|
|
41
40
|
"""Result of a prune operation."""
|
|
41
|
+
|
|
42
42
|
reason: PruneReason
|
|
43
43
|
item_type: str
|
|
44
44
|
item_id: str
|
|
@@ -50,6 +50,7 @@ class PruneResult:
|
|
|
50
50
|
@dataclass
|
|
51
51
|
class PruneSummary:
|
|
52
52
|
"""Summary of a complete prune operation."""
|
|
53
|
+
|
|
53
54
|
outcomes_pruned: int = 0
|
|
54
55
|
heuristics_pruned: int = 0
|
|
55
56
|
knowledge_pruned: int = 0
|
|
@@ -77,6 +78,7 @@ class PrunePolicy:
|
|
|
77
78
|
|
|
78
79
|
Defines thresholds and quotas for different memory types.
|
|
79
80
|
"""
|
|
81
|
+
|
|
80
82
|
# Age-based pruning
|
|
81
83
|
outcome_max_age_days: int = 90
|
|
82
84
|
knowledge_max_age_days: int = 180
|
|
@@ -146,6 +148,7 @@ class ForgettingEngine:
|
|
|
146
148
|
PruneSummary with details
|
|
147
149
|
"""
|
|
148
150
|
import time
|
|
151
|
+
|
|
149
152
|
start_time = time.time()
|
|
150
153
|
|
|
151
154
|
summary = PruneSummary()
|
|
@@ -175,10 +178,10 @@ class ForgettingEngine:
|
|
|
175
178
|
summary.outcomes_pruned += quota_pruned.get("outcomes", 0)
|
|
176
179
|
|
|
177
180
|
summary.total_pruned = (
|
|
178
|
-
summary.outcomes_pruned
|
|
179
|
-
summary.heuristics_pruned
|
|
180
|
-
summary.knowledge_pruned
|
|
181
|
-
summary.anti_patterns_pruned
|
|
181
|
+
summary.outcomes_pruned
|
|
182
|
+
+ summary.heuristics_pruned
|
|
183
|
+
+ summary.knowledge_pruned
|
|
184
|
+
+ summary.anti_patterns_pruned
|
|
182
185
|
)
|
|
183
186
|
|
|
184
187
|
summary.execution_time_ms = int((time.time() - start_time) * 1000)
|
|
@@ -214,14 +217,16 @@ class ForgettingEngine:
|
|
|
214
217
|
count = sum(1 for o in outcomes if o.timestamp < cutoff)
|
|
215
218
|
for o in outcomes:
|
|
216
219
|
if o.timestamp < cutoff:
|
|
217
|
-
results.append(
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
220
|
+
results.append(
|
|
221
|
+
PruneResult(
|
|
222
|
+
reason=PruneReason.STALE,
|
|
223
|
+
item_type="outcome",
|
|
224
|
+
item_id=o.id,
|
|
225
|
+
agent=o.agent,
|
|
226
|
+
project_id=project_id,
|
|
227
|
+
details=f"Older than {self.policy.outcome_max_age_days} days",
|
|
228
|
+
)
|
|
229
|
+
)
|
|
225
230
|
return count
|
|
226
231
|
else:
|
|
227
232
|
return self.storage.delete_outcomes_older_than(
|
|
@@ -259,8 +264,10 @@ class ForgettingEngine:
|
|
|
259
264
|
details = f"Confidence {h.confidence:.2f} < {self.policy.heuristic_min_confidence}"
|
|
260
265
|
|
|
261
266
|
# Check success rate (only if enough occurrences)
|
|
262
|
-
elif (
|
|
263
|
-
|
|
267
|
+
elif (
|
|
268
|
+
h.occurrence_count >= self.policy.min_occurrences_before_prune
|
|
269
|
+
and h.success_rate < self.policy.heuristic_min_success_rate
|
|
270
|
+
):
|
|
264
271
|
reason = PruneReason.LOW_SUCCESS_RATE
|
|
265
272
|
details = f"Success rate {h.success_rate:.2f} < {self.policy.heuristic_min_success_rate}"
|
|
266
273
|
|
|
@@ -271,14 +278,16 @@ class ForgettingEngine:
|
|
|
271
278
|
|
|
272
279
|
if reason:
|
|
273
280
|
to_delete.append(h)
|
|
274
|
-
results.append(
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
281
|
+
results.append(
|
|
282
|
+
PruneResult(
|
|
283
|
+
reason=reason,
|
|
284
|
+
item_type="heuristic",
|
|
285
|
+
item_id=h.id,
|
|
286
|
+
agent=h.agent,
|
|
287
|
+
project_id=project_id,
|
|
288
|
+
details=details,
|
|
289
|
+
)
|
|
290
|
+
)
|
|
282
291
|
|
|
283
292
|
if not dry_run:
|
|
284
293
|
for h in to_delete:
|
|
@@ -326,14 +335,16 @@ class ForgettingEngine:
|
|
|
326
335
|
|
|
327
336
|
if reason:
|
|
328
337
|
to_delete.append(dk)
|
|
329
|
-
results.append(
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
338
|
+
results.append(
|
|
339
|
+
PruneResult(
|
|
340
|
+
reason=reason,
|
|
341
|
+
item_type="domain_knowledge",
|
|
342
|
+
item_id=dk.id,
|
|
343
|
+
agent=dk.agent,
|
|
344
|
+
project_id=project_id,
|
|
345
|
+
details=details,
|
|
346
|
+
)
|
|
347
|
+
)
|
|
337
348
|
|
|
338
349
|
if not dry_run:
|
|
339
350
|
for dk in to_delete:
|
|
@@ -362,14 +373,16 @@ class ForgettingEngine:
|
|
|
362
373
|
for ap in anti_patterns:
|
|
363
374
|
if ap.last_seen < age_cutoff:
|
|
364
375
|
to_delete.append(ap)
|
|
365
|
-
results.append(
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
376
|
+
results.append(
|
|
377
|
+
PruneResult(
|
|
378
|
+
reason=PruneReason.STALE,
|
|
379
|
+
item_type="anti_pattern",
|
|
380
|
+
item_id=ap.id,
|
|
381
|
+
agent=ap.agent,
|
|
382
|
+
project_id=project_id,
|
|
383
|
+
details=f"Not seen since {ap.last_seen.date()}",
|
|
384
|
+
)
|
|
385
|
+
)
|
|
373
386
|
|
|
374
387
|
if not dry_run:
|
|
375
388
|
for ap in to_delete:
|
|
@@ -409,14 +422,16 @@ class ForgettingEngine:
|
|
|
409
422
|
to_remove = len(heuristics) - self.policy.max_heuristics_per_agent
|
|
410
423
|
|
|
411
424
|
for h in sorted_h[:to_remove]:
|
|
412
|
-
results.append(
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
425
|
+
results.append(
|
|
426
|
+
PruneResult(
|
|
427
|
+
reason=PruneReason.QUOTA_EXCEEDED,
|
|
428
|
+
item_type="heuristic",
|
|
429
|
+
item_id=h.id,
|
|
430
|
+
agent=ag,
|
|
431
|
+
project_id=project_id,
|
|
432
|
+
details=f"Exceeded quota of {self.policy.max_heuristics_per_agent}",
|
|
433
|
+
)
|
|
434
|
+
)
|
|
420
435
|
if not dry_run:
|
|
421
436
|
self.storage.delete_heuristic(h.id)
|
|
422
437
|
pruned["heuristics"] += 1
|
|
@@ -435,14 +450,16 @@ class ForgettingEngine:
|
|
|
435
450
|
to_remove = len(outcomes) - self.policy.max_outcomes_per_agent
|
|
436
451
|
|
|
437
452
|
for o in sorted_o[:to_remove]:
|
|
438
|
-
results.append(
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
453
|
+
results.append(
|
|
454
|
+
PruneResult(
|
|
455
|
+
reason=PruneReason.QUOTA_EXCEEDED,
|
|
456
|
+
item_type="outcome",
|
|
457
|
+
item_id=o.id,
|
|
458
|
+
agent=ag,
|
|
459
|
+
project_id=project_id,
|
|
460
|
+
details=f"Exceeded quota of {self.policy.max_outcomes_per_agent}",
|
|
461
|
+
)
|
|
462
|
+
)
|
|
446
463
|
if not dry_run:
|
|
447
464
|
self.storage.delete_outcome(o.id)
|
|
448
465
|
pruned["outcomes"] += 1
|
|
@@ -476,10 +493,10 @@ class ForgettingEngine:
|
|
|
476
493
|
|
|
477
494
|
# Weighted combination
|
|
478
495
|
return (
|
|
479
|
-
0.3 * age_score
|
|
480
|
-
0.3 * confidence
|
|
481
|
-
0.2 * success_rate
|
|
482
|
-
0.2 * occurrence_score
|
|
496
|
+
0.3 * age_score
|
|
497
|
+
+ 0.3 * confidence
|
|
498
|
+
+ 0.2 * success_rate
|
|
499
|
+
+ 0.2 * occurrence_score
|
|
483
500
|
)
|
|
484
501
|
|
|
485
502
|
def identify_candidates(
|
|
@@ -520,15 +537,17 @@ class ForgettingEngine:
|
|
|
520
537
|
success_rate=h.success_rate,
|
|
521
538
|
occurrence_count=h.occurrence_count,
|
|
522
539
|
)
|
|
523
|
-
candidates.append(
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
540
|
+
candidates.append(
|
|
541
|
+
{
|
|
542
|
+
"type": "heuristic",
|
|
543
|
+
"id": h.id,
|
|
544
|
+
"agent": h.agent,
|
|
545
|
+
"score": score,
|
|
546
|
+
"age_days": int(age_days),
|
|
547
|
+
"confidence": h.confidence,
|
|
548
|
+
"summary": h.strategy[:50],
|
|
549
|
+
}
|
|
550
|
+
)
|
|
532
551
|
|
|
533
552
|
# Sort by score (lowest first = best candidates for pruning)
|
|
534
553
|
candidates.sort(key=lambda x: x["score"])
|
|
@@ -609,7 +628,9 @@ class LinearDecay(DecayFunction):
|
|
|
609
628
|
|
|
610
629
|
def compute_decay(self, days_since_validation: float) -> float:
|
|
611
630
|
"""Compute linear decay multiplier."""
|
|
612
|
-
decay = 1.0 - (days_since_validation / self.decay_period_days) * (
|
|
631
|
+
decay = 1.0 - (days_since_validation / self.decay_period_days) * (
|
|
632
|
+
1.0 - self.min_value
|
|
633
|
+
)
|
|
613
634
|
return max(self.min_value, decay)
|
|
614
635
|
|
|
615
636
|
def get_name(self) -> str:
|
|
@@ -673,6 +694,7 @@ class NoDecay(DecayFunction):
|
|
|
673
694
|
@dataclass
|
|
674
695
|
class DecayResult:
|
|
675
696
|
"""Result of applying confidence decay."""
|
|
697
|
+
|
|
676
698
|
items_processed: int = 0
|
|
677
699
|
items_updated: int = 0
|
|
678
700
|
items_pruned: int = 0
|
|
@@ -743,7 +765,7 @@ class ConfidenceDecayer:
|
|
|
743
765
|
decay_multiplier = self.decay_function.compute_decay(days_since)
|
|
744
766
|
|
|
745
767
|
new_confidence = h.confidence * decay_multiplier
|
|
746
|
-
total_decay +=
|
|
768
|
+
total_decay += 1.0 - decay_multiplier
|
|
747
769
|
|
|
748
770
|
if new_confidence != h.confidence:
|
|
749
771
|
if new_confidence < self.prune_below_confidence:
|
|
@@ -770,7 +792,7 @@ class ConfidenceDecayer:
|
|
|
770
792
|
decay_multiplier = self.decay_function.compute_decay(days_since)
|
|
771
793
|
|
|
772
794
|
new_confidence = dk.confidence * decay_multiplier
|
|
773
|
-
total_decay +=
|
|
795
|
+
total_decay += 1.0 - decay_multiplier
|
|
774
796
|
|
|
775
797
|
if new_confidence != dk.confidence:
|
|
776
798
|
if new_confidence < self.prune_below_confidence:
|
|
@@ -802,6 +824,7 @@ class ConfidenceDecayer:
|
|
|
802
824
|
@dataclass
|
|
803
825
|
class MemoryHealthMetrics:
|
|
804
826
|
"""Metrics about memory health and growth."""
|
|
827
|
+
|
|
805
828
|
total_items: int = 0
|
|
806
829
|
heuristic_count: int = 0
|
|
807
830
|
outcome_count: int = 0
|
|
@@ -836,6 +859,7 @@ class MemoryHealthMetrics:
|
|
|
836
859
|
@dataclass
|
|
837
860
|
class HealthAlert:
|
|
838
861
|
"""An alert about memory health issues."""
|
|
862
|
+
|
|
839
863
|
level: str # "warning", "critical"
|
|
840
864
|
category: str
|
|
841
865
|
message: str
|
|
@@ -847,6 +871,7 @@ class HealthAlert:
|
|
|
847
871
|
@dataclass
|
|
848
872
|
class HealthThresholds:
|
|
849
873
|
"""Thresholds for health monitoring alerts."""
|
|
874
|
+
|
|
850
875
|
# Warning thresholds
|
|
851
876
|
max_total_items_warning: int = 5000
|
|
852
877
|
max_stale_percentage_warning: float = 0.3
|
|
@@ -963,10 +988,10 @@ class MemoryHealthMonitor:
|
|
|
963
988
|
metrics.anti_pattern_count = len(anti_patterns)
|
|
964
989
|
|
|
965
990
|
metrics.total_items = (
|
|
966
|
-
metrics.heuristic_count
|
|
967
|
-
metrics.outcome_count
|
|
968
|
-
metrics.knowledge_count
|
|
969
|
-
metrics.anti_pattern_count
|
|
991
|
+
metrics.heuristic_count
|
|
992
|
+
+ metrics.outcome_count
|
|
993
|
+
+ metrics.knowledge_count
|
|
994
|
+
+ metrics.anti_pattern_count
|
|
970
995
|
)
|
|
971
996
|
|
|
972
997
|
# Get agent count
|
|
@@ -980,7 +1005,7 @@ class MemoryHealthMonitor:
|
|
|
980
1005
|
# Store in history
|
|
981
1006
|
self._metrics_history.append(metrics)
|
|
982
1007
|
if len(self._metrics_history) > self._max_history:
|
|
983
|
-
self._metrics_history = self._metrics_history[-self._max_history:]
|
|
1008
|
+
self._metrics_history = self._metrics_history[-self._max_history :]
|
|
984
1009
|
|
|
985
1010
|
return metrics
|
|
986
1011
|
|
|
@@ -1000,70 +1025,84 @@ class MemoryHealthMonitor:
|
|
|
1000
1025
|
|
|
1001
1026
|
# Check total items
|
|
1002
1027
|
if metrics.total_items >= t.max_total_items_critical:
|
|
1003
|
-
alerts.append(
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1028
|
+
alerts.append(
|
|
1029
|
+
HealthAlert(
|
|
1030
|
+
level="critical",
|
|
1031
|
+
category="total_items",
|
|
1032
|
+
message="Memory item count critically high",
|
|
1033
|
+
current_value=metrics.total_items,
|
|
1034
|
+
threshold=t.max_total_items_critical,
|
|
1035
|
+
)
|
|
1036
|
+
)
|
|
1010
1037
|
elif metrics.total_items >= t.max_total_items_warning:
|
|
1011
|
-
alerts.append(
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1038
|
+
alerts.append(
|
|
1039
|
+
HealthAlert(
|
|
1040
|
+
level="warning",
|
|
1041
|
+
category="total_items",
|
|
1042
|
+
message="Memory item count approaching limit",
|
|
1043
|
+
current_value=metrics.total_items,
|
|
1044
|
+
threshold=t.max_total_items_warning,
|
|
1045
|
+
)
|
|
1046
|
+
)
|
|
1018
1047
|
|
|
1019
1048
|
# Check staleness
|
|
1020
1049
|
if metrics.heuristic_count > 0:
|
|
1021
1050
|
stale_percentage = metrics.stale_heuristic_count / metrics.heuristic_count
|
|
1022
1051
|
if stale_percentage >= t.max_stale_percentage_critical:
|
|
1023
|
-
alerts.append(
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1052
|
+
alerts.append(
|
|
1053
|
+
HealthAlert(
|
|
1054
|
+
level="critical",
|
|
1055
|
+
category="staleness",
|
|
1056
|
+
message="Too many stale heuristics",
|
|
1057
|
+
current_value=f"{stale_percentage:.0%}",
|
|
1058
|
+
threshold=f"{t.max_stale_percentage_critical:.0%}",
|
|
1059
|
+
)
|
|
1060
|
+
)
|
|
1030
1061
|
elif stale_percentage >= t.max_stale_percentage_warning:
|
|
1031
|
-
alerts.append(
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1062
|
+
alerts.append(
|
|
1063
|
+
HealthAlert(
|
|
1064
|
+
level="warning",
|
|
1065
|
+
category="staleness",
|
|
1066
|
+
message="Many heuristics are stale",
|
|
1067
|
+
current_value=f"{stale_percentage:.0%}",
|
|
1068
|
+
threshold=f"{t.max_stale_percentage_warning:.0%}",
|
|
1069
|
+
)
|
|
1070
|
+
)
|
|
1038
1071
|
|
|
1039
1072
|
# Check average confidence
|
|
1040
1073
|
if metrics.heuristic_count > 0:
|
|
1041
1074
|
if metrics.avg_heuristic_confidence < t.min_avg_confidence_critical:
|
|
1042
|
-
alerts.append(
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1075
|
+
alerts.append(
|
|
1076
|
+
HealthAlert(
|
|
1077
|
+
level="critical",
|
|
1078
|
+
category="confidence",
|
|
1079
|
+
message="Average heuristic confidence critically low",
|
|
1080
|
+
current_value=f"{metrics.avg_heuristic_confidence:.2f}",
|
|
1081
|
+
threshold=f"{t.min_avg_confidence_critical:.2f}",
|
|
1082
|
+
)
|
|
1083
|
+
)
|
|
1049
1084
|
elif metrics.avg_heuristic_confidence < t.min_avg_confidence_warning:
|
|
1050
|
-
alerts.append(
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1085
|
+
alerts.append(
|
|
1086
|
+
HealthAlert(
|
|
1087
|
+
level="warning",
|
|
1088
|
+
category="confidence",
|
|
1089
|
+
message="Average heuristic confidence is low",
|
|
1090
|
+
current_value=f"{metrics.avg_heuristic_confidence:.2f}",
|
|
1091
|
+
threshold=f"{t.min_avg_confidence_warning:.2f}",
|
|
1092
|
+
)
|
|
1093
|
+
)
|
|
1057
1094
|
|
|
1058
1095
|
# Check storage size
|
|
1059
1096
|
if metrics.storage_bytes >= t.max_storage_bytes_critical:
|
|
1060
|
-
alerts.append(
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1097
|
+
alerts.append(
|
|
1098
|
+
HealthAlert(
|
|
1099
|
+
level="critical",
|
|
1100
|
+
category="storage",
|
|
1101
|
+
message="Memory storage size critically high",
|
|
1102
|
+
current_value=f"{metrics.storage_bytes / (1024 * 1024):.1f}MB",
|
|
1103
|
+
threshold=f"{t.max_storage_bytes_critical / (1024 * 1024):.1f}MB",
|
|
1104
|
+
)
|
|
1105
|
+
)
|
|
1067
1106
|
|
|
1068
1107
|
# Notify handlers
|
|
1069
1108
|
for alert in alerts:
|
|
@@ -1122,6 +1161,7 @@ class MemoryHealthMonitor:
|
|
|
1122
1161
|
@dataclass
|
|
1123
1162
|
class CleanupJob:
|
|
1124
1163
|
"""Configuration for a scheduled cleanup job."""
|
|
1164
|
+
|
|
1125
1165
|
name: str
|
|
1126
1166
|
project_id: str
|
|
1127
1167
|
interval_hours: float
|
|
@@ -1136,6 +1176,7 @@ class CleanupJob:
|
|
|
1136
1176
|
@dataclass
|
|
1137
1177
|
class CleanupResult:
|
|
1138
1178
|
"""Result of a cleanup job execution."""
|
|
1179
|
+
|
|
1139
1180
|
job_name: str
|
|
1140
1181
|
project_id: str
|
|
1141
1182
|
started_at: datetime
|
|
@@ -1198,7 +1239,9 @@ class CleanupScheduler:
|
|
|
1198
1239
|
now = datetime.now(timezone.utc)
|
|
1199
1240
|
job.next_run = now + timedelta(hours=job.interval_hours)
|
|
1200
1241
|
self._jobs[job.name] = job
|
|
1201
|
-
logger.info(
|
|
1242
|
+
logger.info(
|
|
1243
|
+
f"Registered cleanup job '{job.name}' for project {job.project_id}"
|
|
1244
|
+
)
|
|
1202
1245
|
|
|
1203
1246
|
def unregister_job(self, name: str) -> bool:
|
|
1204
1247
|
"""
|
|
@@ -1247,7 +1290,8 @@ class CleanupScheduler:
|
|
|
1247
1290
|
|
|
1248
1291
|
with self._lock:
|
|
1249
1292
|
due_jobs = [
|
|
1250
|
-
job
|
|
1293
|
+
job
|
|
1294
|
+
for job in self._jobs.values()
|
|
1251
1295
|
if job.enabled and job.next_run and job.next_run <= now
|
|
1252
1296
|
]
|
|
1253
1297
|
|
|
@@ -1257,14 +1301,16 @@ class CleanupScheduler:
|
|
|
1257
1301
|
results.append(result)
|
|
1258
1302
|
except Exception as e:
|
|
1259
1303
|
logger.error(f"Error running job '{job.name}': {e}")
|
|
1260
|
-
results.append(
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1304
|
+
results.append(
|
|
1305
|
+
CleanupResult(
|
|
1306
|
+
job_name=job.name,
|
|
1307
|
+
project_id=job.project_id,
|
|
1308
|
+
started_at=now,
|
|
1309
|
+
completed_at=datetime.now(timezone.utc),
|
|
1310
|
+
success=False,
|
|
1311
|
+
error=str(e),
|
|
1312
|
+
)
|
|
1313
|
+
)
|
|
1268
1314
|
|
|
1269
1315
|
return results
|
|
1270
1316
|
|
|
@@ -1320,7 +1366,7 @@ class CleanupScheduler:
|
|
|
1320
1366
|
with self._lock:
|
|
1321
1367
|
self._history.append(result)
|
|
1322
1368
|
if len(self._history) > self._max_history:
|
|
1323
|
-
self._history = self._history[-self._max_history:]
|
|
1369
|
+
self._history = self._history[-self._max_history :]
|
|
1324
1370
|
|
|
1325
1371
|
return result
|
|
1326
1372
|
|
|
@@ -1387,8 +1433,12 @@ class CleanupScheduler:
|
|
|
1387
1433
|
(r.completed_at - r.started_at).total_seconds() * 1000
|
|
1388
1434
|
),
|
|
1389
1435
|
"success": r.success,
|
|
1390
|
-
"items_pruned":
|
|
1391
|
-
|
|
1436
|
+
"items_pruned": (
|
|
1437
|
+
r.prune_summary.total_pruned if r.prune_summary else 0
|
|
1438
|
+
),
|
|
1439
|
+
"items_decayed": (
|
|
1440
|
+
r.decay_result.items_updated if r.decay_result else 0
|
|
1441
|
+
),
|
|
1392
1442
|
"alerts": len(r.alerts),
|
|
1393
1443
|
"error": r.error,
|
|
1394
1444
|
}
|