alma-memory 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. alma/__init__.py +33 -1
  2. alma/core.py +124 -16
  3. alma/extraction/auto_learner.py +4 -3
  4. alma/graph/__init__.py +26 -1
  5. alma/graph/backends/__init__.py +14 -0
  6. alma/graph/backends/kuzu.py +624 -0
  7. alma/graph/backends/memgraph.py +432 -0
  8. alma/integration/claude_agents.py +22 -10
  9. alma/learning/protocols.py +3 -3
  10. alma/mcp/tools.py +9 -11
  11. alma/observability/__init__.py +84 -0
  12. alma/observability/config.py +302 -0
  13. alma/observability/logging.py +424 -0
  14. alma/observability/metrics.py +583 -0
  15. alma/observability/tracing.py +440 -0
  16. alma/retrieval/engine.py +65 -4
  17. alma/storage/__init__.py +29 -0
  18. alma/storage/azure_cosmos.py +343 -132
  19. alma/storage/base.py +58 -0
  20. alma/storage/constants.py +103 -0
  21. alma/storage/file_based.py +3 -8
  22. alma/storage/migrations/__init__.py +21 -0
  23. alma/storage/migrations/base.py +321 -0
  24. alma/storage/migrations/runner.py +323 -0
  25. alma/storage/migrations/version_stores.py +337 -0
  26. alma/storage/migrations/versions/__init__.py +11 -0
  27. alma/storage/migrations/versions/v1_0_0.py +373 -0
  28. alma/storage/postgresql.py +185 -78
  29. alma/storage/sqlite_local.py +149 -50
  30. alma/testing/__init__.py +46 -0
  31. alma/testing/factories.py +301 -0
  32. alma/testing/mocks.py +389 -0
  33. {alma_memory-0.5.0.dist-info → alma_memory-0.5.1.dist-info}/METADATA +42 -8
  34. {alma_memory-0.5.0.dist-info → alma_memory-0.5.1.dist-info}/RECORD +36 -19
  35. {alma_memory-0.5.0.dist-info → alma_memory-0.5.1.dist-info}/WHEEL +0 -0
  36. {alma_memory-0.5.0.dist-info → alma_memory-0.5.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,583 @@
1
+ """
2
+ ALMA Metrics Collection.
3
+
4
+ Provides metrics collection using OpenTelemetry with fallback
5
+ to in-memory collection when OTel is not available.
6
+
7
+ Metrics tracked:
8
+ - Memory operation latency (retrieve, learn, forget)
9
+ - Embedding generation time
10
+ - Cache hit/miss rates
11
+ - Storage backend query times
12
+ - Memory counts by type
13
+ """
14
+
15
+ import threading
16
+ import time
17
+ from dataclasses import dataclass, field
18
+ from typing import Any, Dict, List, Optional
19
+
20
+ # Try to import OpenTelemetry
21
+ _otel_available = False
22
+ try:
23
+ from opentelemetry import metrics
24
+
25
+ _otel_available = True
26
+ except ImportError:
27
+ pass
28
+
29
+ # Global metrics instance
30
+ _metrics_instance: Optional["ALMAMetrics"] = None
31
+ _metrics_lock = threading.Lock()
32
+
33
+
34
+ @dataclass
35
+ class MetricValue:
36
+ """Container for metric values with metadata."""
37
+
38
+ name: str
39
+ value: float
40
+ timestamp: float
41
+ labels: Dict[str, str] = field(default_factory=dict)
42
+
43
+
44
+ class InMemoryMetricsCollector:
45
+ """
46
+ In-memory metrics collection for when OpenTelemetry is not available.
47
+
48
+ Stores metric values in memory for later retrieval.
49
+ """
50
+
51
+ def __init__(self, max_samples: int = 10000):
52
+ """Initialize in-memory collector."""
53
+ self._counters: Dict[str, float] = {}
54
+ self._histograms: Dict[str, List[float]] = {}
55
+ self._gauges: Dict[str, float] = {}
56
+ self._lock = threading.RLock()
57
+ self._max_samples = max_samples
58
+
59
+ def increment_counter(
60
+ self,
61
+ name: str,
62
+ value: float = 1.0,
63
+ labels: Optional[Dict[str, str]] = None,
64
+ ):
65
+ """Increment a counter metric."""
66
+ key = self._make_key(name, labels)
67
+ with self._lock:
68
+ self._counters[key] = self._counters.get(key, 0.0) + value
69
+
70
+ def record_histogram(
71
+ self,
72
+ name: str,
73
+ value: float,
74
+ labels: Optional[Dict[str, str]] = None,
75
+ ):
76
+ """Record a histogram value."""
77
+ key = self._make_key(name, labels)
78
+ with self._lock:
79
+ if key not in self._histograms:
80
+ self._histograms[key] = []
81
+ self._histograms[key].append(value)
82
+ # Trim if needed
83
+ if len(self._histograms[key]) > self._max_samples:
84
+ self._histograms[key] = self._histograms[key][-self._max_samples :]
85
+
86
+ def set_gauge(
87
+ self,
88
+ name: str,
89
+ value: float,
90
+ labels: Optional[Dict[str, str]] = None,
91
+ ):
92
+ """Set a gauge value."""
93
+ key = self._make_key(name, labels)
94
+ with self._lock:
95
+ self._gauges[key] = value
96
+
97
+ def increment_gauge(
98
+ self,
99
+ name: str,
100
+ value: float = 1.0,
101
+ labels: Optional[Dict[str, str]] = None,
102
+ ):
103
+ """Increment a gauge (up-down counter)."""
104
+ key = self._make_key(name, labels)
105
+ with self._lock:
106
+ self._gauges[key] = self._gauges.get(key, 0.0) + value
107
+
108
+ def _make_key(
109
+ self,
110
+ name: str,
111
+ labels: Optional[Dict[str, str]] = None,
112
+ ) -> str:
113
+ """Create a unique key for the metric with labels."""
114
+ if not labels:
115
+ return name
116
+ label_str = ",".join(f"{k}={v}" for k, v in sorted(labels.items()))
117
+ return f"{name}{{{label_str}}}"
118
+
119
+ def get_counter(self, name: str, labels: Optional[Dict[str, str]] = None) -> float:
120
+ """Get counter value."""
121
+ key = self._make_key(name, labels)
122
+ with self._lock:
123
+ return self._counters.get(key, 0.0)
124
+
125
+ def get_histogram_stats(
126
+ self,
127
+ name: str,
128
+ labels: Optional[Dict[str, str]] = None,
129
+ ) -> Dict[str, float]:
130
+ """Get histogram statistics."""
131
+ key = self._make_key(name, labels)
132
+ with self._lock:
133
+ values = self._histograms.get(key, [])
134
+ if not values:
135
+ return {
136
+ "count": 0,
137
+ "sum": 0,
138
+ "min": 0,
139
+ "max": 0,
140
+ "avg": 0,
141
+ "p50": 0,
142
+ "p95": 0,
143
+ "p99": 0,
144
+ }
145
+
146
+ sorted_values = sorted(values)
147
+ count = len(sorted_values)
148
+ return {
149
+ "count": count,
150
+ "sum": sum(sorted_values),
151
+ "min": sorted_values[0],
152
+ "max": sorted_values[-1],
153
+ "avg": sum(sorted_values) / count,
154
+ "p50": sorted_values[int(count * 0.5)],
155
+ "p95": sorted_values[min(int(count * 0.95), count - 1)],
156
+ "p99": sorted_values[min(int(count * 0.99), count - 1)],
157
+ }
158
+
159
+ def get_gauge(self, name: str, labels: Optional[Dict[str, str]] = None) -> float:
160
+ """Get gauge value."""
161
+ key = self._make_key(name, labels)
162
+ with self._lock:
163
+ return self._gauges.get(key, 0.0)
164
+
165
+ def get_all_metrics(self) -> Dict[str, Any]:
166
+ """Get all metrics as a dictionary."""
167
+ with self._lock:
168
+ return {
169
+ "counters": dict(self._counters),
170
+ "histograms": {
171
+ k: self.get_histogram_stats(k.split("{")[0])
172
+ for k in self._histograms
173
+ },
174
+ "gauges": dict(self._gauges),
175
+ }
176
+
177
+ def reset(self):
178
+ """Reset all metrics."""
179
+ with self._lock:
180
+ self._counters.clear()
181
+ self._histograms.clear()
182
+ self._gauges.clear()
183
+
184
+
185
+ class MetricsCollector:
186
+ """
187
+ Unified metrics collector that uses OpenTelemetry when available,
188
+ falling back to in-memory collection otherwise.
189
+ """
190
+
191
+ def __init__(
192
+ self,
193
+ service_name: str = "alma-memory",
194
+ use_otel: bool = True,
195
+ ):
196
+ """
197
+ Initialize metrics collector.
198
+
199
+ Args:
200
+ service_name: Service name for metrics
201
+ use_otel: Whether to use OpenTelemetry (if available)
202
+ """
203
+ self.service_name = service_name
204
+ self._use_otel = use_otel and _otel_available
205
+ self._fallback = InMemoryMetricsCollector()
206
+
207
+ # OpenTelemetry instruments
208
+ self._otel_counters: Dict[str, Any] = {}
209
+ self._otel_histograms: Dict[str, Any] = {}
210
+ self._otel_gauges: Dict[str, Any] = {}
211
+
212
+ if self._use_otel:
213
+ self._meter = metrics.get_meter(service_name)
214
+
215
+ def counter(
216
+ self,
217
+ name: str,
218
+ value: float = 1.0,
219
+ labels: Optional[Dict[str, str]] = None,
220
+ ):
221
+ """Increment a counter metric."""
222
+ if self._use_otel:
223
+ if name not in self._otel_counters:
224
+ self._otel_counters[name] = self._meter.create_counter(
225
+ name=f"alma.{name}",
226
+ description=f"ALMA counter: {name}",
227
+ )
228
+ self._otel_counters[name].add(value, labels or {})
229
+ else:
230
+ self._fallback.increment_counter(name, value, labels)
231
+
232
+ def histogram(
233
+ self,
234
+ name: str,
235
+ value: float,
236
+ unit: str = "ms",
237
+ labels: Optional[Dict[str, str]] = None,
238
+ ):
239
+ """Record a histogram value (typically latency)."""
240
+ if self._use_otel:
241
+ if name not in self._otel_histograms:
242
+ self._otel_histograms[name] = self._meter.create_histogram(
243
+ name=f"alma.{name}",
244
+ unit=unit,
245
+ description=f"ALMA histogram: {name}",
246
+ )
247
+ self._otel_histograms[name].record(value, labels or {})
248
+ else:
249
+ self._fallback.record_histogram(name, value, labels)
250
+
251
+ def gauge(
252
+ self,
253
+ name: str,
254
+ value: float,
255
+ labels: Optional[Dict[str, str]] = None,
256
+ ):
257
+ """Set a gauge value."""
258
+ if self._use_otel:
259
+ # OTel gauges require callbacks, so we use up-down counter
260
+ if name not in self._otel_gauges:
261
+ self._otel_gauges[name] = self._meter.create_up_down_counter(
262
+ name=f"alma.{name}",
263
+ description=f"ALMA gauge: {name}",
264
+ )
265
+ # Note: OTel up-down counters don't support setting absolute values
266
+ # We track the last value and adjust
267
+ pass
268
+ # Always use fallback for gauges to support get operations
269
+ self._fallback.set_gauge(name, value, labels)
270
+
271
+ def gauge_increment(
272
+ self,
273
+ name: str,
274
+ value: float = 1.0,
275
+ labels: Optional[Dict[str, str]] = None,
276
+ ):
277
+ """Increment (or decrement if negative) a gauge."""
278
+ if self._use_otel:
279
+ if name not in self._otel_gauges:
280
+ self._otel_gauges[name] = self._meter.create_up_down_counter(
281
+ name=f"alma.{name}",
282
+ description=f"ALMA gauge: {name}",
283
+ )
284
+ self._otel_gauges[name].add(value, labels or {})
285
+ self._fallback.increment_gauge(name, value, labels)
286
+
287
+ def get_stats(self) -> Dict[str, Any]:
288
+ """Get all metrics as a dictionary (from fallback collector)."""
289
+ return self._fallback.get_all_metrics()
290
+
291
+ def timer(self, name: str, labels: Optional[Dict[str, str]] = None) -> "Timer":
292
+ """Create a timer context manager for measuring duration."""
293
+ return Timer(self, name, labels)
294
+
295
+
296
+ class Timer:
297
+ """Context manager for timing operations."""
298
+
299
+ def __init__(
300
+ self,
301
+ collector: MetricsCollector,
302
+ name: str,
303
+ labels: Optional[Dict[str, str]] = None,
304
+ ):
305
+ self._collector = collector
306
+ self._name = name
307
+ self._labels = labels
308
+ self._start_time: Optional[float] = None
309
+ self.duration_ms: float = 0
310
+
311
+ def __enter__(self) -> "Timer":
312
+ self._start_time = time.time()
313
+ return self
314
+
315
+ def __exit__(self, exc_type, exc_val, exc_tb):
316
+ if self._start_time is not None:
317
+ self.duration_ms = (time.time() - self._start_time) * 1000
318
+ labels = dict(self._labels or {})
319
+ labels["success"] = "false" if exc_type else "true"
320
+ self._collector.histogram(self._name, self.duration_ms, "ms", labels)
321
+ return False
322
+
323
+
324
+ class ALMAMetrics:
325
+ """
326
+ High-level metrics interface for ALMA operations.
327
+
328
+ Provides semantic methods for tracking ALMA-specific metrics.
329
+ """
330
+
331
+ def __init__(self, collector: Optional[MetricsCollector] = None):
332
+ """Initialize ALMA metrics."""
333
+ self._collector = collector or MetricsCollector()
334
+
335
+ @property
336
+ def collector(self) -> MetricsCollector:
337
+ """Get underlying metrics collector."""
338
+ return self._collector
339
+
340
+ # ==================== Memory Operations ====================
341
+
342
+ def record_retrieve_latency(
343
+ self,
344
+ duration_ms: float,
345
+ agent: str,
346
+ project_id: str,
347
+ cache_hit: bool,
348
+ items_returned: int,
349
+ ):
350
+ """Record memory retrieval latency."""
351
+ self._collector.histogram(
352
+ "memory.retrieve.latency",
353
+ duration_ms,
354
+ "ms",
355
+ {
356
+ "agent": agent,
357
+ "project_id": project_id,
358
+ "cache_hit": str(cache_hit).lower(),
359
+ },
360
+ )
361
+ self._collector.counter(
362
+ "memory.retrieve.count",
363
+ 1,
364
+ {"agent": agent, "project_id": project_id},
365
+ )
366
+ self._collector.counter(
367
+ "memory.retrieve.items",
368
+ items_returned,
369
+ {"agent": agent, "project_id": project_id},
370
+ )
371
+
372
+ def record_learn_operation(
373
+ self,
374
+ duration_ms: float,
375
+ agent: str,
376
+ project_id: str,
377
+ memory_type: str,
378
+ success: bool,
379
+ ):
380
+ """Record a learning operation."""
381
+ self._collector.histogram(
382
+ "memory.learn.latency",
383
+ duration_ms,
384
+ "ms",
385
+ {
386
+ "agent": agent,
387
+ "project_id": project_id,
388
+ "memory_type": memory_type,
389
+ "success": str(success).lower(),
390
+ },
391
+ )
392
+ self._collector.counter(
393
+ "memory.learn.count",
394
+ 1,
395
+ {
396
+ "agent": agent,
397
+ "memory_type": memory_type,
398
+ "success": str(success).lower(),
399
+ },
400
+ )
401
+
402
+ def record_forget_operation(
403
+ self,
404
+ duration_ms: float,
405
+ agent: Optional[str],
406
+ project_id: str,
407
+ items_removed: int,
408
+ ):
409
+ """Record a forget (pruning) operation."""
410
+ self._collector.histogram(
411
+ "memory.forget.latency",
412
+ duration_ms,
413
+ "ms",
414
+ {"project_id": project_id},
415
+ )
416
+ self._collector.counter(
417
+ "memory.forget.items",
418
+ items_removed,
419
+ {"project_id": project_id, "agent": agent or "all"},
420
+ )
421
+
422
+ # ==================== Embedding Operations ====================
423
+
424
+ def record_embedding_latency(
425
+ self,
426
+ duration_ms: float,
427
+ provider: str,
428
+ batch_size: int = 1,
429
+ ):
430
+ """Record embedding generation latency."""
431
+ self._collector.histogram(
432
+ "embedding.latency",
433
+ duration_ms,
434
+ "ms",
435
+ {"provider": provider, "batch_size": str(batch_size)},
436
+ )
437
+ self._collector.counter(
438
+ "embedding.count",
439
+ batch_size,
440
+ {"provider": provider},
441
+ )
442
+
443
+ # ==================== Cache Operations ====================
444
+
445
+ def record_cache_hit(self, cache_type: str = "retrieval"):
446
+ """Record a cache hit."""
447
+ self._collector.counter("cache.hit", 1, {"cache_type": cache_type})
448
+
449
+ def record_cache_miss(self, cache_type: str = "retrieval"):
450
+ """Record a cache miss."""
451
+ self._collector.counter("cache.miss", 1, {"cache_type": cache_type})
452
+
453
+ def record_cache_eviction(self, cache_type: str = "retrieval", count: int = 1):
454
+ """Record cache evictions."""
455
+ self._collector.counter("cache.eviction", count, {"cache_type": cache_type})
456
+
457
+ def set_cache_size(self, size: int, cache_type: str = "retrieval"):
458
+ """Set current cache size."""
459
+ self._collector.gauge("cache.size", size, {"cache_type": cache_type})
460
+
461
+ # ==================== Storage Operations ====================
462
+
463
+ def record_storage_query_latency(
464
+ self,
465
+ duration_ms: float,
466
+ operation: str,
467
+ backend: str,
468
+ success: bool = True,
469
+ ):
470
+ """Record storage query latency."""
471
+ self._collector.histogram(
472
+ "storage.query.latency",
473
+ duration_ms,
474
+ "ms",
475
+ {
476
+ "operation": operation,
477
+ "backend": backend,
478
+ "success": str(success).lower(),
479
+ },
480
+ )
481
+ self._collector.counter(
482
+ "storage.query.count",
483
+ 1,
484
+ {"operation": operation, "backend": backend},
485
+ )
486
+
487
+ def record_storage_error(self, backend: str, operation: str, error_type: str):
488
+ """Record a storage error."""
489
+ self._collector.counter(
490
+ "storage.error.count",
491
+ 1,
492
+ {"backend": backend, "operation": operation, "error_type": error_type},
493
+ )
494
+
495
+ # ==================== Memory Counts ====================
496
+
497
+ def set_memory_count(
498
+ self,
499
+ count: int,
500
+ memory_type: str,
501
+ agent: Optional[str] = None,
502
+ project_id: Optional[str] = None,
503
+ ):
504
+ """Set memory item count gauge."""
505
+ labels = {"memory_type": memory_type}
506
+ if agent:
507
+ labels["agent"] = agent
508
+ if project_id:
509
+ labels["project_id"] = project_id
510
+ self._collector.gauge("memory.count", count, labels)
511
+
512
+ # ==================== Session Operations ====================
513
+
514
+ def record_session_start(self, agent: str, project_id: str):
515
+ """Record a session start."""
516
+ self._collector.counter(
517
+ "session.start",
518
+ 1,
519
+ {"agent": agent, "project_id": project_id},
520
+ )
521
+
522
+ def record_session_end(
523
+ self,
524
+ agent: str,
525
+ project_id: str,
526
+ duration_ms: float,
527
+ outcome: str,
528
+ ):
529
+ """Record a session end."""
530
+ self._collector.histogram(
531
+ "session.duration",
532
+ duration_ms,
533
+ "ms",
534
+ {"agent": agent, "project_id": project_id, "outcome": outcome},
535
+ )
536
+ self._collector.counter(
537
+ "session.end",
538
+ 1,
539
+ {"agent": agent, "project_id": project_id, "outcome": outcome},
540
+ )
541
+
542
+ # ==================== Utility ====================
543
+
544
+ def get_all_metrics(self) -> Dict[str, Any]:
545
+ """Get all collected metrics."""
546
+ return self._collector.get_stats()
547
+
548
+ def timer(self, name: str, labels: Optional[Dict[str, str]] = None) -> Timer:
549
+ """Create a timer for measuring operation duration."""
550
+ return self._collector.timer(name, labels)
551
+
552
+
553
+ def get_meter(name: str = "alma"):
554
+ """
555
+ Get an OpenTelemetry meter.
556
+
557
+ Falls back to a no-op meter if OTel is not available.
558
+ """
559
+ if _otel_available:
560
+ return metrics.get_meter(name)
561
+ return None
562
+
563
+
564
+ def get_metrics() -> ALMAMetrics:
565
+ """
566
+ Get the global ALMAMetrics instance.
567
+
568
+ Creates one if it doesn't exist.
569
+ """
570
+ global _metrics_instance
571
+
572
+ with _metrics_lock:
573
+ if _metrics_instance is None:
574
+ _metrics_instance = ALMAMetrics()
575
+ return _metrics_instance
576
+
577
+
578
+ def set_metrics(metrics_instance: ALMAMetrics):
579
+ """Set the global ALMAMetrics instance."""
580
+ global _metrics_instance
581
+
582
+ with _metrics_lock:
583
+ _metrics_instance = metrics_instance