alma-memory 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. alma/__init__.py +33 -1
  2. alma/core.py +124 -16
  3. alma/extraction/auto_learner.py +4 -3
  4. alma/graph/__init__.py +26 -1
  5. alma/graph/backends/__init__.py +14 -0
  6. alma/graph/backends/kuzu.py +624 -0
  7. alma/graph/backends/memgraph.py +432 -0
  8. alma/integration/claude_agents.py +22 -10
  9. alma/learning/protocols.py +3 -3
  10. alma/mcp/tools.py +9 -11
  11. alma/observability/__init__.py +84 -0
  12. alma/observability/config.py +302 -0
  13. alma/observability/logging.py +424 -0
  14. alma/observability/metrics.py +583 -0
  15. alma/observability/tracing.py +440 -0
  16. alma/retrieval/engine.py +65 -4
  17. alma/storage/__init__.py +29 -0
  18. alma/storage/azure_cosmos.py +343 -132
  19. alma/storage/base.py +58 -0
  20. alma/storage/constants.py +103 -0
  21. alma/storage/file_based.py +3 -8
  22. alma/storage/migrations/__init__.py +21 -0
  23. alma/storage/migrations/base.py +321 -0
  24. alma/storage/migrations/runner.py +323 -0
  25. alma/storage/migrations/version_stores.py +337 -0
  26. alma/storage/migrations/versions/__init__.py +11 -0
  27. alma/storage/migrations/versions/v1_0_0.py +373 -0
  28. alma/storage/postgresql.py +185 -78
  29. alma/storage/sqlite_local.py +149 -50
  30. alma/testing/__init__.py +46 -0
  31. alma/testing/factories.py +301 -0
  32. alma/testing/mocks.py +389 -0
  33. {alma_memory-0.5.0.dist-info → alma_memory-0.5.1.dist-info}/METADATA +42 -8
  34. {alma_memory-0.5.0.dist-info → alma_memory-0.5.1.dist-info}/RECORD +36 -19
  35. {alma_memory-0.5.0.dist-info → alma_memory-0.5.1.dist-info}/WHEEL +0 -0
  36. {alma_memory-0.5.0.dist-info → alma_memory-0.5.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,440 @@
1
+ """
2
+ ALMA Distributed Tracing.
3
+
4
+ Provides distributed tracing using OpenTelemetry with fallback
5
+ to logging when OTel is not available.
6
+ """
7
+
8
+ import functools
9
+ import logging
10
+ from contextlib import contextmanager
11
+ from enum import Enum
12
+ from typing import Any, Callable, Dict, Optional, TypeVar, Union
13
+
14
+ # Try to import OpenTelemetry
15
+ _otel_available = False
16
+ _NoOpSpan = None
17
+ _NoOpTracer = None
18
+
19
+ try:
20
+ from opentelemetry import trace
21
+ from opentelemetry.trace import SpanKind as OTelSpanKind
22
+ from opentelemetry.trace import Status, StatusCode
23
+
24
+ _otel_available = True
25
+ except ImportError:
26
+ pass
27
+
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # Type variable for decorated functions
32
+ F = TypeVar("F", bound=Callable[..., Any])
33
+
34
+
35
+ class SpanKind(Enum):
36
+ """Span kind enum (mirrors OpenTelemetry SpanKind)."""
37
+
38
+ INTERNAL = "internal"
39
+ SERVER = "server"
40
+ CLIENT = "client"
41
+ PRODUCER = "producer"
42
+ CONSUMER = "consumer"
43
+
44
+
45
+ class NoOpSpan:
46
+ """No-op span implementation when OpenTelemetry is not available."""
47
+
48
+ def __init__(self, name: str, attributes: Optional[Dict[str, Any]] = None):
49
+ self.name = name
50
+ self.attributes = attributes or {}
51
+ self._logger = logging.getLogger(f"alma.trace.{name}")
52
+
53
+ def set_attribute(self, key: str, value: Any):
54
+ """Set a span attribute."""
55
+ self.attributes[key] = value
56
+
57
+ def set_attributes(self, attributes: Dict[str, Any]):
58
+ """Set multiple span attributes."""
59
+ self.attributes.update(attributes)
60
+
61
+ def add_event(self, name: str, attributes: Optional[Dict[str, Any]] = None):
62
+ """Add an event to the span."""
63
+ self._logger.debug(f"Event: {name}", extra={"event_attributes": attributes})
64
+
65
+ def set_status(self, status: Any, description: Optional[str] = None):
66
+ """Set span status."""
67
+ pass
68
+
69
+ def record_exception(
70
+ self, exception: BaseException, attributes: Optional[Dict[str, Any]] = None
71
+ ):
72
+ """Record an exception."""
73
+ self._logger.error(f"Exception in span {self.name}: {exception}", exc_info=True)
74
+
75
+ def end(self, end_time: Optional[int] = None):
76
+ """End the span."""
77
+ pass
78
+
79
+ def __enter__(self):
80
+ return self
81
+
82
+ def __exit__(self, exc_type, exc_val, exc_tb):
83
+ if exc_type is not None:
84
+ self.record_exception(exc_val)
85
+ return False
86
+
87
+
88
+ class NoOpTracer:
89
+ """No-op tracer implementation when OpenTelemetry is not available."""
90
+
91
+ def __init__(self, name: str):
92
+ self.name = name
93
+
94
+ def start_span(
95
+ self,
96
+ name: str,
97
+ context: Optional[Any] = None,
98
+ kind: SpanKind = SpanKind.INTERNAL,
99
+ attributes: Optional[Dict[str, Any]] = None,
100
+ start_time: Optional[int] = None,
101
+ ) -> NoOpSpan:
102
+ """Start a new span."""
103
+ return NoOpSpan(name, attributes)
104
+
105
+ @contextmanager
106
+ def start_as_current_span(
107
+ self,
108
+ name: str,
109
+ context: Optional[Any] = None,
110
+ kind: SpanKind = SpanKind.INTERNAL,
111
+ attributes: Optional[Dict[str, Any]] = None,
112
+ start_time: Optional[int] = None,
113
+ ):
114
+ """Start a span as the current span."""
115
+ span = NoOpSpan(name, attributes)
116
+ try:
117
+ yield span
118
+ except Exception as e:
119
+ span.record_exception(e)
120
+ raise
121
+ finally:
122
+ span.end()
123
+
124
+
125
+ class TracingContext:
126
+ """
127
+ Context for managing trace propagation and span creation.
128
+
129
+ Provides a unified interface for tracing regardless of
130
+ whether OpenTelemetry is available.
131
+ """
132
+
133
+ def __init__(self, tracer_name: str = "alma"):
134
+ """
135
+ Initialize tracing context.
136
+
137
+ Args:
138
+ tracer_name: Name for the tracer
139
+ """
140
+ self.tracer_name = tracer_name
141
+ self._tracer = None
142
+
143
+ @property
144
+ def tracer(self):
145
+ """Get the tracer (lazy initialization)."""
146
+ if self._tracer is None:
147
+ self._tracer = get_tracer(self.tracer_name)
148
+ return self._tracer
149
+
150
+ @contextmanager
151
+ def span(
152
+ self,
153
+ name: str,
154
+ kind: SpanKind = SpanKind.INTERNAL,
155
+ attributes: Optional[Dict[str, Any]] = None,
156
+ ):
157
+ """
158
+ Create a span context manager.
159
+
160
+ Args:
161
+ name: Span name
162
+ kind: Span kind
163
+ attributes: Initial span attributes
164
+
165
+ Yields:
166
+ The created span
167
+ """
168
+ if _otel_available:
169
+ otel_kind = _map_span_kind(kind)
170
+ with self.tracer.start_as_current_span(
171
+ name,
172
+ kind=otel_kind,
173
+ attributes=attributes,
174
+ ) as span:
175
+ yield span
176
+ else:
177
+ with self.tracer.start_as_current_span(
178
+ name,
179
+ kind=kind,
180
+ attributes=attributes,
181
+ ) as span:
182
+ yield span
183
+
184
+ def create_span(
185
+ self,
186
+ name: str,
187
+ kind: SpanKind = SpanKind.INTERNAL,
188
+ attributes: Optional[Dict[str, Any]] = None,
189
+ ):
190
+ """
191
+ Create a span (not automatically set as current).
192
+
193
+ Args:
194
+ name: Span name
195
+ kind: Span kind
196
+ attributes: Initial span attributes
197
+
198
+ Returns:
199
+ The created span
200
+ """
201
+ if _otel_available:
202
+ otel_kind = _map_span_kind(kind)
203
+ return self.tracer.start_span(
204
+ name,
205
+ kind=otel_kind,
206
+ attributes=attributes,
207
+ )
208
+ else:
209
+ return self.tracer.start_span(
210
+ name,
211
+ kind=kind,
212
+ attributes=attributes,
213
+ )
214
+
215
+
216
+ def _map_span_kind(kind: SpanKind):
217
+ """Map our SpanKind to OpenTelemetry SpanKind."""
218
+ if not _otel_available:
219
+ return kind
220
+
221
+ mapping = {
222
+ SpanKind.INTERNAL: OTelSpanKind.INTERNAL,
223
+ SpanKind.SERVER: OTelSpanKind.SERVER,
224
+ SpanKind.CLIENT: OTelSpanKind.CLIENT,
225
+ SpanKind.PRODUCER: OTelSpanKind.PRODUCER,
226
+ SpanKind.CONSUMER: OTelSpanKind.CONSUMER,
227
+ }
228
+ return mapping.get(kind, OTelSpanKind.INTERNAL)
229
+
230
+
231
+ def get_tracer(name: str = "alma") -> Union["NoOpTracer", Any]:
232
+ """
233
+ Get a tracer for the given name.
234
+
235
+ Uses OpenTelemetry tracer if available, otherwise returns
236
+ a no-op tracer that logs span information.
237
+
238
+ Args:
239
+ name: Tracer name (typically module name)
240
+
241
+ Returns:
242
+ Tracer instance
243
+ """
244
+ if _otel_available:
245
+ return trace.get_tracer(name)
246
+ return NoOpTracer(name)
247
+
248
+
249
+ def get_current_span():
250
+ """
251
+ Get the current span.
252
+
253
+ Returns:
254
+ Current span or NoOpSpan if no span is active
255
+ """
256
+ if _otel_available:
257
+ return trace.get_current_span()
258
+ return NoOpSpan("current")
259
+
260
+
261
+ def trace_method(
262
+ name: Optional[str] = None,
263
+ kind: SpanKind = SpanKind.INTERNAL,
264
+ record_args: bool = True,
265
+ record_result: bool = False,
266
+ ) -> Callable[[F], F]:
267
+ """
268
+ Decorator to trace a synchronous method.
269
+
270
+ Args:
271
+ name: Span name (defaults to function name)
272
+ kind: Span kind
273
+ record_args: Whether to record function arguments as attributes
274
+ record_result: Whether to record the return value
275
+
276
+ Usage:
277
+ @trace_method(name="my_operation")
278
+ def my_function(arg1, arg2):
279
+ return result
280
+ """
281
+
282
+ def decorator(func: F) -> F:
283
+ span_name = name or func.__qualname__
284
+
285
+ @functools.wraps(func)
286
+ def wrapper(*args, **kwargs):
287
+ tracer = get_tracer(func.__module__)
288
+
289
+ attributes: Dict[str, Any] = {
290
+ "code.function": func.__name__,
291
+ "code.namespace": func.__module__,
292
+ }
293
+
294
+ if record_args:
295
+ # Record positional args (skip 'self' for methods)
296
+ arg_names = func.__code__.co_varnames[: func.__code__.co_argcount]
297
+ start_idx = 1 if arg_names and arg_names[0] in ("self", "cls") else 0
298
+ for i, arg in enumerate(args[start_idx:], start=start_idx):
299
+ if i < len(arg_names):
300
+ arg_val = _safe_attribute_value(arg)
301
+ if arg_val is not None:
302
+ attributes[f"arg.{arg_names[i]}"] = arg_val
303
+
304
+ # Record keyword args
305
+ for key, value in kwargs.items():
306
+ arg_val = _safe_attribute_value(value)
307
+ if arg_val is not None:
308
+ attributes[f"arg.{key}"] = arg_val
309
+
310
+ if _otel_available:
311
+ otel_kind = _map_span_kind(kind)
312
+ with tracer.start_as_current_span(
313
+ span_name,
314
+ kind=otel_kind,
315
+ attributes=attributes,
316
+ ) as span:
317
+ try:
318
+ result = func(*args, **kwargs)
319
+ if record_result:
320
+ result_val = _safe_attribute_value(result)
321
+ if result_val is not None:
322
+ span.set_attribute("result", result_val)
323
+ return result
324
+ except Exception as e:
325
+ span.record_exception(e)
326
+ span.set_status(Status(StatusCode.ERROR, str(e)))
327
+ raise
328
+ else:
329
+ with tracer.start_as_current_span(
330
+ span_name,
331
+ kind=kind,
332
+ attributes=attributes,
333
+ ) as span:
334
+ return func(*args, **kwargs)
335
+
336
+ return wrapper # type: ignore
337
+
338
+ return decorator
339
+
340
+
341
+ def trace_async(
342
+ name: Optional[str] = None,
343
+ kind: SpanKind = SpanKind.INTERNAL,
344
+ record_args: bool = True,
345
+ record_result: bool = False,
346
+ ) -> Callable[[F], F]:
347
+ """
348
+ Decorator to trace an async method.
349
+
350
+ Args:
351
+ name: Span name (defaults to function name)
352
+ kind: Span kind
353
+ record_args: Whether to record function arguments as attributes
354
+ record_result: Whether to record the return value
355
+
356
+ Usage:
357
+ @trace_async(name="my_async_operation")
358
+ async def my_async_function(arg1, arg2):
359
+ return result
360
+ """
361
+
362
+ def decorator(func: F) -> F:
363
+ span_name = name or func.__qualname__
364
+
365
+ @functools.wraps(func)
366
+ async def wrapper(*args, **kwargs):
367
+ tracer = get_tracer(func.__module__)
368
+
369
+ attributes: Dict[str, Any] = {
370
+ "code.function": func.__name__,
371
+ "code.namespace": func.__module__,
372
+ }
373
+
374
+ if record_args:
375
+ # Record positional args (skip 'self' for methods)
376
+ arg_names = func.__code__.co_varnames[: func.__code__.co_argcount]
377
+ start_idx = 1 if arg_names and arg_names[0] in ("self", "cls") else 0
378
+ for i, arg in enumerate(args[start_idx:], start=start_idx):
379
+ if i < len(arg_names):
380
+ arg_val = _safe_attribute_value(arg)
381
+ if arg_val is not None:
382
+ attributes[f"arg.{arg_names[i]}"] = arg_val
383
+
384
+ # Record keyword args
385
+ for key, value in kwargs.items():
386
+ arg_val = _safe_attribute_value(value)
387
+ if arg_val is not None:
388
+ attributes[f"arg.{key}"] = arg_val
389
+
390
+ if _otel_available:
391
+ otel_kind = _map_span_kind(kind)
392
+ with tracer.start_as_current_span(
393
+ span_name,
394
+ kind=otel_kind,
395
+ attributes=attributes,
396
+ ) as span:
397
+ try:
398
+ result = await func(*args, **kwargs)
399
+ if record_result:
400
+ result_val = _safe_attribute_value(result)
401
+ if result_val is not None:
402
+ span.set_attribute("result", result_val)
403
+ return result
404
+ except Exception as e:
405
+ span.record_exception(e)
406
+ span.set_status(Status(StatusCode.ERROR, str(e)))
407
+ raise
408
+ else:
409
+ with tracer.start_as_current_span(
410
+ span_name,
411
+ kind=kind,
412
+ attributes=attributes,
413
+ ) as span:
414
+ return await func(*args, **kwargs)
415
+
416
+ return wrapper # type: ignore
417
+
418
+ return decorator
419
+
420
+
421
+ def _safe_attribute_value(value: Any) -> Optional[Union[str, int, float, bool]]:
422
+ """
423
+ Convert a value to a safe attribute value for tracing.
424
+
425
+ OpenTelemetry only supports certain types for attributes.
426
+ """
427
+ if value is None:
428
+ return None
429
+ if isinstance(value, (str, int, float, bool)):
430
+ return value
431
+ if isinstance(value, (list, tuple)):
432
+ if len(value) <= 10: # Limit list size
433
+ return str(value)
434
+ return f"[{len(value)} items]"
435
+ if isinstance(value, dict):
436
+ if len(value) <= 5: # Limit dict size
437
+ return str(value)
438
+ return f"{{{len(value)} items}}"
439
+ # For complex objects, return type and id
440
+ return f"<{type(value).__name__}>"
alma/retrieval/engine.py CHANGED
@@ -8,12 +8,17 @@ import logging
8
8
  import time
9
9
  from typing import Any, Dict, List, Optional
10
10
 
11
+ from alma.observability.logging import get_logger
12
+ from alma.observability.metrics import get_metrics
13
+ from alma.observability.tracing import get_tracer
11
14
  from alma.retrieval.cache import NullCache, RetrievalCache
12
15
  from alma.retrieval.scoring import MemoryScorer, ScoredItem, ScoringWeights
13
16
  from alma.storage.base import StorageBackend
14
17
  from alma.types import MemoryScope, MemorySlice
15
18
 
16
19
  logger = logging.getLogger(__name__)
20
+ structured_logger = get_logger(__name__)
21
+ tracer = get_tracer(__name__)
17
22
 
18
23
 
19
24
  class RetrievalEngine:
@@ -285,22 +290,78 @@ class RetrievalEngine:
285
290
  if self._embedder is None:
286
291
  self._embedder = self._init_embedder()
287
292
 
288
- return self._embedder.encode(text)
293
+ start_time = time.time()
294
+ embedding = self._embedder.encode(text)
295
+ duration_ms = (time.time() - start_time) * 1000
296
+
297
+ # Record embedding generation metrics
298
+ metrics = get_metrics()
299
+ metrics.record_embedding_latency(
300
+ duration_ms=duration_ms,
301
+ provider=self.embedding_provider,
302
+ batch_size=1,
303
+ )
304
+
305
+ return embedding
289
306
 
290
307
  def _init_embedder(self):
291
308
  """Initialize the embedding model based on provider config."""
292
309
  if self.embedding_provider == "azure":
293
310
  from alma.retrieval.embeddings import AzureEmbedder
294
311
 
295
- return AzureEmbedder()
312
+ embedder = AzureEmbedder()
296
313
  elif self.embedding_provider == "mock":
297
314
  from alma.retrieval.embeddings import MockEmbedder
298
315
 
299
- return MockEmbedder()
316
+ embedder = MockEmbedder()
300
317
  else:
301
318
  from alma.retrieval.embeddings import LocalEmbedder
302
319
 
303
- return LocalEmbedder()
320
+ embedder = LocalEmbedder()
321
+
322
+ # Validate embedding dimension matches storage configuration
323
+ self._validate_embedding_dimension(embedder)
324
+ return embedder
325
+
326
+ def _validate_embedding_dimension(self, embedder) -> None:
327
+ """
328
+ Validate that embedding provider dimension matches storage configuration.
329
+
330
+ Raises:
331
+ ValueError: If dimensions don't match
332
+ """
333
+ provider_dim = embedder.dimension
334
+
335
+ # Check if storage has embedding_dim attribute
336
+ storage_dim = getattr(self.storage, "embedding_dim", None)
337
+ if storage_dim is None:
338
+ logger.debug(
339
+ "Storage backend doesn't specify embedding_dim, skipping validation"
340
+ )
341
+ return
342
+
343
+ # Skip validation if storage_dim is not an integer (e.g., mock objects)
344
+ if not isinstance(storage_dim, int):
345
+ logger.debug(
346
+ f"Storage embedding_dim is not an integer ({type(storage_dim)}), "
347
+ "skipping validation"
348
+ )
349
+ return
350
+
351
+ if provider_dim != storage_dim:
352
+ raise ValueError(
353
+ f"Embedding dimension mismatch: provider '{self.embedding_provider}' "
354
+ f"outputs {provider_dim} dimensions, but storage is configured for "
355
+ f"{storage_dim} dimensions. Update your config's embedding_dim to "
356
+ f"match the provider, or use a different embedding provider.\n"
357
+ f" - local (all-MiniLM-L6-v2): 384 dimensions\n"
358
+ f" - azure (text-embedding-3-small): 1536 dimensions"
359
+ )
360
+
361
+ logger.info(
362
+ f"Embedding dimension validated: {provider_dim} "
363
+ f"(provider: {self.embedding_provider})"
364
+ )
304
365
 
305
366
  def invalidate_cache(
306
367
  self,
alma/storage/__init__.py CHANGED
@@ -1,7 +1,22 @@
1
1
  """ALMA Storage Backends."""
2
2
 
3
3
  from alma.storage.base import StorageBackend
4
+ from alma.storage.constants import (
5
+ AZURE_COSMOS_CONTAINER_NAMES,
6
+ POSTGRESQL_TABLE_NAMES,
7
+ SQLITE_TABLE_NAMES,
8
+ MemoryType,
9
+ get_table_name,
10
+ get_table_names,
11
+ )
4
12
  from alma.storage.file_based import FileBasedStorage
13
+ from alma.storage.migrations import (
14
+ Migration,
15
+ MigrationError,
16
+ MigrationRegistry,
17
+ MigrationRunner,
18
+ SchemaVersion,
19
+ )
5
20
  from alma.storage.sqlite_local import SQLiteStorage
6
21
 
7
22
  # Azure Cosmos DB is optional - requires azure-cosmos package
@@ -50,6 +65,7 @@ except ImportError:
50
65
  _HAS_PINECONE = False
51
66
 
52
67
  __all__ = [
68
+ # Storage backends
53
69
  "StorageBackend",
54
70
  "FileBasedStorage",
55
71
  "SQLiteStorage",
@@ -58,4 +74,17 @@ __all__ = [
58
74
  "QdrantStorage",
59
75
  "ChromaStorage",
60
76
  "PineconeStorage",
77
+ # Migration framework
78
+ "Migration",
79
+ "MigrationError",
80
+ "MigrationRegistry",
81
+ "MigrationRunner",
82
+ "SchemaVersion",
83
+ # Constants for consistent naming
84
+ "MemoryType",
85
+ "get_table_name",
86
+ "get_table_names",
87
+ "POSTGRESQL_TABLE_NAMES",
88
+ "SQLITE_TABLE_NAMES",
89
+ "AZURE_COSMOS_CONTAINER_NAMES",
61
90
  ]