alma-memory 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alma/__init__.py +121 -45
- alma/confidence/__init__.py +1 -1
- alma/confidence/engine.py +92 -58
- alma/confidence/types.py +34 -14
- alma/config/loader.py +3 -2
- alma/consolidation/__init__.py +23 -0
- alma/consolidation/engine.py +678 -0
- alma/consolidation/prompts.py +84 -0
- alma/core.py +136 -28
- alma/domains/__init__.py +6 -6
- alma/domains/factory.py +12 -9
- alma/domains/schemas.py +17 -3
- alma/domains/types.py +8 -4
- alma/events/__init__.py +75 -0
- alma/events/emitter.py +284 -0
- alma/events/storage_mixin.py +246 -0
- alma/events/types.py +126 -0
- alma/events/webhook.py +425 -0
- alma/exceptions.py +49 -0
- alma/extraction/__init__.py +31 -0
- alma/extraction/auto_learner.py +265 -0
- alma/extraction/extractor.py +420 -0
- alma/graph/__init__.py +106 -0
- alma/graph/backends/__init__.py +32 -0
- alma/graph/backends/kuzu.py +624 -0
- alma/graph/backends/memgraph.py +432 -0
- alma/graph/backends/memory.py +236 -0
- alma/graph/backends/neo4j.py +417 -0
- alma/graph/base.py +159 -0
- alma/graph/extraction.py +198 -0
- alma/graph/store.py +860 -0
- alma/harness/__init__.py +4 -4
- alma/harness/base.py +18 -9
- alma/harness/domains.py +27 -11
- alma/initializer/__init__.py +1 -1
- alma/initializer/initializer.py +51 -43
- alma/initializer/types.py +25 -17
- alma/integration/__init__.py +9 -9
- alma/integration/claude_agents.py +32 -20
- alma/integration/helena.py +32 -22
- alma/integration/victor.py +57 -33
- alma/learning/__init__.py +27 -27
- alma/learning/forgetting.py +198 -148
- alma/learning/heuristic_extractor.py +40 -24
- alma/learning/protocols.py +65 -17
- alma/learning/validation.py +7 -2
- alma/mcp/__init__.py +4 -4
- alma/mcp/__main__.py +2 -1
- alma/mcp/resources.py +17 -16
- alma/mcp/server.py +102 -44
- alma/mcp/tools.py +180 -45
- alma/observability/__init__.py +84 -0
- alma/observability/config.py +302 -0
- alma/observability/logging.py +424 -0
- alma/observability/metrics.py +583 -0
- alma/observability/tracing.py +440 -0
- alma/progress/__init__.py +3 -3
- alma/progress/tracker.py +26 -20
- alma/progress/types.py +8 -12
- alma/py.typed +0 -0
- alma/retrieval/__init__.py +11 -11
- alma/retrieval/cache.py +20 -21
- alma/retrieval/embeddings.py +4 -4
- alma/retrieval/engine.py +179 -39
- alma/retrieval/scoring.py +73 -63
- alma/session/__init__.py +2 -2
- alma/session/manager.py +5 -5
- alma/session/types.py +5 -4
- alma/storage/__init__.py +70 -0
- alma/storage/azure_cosmos.py +414 -133
- alma/storage/base.py +215 -4
- alma/storage/chroma.py +1443 -0
- alma/storage/constants.py +103 -0
- alma/storage/file_based.py +59 -28
- alma/storage/migrations/__init__.py +21 -0
- alma/storage/migrations/base.py +321 -0
- alma/storage/migrations/runner.py +323 -0
- alma/storage/migrations/version_stores.py +337 -0
- alma/storage/migrations/versions/__init__.py +11 -0
- alma/storage/migrations/versions/v1_0_0.py +373 -0
- alma/storage/pinecone.py +1080 -0
- alma/storage/postgresql.py +1559 -0
- alma/storage/qdrant.py +1306 -0
- alma/storage/sqlite_local.py +504 -60
- alma/testing/__init__.py +46 -0
- alma/testing/factories.py +301 -0
- alma/testing/mocks.py +389 -0
- alma/types.py +62 -14
- alma_memory-0.5.1.dist-info/METADATA +939 -0
- alma_memory-0.5.1.dist-info/RECORD +93 -0
- {alma_memory-0.4.0.dist-info → alma_memory-0.5.1.dist-info}/WHEEL +1 -1
- alma_memory-0.4.0.dist-info/METADATA +0 -488
- alma_memory-0.4.0.dist-info/RECORD +0 -52
- {alma_memory-0.4.0.dist-info → alma_memory-0.5.1.dist-info}/top_level.txt +0 -0
alma/mcp/tools.py
CHANGED
|
@@ -5,11 +5,9 @@ Provides the tool functions that can be called via MCP protocol.
|
|
|
5
5
|
Each tool corresponds to an ALMA operation.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
import json
|
|
9
8
|
import logging
|
|
10
|
-
from typing import Dict, Any, Optional, List
|
|
11
9
|
from datetime import datetime, timezone
|
|
12
|
-
from
|
|
10
|
+
from typing import Any, Dict, Optional
|
|
13
11
|
|
|
14
12
|
from alma import ALMA
|
|
15
13
|
from alma.types import MemorySlice
|
|
@@ -32,47 +30,57 @@ def _serialize_memory_slice(memory_slice: MemorySlice) -> Dict[str, Any]:
|
|
|
32
30
|
}
|
|
33
31
|
|
|
34
32
|
for h in memory_slice.heuristics:
|
|
35
|
-
result["heuristics"].append(
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
33
|
+
result["heuristics"].append(
|
|
34
|
+
{
|
|
35
|
+
"id": h.id,
|
|
36
|
+
"condition": h.condition,
|
|
37
|
+
"strategy": h.strategy,
|
|
38
|
+
"confidence": h.confidence,
|
|
39
|
+
"occurrence_count": h.occurrence_count,
|
|
40
|
+
"success_rate": h.success_rate,
|
|
41
|
+
}
|
|
42
|
+
)
|
|
43
43
|
|
|
44
44
|
for o in memory_slice.outcomes:
|
|
45
|
-
result["outcomes"].append(
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
45
|
+
result["outcomes"].append(
|
|
46
|
+
{
|
|
47
|
+
"id": o.id,
|
|
48
|
+
"task_type": o.task_type,
|
|
49
|
+
"task_description": o.task_description,
|
|
50
|
+
"success": o.success,
|
|
51
|
+
"strategy_used": o.strategy_used,
|
|
52
|
+
"duration_ms": o.duration_ms,
|
|
53
|
+
}
|
|
54
|
+
)
|
|
53
55
|
|
|
54
56
|
for dk in memory_slice.domain_knowledge:
|
|
55
|
-
result["domain_knowledge"].append(
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
57
|
+
result["domain_knowledge"].append(
|
|
58
|
+
{
|
|
59
|
+
"id": dk.id,
|
|
60
|
+
"domain": dk.domain,
|
|
61
|
+
"fact": dk.fact,
|
|
62
|
+
"confidence": dk.confidence,
|
|
63
|
+
}
|
|
64
|
+
)
|
|
61
65
|
|
|
62
66
|
for ap in memory_slice.anti_patterns:
|
|
63
|
-
result["anti_patterns"].append(
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
67
|
+
result["anti_patterns"].append(
|
|
68
|
+
{
|
|
69
|
+
"id": ap.id,
|
|
70
|
+
"pattern": ap.pattern,
|
|
71
|
+
"why_bad": ap.why_bad,
|
|
72
|
+
"better_alternative": ap.better_alternative,
|
|
73
|
+
}
|
|
74
|
+
)
|
|
69
75
|
|
|
70
76
|
for p in memory_slice.preferences:
|
|
71
|
-
result["preferences"].append(
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
77
|
+
result["preferences"].append(
|
|
78
|
+
{
|
|
79
|
+
"id": p.id,
|
|
80
|
+
"category": p.category,
|
|
81
|
+
"preference": p.preference,
|
|
82
|
+
}
|
|
83
|
+
)
|
|
76
84
|
|
|
77
85
|
return result
|
|
78
86
|
|
|
@@ -97,6 +105,12 @@ def alma_retrieve(
|
|
|
97
105
|
Returns:
|
|
98
106
|
Dict containing the memory slice with relevant memories
|
|
99
107
|
"""
|
|
108
|
+
# Input validation
|
|
109
|
+
if not task or not task.strip():
|
|
110
|
+
return {"success": False, "error": "task cannot be empty"}
|
|
111
|
+
if not agent or not agent.strip():
|
|
112
|
+
return {"success": False, "error": "agent cannot be empty"}
|
|
113
|
+
|
|
100
114
|
try:
|
|
101
115
|
memories = alma.retrieve(
|
|
102
116
|
task=task,
|
|
@@ -147,8 +161,18 @@ def alma_learn(
|
|
|
147
161
|
Returns:
|
|
148
162
|
Dict with learning result
|
|
149
163
|
"""
|
|
164
|
+
# Input validation
|
|
165
|
+
if not agent or not agent.strip():
|
|
166
|
+
return {"success": False, "error": "agent cannot be empty"}
|
|
167
|
+
if not task or not task.strip():
|
|
168
|
+
return {"success": False, "error": "task cannot be empty"}
|
|
169
|
+
if not outcome or not outcome.strip():
|
|
170
|
+
return {"success": False, "error": "outcome cannot be empty"}
|
|
171
|
+
if not strategy_used or not strategy_used.strip():
|
|
172
|
+
return {"success": False, "error": "strategy_used cannot be empty"}
|
|
173
|
+
|
|
150
174
|
try:
|
|
151
|
-
|
|
175
|
+
outcome_record = alma.learn(
|
|
152
176
|
agent=agent,
|
|
153
177
|
task=task,
|
|
154
178
|
outcome=outcome,
|
|
@@ -161,8 +185,14 @@ def alma_learn(
|
|
|
161
185
|
|
|
162
186
|
return {
|
|
163
187
|
"success": True,
|
|
164
|
-
"learned":
|
|
165
|
-
"
|
|
188
|
+
"learned": True,
|
|
189
|
+
"outcome": {
|
|
190
|
+
"id": outcome_record.id,
|
|
191
|
+
"agent": outcome_record.agent,
|
|
192
|
+
"task_type": outcome_record.task_type,
|
|
193
|
+
"success": outcome_record.success,
|
|
194
|
+
},
|
|
195
|
+
"message": "Outcome recorded successfully",
|
|
166
196
|
}
|
|
167
197
|
|
|
168
198
|
except Exception as e:
|
|
@@ -193,6 +223,14 @@ def alma_add_preference(
|
|
|
193
223
|
Returns:
|
|
194
224
|
Dict with the created preference
|
|
195
225
|
"""
|
|
226
|
+
# Input validation
|
|
227
|
+
if not user_id or not user_id.strip():
|
|
228
|
+
return {"success": False, "error": "user_id cannot be empty"}
|
|
229
|
+
if not category or not category.strip():
|
|
230
|
+
return {"success": False, "error": "category cannot be empty"}
|
|
231
|
+
if not preference or not preference.strip():
|
|
232
|
+
return {"success": False, "error": "preference cannot be empty"}
|
|
233
|
+
|
|
196
234
|
try:
|
|
197
235
|
pref = alma.add_user_preference(
|
|
198
236
|
user_id=user_id,
|
|
@@ -240,6 +278,14 @@ def alma_add_knowledge(
|
|
|
240
278
|
Returns:
|
|
241
279
|
Dict with the created knowledge or rejection reason
|
|
242
280
|
"""
|
|
281
|
+
# Input validation
|
|
282
|
+
if not agent or not agent.strip():
|
|
283
|
+
return {"success": False, "error": "agent cannot be empty"}
|
|
284
|
+
if not domain or not domain.strip():
|
|
285
|
+
return {"success": False, "error": "domain cannot be empty"}
|
|
286
|
+
if not fact or not fact.strip():
|
|
287
|
+
return {"success": False, "error": "fact cannot be empty"}
|
|
288
|
+
|
|
243
289
|
try:
|
|
244
290
|
knowledge = alma.add_domain_knowledge(
|
|
245
291
|
agent=agent,
|
|
@@ -248,12 +294,6 @@ def alma_add_knowledge(
|
|
|
248
294
|
source=source,
|
|
249
295
|
)
|
|
250
296
|
|
|
251
|
-
if knowledge is None:
|
|
252
|
-
return {
|
|
253
|
-
"success": False,
|
|
254
|
-
"error": f"Agent '{agent}' not allowed to learn in domain '{domain}'",
|
|
255
|
-
}
|
|
256
|
-
|
|
257
297
|
return {
|
|
258
298
|
"success": True,
|
|
259
299
|
"knowledge": {
|
|
@@ -372,3 +412,98 @@ def alma_health(alma: ALMA) -> Dict[str, Any]:
|
|
|
372
412
|
"status": "unhealthy",
|
|
373
413
|
"error": str(e),
|
|
374
414
|
}
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
async def alma_consolidate(
|
|
418
|
+
alma: ALMA,
|
|
419
|
+
agent: str,
|
|
420
|
+
memory_type: str = "heuristics",
|
|
421
|
+
similarity_threshold: float = 0.85,
|
|
422
|
+
dry_run: bool = True,
|
|
423
|
+
) -> Dict[str, Any]:
|
|
424
|
+
"""
|
|
425
|
+
Consolidate similar memories to reduce redundancy.
|
|
426
|
+
|
|
427
|
+
This is ALMA's implementation of Mem0's core innovation - LLM-powered
|
|
428
|
+
deduplication that merges similar memories intelligently.
|
|
429
|
+
|
|
430
|
+
Args:
|
|
431
|
+
alma: ALMA instance
|
|
432
|
+
agent: Agent whose memories to consolidate
|
|
433
|
+
memory_type: Type of memory to consolidate
|
|
434
|
+
("heuristics", "outcomes", "domain_knowledge", "anti_patterns")
|
|
435
|
+
similarity_threshold: Minimum cosine similarity to group (0.0 to 1.0)
|
|
436
|
+
Higher values are more conservative (fewer merges)
|
|
437
|
+
dry_run: If True, report what would be merged without actually modifying storage
|
|
438
|
+
Recommended for first run to preview changes
|
|
439
|
+
|
|
440
|
+
Returns:
|
|
441
|
+
Dict with consolidation results including:
|
|
442
|
+
- merged_count: Number of memories merged
|
|
443
|
+
- groups_found: Number of similar memory groups identified
|
|
444
|
+
- memories_processed: Total memories analyzed
|
|
445
|
+
- merge_details: List of merge operations (or planned operations if dry_run)
|
|
446
|
+
- errors: Any errors encountered
|
|
447
|
+
"""
|
|
448
|
+
# Input validation
|
|
449
|
+
if not agent or not agent.strip():
|
|
450
|
+
return {"success": False, "error": "agent cannot be empty"}
|
|
451
|
+
|
|
452
|
+
valid_types = ["heuristics", "outcomes", "domain_knowledge", "anti_patterns"]
|
|
453
|
+
if memory_type not in valid_types:
|
|
454
|
+
return {
|
|
455
|
+
"success": False,
|
|
456
|
+
"error": f"memory_type must be one of: {', '.join(valid_types)}",
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
if not 0.0 <= similarity_threshold <= 1.0:
|
|
460
|
+
return {
|
|
461
|
+
"success": False,
|
|
462
|
+
"error": "similarity_threshold must be between 0.0 and 1.0",
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
try:
|
|
466
|
+
from alma.consolidation import ConsolidationEngine
|
|
467
|
+
|
|
468
|
+
# Create consolidation engine
|
|
469
|
+
engine = ConsolidationEngine(
|
|
470
|
+
storage=alma.storage,
|
|
471
|
+
embedder=None, # Will use default LocalEmbedder
|
|
472
|
+
llm_client=None, # LLM merging disabled by default
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
# Run consolidation
|
|
476
|
+
result = await engine.consolidate(
|
|
477
|
+
agent=agent,
|
|
478
|
+
project_id=alma.project_id,
|
|
479
|
+
memory_type=memory_type,
|
|
480
|
+
similarity_threshold=similarity_threshold,
|
|
481
|
+
use_llm=False, # LLM disabled - uses highest confidence merge
|
|
482
|
+
dry_run=dry_run,
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
# Invalidate cache after consolidation (if not dry run)
|
|
486
|
+
if not dry_run and result.merged_count > 0:
|
|
487
|
+
alma.retrieval.invalidate_cache(agent=agent, project_id=alma.project_id)
|
|
488
|
+
|
|
489
|
+
return {
|
|
490
|
+
"success": result.success,
|
|
491
|
+
"dry_run": dry_run,
|
|
492
|
+
"merged_count": result.merged_count,
|
|
493
|
+
"groups_found": result.groups_found,
|
|
494
|
+
"memories_processed": result.memories_processed,
|
|
495
|
+
"merge_details": result.merge_details,
|
|
496
|
+
"errors": result.errors,
|
|
497
|
+
"message": (
|
|
498
|
+
f"{'Would merge' if dry_run else 'Merged'} {result.merged_count} memories "
|
|
499
|
+
f"from {result.groups_found} similar groups "
|
|
500
|
+
f"(processed {result.memories_processed} total)"
|
|
501
|
+
),
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
except Exception as e:
|
|
505
|
+
logger.exception(f"Error in alma_consolidate: {e}")
|
|
506
|
+
return {
|
|
507
|
+
"success": False,
|
|
508
|
+
"error": str(e),
|
|
509
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ALMA Observability Module.
|
|
3
|
+
|
|
4
|
+
Provides comprehensive observability features including:
|
|
5
|
+
- OpenTelemetry integration for distributed tracing
|
|
6
|
+
- Structured JSON logging
|
|
7
|
+
- Metrics collection (counters, histograms, gauges)
|
|
8
|
+
- Performance monitoring
|
|
9
|
+
|
|
10
|
+
This module follows the OpenTelemetry specification and supports
|
|
11
|
+
integration with common observability backends (Jaeger, Prometheus,
|
|
12
|
+
DataDog, etc.).
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
from alma.observability import (
|
|
16
|
+
get_tracer,
|
|
17
|
+
get_meter,
|
|
18
|
+
get_logger,
|
|
19
|
+
configure_observability,
|
|
20
|
+
ALMAMetrics,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# Initialize observability (typically at app startup)
|
|
24
|
+
configure_observability(
|
|
25
|
+
service_name="alma-memory",
|
|
26
|
+
enable_tracing=True,
|
|
27
|
+
enable_metrics=True,
|
|
28
|
+
log_format="json",
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Use in code
|
|
32
|
+
tracer = get_tracer(__name__)
|
|
33
|
+
with tracer.start_as_current_span("my_operation"):
|
|
34
|
+
# ... your code
|
|
35
|
+
pass
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
from alma.observability.config import (
|
|
39
|
+
ObservabilityConfig,
|
|
40
|
+
configure_observability,
|
|
41
|
+
shutdown_observability,
|
|
42
|
+
)
|
|
43
|
+
from alma.observability.logging import (
|
|
44
|
+
JSONFormatter,
|
|
45
|
+
StructuredLogger,
|
|
46
|
+
get_logger,
|
|
47
|
+
setup_logging,
|
|
48
|
+
)
|
|
49
|
+
from alma.observability.metrics import (
|
|
50
|
+
ALMAMetrics,
|
|
51
|
+
MetricsCollector,
|
|
52
|
+
get_meter,
|
|
53
|
+
get_metrics,
|
|
54
|
+
)
|
|
55
|
+
from alma.observability.tracing import (
|
|
56
|
+
SpanKind,
|
|
57
|
+
TracingContext,
|
|
58
|
+
get_tracer,
|
|
59
|
+
trace_async,
|
|
60
|
+
trace_method,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
__all__ = [
|
|
64
|
+
# Configuration
|
|
65
|
+
"ObservabilityConfig",
|
|
66
|
+
"configure_observability",
|
|
67
|
+
"shutdown_observability",
|
|
68
|
+
# Logging
|
|
69
|
+
"JSONFormatter",
|
|
70
|
+
"StructuredLogger",
|
|
71
|
+
"get_logger",
|
|
72
|
+
"setup_logging",
|
|
73
|
+
# Metrics
|
|
74
|
+
"ALMAMetrics",
|
|
75
|
+
"MetricsCollector",
|
|
76
|
+
"get_meter",
|
|
77
|
+
"get_metrics",
|
|
78
|
+
# Tracing
|
|
79
|
+
"SpanKind",
|
|
80
|
+
"TracingContext",
|
|
81
|
+
"get_tracer",
|
|
82
|
+
"trace_method",
|
|
83
|
+
"trace_async",
|
|
84
|
+
]
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ALMA Observability Configuration.
|
|
3
|
+
|
|
4
|
+
Centralized configuration for observability features including
|
|
5
|
+
tracing, metrics, and logging setup.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from typing import Any, Dict, Optional
|
|
12
|
+
|
|
13
|
+
# Global state for observability configuration
|
|
14
|
+
_observability_initialized = False
|
|
15
|
+
_tracer_provider = None
|
|
16
|
+
_meter_provider = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ObservabilityConfig:
|
|
21
|
+
"""
|
|
22
|
+
Configuration for ALMA observability features.
|
|
23
|
+
|
|
24
|
+
Attributes:
|
|
25
|
+
service_name: Name of the service for tracing/metrics
|
|
26
|
+
service_version: Version of the service
|
|
27
|
+
environment: Deployment environment (dev, staging, prod)
|
|
28
|
+
enable_tracing: Whether to enable distributed tracing
|
|
29
|
+
enable_metrics: Whether to enable metrics collection
|
|
30
|
+
enable_logging: Whether to enable structured logging
|
|
31
|
+
log_level: Logging level (DEBUG, INFO, WARNING, ERROR)
|
|
32
|
+
log_format: Log format ("json" or "text")
|
|
33
|
+
otlp_endpoint: OpenTelemetry collector endpoint
|
|
34
|
+
otlp_headers: Headers for OTLP exporter
|
|
35
|
+
trace_sample_rate: Sampling rate for traces (0.0-1.0)
|
|
36
|
+
metric_export_interval_ms: How often to export metrics
|
|
37
|
+
resource_attributes: Additional resource attributes
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
service_name: str = "alma-memory"
|
|
41
|
+
service_version: str = "0.5.1"
|
|
42
|
+
environment: str = field(
|
|
43
|
+
default_factory=lambda: os.environ.get("ALMA_ENVIRONMENT", "development")
|
|
44
|
+
)
|
|
45
|
+
enable_tracing: bool = True
|
|
46
|
+
enable_metrics: bool = True
|
|
47
|
+
enable_logging: bool = True
|
|
48
|
+
log_level: str = field(
|
|
49
|
+
default_factory=lambda: os.environ.get("ALMA_LOG_LEVEL", "INFO")
|
|
50
|
+
)
|
|
51
|
+
log_format: str = field(
|
|
52
|
+
default_factory=lambda: os.environ.get("ALMA_LOG_FORMAT", "json")
|
|
53
|
+
)
|
|
54
|
+
otlp_endpoint: Optional[str] = field(
|
|
55
|
+
default_factory=lambda: os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
|
|
56
|
+
)
|
|
57
|
+
otlp_headers: Dict[str, str] = field(default_factory=dict)
|
|
58
|
+
trace_sample_rate: float = 1.0
|
|
59
|
+
metric_export_interval_ms: int = 60000
|
|
60
|
+
resource_attributes: Dict[str, str] = field(default_factory=dict)
|
|
61
|
+
|
|
62
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
63
|
+
"""Convert config to dictionary."""
|
|
64
|
+
return {
|
|
65
|
+
"service_name": self.service_name,
|
|
66
|
+
"service_version": self.service_version,
|
|
67
|
+
"environment": self.environment,
|
|
68
|
+
"enable_tracing": self.enable_tracing,
|
|
69
|
+
"enable_metrics": self.enable_metrics,
|
|
70
|
+
"enable_logging": self.enable_logging,
|
|
71
|
+
"log_level": self.log_level,
|
|
72
|
+
"log_format": self.log_format,
|
|
73
|
+
"otlp_endpoint": self.otlp_endpoint,
|
|
74
|
+
"trace_sample_rate": self.trace_sample_rate,
|
|
75
|
+
"metric_export_interval_ms": self.metric_export_interval_ms,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def configure_observability(
|
|
80
|
+
service_name: str = "alma-memory",
|
|
81
|
+
service_version: str = "0.5.1",
|
|
82
|
+
environment: Optional[str] = None,
|
|
83
|
+
enable_tracing: bool = True,
|
|
84
|
+
enable_metrics: bool = True,
|
|
85
|
+
enable_logging: bool = True,
|
|
86
|
+
log_level: str = "INFO",
|
|
87
|
+
log_format: str = "json",
|
|
88
|
+
otlp_endpoint: Optional[str] = None,
|
|
89
|
+
trace_sample_rate: float = 1.0,
|
|
90
|
+
resource_attributes: Optional[Dict[str, str]] = None,
|
|
91
|
+
) -> ObservabilityConfig:
|
|
92
|
+
"""
|
|
93
|
+
Configure ALMA observability features.
|
|
94
|
+
|
|
95
|
+
This function should be called once at application startup to initialize
|
|
96
|
+
tracing, metrics, and logging.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
service_name: Name of the service
|
|
100
|
+
service_version: Version of the service
|
|
101
|
+
environment: Deployment environment
|
|
102
|
+
enable_tracing: Enable distributed tracing
|
|
103
|
+
enable_metrics: Enable metrics collection
|
|
104
|
+
enable_logging: Enable structured logging
|
|
105
|
+
log_level: Logging level
|
|
106
|
+
log_format: Log format ("json" or "text")
|
|
107
|
+
otlp_endpoint: OpenTelemetry collector endpoint
|
|
108
|
+
trace_sample_rate: Sampling rate for traces
|
|
109
|
+
resource_attributes: Additional resource attributes
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
ObservabilityConfig with applied settings
|
|
113
|
+
"""
|
|
114
|
+
global _observability_initialized, _tracer_provider, _meter_provider
|
|
115
|
+
|
|
116
|
+
config = ObservabilityConfig(
|
|
117
|
+
service_name=service_name,
|
|
118
|
+
service_version=service_version,
|
|
119
|
+
environment=environment or os.environ.get("ALMA_ENVIRONMENT", "development"),
|
|
120
|
+
enable_tracing=enable_tracing,
|
|
121
|
+
enable_metrics=enable_metrics,
|
|
122
|
+
enable_logging=enable_logging,
|
|
123
|
+
log_level=log_level,
|
|
124
|
+
log_format=log_format,
|
|
125
|
+
otlp_endpoint=otlp_endpoint,
|
|
126
|
+
trace_sample_rate=trace_sample_rate,
|
|
127
|
+
resource_attributes=resource_attributes or {},
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Setup logging first
|
|
131
|
+
if config.enable_logging:
|
|
132
|
+
from alma.observability.logging import setup_logging
|
|
133
|
+
|
|
134
|
+
setup_logging(
|
|
135
|
+
level=config.log_level,
|
|
136
|
+
format_type=config.log_format,
|
|
137
|
+
service_name=config.service_name,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# Setup tracing
|
|
141
|
+
if config.enable_tracing:
|
|
142
|
+
_tracer_provider = _setup_tracing(config)
|
|
143
|
+
|
|
144
|
+
# Setup metrics
|
|
145
|
+
if config.enable_metrics:
|
|
146
|
+
_meter_provider = _setup_metrics(config)
|
|
147
|
+
|
|
148
|
+
_observability_initialized = True
|
|
149
|
+
|
|
150
|
+
logger = logging.getLogger(__name__)
|
|
151
|
+
logger.info(
|
|
152
|
+
"ALMA observability configured",
|
|
153
|
+
extra={
|
|
154
|
+
"service_name": config.service_name,
|
|
155
|
+
"environment": config.environment,
|
|
156
|
+
"tracing_enabled": config.enable_tracing,
|
|
157
|
+
"metrics_enabled": config.enable_metrics,
|
|
158
|
+
},
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
return config
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _setup_tracing(config: ObservabilityConfig):
|
|
165
|
+
"""Setup OpenTelemetry tracing."""
|
|
166
|
+
try:
|
|
167
|
+
from opentelemetry import trace
|
|
168
|
+
from opentelemetry.sdk.resources import Resource
|
|
169
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
170
|
+
from opentelemetry.sdk.trace.sampling import TraceIdRatioBased
|
|
171
|
+
|
|
172
|
+
# Build resource attributes
|
|
173
|
+
resource_attrs = {
|
|
174
|
+
"service.name": config.service_name,
|
|
175
|
+
"service.version": config.service_version,
|
|
176
|
+
"deployment.environment": config.environment,
|
|
177
|
+
}
|
|
178
|
+
resource_attrs.update(config.resource_attributes)
|
|
179
|
+
|
|
180
|
+
resource = Resource.create(resource_attrs)
|
|
181
|
+
|
|
182
|
+
# Create sampler
|
|
183
|
+
sampler = TraceIdRatioBased(config.trace_sample_rate)
|
|
184
|
+
|
|
185
|
+
# Create and set tracer provider
|
|
186
|
+
provider = TracerProvider(resource=resource, sampler=sampler)
|
|
187
|
+
|
|
188
|
+
# Add OTLP exporter if endpoint is configured
|
|
189
|
+
if config.otlp_endpoint:
|
|
190
|
+
try:
|
|
191
|
+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
|
|
192
|
+
OTLPSpanExporter,
|
|
193
|
+
)
|
|
194
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
195
|
+
|
|
196
|
+
otlp_exporter = OTLPSpanExporter(
|
|
197
|
+
endpoint=config.otlp_endpoint,
|
|
198
|
+
headers=config.otlp_headers or {},
|
|
199
|
+
)
|
|
200
|
+
provider.add_span_processor(BatchSpanProcessor(otlp_exporter))
|
|
201
|
+
except ImportError:
|
|
202
|
+
logging.getLogger(__name__).warning(
|
|
203
|
+
"OTLP exporter not available. Install with: "
|
|
204
|
+
"pip install opentelemetry-exporter-otlp-proto-grpc"
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
trace.set_tracer_provider(provider)
|
|
208
|
+
return provider
|
|
209
|
+
|
|
210
|
+
except ImportError:
|
|
211
|
+
logging.getLogger(__name__).warning(
|
|
212
|
+
"OpenTelemetry SDK not available. Tracing disabled. "
|
|
213
|
+
"Install with: pip install opentelemetry-sdk"
|
|
214
|
+
)
|
|
215
|
+
return None
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _setup_metrics(config: ObservabilityConfig):
|
|
219
|
+
"""Setup OpenTelemetry metrics."""
|
|
220
|
+
try:
|
|
221
|
+
from opentelemetry import metrics
|
|
222
|
+
from opentelemetry.sdk.metrics import MeterProvider
|
|
223
|
+
from opentelemetry.sdk.resources import Resource
|
|
224
|
+
|
|
225
|
+
# Build resource attributes
|
|
226
|
+
resource_attrs = {
|
|
227
|
+
"service.name": config.service_name,
|
|
228
|
+
"service.version": config.service_version,
|
|
229
|
+
"deployment.environment": config.environment,
|
|
230
|
+
}
|
|
231
|
+
resource_attrs.update(config.resource_attributes)
|
|
232
|
+
|
|
233
|
+
resource = Resource.create(resource_attrs)
|
|
234
|
+
|
|
235
|
+
# Create meter provider
|
|
236
|
+
provider = MeterProvider(resource=resource)
|
|
237
|
+
|
|
238
|
+
# Add OTLP exporter if endpoint is configured
|
|
239
|
+
if config.otlp_endpoint:
|
|
240
|
+
try:
|
|
241
|
+
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import (
|
|
242
|
+
OTLPMetricExporter,
|
|
243
|
+
)
|
|
244
|
+
from opentelemetry.sdk.metrics.export import (
|
|
245
|
+
PeriodicExportingMetricReader,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
otlp_exporter = OTLPMetricExporter(
|
|
249
|
+
endpoint=config.otlp_endpoint,
|
|
250
|
+
headers=config.otlp_headers or {},
|
|
251
|
+
)
|
|
252
|
+
reader = PeriodicExportingMetricReader(
|
|
253
|
+
otlp_exporter,
|
|
254
|
+
export_interval_millis=config.metric_export_interval_ms,
|
|
255
|
+
)
|
|
256
|
+
provider = MeterProvider(resource=resource, metric_readers=[reader])
|
|
257
|
+
except ImportError:
|
|
258
|
+
logging.getLogger(__name__).warning(
|
|
259
|
+
"OTLP metric exporter not available. Install with: "
|
|
260
|
+
"pip install opentelemetry-exporter-otlp-proto-grpc"
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
metrics.set_meter_provider(provider)
|
|
264
|
+
return provider
|
|
265
|
+
|
|
266
|
+
except ImportError:
|
|
267
|
+
logging.getLogger(__name__).warning(
|
|
268
|
+
"OpenTelemetry SDK not available. Metrics disabled. "
|
|
269
|
+
"Install with: pip install opentelemetry-sdk"
|
|
270
|
+
)
|
|
271
|
+
return None
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def shutdown_observability():
|
|
275
|
+
"""
|
|
276
|
+
Shutdown observability providers.
|
|
277
|
+
|
|
278
|
+
Should be called at application shutdown to ensure all telemetry
|
|
279
|
+
data is exported.
|
|
280
|
+
"""
|
|
281
|
+
global _observability_initialized, _tracer_provider, _meter_provider
|
|
282
|
+
|
|
283
|
+
if _tracer_provider is not None:
|
|
284
|
+
try:
|
|
285
|
+
_tracer_provider.shutdown()
|
|
286
|
+
except Exception as e:
|
|
287
|
+
logging.getLogger(__name__).error(f"Error shutting down tracer: {e}")
|
|
288
|
+
|
|
289
|
+
if _meter_provider is not None:
|
|
290
|
+
try:
|
|
291
|
+
_meter_provider.shutdown()
|
|
292
|
+
except Exception as e:
|
|
293
|
+
logging.getLogger(__name__).error(f"Error shutting down meter: {e}")
|
|
294
|
+
|
|
295
|
+
_observability_initialized = False
|
|
296
|
+
_tracer_provider = None
|
|
297
|
+
_meter_provider = None
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def is_observability_initialized() -> bool:
|
|
301
|
+
"""Check if observability has been initialized."""
|
|
302
|
+
return _observability_initialized
|