alma-memory 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. alma/__init__.py +121 -45
  2. alma/confidence/__init__.py +1 -1
  3. alma/confidence/engine.py +92 -58
  4. alma/confidence/types.py +34 -14
  5. alma/config/loader.py +3 -2
  6. alma/consolidation/__init__.py +23 -0
  7. alma/consolidation/engine.py +678 -0
  8. alma/consolidation/prompts.py +84 -0
  9. alma/core.py +136 -28
  10. alma/domains/__init__.py +6 -6
  11. alma/domains/factory.py +12 -9
  12. alma/domains/schemas.py +17 -3
  13. alma/domains/types.py +8 -4
  14. alma/events/__init__.py +75 -0
  15. alma/events/emitter.py +284 -0
  16. alma/events/storage_mixin.py +246 -0
  17. alma/events/types.py +126 -0
  18. alma/events/webhook.py +425 -0
  19. alma/exceptions.py +49 -0
  20. alma/extraction/__init__.py +31 -0
  21. alma/extraction/auto_learner.py +265 -0
  22. alma/extraction/extractor.py +420 -0
  23. alma/graph/__init__.py +106 -0
  24. alma/graph/backends/__init__.py +32 -0
  25. alma/graph/backends/kuzu.py +624 -0
  26. alma/graph/backends/memgraph.py +432 -0
  27. alma/graph/backends/memory.py +236 -0
  28. alma/graph/backends/neo4j.py +417 -0
  29. alma/graph/base.py +159 -0
  30. alma/graph/extraction.py +198 -0
  31. alma/graph/store.py +860 -0
  32. alma/harness/__init__.py +4 -4
  33. alma/harness/base.py +18 -9
  34. alma/harness/domains.py +27 -11
  35. alma/initializer/__init__.py +1 -1
  36. alma/initializer/initializer.py +51 -43
  37. alma/initializer/types.py +25 -17
  38. alma/integration/__init__.py +9 -9
  39. alma/integration/claude_agents.py +32 -20
  40. alma/integration/helena.py +32 -22
  41. alma/integration/victor.py +57 -33
  42. alma/learning/__init__.py +27 -27
  43. alma/learning/forgetting.py +198 -148
  44. alma/learning/heuristic_extractor.py +40 -24
  45. alma/learning/protocols.py +65 -17
  46. alma/learning/validation.py +7 -2
  47. alma/mcp/__init__.py +4 -4
  48. alma/mcp/__main__.py +2 -1
  49. alma/mcp/resources.py +17 -16
  50. alma/mcp/server.py +102 -44
  51. alma/mcp/tools.py +180 -45
  52. alma/observability/__init__.py +84 -0
  53. alma/observability/config.py +302 -0
  54. alma/observability/logging.py +424 -0
  55. alma/observability/metrics.py +583 -0
  56. alma/observability/tracing.py +440 -0
  57. alma/progress/__init__.py +3 -3
  58. alma/progress/tracker.py +26 -20
  59. alma/progress/types.py +8 -12
  60. alma/py.typed +0 -0
  61. alma/retrieval/__init__.py +11 -11
  62. alma/retrieval/cache.py +20 -21
  63. alma/retrieval/embeddings.py +4 -4
  64. alma/retrieval/engine.py +179 -39
  65. alma/retrieval/scoring.py +73 -63
  66. alma/session/__init__.py +2 -2
  67. alma/session/manager.py +5 -5
  68. alma/session/types.py +5 -4
  69. alma/storage/__init__.py +70 -0
  70. alma/storage/azure_cosmos.py +414 -133
  71. alma/storage/base.py +215 -4
  72. alma/storage/chroma.py +1443 -0
  73. alma/storage/constants.py +103 -0
  74. alma/storage/file_based.py +59 -28
  75. alma/storage/migrations/__init__.py +21 -0
  76. alma/storage/migrations/base.py +321 -0
  77. alma/storage/migrations/runner.py +323 -0
  78. alma/storage/migrations/version_stores.py +337 -0
  79. alma/storage/migrations/versions/__init__.py +11 -0
  80. alma/storage/migrations/versions/v1_0_0.py +373 -0
  81. alma/storage/pinecone.py +1080 -0
  82. alma/storage/postgresql.py +1559 -0
  83. alma/storage/qdrant.py +1306 -0
  84. alma/storage/sqlite_local.py +504 -60
  85. alma/testing/__init__.py +46 -0
  86. alma/testing/factories.py +301 -0
  87. alma/testing/mocks.py +389 -0
  88. alma/types.py +62 -14
  89. alma_memory-0.5.1.dist-info/METADATA +939 -0
  90. alma_memory-0.5.1.dist-info/RECORD +93 -0
  91. {alma_memory-0.4.0.dist-info → alma_memory-0.5.1.dist-info}/WHEEL +1 -1
  92. alma_memory-0.4.0.dist-info/METADATA +0 -488
  93. alma_memory-0.4.0.dist-info/RECORD +0 -52
  94. {alma_memory-0.4.0.dist-info → alma_memory-0.5.1.dist-info}/top_level.txt +0 -0
alma/mcp/tools.py CHANGED
@@ -5,11 +5,9 @@ Provides the tool functions that can be called via MCP protocol.
5
5
  Each tool corresponds to an ALMA operation.
6
6
  """
7
7
 
8
- import json
9
8
  import logging
10
- from typing import Dict, Any, Optional, List
11
9
  from datetime import datetime, timezone
12
- from dataclasses import asdict
10
+ from typing import Any, Dict, Optional
13
11
 
14
12
  from alma import ALMA
15
13
  from alma.types import MemorySlice
@@ -32,47 +30,57 @@ def _serialize_memory_slice(memory_slice: MemorySlice) -> Dict[str, Any]:
32
30
  }
33
31
 
34
32
  for h in memory_slice.heuristics:
35
- result["heuristics"].append({
36
- "id": h.id,
37
- "condition": h.condition,
38
- "strategy": h.strategy,
39
- "confidence": h.confidence,
40
- "occurrence_count": h.occurrence_count,
41
- "success_rate": h.success_rate,
42
- })
33
+ result["heuristics"].append(
34
+ {
35
+ "id": h.id,
36
+ "condition": h.condition,
37
+ "strategy": h.strategy,
38
+ "confidence": h.confidence,
39
+ "occurrence_count": h.occurrence_count,
40
+ "success_rate": h.success_rate,
41
+ }
42
+ )
43
43
 
44
44
  for o in memory_slice.outcomes:
45
- result["outcomes"].append({
46
- "id": o.id,
47
- "task_type": o.task_type,
48
- "task_description": o.task_description,
49
- "success": o.success,
50
- "strategy_used": o.strategy_used,
51
- "duration_ms": o.duration_ms,
52
- })
45
+ result["outcomes"].append(
46
+ {
47
+ "id": o.id,
48
+ "task_type": o.task_type,
49
+ "task_description": o.task_description,
50
+ "success": o.success,
51
+ "strategy_used": o.strategy_used,
52
+ "duration_ms": o.duration_ms,
53
+ }
54
+ )
53
55
 
54
56
  for dk in memory_slice.domain_knowledge:
55
- result["domain_knowledge"].append({
56
- "id": dk.id,
57
- "domain": dk.domain,
58
- "fact": dk.fact,
59
- "confidence": dk.confidence,
60
- })
57
+ result["domain_knowledge"].append(
58
+ {
59
+ "id": dk.id,
60
+ "domain": dk.domain,
61
+ "fact": dk.fact,
62
+ "confidence": dk.confidence,
63
+ }
64
+ )
61
65
 
62
66
  for ap in memory_slice.anti_patterns:
63
- result["anti_patterns"].append({
64
- "id": ap.id,
65
- "pattern": ap.pattern,
66
- "why_bad": ap.why_bad,
67
- "better_alternative": ap.better_alternative,
68
- })
67
+ result["anti_patterns"].append(
68
+ {
69
+ "id": ap.id,
70
+ "pattern": ap.pattern,
71
+ "why_bad": ap.why_bad,
72
+ "better_alternative": ap.better_alternative,
73
+ }
74
+ )
69
75
 
70
76
  for p in memory_slice.preferences:
71
- result["preferences"].append({
72
- "id": p.id,
73
- "category": p.category,
74
- "preference": p.preference,
75
- })
77
+ result["preferences"].append(
78
+ {
79
+ "id": p.id,
80
+ "category": p.category,
81
+ "preference": p.preference,
82
+ }
83
+ )
76
84
 
77
85
  return result
78
86
 
@@ -97,6 +105,12 @@ def alma_retrieve(
97
105
  Returns:
98
106
  Dict containing the memory slice with relevant memories
99
107
  """
108
+ # Input validation
109
+ if not task or not task.strip():
110
+ return {"success": False, "error": "task cannot be empty"}
111
+ if not agent or not agent.strip():
112
+ return {"success": False, "error": "agent cannot be empty"}
113
+
100
114
  try:
101
115
  memories = alma.retrieve(
102
116
  task=task,
@@ -147,8 +161,18 @@ def alma_learn(
147
161
  Returns:
148
162
  Dict with learning result
149
163
  """
164
+ # Input validation
165
+ if not agent or not agent.strip():
166
+ return {"success": False, "error": "agent cannot be empty"}
167
+ if not task or not task.strip():
168
+ return {"success": False, "error": "task cannot be empty"}
169
+ if not outcome or not outcome.strip():
170
+ return {"success": False, "error": "outcome cannot be empty"}
171
+ if not strategy_used or not strategy_used.strip():
172
+ return {"success": False, "error": "strategy_used cannot be empty"}
173
+
150
174
  try:
151
- result = alma.learn(
175
+ outcome_record = alma.learn(
152
176
  agent=agent,
153
177
  task=task,
154
178
  outcome=outcome,
@@ -161,8 +185,14 @@ def alma_learn(
161
185
 
162
186
  return {
163
187
  "success": True,
164
- "learned": result,
165
- "message": "Outcome recorded" if result else "Learning rejected (scope violation)",
188
+ "learned": True,
189
+ "outcome": {
190
+ "id": outcome_record.id,
191
+ "agent": outcome_record.agent,
192
+ "task_type": outcome_record.task_type,
193
+ "success": outcome_record.success,
194
+ },
195
+ "message": "Outcome recorded successfully",
166
196
  }
167
197
 
168
198
  except Exception as e:
@@ -193,6 +223,14 @@ def alma_add_preference(
193
223
  Returns:
194
224
  Dict with the created preference
195
225
  """
226
+ # Input validation
227
+ if not user_id or not user_id.strip():
228
+ return {"success": False, "error": "user_id cannot be empty"}
229
+ if not category or not category.strip():
230
+ return {"success": False, "error": "category cannot be empty"}
231
+ if not preference or not preference.strip():
232
+ return {"success": False, "error": "preference cannot be empty"}
233
+
196
234
  try:
197
235
  pref = alma.add_user_preference(
198
236
  user_id=user_id,
@@ -240,6 +278,14 @@ def alma_add_knowledge(
240
278
  Returns:
241
279
  Dict with the created knowledge or rejection reason
242
280
  """
281
+ # Input validation
282
+ if not agent or not agent.strip():
283
+ return {"success": False, "error": "agent cannot be empty"}
284
+ if not domain or not domain.strip():
285
+ return {"success": False, "error": "domain cannot be empty"}
286
+ if not fact or not fact.strip():
287
+ return {"success": False, "error": "fact cannot be empty"}
288
+
243
289
  try:
244
290
  knowledge = alma.add_domain_knowledge(
245
291
  agent=agent,
@@ -248,12 +294,6 @@ def alma_add_knowledge(
248
294
  source=source,
249
295
  )
250
296
 
251
- if knowledge is None:
252
- return {
253
- "success": False,
254
- "error": f"Agent '{agent}' not allowed to learn in domain '{domain}'",
255
- }
256
-
257
297
  return {
258
298
  "success": True,
259
299
  "knowledge": {
@@ -372,3 +412,98 @@ def alma_health(alma: ALMA) -> Dict[str, Any]:
372
412
  "status": "unhealthy",
373
413
  "error": str(e),
374
414
  }
415
+
416
+
417
+ async def alma_consolidate(
418
+ alma: ALMA,
419
+ agent: str,
420
+ memory_type: str = "heuristics",
421
+ similarity_threshold: float = 0.85,
422
+ dry_run: bool = True,
423
+ ) -> Dict[str, Any]:
424
+ """
425
+ Consolidate similar memories to reduce redundancy.
426
+
427
+ This is ALMA's implementation of Mem0's core innovation - LLM-powered
428
+ deduplication that merges similar memories intelligently.
429
+
430
+ Args:
431
+ alma: ALMA instance
432
+ agent: Agent whose memories to consolidate
433
+ memory_type: Type of memory to consolidate
434
+ ("heuristics", "outcomes", "domain_knowledge", "anti_patterns")
435
+ similarity_threshold: Minimum cosine similarity to group (0.0 to 1.0)
436
+ Higher values are more conservative (fewer merges)
437
+ dry_run: If True, report what would be merged without actually modifying storage
438
+ Recommended for first run to preview changes
439
+
440
+ Returns:
441
+ Dict with consolidation results including:
442
+ - merged_count: Number of memories merged
443
+ - groups_found: Number of similar memory groups identified
444
+ - memories_processed: Total memories analyzed
445
+ - merge_details: List of merge operations (or planned operations if dry_run)
446
+ - errors: Any errors encountered
447
+ """
448
+ # Input validation
449
+ if not agent or not agent.strip():
450
+ return {"success": False, "error": "agent cannot be empty"}
451
+
452
+ valid_types = ["heuristics", "outcomes", "domain_knowledge", "anti_patterns"]
453
+ if memory_type not in valid_types:
454
+ return {
455
+ "success": False,
456
+ "error": f"memory_type must be one of: {', '.join(valid_types)}",
457
+ }
458
+
459
+ if not 0.0 <= similarity_threshold <= 1.0:
460
+ return {
461
+ "success": False,
462
+ "error": "similarity_threshold must be between 0.0 and 1.0",
463
+ }
464
+
465
+ try:
466
+ from alma.consolidation import ConsolidationEngine
467
+
468
+ # Create consolidation engine
469
+ engine = ConsolidationEngine(
470
+ storage=alma.storage,
471
+ embedder=None, # Will use default LocalEmbedder
472
+ llm_client=None, # LLM merging disabled by default
473
+ )
474
+
475
+ # Run consolidation
476
+ result = await engine.consolidate(
477
+ agent=agent,
478
+ project_id=alma.project_id,
479
+ memory_type=memory_type,
480
+ similarity_threshold=similarity_threshold,
481
+ use_llm=False, # LLM disabled - uses highest confidence merge
482
+ dry_run=dry_run,
483
+ )
484
+
485
+ # Invalidate cache after consolidation (if not dry run)
486
+ if not dry_run and result.merged_count > 0:
487
+ alma.retrieval.invalidate_cache(agent=agent, project_id=alma.project_id)
488
+
489
+ return {
490
+ "success": result.success,
491
+ "dry_run": dry_run,
492
+ "merged_count": result.merged_count,
493
+ "groups_found": result.groups_found,
494
+ "memories_processed": result.memories_processed,
495
+ "merge_details": result.merge_details,
496
+ "errors": result.errors,
497
+ "message": (
498
+ f"{'Would merge' if dry_run else 'Merged'} {result.merged_count} memories "
499
+ f"from {result.groups_found} similar groups "
500
+ f"(processed {result.memories_processed} total)"
501
+ ),
502
+ }
503
+
504
+ except Exception as e:
505
+ logger.exception(f"Error in alma_consolidate: {e}")
506
+ return {
507
+ "success": False,
508
+ "error": str(e),
509
+ }
@@ -0,0 +1,84 @@
1
+ """
2
+ ALMA Observability Module.
3
+
4
+ Provides comprehensive observability features including:
5
+ - OpenTelemetry integration for distributed tracing
6
+ - Structured JSON logging
7
+ - Metrics collection (counters, histograms, gauges)
8
+ - Performance monitoring
9
+
10
+ This module follows the OpenTelemetry specification and supports
11
+ integration with common observability backends (Jaeger, Prometheus,
12
+ DataDog, etc.).
13
+
14
+ Usage:
15
+ from alma.observability import (
16
+ get_tracer,
17
+ get_meter,
18
+ get_logger,
19
+ configure_observability,
20
+ ALMAMetrics,
21
+ )
22
+
23
+ # Initialize observability (typically at app startup)
24
+ configure_observability(
25
+ service_name="alma-memory",
26
+ enable_tracing=True,
27
+ enable_metrics=True,
28
+ log_format="json",
29
+ )
30
+
31
+ # Use in code
32
+ tracer = get_tracer(__name__)
33
+ with tracer.start_as_current_span("my_operation"):
34
+ # ... your code
35
+ pass
36
+ """
37
+
38
+ from alma.observability.config import (
39
+ ObservabilityConfig,
40
+ configure_observability,
41
+ shutdown_observability,
42
+ )
43
+ from alma.observability.logging import (
44
+ JSONFormatter,
45
+ StructuredLogger,
46
+ get_logger,
47
+ setup_logging,
48
+ )
49
+ from alma.observability.metrics import (
50
+ ALMAMetrics,
51
+ MetricsCollector,
52
+ get_meter,
53
+ get_metrics,
54
+ )
55
+ from alma.observability.tracing import (
56
+ SpanKind,
57
+ TracingContext,
58
+ get_tracer,
59
+ trace_async,
60
+ trace_method,
61
+ )
62
+
63
+ __all__ = [
64
+ # Configuration
65
+ "ObservabilityConfig",
66
+ "configure_observability",
67
+ "shutdown_observability",
68
+ # Logging
69
+ "JSONFormatter",
70
+ "StructuredLogger",
71
+ "get_logger",
72
+ "setup_logging",
73
+ # Metrics
74
+ "ALMAMetrics",
75
+ "MetricsCollector",
76
+ "get_meter",
77
+ "get_metrics",
78
+ # Tracing
79
+ "SpanKind",
80
+ "TracingContext",
81
+ "get_tracer",
82
+ "trace_method",
83
+ "trace_async",
84
+ ]
@@ -0,0 +1,302 @@
1
+ """
2
+ ALMA Observability Configuration.
3
+
4
+ Centralized configuration for observability features including
5
+ tracing, metrics, and logging setup.
6
+ """
7
+
8
+ import logging
9
+ import os
10
+ from dataclasses import dataclass, field
11
+ from typing import Any, Dict, Optional
12
+
13
+ # Global state for observability configuration
14
+ _observability_initialized = False
15
+ _tracer_provider = None
16
+ _meter_provider = None
17
+
18
+
19
+ @dataclass
20
+ class ObservabilityConfig:
21
+ """
22
+ Configuration for ALMA observability features.
23
+
24
+ Attributes:
25
+ service_name: Name of the service for tracing/metrics
26
+ service_version: Version of the service
27
+ environment: Deployment environment (dev, staging, prod)
28
+ enable_tracing: Whether to enable distributed tracing
29
+ enable_metrics: Whether to enable metrics collection
30
+ enable_logging: Whether to enable structured logging
31
+ log_level: Logging level (DEBUG, INFO, WARNING, ERROR)
32
+ log_format: Log format ("json" or "text")
33
+ otlp_endpoint: OpenTelemetry collector endpoint
34
+ otlp_headers: Headers for OTLP exporter
35
+ trace_sample_rate: Sampling rate for traces (0.0-1.0)
36
+ metric_export_interval_ms: How often to export metrics
37
+ resource_attributes: Additional resource attributes
38
+ """
39
+
40
+ service_name: str = "alma-memory"
41
+ service_version: str = "0.5.1"
42
+ environment: str = field(
43
+ default_factory=lambda: os.environ.get("ALMA_ENVIRONMENT", "development")
44
+ )
45
+ enable_tracing: bool = True
46
+ enable_metrics: bool = True
47
+ enable_logging: bool = True
48
+ log_level: str = field(
49
+ default_factory=lambda: os.environ.get("ALMA_LOG_LEVEL", "INFO")
50
+ )
51
+ log_format: str = field(
52
+ default_factory=lambda: os.environ.get("ALMA_LOG_FORMAT", "json")
53
+ )
54
+ otlp_endpoint: Optional[str] = field(
55
+ default_factory=lambda: os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
56
+ )
57
+ otlp_headers: Dict[str, str] = field(default_factory=dict)
58
+ trace_sample_rate: float = 1.0
59
+ metric_export_interval_ms: int = 60000
60
+ resource_attributes: Dict[str, str] = field(default_factory=dict)
61
+
62
+ def to_dict(self) -> Dict[str, Any]:
63
+ """Convert config to dictionary."""
64
+ return {
65
+ "service_name": self.service_name,
66
+ "service_version": self.service_version,
67
+ "environment": self.environment,
68
+ "enable_tracing": self.enable_tracing,
69
+ "enable_metrics": self.enable_metrics,
70
+ "enable_logging": self.enable_logging,
71
+ "log_level": self.log_level,
72
+ "log_format": self.log_format,
73
+ "otlp_endpoint": self.otlp_endpoint,
74
+ "trace_sample_rate": self.trace_sample_rate,
75
+ "metric_export_interval_ms": self.metric_export_interval_ms,
76
+ }
77
+
78
+
79
+ def configure_observability(
80
+ service_name: str = "alma-memory",
81
+ service_version: str = "0.5.1",
82
+ environment: Optional[str] = None,
83
+ enable_tracing: bool = True,
84
+ enable_metrics: bool = True,
85
+ enable_logging: bool = True,
86
+ log_level: str = "INFO",
87
+ log_format: str = "json",
88
+ otlp_endpoint: Optional[str] = None,
89
+ trace_sample_rate: float = 1.0,
90
+ resource_attributes: Optional[Dict[str, str]] = None,
91
+ ) -> ObservabilityConfig:
92
+ """
93
+ Configure ALMA observability features.
94
+
95
+ This function should be called once at application startup to initialize
96
+ tracing, metrics, and logging.
97
+
98
+ Args:
99
+ service_name: Name of the service
100
+ service_version: Version of the service
101
+ environment: Deployment environment
102
+ enable_tracing: Enable distributed tracing
103
+ enable_metrics: Enable metrics collection
104
+ enable_logging: Enable structured logging
105
+ log_level: Logging level
106
+ log_format: Log format ("json" or "text")
107
+ otlp_endpoint: OpenTelemetry collector endpoint
108
+ trace_sample_rate: Sampling rate for traces
109
+ resource_attributes: Additional resource attributes
110
+
111
+ Returns:
112
+ ObservabilityConfig with applied settings
113
+ """
114
+ global _observability_initialized, _tracer_provider, _meter_provider
115
+
116
+ config = ObservabilityConfig(
117
+ service_name=service_name,
118
+ service_version=service_version,
119
+ environment=environment or os.environ.get("ALMA_ENVIRONMENT", "development"),
120
+ enable_tracing=enable_tracing,
121
+ enable_metrics=enable_metrics,
122
+ enable_logging=enable_logging,
123
+ log_level=log_level,
124
+ log_format=log_format,
125
+ otlp_endpoint=otlp_endpoint,
126
+ trace_sample_rate=trace_sample_rate,
127
+ resource_attributes=resource_attributes or {},
128
+ )
129
+
130
+ # Setup logging first
131
+ if config.enable_logging:
132
+ from alma.observability.logging import setup_logging
133
+
134
+ setup_logging(
135
+ level=config.log_level,
136
+ format_type=config.log_format,
137
+ service_name=config.service_name,
138
+ )
139
+
140
+ # Setup tracing
141
+ if config.enable_tracing:
142
+ _tracer_provider = _setup_tracing(config)
143
+
144
+ # Setup metrics
145
+ if config.enable_metrics:
146
+ _meter_provider = _setup_metrics(config)
147
+
148
+ _observability_initialized = True
149
+
150
+ logger = logging.getLogger(__name__)
151
+ logger.info(
152
+ "ALMA observability configured",
153
+ extra={
154
+ "service_name": config.service_name,
155
+ "environment": config.environment,
156
+ "tracing_enabled": config.enable_tracing,
157
+ "metrics_enabled": config.enable_metrics,
158
+ },
159
+ )
160
+
161
+ return config
162
+
163
+
164
+ def _setup_tracing(config: ObservabilityConfig):
165
+ """Setup OpenTelemetry tracing."""
166
+ try:
167
+ from opentelemetry import trace
168
+ from opentelemetry.sdk.resources import Resource
169
+ from opentelemetry.sdk.trace import TracerProvider
170
+ from opentelemetry.sdk.trace.sampling import TraceIdRatioBased
171
+
172
+ # Build resource attributes
173
+ resource_attrs = {
174
+ "service.name": config.service_name,
175
+ "service.version": config.service_version,
176
+ "deployment.environment": config.environment,
177
+ }
178
+ resource_attrs.update(config.resource_attributes)
179
+
180
+ resource = Resource.create(resource_attrs)
181
+
182
+ # Create sampler
183
+ sampler = TraceIdRatioBased(config.trace_sample_rate)
184
+
185
+ # Create and set tracer provider
186
+ provider = TracerProvider(resource=resource, sampler=sampler)
187
+
188
+ # Add OTLP exporter if endpoint is configured
189
+ if config.otlp_endpoint:
190
+ try:
191
+ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
192
+ OTLPSpanExporter,
193
+ )
194
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
195
+
196
+ otlp_exporter = OTLPSpanExporter(
197
+ endpoint=config.otlp_endpoint,
198
+ headers=config.otlp_headers or {},
199
+ )
200
+ provider.add_span_processor(BatchSpanProcessor(otlp_exporter))
201
+ except ImportError:
202
+ logging.getLogger(__name__).warning(
203
+ "OTLP exporter not available. Install with: "
204
+ "pip install opentelemetry-exporter-otlp-proto-grpc"
205
+ )
206
+
207
+ trace.set_tracer_provider(provider)
208
+ return provider
209
+
210
+ except ImportError:
211
+ logging.getLogger(__name__).warning(
212
+ "OpenTelemetry SDK not available. Tracing disabled. "
213
+ "Install with: pip install opentelemetry-sdk"
214
+ )
215
+ return None
216
+
217
+
218
+ def _setup_metrics(config: ObservabilityConfig):
219
+ """Setup OpenTelemetry metrics."""
220
+ try:
221
+ from opentelemetry import metrics
222
+ from opentelemetry.sdk.metrics import MeterProvider
223
+ from opentelemetry.sdk.resources import Resource
224
+
225
+ # Build resource attributes
226
+ resource_attrs = {
227
+ "service.name": config.service_name,
228
+ "service.version": config.service_version,
229
+ "deployment.environment": config.environment,
230
+ }
231
+ resource_attrs.update(config.resource_attributes)
232
+
233
+ resource = Resource.create(resource_attrs)
234
+
235
+ # Create meter provider
236
+ provider = MeterProvider(resource=resource)
237
+
238
+ # Add OTLP exporter if endpoint is configured
239
+ if config.otlp_endpoint:
240
+ try:
241
+ from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import (
242
+ OTLPMetricExporter,
243
+ )
244
+ from opentelemetry.sdk.metrics.export import (
245
+ PeriodicExportingMetricReader,
246
+ )
247
+
248
+ otlp_exporter = OTLPMetricExporter(
249
+ endpoint=config.otlp_endpoint,
250
+ headers=config.otlp_headers or {},
251
+ )
252
+ reader = PeriodicExportingMetricReader(
253
+ otlp_exporter,
254
+ export_interval_millis=config.metric_export_interval_ms,
255
+ )
256
+ provider = MeterProvider(resource=resource, metric_readers=[reader])
257
+ except ImportError:
258
+ logging.getLogger(__name__).warning(
259
+ "OTLP metric exporter not available. Install with: "
260
+ "pip install opentelemetry-exporter-otlp-proto-grpc"
261
+ )
262
+
263
+ metrics.set_meter_provider(provider)
264
+ return provider
265
+
266
+ except ImportError:
267
+ logging.getLogger(__name__).warning(
268
+ "OpenTelemetry SDK not available. Metrics disabled. "
269
+ "Install with: pip install opentelemetry-sdk"
270
+ )
271
+ return None
272
+
273
+
274
+ def shutdown_observability():
275
+ """
276
+ Shutdown observability providers.
277
+
278
+ Should be called at application shutdown to ensure all telemetry
279
+ data is exported.
280
+ """
281
+ global _observability_initialized, _tracer_provider, _meter_provider
282
+
283
+ if _tracer_provider is not None:
284
+ try:
285
+ _tracer_provider.shutdown()
286
+ except Exception as e:
287
+ logging.getLogger(__name__).error(f"Error shutting down tracer: {e}")
288
+
289
+ if _meter_provider is not None:
290
+ try:
291
+ _meter_provider.shutdown()
292
+ except Exception as e:
293
+ logging.getLogger(__name__).error(f"Error shutting down meter: {e}")
294
+
295
+ _observability_initialized = False
296
+ _tracer_provider = None
297
+ _meter_provider = None
298
+
299
+
300
+ def is_observability_initialized() -> bool:
301
+ """Check if observability has been initialized."""
302
+ return _observability_initialized