alma-memory 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alma/__init__.py +296 -194
- alma/compression/__init__.py +33 -0
- alma/compression/pipeline.py +980 -0
- alma/confidence/__init__.py +47 -47
- alma/confidence/engine.py +540 -540
- alma/confidence/types.py +351 -351
- alma/config/loader.py +157 -157
- alma/consolidation/__init__.py +23 -23
- alma/consolidation/engine.py +678 -678
- alma/consolidation/prompts.py +84 -84
- alma/core.py +1189 -322
- alma/domains/__init__.py +30 -30
- alma/domains/factory.py +359 -359
- alma/domains/schemas.py +448 -448
- alma/domains/types.py +272 -272
- alma/events/__init__.py +75 -75
- alma/events/emitter.py +285 -284
- alma/events/storage_mixin.py +246 -246
- alma/events/types.py +126 -126
- alma/events/webhook.py +425 -425
- alma/exceptions.py +49 -49
- alma/extraction/__init__.py +31 -31
- alma/extraction/auto_learner.py +265 -264
- alma/extraction/extractor.py +420 -420
- alma/graph/__init__.py +106 -81
- alma/graph/backends/__init__.py +32 -18
- alma/graph/backends/kuzu.py +624 -0
- alma/graph/backends/memgraph.py +432 -0
- alma/graph/backends/memory.py +236 -236
- alma/graph/backends/neo4j.py +417 -417
- alma/graph/base.py +159 -159
- alma/graph/extraction.py +198 -198
- alma/graph/store.py +860 -860
- alma/harness/__init__.py +35 -35
- alma/harness/base.py +386 -386
- alma/harness/domains.py +705 -705
- alma/initializer/__init__.py +37 -37
- alma/initializer/initializer.py +418 -418
- alma/initializer/types.py +250 -250
- alma/integration/__init__.py +62 -62
- alma/integration/claude_agents.py +444 -432
- alma/integration/helena.py +423 -423
- alma/integration/victor.py +471 -471
- alma/learning/__init__.py +101 -86
- alma/learning/decay.py +878 -0
- alma/learning/forgetting.py +1446 -1446
- alma/learning/heuristic_extractor.py +390 -390
- alma/learning/protocols.py +374 -374
- alma/learning/validation.py +346 -346
- alma/mcp/__init__.py +123 -45
- alma/mcp/__main__.py +156 -156
- alma/mcp/resources.py +122 -122
- alma/mcp/server.py +955 -591
- alma/mcp/tools.py +3254 -511
- alma/observability/__init__.py +91 -0
- alma/observability/config.py +302 -0
- alma/observability/guidelines.py +170 -0
- alma/observability/logging.py +424 -0
- alma/observability/metrics.py +583 -0
- alma/observability/tracing.py +440 -0
- alma/progress/__init__.py +21 -21
- alma/progress/tracker.py +607 -607
- alma/progress/types.py +250 -250
- alma/retrieval/__init__.py +134 -53
- alma/retrieval/budget.py +525 -0
- alma/retrieval/cache.py +1304 -1061
- alma/retrieval/embeddings.py +202 -202
- alma/retrieval/engine.py +850 -366
- alma/retrieval/modes.py +365 -0
- alma/retrieval/progressive.py +560 -0
- alma/retrieval/scoring.py +344 -344
- alma/retrieval/trust_scoring.py +637 -0
- alma/retrieval/verification.py +797 -0
- alma/session/__init__.py +19 -19
- alma/session/manager.py +442 -399
- alma/session/types.py +288 -288
- alma/storage/__init__.py +101 -61
- alma/storage/archive.py +233 -0
- alma/storage/azure_cosmos.py +1259 -1048
- alma/storage/base.py +1083 -525
- alma/storage/chroma.py +1443 -1443
- alma/storage/constants.py +103 -0
- alma/storage/file_based.py +614 -619
- alma/storage/migrations/__init__.py +21 -0
- alma/storage/migrations/base.py +321 -0
- alma/storage/migrations/runner.py +323 -0
- alma/storage/migrations/version_stores.py +337 -0
- alma/storage/migrations/versions/__init__.py +11 -0
- alma/storage/migrations/versions/v1_0_0.py +373 -0
- alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
- alma/storage/pinecone.py +1080 -1080
- alma/storage/postgresql.py +1948 -1452
- alma/storage/qdrant.py +1306 -1306
- alma/storage/sqlite_local.py +3041 -1358
- alma/testing/__init__.py +46 -0
- alma/testing/factories.py +301 -0
- alma/testing/mocks.py +389 -0
- alma/types.py +292 -264
- alma/utils/__init__.py +19 -0
- alma/utils/tokenizer.py +521 -0
- alma/workflow/__init__.py +83 -0
- alma/workflow/artifacts.py +170 -0
- alma/workflow/checkpoint.py +311 -0
- alma/workflow/context.py +228 -0
- alma/workflow/outcomes.py +189 -0
- alma/workflow/reducers.py +393 -0
- {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/METADATA +244 -72
- alma_memory-0.7.0.dist-info/RECORD +112 -0
- alma_memory-0.5.0.dist-info/RECORD +0 -76
- {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
- {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ALMA Observability Module.
|
|
3
|
+
|
|
4
|
+
Provides comprehensive observability features including:
|
|
5
|
+
- OpenTelemetry integration for distributed tracing
|
|
6
|
+
- Structured JSON logging
|
|
7
|
+
- Metrics collection (counters, histograms, gauges)
|
|
8
|
+
- Performance monitoring
|
|
9
|
+
|
|
10
|
+
This module follows the OpenTelemetry specification and supports
|
|
11
|
+
integration with common observability backends (Jaeger, Prometheus,
|
|
12
|
+
DataDog, etc.).
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
from alma.observability import (
|
|
16
|
+
get_tracer,
|
|
17
|
+
get_meter,
|
|
18
|
+
get_logger,
|
|
19
|
+
configure_observability,
|
|
20
|
+
ALMAMetrics,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# Initialize observability (typically at app startup)
|
|
24
|
+
configure_observability(
|
|
25
|
+
service_name="alma-memory",
|
|
26
|
+
enable_tracing=True,
|
|
27
|
+
enable_metrics=True,
|
|
28
|
+
log_format="json",
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Use in code
|
|
32
|
+
tracer = get_tracer(__name__)
|
|
33
|
+
with tracer.start_as_current_span("my_operation"):
|
|
34
|
+
# ... your code
|
|
35
|
+
pass
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
from alma.observability.config import (
|
|
39
|
+
ObservabilityConfig,
|
|
40
|
+
configure_observability,
|
|
41
|
+
shutdown_observability,
|
|
42
|
+
)
|
|
43
|
+
from alma.observability.guidelines import (
|
|
44
|
+
OPERATION_LOG_LEVELS,
|
|
45
|
+
get_recommended_level,
|
|
46
|
+
)
|
|
47
|
+
from alma.observability.logging import (
|
|
48
|
+
JSONFormatter,
|
|
49
|
+
StructuredLogger,
|
|
50
|
+
get_logger,
|
|
51
|
+
setup_logging,
|
|
52
|
+
)
|
|
53
|
+
from alma.observability.metrics import (
|
|
54
|
+
ALMAMetrics,
|
|
55
|
+
MetricsCollector,
|
|
56
|
+
get_meter,
|
|
57
|
+
get_metrics,
|
|
58
|
+
)
|
|
59
|
+
from alma.observability.tracing import (
|
|
60
|
+
SpanKind,
|
|
61
|
+
TracingContext,
|
|
62
|
+
get_tracer,
|
|
63
|
+
trace_async,
|
|
64
|
+
trace_method,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
__all__ = [
|
|
68
|
+
# Configuration
|
|
69
|
+
"ObservabilityConfig",
|
|
70
|
+
"configure_observability",
|
|
71
|
+
"shutdown_observability",
|
|
72
|
+
# Logging
|
|
73
|
+
"JSONFormatter",
|
|
74
|
+
"StructuredLogger",
|
|
75
|
+
"get_logger",
|
|
76
|
+
"setup_logging",
|
|
77
|
+
# Metrics
|
|
78
|
+
"ALMAMetrics",
|
|
79
|
+
"MetricsCollector",
|
|
80
|
+
"get_meter",
|
|
81
|
+
"get_metrics",
|
|
82
|
+
# Tracing
|
|
83
|
+
"SpanKind",
|
|
84
|
+
"TracingContext",
|
|
85
|
+
"get_tracer",
|
|
86
|
+
"trace_method",
|
|
87
|
+
"trace_async",
|
|
88
|
+
# Guidelines
|
|
89
|
+
"OPERATION_LOG_LEVELS",
|
|
90
|
+
"get_recommended_level",
|
|
91
|
+
]
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ALMA Observability Configuration.
|
|
3
|
+
|
|
4
|
+
Centralized configuration for observability features including
|
|
5
|
+
tracing, metrics, and logging setup.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from typing import Any, Dict, Optional
|
|
12
|
+
|
|
13
|
+
# Global state for observability configuration
|
|
14
|
+
_observability_initialized = False
|
|
15
|
+
_tracer_provider = None
|
|
16
|
+
_meter_provider = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ObservabilityConfig:
|
|
21
|
+
"""
|
|
22
|
+
Configuration for ALMA observability features.
|
|
23
|
+
|
|
24
|
+
Attributes:
|
|
25
|
+
service_name: Name of the service for tracing/metrics
|
|
26
|
+
service_version: Version of the service
|
|
27
|
+
environment: Deployment environment (dev, staging, prod)
|
|
28
|
+
enable_tracing: Whether to enable distributed tracing
|
|
29
|
+
enable_metrics: Whether to enable metrics collection
|
|
30
|
+
enable_logging: Whether to enable structured logging
|
|
31
|
+
log_level: Logging level (DEBUG, INFO, WARNING, ERROR)
|
|
32
|
+
log_format: Log format ("json" or "text")
|
|
33
|
+
otlp_endpoint: OpenTelemetry collector endpoint
|
|
34
|
+
otlp_headers: Headers for OTLP exporter
|
|
35
|
+
trace_sample_rate: Sampling rate for traces (0.0-1.0)
|
|
36
|
+
metric_export_interval_ms: How often to export metrics
|
|
37
|
+
resource_attributes: Additional resource attributes
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
service_name: str = "alma-memory"
|
|
41
|
+
service_version: str = "0.5.1"
|
|
42
|
+
environment: str = field(
|
|
43
|
+
default_factory=lambda: os.environ.get("ALMA_ENVIRONMENT", "development")
|
|
44
|
+
)
|
|
45
|
+
enable_tracing: bool = True
|
|
46
|
+
enable_metrics: bool = True
|
|
47
|
+
enable_logging: bool = True
|
|
48
|
+
log_level: str = field(
|
|
49
|
+
default_factory=lambda: os.environ.get("ALMA_LOG_LEVEL", "INFO")
|
|
50
|
+
)
|
|
51
|
+
log_format: str = field(
|
|
52
|
+
default_factory=lambda: os.environ.get("ALMA_LOG_FORMAT", "json")
|
|
53
|
+
)
|
|
54
|
+
otlp_endpoint: Optional[str] = field(
|
|
55
|
+
default_factory=lambda: os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
|
|
56
|
+
)
|
|
57
|
+
otlp_headers: Dict[str, str] = field(default_factory=dict)
|
|
58
|
+
trace_sample_rate: float = 1.0
|
|
59
|
+
metric_export_interval_ms: int = 60000
|
|
60
|
+
resource_attributes: Dict[str, str] = field(default_factory=dict)
|
|
61
|
+
|
|
62
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
63
|
+
"""Convert config to dictionary."""
|
|
64
|
+
return {
|
|
65
|
+
"service_name": self.service_name,
|
|
66
|
+
"service_version": self.service_version,
|
|
67
|
+
"environment": self.environment,
|
|
68
|
+
"enable_tracing": self.enable_tracing,
|
|
69
|
+
"enable_metrics": self.enable_metrics,
|
|
70
|
+
"enable_logging": self.enable_logging,
|
|
71
|
+
"log_level": self.log_level,
|
|
72
|
+
"log_format": self.log_format,
|
|
73
|
+
"otlp_endpoint": self.otlp_endpoint,
|
|
74
|
+
"trace_sample_rate": self.trace_sample_rate,
|
|
75
|
+
"metric_export_interval_ms": self.metric_export_interval_ms,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def configure_observability(
|
|
80
|
+
service_name: str = "alma-memory",
|
|
81
|
+
service_version: str = "0.5.1",
|
|
82
|
+
environment: Optional[str] = None,
|
|
83
|
+
enable_tracing: bool = True,
|
|
84
|
+
enable_metrics: bool = True,
|
|
85
|
+
enable_logging: bool = True,
|
|
86
|
+
log_level: str = "INFO",
|
|
87
|
+
log_format: str = "json",
|
|
88
|
+
otlp_endpoint: Optional[str] = None,
|
|
89
|
+
trace_sample_rate: float = 1.0,
|
|
90
|
+
resource_attributes: Optional[Dict[str, str]] = None,
|
|
91
|
+
) -> ObservabilityConfig:
|
|
92
|
+
"""
|
|
93
|
+
Configure ALMA observability features.
|
|
94
|
+
|
|
95
|
+
This function should be called once at application startup to initialize
|
|
96
|
+
tracing, metrics, and logging.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
service_name: Name of the service
|
|
100
|
+
service_version: Version of the service
|
|
101
|
+
environment: Deployment environment
|
|
102
|
+
enable_tracing: Enable distributed tracing
|
|
103
|
+
enable_metrics: Enable metrics collection
|
|
104
|
+
enable_logging: Enable structured logging
|
|
105
|
+
log_level: Logging level
|
|
106
|
+
log_format: Log format ("json" or "text")
|
|
107
|
+
otlp_endpoint: OpenTelemetry collector endpoint
|
|
108
|
+
trace_sample_rate: Sampling rate for traces
|
|
109
|
+
resource_attributes: Additional resource attributes
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
ObservabilityConfig with applied settings
|
|
113
|
+
"""
|
|
114
|
+
global _observability_initialized, _tracer_provider, _meter_provider
|
|
115
|
+
|
|
116
|
+
config = ObservabilityConfig(
|
|
117
|
+
service_name=service_name,
|
|
118
|
+
service_version=service_version,
|
|
119
|
+
environment=environment or os.environ.get("ALMA_ENVIRONMENT", "development"),
|
|
120
|
+
enable_tracing=enable_tracing,
|
|
121
|
+
enable_metrics=enable_metrics,
|
|
122
|
+
enable_logging=enable_logging,
|
|
123
|
+
log_level=log_level,
|
|
124
|
+
log_format=log_format,
|
|
125
|
+
otlp_endpoint=otlp_endpoint,
|
|
126
|
+
trace_sample_rate=trace_sample_rate,
|
|
127
|
+
resource_attributes=resource_attributes or {},
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Setup logging first
|
|
131
|
+
if config.enable_logging:
|
|
132
|
+
from alma.observability.logging import setup_logging
|
|
133
|
+
|
|
134
|
+
setup_logging(
|
|
135
|
+
level=config.log_level,
|
|
136
|
+
format_type=config.log_format,
|
|
137
|
+
service_name=config.service_name,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# Setup tracing
|
|
141
|
+
if config.enable_tracing:
|
|
142
|
+
_tracer_provider = _setup_tracing(config)
|
|
143
|
+
|
|
144
|
+
# Setup metrics
|
|
145
|
+
if config.enable_metrics:
|
|
146
|
+
_meter_provider = _setup_metrics(config)
|
|
147
|
+
|
|
148
|
+
_observability_initialized = True
|
|
149
|
+
|
|
150
|
+
logger = logging.getLogger(__name__)
|
|
151
|
+
logger.info(
|
|
152
|
+
"ALMA observability configured",
|
|
153
|
+
extra={
|
|
154
|
+
"service_name": config.service_name,
|
|
155
|
+
"environment": config.environment,
|
|
156
|
+
"tracing_enabled": config.enable_tracing,
|
|
157
|
+
"metrics_enabled": config.enable_metrics,
|
|
158
|
+
},
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
return config
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _setup_tracing(config: ObservabilityConfig):
|
|
165
|
+
"""Setup OpenTelemetry tracing."""
|
|
166
|
+
try:
|
|
167
|
+
from opentelemetry import trace
|
|
168
|
+
from opentelemetry.sdk.resources import Resource
|
|
169
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
170
|
+
from opentelemetry.sdk.trace.sampling import TraceIdRatioBased
|
|
171
|
+
|
|
172
|
+
# Build resource attributes
|
|
173
|
+
resource_attrs = {
|
|
174
|
+
"service.name": config.service_name,
|
|
175
|
+
"service.version": config.service_version,
|
|
176
|
+
"deployment.environment": config.environment,
|
|
177
|
+
}
|
|
178
|
+
resource_attrs.update(config.resource_attributes)
|
|
179
|
+
|
|
180
|
+
resource = Resource.create(resource_attrs)
|
|
181
|
+
|
|
182
|
+
# Create sampler
|
|
183
|
+
sampler = TraceIdRatioBased(config.trace_sample_rate)
|
|
184
|
+
|
|
185
|
+
# Create and set tracer provider
|
|
186
|
+
provider = TracerProvider(resource=resource, sampler=sampler)
|
|
187
|
+
|
|
188
|
+
# Add OTLP exporter if endpoint is configured
|
|
189
|
+
if config.otlp_endpoint:
|
|
190
|
+
try:
|
|
191
|
+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
|
|
192
|
+
OTLPSpanExporter,
|
|
193
|
+
)
|
|
194
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
195
|
+
|
|
196
|
+
otlp_exporter = OTLPSpanExporter(
|
|
197
|
+
endpoint=config.otlp_endpoint,
|
|
198
|
+
headers=config.otlp_headers or {},
|
|
199
|
+
)
|
|
200
|
+
provider.add_span_processor(BatchSpanProcessor(otlp_exporter))
|
|
201
|
+
except ImportError:
|
|
202
|
+
logging.getLogger(__name__).warning(
|
|
203
|
+
"OTLP exporter not available. Install with: "
|
|
204
|
+
"pip install opentelemetry-exporter-otlp-proto-grpc"
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
trace.set_tracer_provider(provider)
|
|
208
|
+
return provider
|
|
209
|
+
|
|
210
|
+
except ImportError:
|
|
211
|
+
logging.getLogger(__name__).warning(
|
|
212
|
+
"OpenTelemetry SDK not available. Tracing disabled. "
|
|
213
|
+
"Install with: pip install opentelemetry-sdk"
|
|
214
|
+
)
|
|
215
|
+
return None
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _setup_metrics(config: ObservabilityConfig):
|
|
219
|
+
"""Setup OpenTelemetry metrics."""
|
|
220
|
+
try:
|
|
221
|
+
from opentelemetry import metrics
|
|
222
|
+
from opentelemetry.sdk.metrics import MeterProvider
|
|
223
|
+
from opentelemetry.sdk.resources import Resource
|
|
224
|
+
|
|
225
|
+
# Build resource attributes
|
|
226
|
+
resource_attrs = {
|
|
227
|
+
"service.name": config.service_name,
|
|
228
|
+
"service.version": config.service_version,
|
|
229
|
+
"deployment.environment": config.environment,
|
|
230
|
+
}
|
|
231
|
+
resource_attrs.update(config.resource_attributes)
|
|
232
|
+
|
|
233
|
+
resource = Resource.create(resource_attrs)
|
|
234
|
+
|
|
235
|
+
# Create meter provider
|
|
236
|
+
provider = MeterProvider(resource=resource)
|
|
237
|
+
|
|
238
|
+
# Add OTLP exporter if endpoint is configured
|
|
239
|
+
if config.otlp_endpoint:
|
|
240
|
+
try:
|
|
241
|
+
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import (
|
|
242
|
+
OTLPMetricExporter,
|
|
243
|
+
)
|
|
244
|
+
from opentelemetry.sdk.metrics.export import (
|
|
245
|
+
PeriodicExportingMetricReader,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
otlp_exporter = OTLPMetricExporter(
|
|
249
|
+
endpoint=config.otlp_endpoint,
|
|
250
|
+
headers=config.otlp_headers or {},
|
|
251
|
+
)
|
|
252
|
+
reader = PeriodicExportingMetricReader(
|
|
253
|
+
otlp_exporter,
|
|
254
|
+
export_interval_millis=config.metric_export_interval_ms,
|
|
255
|
+
)
|
|
256
|
+
provider = MeterProvider(resource=resource, metric_readers=[reader])
|
|
257
|
+
except ImportError:
|
|
258
|
+
logging.getLogger(__name__).warning(
|
|
259
|
+
"OTLP metric exporter not available. Install with: "
|
|
260
|
+
"pip install opentelemetry-exporter-otlp-proto-grpc"
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
metrics.set_meter_provider(provider)
|
|
264
|
+
return provider
|
|
265
|
+
|
|
266
|
+
except ImportError:
|
|
267
|
+
logging.getLogger(__name__).warning(
|
|
268
|
+
"OpenTelemetry SDK not available. Metrics disabled. "
|
|
269
|
+
"Install with: pip install opentelemetry-sdk"
|
|
270
|
+
)
|
|
271
|
+
return None
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def shutdown_observability():
|
|
275
|
+
"""
|
|
276
|
+
Shutdown observability providers.
|
|
277
|
+
|
|
278
|
+
Should be called at application shutdown to ensure all telemetry
|
|
279
|
+
data is exported.
|
|
280
|
+
"""
|
|
281
|
+
global _observability_initialized, _tracer_provider, _meter_provider
|
|
282
|
+
|
|
283
|
+
if _tracer_provider is not None:
|
|
284
|
+
try:
|
|
285
|
+
_tracer_provider.shutdown()
|
|
286
|
+
except Exception as e:
|
|
287
|
+
logging.getLogger(__name__).error(f"Error shutting down tracer: {e}")
|
|
288
|
+
|
|
289
|
+
if _meter_provider is not None:
|
|
290
|
+
try:
|
|
291
|
+
_meter_provider.shutdown()
|
|
292
|
+
except Exception as e:
|
|
293
|
+
logging.getLogger(__name__).error(f"Error shutting down meter: {e}")
|
|
294
|
+
|
|
295
|
+
_observability_initialized = False
|
|
296
|
+
_tracer_provider = None
|
|
297
|
+
_meter_provider = None
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def is_observability_initialized() -> bool:
|
|
301
|
+
"""Check if observability has been initialized."""
|
|
302
|
+
return _observability_initialized
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ALMA Logging Level Guidelines.
|
|
3
|
+
|
|
4
|
+
This module documents the standardized logging levels for ALMA.
|
|
5
|
+
All modules should follow these guidelines for consistent logging.
|
|
6
|
+
|
|
7
|
+
Logging Level Standards
|
|
8
|
+
=======================
|
|
9
|
+
|
|
10
|
+
DEBUG
|
|
11
|
+
-----
|
|
12
|
+
Use for detailed diagnostic information useful during development
|
|
13
|
+
and debugging. This level should NOT be enabled in production under
|
|
14
|
+
normal circumstances.
|
|
15
|
+
|
|
16
|
+
Examples:
|
|
17
|
+
- Cache hits/misses for individual queries
|
|
18
|
+
- Internal state changes (e.g., "Updated work item: {item_id}")
|
|
19
|
+
- Function entry/exit with parameters
|
|
20
|
+
- Detailed timing breakdowns
|
|
21
|
+
- Query/embedding details
|
|
22
|
+
|
|
23
|
+
INFO
|
|
24
|
+
----
|
|
25
|
+
Use for high-level operation completion and significant lifecycle events.
|
|
26
|
+
These logs confirm that things are working as expected.
|
|
27
|
+
|
|
28
|
+
Examples:
|
|
29
|
+
- Service startup/shutdown
|
|
30
|
+
- Memory retrieval/learning operations completed (with summary metrics)
|
|
31
|
+
- Scheduled job execution completed
|
|
32
|
+
- Configuration loaded successfully
|
|
33
|
+
- Database/storage connections established
|
|
34
|
+
- Batch operations completed with counts
|
|
35
|
+
|
|
36
|
+
WARNING
|
|
37
|
+
-------
|
|
38
|
+
Use for recoverable issues that may indicate problems but don't prevent
|
|
39
|
+
operation. The system continues to function but something unexpected happened.
|
|
40
|
+
|
|
41
|
+
Examples:
|
|
42
|
+
- Agent has no defined scope, using defaults
|
|
43
|
+
- Optional feature unavailable (e.g., "aiohttp not installed")
|
|
44
|
+
- Failed to retrieve optional data (e.g., git commands timeout)
|
|
45
|
+
- Near-quota conditions
|
|
46
|
+
- Deprecated feature usage
|
|
47
|
+
- Retry attempts
|
|
48
|
+
- Missing optional configuration
|
|
49
|
+
|
|
50
|
+
ERROR
|
|
51
|
+
-----
|
|
52
|
+
Use for failures that need attention. These indicate that an operation
|
|
53
|
+
failed and likely requires investigation or intervention.
|
|
54
|
+
|
|
55
|
+
Examples:
|
|
56
|
+
- Storage operation failures
|
|
57
|
+
- Failed to process required data
|
|
58
|
+
- Authentication/authorization failures
|
|
59
|
+
- Configuration errors preventing operation
|
|
60
|
+
- Unrecoverable API errors
|
|
61
|
+
- Data corruption detected
|
|
62
|
+
- Resource exhaustion
|
|
63
|
+
|
|
64
|
+
CRITICAL
|
|
65
|
+
--------
|
|
66
|
+
Use for severe failures that may cause application shutdown or
|
|
67
|
+
complete loss of functionality. These require immediate attention.
|
|
68
|
+
|
|
69
|
+
Examples:
|
|
70
|
+
- Database connection permanently lost
|
|
71
|
+
- Critical configuration missing
|
|
72
|
+
- Unrecoverable system state
|
|
73
|
+
- Security breach detected
|
|
74
|
+
|
|
75
|
+
Implementation Notes
|
|
76
|
+
====================
|
|
77
|
+
|
|
78
|
+
1. Logger Initialization:
|
|
79
|
+
- Use standard logging: logger = logging.getLogger(__name__)
|
|
80
|
+
- For structured logging features, also use: structured_logger = get_logger(__name__)
|
|
81
|
+
|
|
82
|
+
2. Message Format:
|
|
83
|
+
- Start with action or subject (e.g., "Memory retrieval completed", "Failed to save heuristic")
|
|
84
|
+
- Include relevant context as structured fields
|
|
85
|
+
- Keep messages concise but informative
|
|
86
|
+
|
|
87
|
+
3. Structured Logging:
|
|
88
|
+
- Use structured_logger for operations that benefit from structured fields
|
|
89
|
+
- Pass context as keyword arguments for JSON serialization
|
|
90
|
+
- Example: structured_logger.info("Retrieved memories", agent=agent, count=count)
|
|
91
|
+
|
|
92
|
+
4. Exception Logging:
|
|
93
|
+
- Use logger.error(..., exc_info=True) or logger.exception() for exceptions
|
|
94
|
+
- Include context about what operation was being attempted
|
|
95
|
+
|
|
96
|
+
5. Performance Logging:
|
|
97
|
+
- Log duration for operations > 100ms at INFO level
|
|
98
|
+
- Log internal operation timing at DEBUG level
|
|
99
|
+
|
|
100
|
+
Examples
|
|
101
|
+
========
|
|
102
|
+
|
|
103
|
+
# DEBUG - Internal diagnostics
|
|
104
|
+
logger.debug(f"Cache hit for query: {query[:50]}...")
|
|
105
|
+
logger.debug(f"Updated heuristic: {heuristic_id}")
|
|
106
|
+
logger.debug(f"Saved outcome: {outcome.id}")
|
|
107
|
+
|
|
108
|
+
# INFO - Operation completions
|
|
109
|
+
logger.info(f"Memory retrieval completed: {count} items in {duration_ms}ms")
|
|
110
|
+
logger.info(f"Cleanup scheduler started (interval: {interval}s)")
|
|
111
|
+
logger.info("ChromaDB storage closed")
|
|
112
|
+
|
|
113
|
+
# WARNING - Recoverable issues
|
|
114
|
+
logger.warning(f"Agent '{agent}' has no defined scope, using defaults")
|
|
115
|
+
logger.warning("aiohttp not installed - webhook delivery unavailable")
|
|
116
|
+
logger.warning(f"Failed to retrieve memories: {e}")
|
|
117
|
+
|
|
118
|
+
# ERROR - Failures requiring attention
|
|
119
|
+
logger.error(f"Failed to save heuristic {heuristic_id}: {e}")
|
|
120
|
+
logger.error(f"Redis connection error: {e}")
|
|
121
|
+
logger.error("Config file not found, cannot proceed")
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
# Log level constants for programmatic use
|
|
125
|
+
import logging
|
|
126
|
+
|
|
127
|
+
# Map of operation types to recommended log levels
|
|
128
|
+
OPERATION_LOG_LEVELS = {
|
|
129
|
+
# Storage operations
|
|
130
|
+
"save_success": logging.DEBUG,
|
|
131
|
+
"save_failure": logging.ERROR,
|
|
132
|
+
"batch_save_success": logging.DEBUG,
|
|
133
|
+
"delete_success": logging.DEBUG,
|
|
134
|
+
"delete_failure": logging.ERROR,
|
|
135
|
+
# Retrieval operations
|
|
136
|
+
"cache_hit": logging.DEBUG,
|
|
137
|
+
"cache_miss": logging.DEBUG,
|
|
138
|
+
"cache_invalidate": logging.DEBUG,
|
|
139
|
+
"retrieval_complete": logging.INFO,
|
|
140
|
+
# Learning operations
|
|
141
|
+
"learn_complete": logging.INFO,
|
|
142
|
+
"heuristic_updated": logging.DEBUG,
|
|
143
|
+
# Lifecycle events
|
|
144
|
+
"service_start": logging.INFO,
|
|
145
|
+
"service_stop": logging.INFO,
|
|
146
|
+
"connection_established": logging.INFO,
|
|
147
|
+
"config_loaded": logging.INFO,
|
|
148
|
+
# Warnings
|
|
149
|
+
"missing_scope": logging.WARNING,
|
|
150
|
+
"optional_feature_unavailable": logging.WARNING,
|
|
151
|
+
"retry_attempt": logging.WARNING,
|
|
152
|
+
"deprecation": logging.WARNING,
|
|
153
|
+
# Errors
|
|
154
|
+
"operation_failure": logging.ERROR,
|
|
155
|
+
"connection_failure": logging.ERROR,
|
|
156
|
+
"validation_failure": logging.ERROR,
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def get_recommended_level(operation: str) -> int:
|
|
161
|
+
"""
|
|
162
|
+
Get the recommended log level for a given operation type.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
operation: The operation type (e.g., "save_success", "cache_hit")
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Logging level constant (e.g., logging.DEBUG, logging.INFO)
|
|
169
|
+
"""
|
|
170
|
+
return OPERATION_LOG_LEVELS.get(operation, logging.INFO)
|