asap-protocol 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- asap/__init__.py +7 -0
- asap/cli.py +220 -0
- asap/errors.py +150 -0
- asap/examples/README.md +25 -0
- asap/examples/__init__.py +1 -0
- asap/examples/coordinator.py +184 -0
- asap/examples/echo_agent.py +100 -0
- asap/examples/run_demo.py +120 -0
- asap/models/__init__.py +146 -0
- asap/models/base.py +55 -0
- asap/models/constants.py +14 -0
- asap/models/entities.py +410 -0
- asap/models/enums.py +71 -0
- asap/models/envelope.py +94 -0
- asap/models/ids.py +55 -0
- asap/models/parts.py +207 -0
- asap/models/payloads.py +423 -0
- asap/models/types.py +39 -0
- asap/observability/__init__.py +43 -0
- asap/observability/logging.py +216 -0
- asap/observability/metrics.py +399 -0
- asap/schemas.py +203 -0
- asap/state/__init__.py +22 -0
- asap/state/machine.py +86 -0
- asap/state/snapshot.py +265 -0
- asap/transport/__init__.py +84 -0
- asap/transport/client.py +399 -0
- asap/transport/handlers.py +444 -0
- asap/transport/jsonrpc.py +190 -0
- asap/transport/middleware.py +359 -0
- asap/transport/server.py +739 -0
- asap_protocol-0.1.0.dist-info/METADATA +251 -0
- asap_protocol-0.1.0.dist-info/RECORD +36 -0
- asap_protocol-0.1.0.dist-info/WHEEL +4 -0
- asap_protocol-0.1.0.dist-info/entry_points.txt +2 -0
- asap_protocol-0.1.0.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Observability module for ASAP protocol.
|
|
2
|
+
|
|
3
|
+
This module provides structured logging, metrics, and observability utilities
|
|
4
|
+
for the ASAP protocol transport layer.
|
|
5
|
+
|
|
6
|
+
Features:
|
|
7
|
+
- Structured logging with JSON output for production
|
|
8
|
+
- Console output with colors for development
|
|
9
|
+
- Automatic trace_id and correlation_id propagation
|
|
10
|
+
- Logger factory with common context binding
|
|
11
|
+
- Prometheus-compatible metrics collection
|
|
12
|
+
|
|
13
|
+
Example:
|
|
14
|
+
>>> from asap.observability import get_logger, get_metrics
|
|
15
|
+
>>>
|
|
16
|
+
>>> logger = get_logger(__name__)
|
|
17
|
+
>>> logger.info("asap.request.received", envelope_id="env_123", payload_type="task.request")
|
|
18
|
+
>>>
|
|
19
|
+
>>> metrics = get_metrics()
|
|
20
|
+
>>> metrics.increment_counter("asap_requests_total", {"payload_type": "task.request"})
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from asap.observability.logging import (
|
|
24
|
+
bind_context,
|
|
25
|
+
clear_context,
|
|
26
|
+
configure_logging,
|
|
27
|
+
get_logger,
|
|
28
|
+
)
|
|
29
|
+
from asap.observability.metrics import (
|
|
30
|
+
MetricsCollector,
|
|
31
|
+
get_metrics,
|
|
32
|
+
reset_metrics,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
__all__ = [
|
|
36
|
+
"bind_context",
|
|
37
|
+
"clear_context",
|
|
38
|
+
"configure_logging",
|
|
39
|
+
"get_logger",
|
|
40
|
+
"get_metrics",
|
|
41
|
+
"reset_metrics",
|
|
42
|
+
"MetricsCollector",
|
|
43
|
+
]
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""Structured logging configuration for ASAP protocol.
|
|
2
|
+
|
|
3
|
+
This module configures structlog for structured logging with support for
|
|
4
|
+
both development (console) and production (JSON) output formats.
|
|
5
|
+
|
|
6
|
+
The logging configuration includes:
|
|
7
|
+
- JSON renderer for production environments
|
|
8
|
+
- Console renderer with colors for development
|
|
9
|
+
- Automatic timestamp and log level injection
|
|
10
|
+
- Context binding for trace_id, correlation_id, etc.
|
|
11
|
+
|
|
12
|
+
Environment Variables:
|
|
13
|
+
ASAP_LOG_FORMAT: Set to "json" for JSON output, "console" for colored output
|
|
14
|
+
ASAP_LOG_LEVEL: Set log level (DEBUG, INFO, WARNING, ERROR)
|
|
15
|
+
ASAP_SERVICE_NAME: Service name to include in logs
|
|
16
|
+
|
|
17
|
+
Example:
|
|
18
|
+
>>> from asap.observability.logging import get_logger, configure_logging
|
|
19
|
+
>>>
|
|
20
|
+
>>> # Configure logging (typically done once at startup)
|
|
21
|
+
>>> configure_logging(log_format="json", log_level="INFO")
|
|
22
|
+
>>>
|
|
23
|
+
>>> # Get a logger and use it
|
|
24
|
+
>>> logger = get_logger("asap.transport.server")
|
|
25
|
+
>>> logger.info("request.received", envelope_id="env_123")
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
import logging
|
|
29
|
+
import os
|
|
30
|
+
import sys
|
|
31
|
+
from typing import Any
|
|
32
|
+
|
|
33
|
+
import structlog
|
|
34
|
+
from structlog.typing import Processor
|
|
35
|
+
|
|
36
|
+
# Default configuration
|
|
37
|
+
DEFAULT_LOG_LEVEL = "INFO"
|
|
38
|
+
DEFAULT_LOG_FORMAT = "console"
|
|
39
|
+
DEFAULT_SERVICE_NAME = "asap-protocol"
|
|
40
|
+
|
|
41
|
+
# Environment variable names
|
|
42
|
+
ENV_LOG_FORMAT = "ASAP_LOG_FORMAT"
|
|
43
|
+
ENV_LOG_LEVEL = "ASAP_LOG_LEVEL"
|
|
44
|
+
ENV_SERVICE_NAME = "ASAP_SERVICE_NAME"
|
|
45
|
+
|
|
46
|
+
# Module-level flag to track if logging has been configured
|
|
47
|
+
_logging_configured = False
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _get_log_level() -> str:
|
|
51
|
+
"""Get log level from environment or use default."""
|
|
52
|
+
return os.environ.get(ENV_LOG_LEVEL, DEFAULT_LOG_LEVEL).upper()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _get_log_format() -> str:
|
|
56
|
+
"""Get log format from environment or use default."""
|
|
57
|
+
return os.environ.get(ENV_LOG_FORMAT, DEFAULT_LOG_FORMAT).lower()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _get_service_name() -> str:
|
|
61
|
+
"""Get service name from environment or use default."""
|
|
62
|
+
return os.environ.get(ENV_SERVICE_NAME, DEFAULT_SERVICE_NAME)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _get_shared_processors() -> list[Processor]:
|
|
66
|
+
"""Get shared processors for all log formats."""
|
|
67
|
+
return [
|
|
68
|
+
structlog.contextvars.merge_contextvars,
|
|
69
|
+
structlog.stdlib.add_log_level,
|
|
70
|
+
structlog.stdlib.add_logger_name,
|
|
71
|
+
structlog.stdlib.PositionalArgumentsFormatter(),
|
|
72
|
+
structlog.processors.TimeStamper(fmt="iso"),
|
|
73
|
+
structlog.processors.StackInfoRenderer(),
|
|
74
|
+
structlog.processors.UnicodeDecoder(),
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _get_console_renderer() -> Processor:
|
|
79
|
+
"""Get console renderer for development."""
|
|
80
|
+
return structlog.dev.ConsoleRenderer(
|
|
81
|
+
colors=True,
|
|
82
|
+
exception_formatter=structlog.dev.plain_traceback,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _get_json_renderer() -> Processor:
|
|
87
|
+
"""Get JSON renderer for production."""
|
|
88
|
+
return structlog.processors.JSONRenderer()
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def configure_logging(
|
|
92
|
+
log_format: str | None = None,
|
|
93
|
+
log_level: str | None = None,
|
|
94
|
+
service_name: str | None = None,
|
|
95
|
+
force: bool = False,
|
|
96
|
+
) -> None:
|
|
97
|
+
"""Configure structured logging for the application.
|
|
98
|
+
|
|
99
|
+
This function sets up structlog with the appropriate processors and
|
|
100
|
+
renderers based on the specified format.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
log_format: Output format - "json" or "console". Defaults to env var or "console"
|
|
104
|
+
log_level: Minimum log level. Defaults to env var or "INFO"
|
|
105
|
+
service_name: Service name for log context. Defaults to env var or "asap-protocol"
|
|
106
|
+
force: If True, reconfigure even if already configured
|
|
107
|
+
|
|
108
|
+
Example:
|
|
109
|
+
>>> # Configure for production
|
|
110
|
+
>>> configure_logging(log_format="json", log_level="INFO")
|
|
111
|
+
>>>
|
|
112
|
+
>>> # Configure for development
|
|
113
|
+
>>> configure_logging(log_format="console", log_level="DEBUG")
|
|
114
|
+
"""
|
|
115
|
+
global _logging_configured
|
|
116
|
+
|
|
117
|
+
if _logging_configured and not force:
|
|
118
|
+
return
|
|
119
|
+
|
|
120
|
+
# Get configuration values
|
|
121
|
+
log_format = log_format or _get_log_format()
|
|
122
|
+
log_level = log_level or _get_log_level()
|
|
123
|
+
service_name = service_name or _get_service_name()
|
|
124
|
+
|
|
125
|
+
# Get shared processors
|
|
126
|
+
shared_processors = _get_shared_processors()
|
|
127
|
+
|
|
128
|
+
# Add format-specific renderer
|
|
129
|
+
if log_format == "json":
|
|
130
|
+
renderer: Processor = _get_json_renderer()
|
|
131
|
+
else:
|
|
132
|
+
renderer = _get_console_renderer()
|
|
133
|
+
|
|
134
|
+
# Configure structlog
|
|
135
|
+
structlog.configure(
|
|
136
|
+
processors=[
|
|
137
|
+
*shared_processors,
|
|
138
|
+
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
|
|
139
|
+
],
|
|
140
|
+
logger_factory=structlog.stdlib.LoggerFactory(),
|
|
141
|
+
wrapper_class=structlog.stdlib.BoundLogger,
|
|
142
|
+
cache_logger_on_first_use=True,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# Configure standard logging
|
|
146
|
+
formatter = structlog.stdlib.ProcessorFormatter(
|
|
147
|
+
foreign_pre_chain=shared_processors,
|
|
148
|
+
processors=[
|
|
149
|
+
structlog.stdlib.ProcessorFormatter.remove_processors_meta,
|
|
150
|
+
renderer,
|
|
151
|
+
],
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
155
|
+
handler.setFormatter(formatter)
|
|
156
|
+
|
|
157
|
+
root_logger = logging.getLogger()
|
|
158
|
+
root_logger.handlers.clear()
|
|
159
|
+
root_logger.addHandler(handler)
|
|
160
|
+
root_logger.setLevel(getattr(logging, log_level))
|
|
161
|
+
|
|
162
|
+
# Set service name in context
|
|
163
|
+
structlog.contextvars.bind_contextvars(service=service_name)
|
|
164
|
+
|
|
165
|
+
_logging_configured = True
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def get_logger(name: str) -> structlog.stdlib.BoundLogger:
|
|
169
|
+
"""Get a structured logger for the given name.
|
|
170
|
+
|
|
171
|
+
Creates a bound logger with the given name. If logging has not been
|
|
172
|
+
configured, it will be configured with default settings.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
name: Logger name (typically __name__ of the module)
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Bound structlog logger
|
|
179
|
+
|
|
180
|
+
Example:
|
|
181
|
+
>>> logger = get_logger(__name__)
|
|
182
|
+
>>> logger.info("event.happened", key="value")
|
|
183
|
+
>>>
|
|
184
|
+
>>> # With bound context
|
|
185
|
+
>>> logger = logger.bind(trace_id="trace_123")
|
|
186
|
+
>>> logger.info("request.processed") # trace_id automatically included
|
|
187
|
+
"""
|
|
188
|
+
# Ensure logging is configured
|
|
189
|
+
if not _logging_configured:
|
|
190
|
+
configure_logging()
|
|
191
|
+
|
|
192
|
+
return structlog.stdlib.get_logger(name)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def bind_context(**kwargs: Any) -> None:
|
|
196
|
+
"""Bind context variables that will be included in all subsequent logs.
|
|
197
|
+
|
|
198
|
+
This is useful for setting trace_id, correlation_id, or other context
|
|
199
|
+
that should be included in all logs within the current context.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
**kwargs: Key-value pairs to bind to the log context
|
|
203
|
+
|
|
204
|
+
Example:
|
|
205
|
+
>>> bind_context(trace_id="trace_123", user_id="user_456")
|
|
206
|
+
>>> logger.info("event") # Will include trace_id and user_id
|
|
207
|
+
"""
|
|
208
|
+
structlog.contextvars.bind_contextvars(**kwargs)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def clear_context() -> None:
|
|
212
|
+
"""Clear all bound context variables.
|
|
213
|
+
|
|
214
|
+
Useful for cleaning up context at the end of a request.
|
|
215
|
+
"""
|
|
216
|
+
structlog.contextvars.clear_contextvars()
|
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
"""ASAP Protocol Metrics Collection.
|
|
2
|
+
|
|
3
|
+
This module provides Prometheus-compatible metrics collection for ASAP servers.
|
|
4
|
+
Metrics are collected during request processing and exposed via the /asap/metrics endpoint.
|
|
5
|
+
|
|
6
|
+
Supported metric types:
|
|
7
|
+
- Counter: Monotonically increasing values (e.g., total requests)
|
|
8
|
+
- Histogram: Distribution of values with configurable buckets (e.g., latency)
|
|
9
|
+
|
|
10
|
+
Example:
|
|
11
|
+
>>> from asap.observability.metrics import MetricsCollector, get_metrics
|
|
12
|
+
>>> collector = MetricsCollector()
|
|
13
|
+
>>> collector.increment_counter("asap_requests_total", {"payload_type": "task.request"})
|
|
14
|
+
>>> collector.observe_histogram("asap_request_duration_seconds", 0.125, {"status": "success"})
|
|
15
|
+
>>> print(collector.export_prometheus())
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import threading
|
|
19
|
+
import time
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from typing import ClassVar
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class Counter:
|
|
26
|
+
"""A monotonically increasing counter metric.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
name: Metric name
|
|
30
|
+
help_text: Human-readable description
|
|
31
|
+
values: Dictionary mapping label combinations to counts
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
name: str
|
|
35
|
+
help_text: str
|
|
36
|
+
values: dict[tuple[tuple[str, str], ...], float] = field(default_factory=dict)
|
|
37
|
+
_lock: threading.Lock = field(default_factory=threading.Lock, repr=False)
|
|
38
|
+
|
|
39
|
+
def increment(self, labels: dict[str, str] | None = None, value: float = 1.0) -> None:
|
|
40
|
+
"""Increment the counter by value.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
labels: Optional label key-value pairs
|
|
44
|
+
value: Amount to increment (default: 1.0)
|
|
45
|
+
"""
|
|
46
|
+
label_key = tuple(sorted((labels or {}).items()))
|
|
47
|
+
with self._lock:
|
|
48
|
+
self.values[label_key] = self.values.get(label_key, 0.0) + value
|
|
49
|
+
|
|
50
|
+
def get(self, labels: dict[str, str] | None = None) -> float:
|
|
51
|
+
"""Get current counter value for labels.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
labels: Optional label key-value pairs
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
Current counter value
|
|
58
|
+
"""
|
|
59
|
+
label_key = tuple(sorted((labels or {}).items()))
|
|
60
|
+
with self._lock:
|
|
61
|
+
return self.values.get(label_key, 0.0)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# Default histogram buckets for latency (in seconds)
|
|
65
|
+
DEFAULT_LATENCY_BUCKETS = (0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class Histogram:
|
|
70
|
+
"""A histogram metric for measuring distributions.
|
|
71
|
+
|
|
72
|
+
Attributes:
|
|
73
|
+
name: Metric name
|
|
74
|
+
help_text: Human-readable description
|
|
75
|
+
buckets: Upper bounds for histogram buckets
|
|
76
|
+
values: Dictionary mapping label combinations to bucket counts and sum
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
name: str
|
|
80
|
+
help_text: str
|
|
81
|
+
buckets: tuple[float, ...] = DEFAULT_LATENCY_BUCKETS
|
|
82
|
+
# values[label_key] = {"buckets": {bound: count}, "sum": total, "count": n}
|
|
83
|
+
values: dict[tuple[tuple[str, str], ...], dict[str, float | dict[float, float]]] = field(
|
|
84
|
+
default_factory=dict
|
|
85
|
+
)
|
|
86
|
+
_lock: threading.Lock = field(default_factory=threading.Lock, repr=False)
|
|
87
|
+
|
|
88
|
+
def observe(self, value: float, labels: dict[str, str] | None = None) -> None:
|
|
89
|
+
"""Record an observation in the histogram.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
value: The observed value
|
|
93
|
+
labels: Optional label key-value pairs
|
|
94
|
+
"""
|
|
95
|
+
label_key = tuple(sorted((labels or {}).items()))
|
|
96
|
+
with self._lock:
|
|
97
|
+
if label_key not in self.values:
|
|
98
|
+
self.values[label_key] = {
|
|
99
|
+
"buckets": dict.fromkeys(self.buckets, 0.0),
|
|
100
|
+
"sum": 0.0,
|
|
101
|
+
"count": 0.0,
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
data = self.values[label_key]
|
|
105
|
+
buckets = data["buckets"]
|
|
106
|
+
if isinstance(buckets, dict):
|
|
107
|
+
for bound in self.buckets:
|
|
108
|
+
if value <= bound:
|
|
109
|
+
buckets[bound] += 1.0
|
|
110
|
+
|
|
111
|
+
if isinstance(data["sum"], float):
|
|
112
|
+
data["sum"] += value
|
|
113
|
+
if isinstance(data["count"], float):
|
|
114
|
+
data["count"] += 1.0
|
|
115
|
+
|
|
116
|
+
def get_count(self, labels: dict[str, str] | None = None) -> float:
|
|
117
|
+
"""Get total observation count for labels.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
labels: Optional label key-value pairs
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Total observation count
|
|
124
|
+
"""
|
|
125
|
+
label_key = tuple(sorted((labels or {}).items()))
|
|
126
|
+
with self._lock:
|
|
127
|
+
if label_key not in self.values:
|
|
128
|
+
return 0.0
|
|
129
|
+
count = self.values[label_key].get("count", 0.0)
|
|
130
|
+
return count if isinstance(count, float) else 0.0
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class MetricsCollector:
|
|
134
|
+
"""Collects and exports metrics in Prometheus format.
|
|
135
|
+
|
|
136
|
+
This class provides thread-safe metric collection with support for
|
|
137
|
+
counters and histograms. Metrics are exported in Prometheus text format.
|
|
138
|
+
|
|
139
|
+
Example:
|
|
140
|
+
>>> collector = MetricsCollector()
|
|
141
|
+
>>> collector.increment_counter(
|
|
142
|
+
... "asap_requests_total",
|
|
143
|
+
... {"payload_type": "task.request", "status": "success"}
|
|
144
|
+
... )
|
|
145
|
+
>>> collector.observe_histogram(
|
|
146
|
+
... "asap_request_duration_seconds",
|
|
147
|
+
... 0.125,
|
|
148
|
+
... {"payload_type": "task.request"}
|
|
149
|
+
... )
|
|
150
|
+
>>> print(collector.export_prometheus())
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
# Default metric definitions
|
|
154
|
+
DEFAULT_COUNTERS: ClassVar[dict[str, str]] = {
|
|
155
|
+
"asap_requests_total": "Total number of ASAP requests received",
|
|
156
|
+
"asap_requests_success_total": "Total number of successful ASAP requests",
|
|
157
|
+
"asap_requests_error_total": "Total number of failed ASAP requests",
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
DEFAULT_HISTOGRAMS: ClassVar[dict[str, str]] = {
|
|
161
|
+
"asap_request_duration_seconds": "Request processing duration in seconds",
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
def __init__(self) -> None:
|
|
165
|
+
"""Initialize the metrics collector with default metrics."""
|
|
166
|
+
self._lock = threading.Lock()
|
|
167
|
+
self._counters: dict[str, Counter] = {}
|
|
168
|
+
self._histograms: dict[str, Histogram] = {}
|
|
169
|
+
self._start_time = time.time()
|
|
170
|
+
|
|
171
|
+
# Initialize default metrics
|
|
172
|
+
for name, help_text in self.DEFAULT_COUNTERS.items():
|
|
173
|
+
self._counters[name] = Counter(name=name, help_text=help_text)
|
|
174
|
+
|
|
175
|
+
for name, help_text in self.DEFAULT_HISTOGRAMS.items():
|
|
176
|
+
self._histograms[name] = Histogram(name=name, help_text=help_text)
|
|
177
|
+
|
|
178
|
+
def register_counter(self, name: str, help_text: str) -> None:
|
|
179
|
+
"""Register a new counter metric.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
name: Metric name (should follow Prometheus naming conventions)
|
|
183
|
+
help_text: Human-readable description
|
|
184
|
+
"""
|
|
185
|
+
with self._lock:
|
|
186
|
+
if name not in self._counters:
|
|
187
|
+
self._counters[name] = Counter(name=name, help_text=help_text)
|
|
188
|
+
|
|
189
|
+
def register_histogram(
|
|
190
|
+
self, name: str, help_text: str, buckets: tuple[float, ...] = DEFAULT_LATENCY_BUCKETS
|
|
191
|
+
) -> None:
|
|
192
|
+
"""Register a new histogram metric.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
name: Metric name (should follow Prometheus naming conventions)
|
|
196
|
+
help_text: Human-readable description
|
|
197
|
+
buckets: Upper bounds for histogram buckets
|
|
198
|
+
"""
|
|
199
|
+
with self._lock:
|
|
200
|
+
if name not in self._histograms:
|
|
201
|
+
self._histograms[name] = Histogram(name=name, help_text=help_text, buckets=buckets)
|
|
202
|
+
|
|
203
|
+
def increment_counter(
|
|
204
|
+
self, name: str, labels: dict[str, str] | None = None, value: float = 1.0
|
|
205
|
+
) -> None:
|
|
206
|
+
"""Increment a counter metric.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
name: Metric name
|
|
210
|
+
labels: Optional label key-value pairs
|
|
211
|
+
value: Amount to increment (default: 1.0)
|
|
212
|
+
"""
|
|
213
|
+
with self._lock:
|
|
214
|
+
if name in self._counters:
|
|
215
|
+
self._counters[name].increment(labels, value)
|
|
216
|
+
|
|
217
|
+
def observe_histogram(
|
|
218
|
+
self, name: str, value: float, labels: dict[str, str] | None = None
|
|
219
|
+
) -> None:
|
|
220
|
+
"""Record an observation in a histogram.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
name: Metric name
|
|
224
|
+
value: The observed value
|
|
225
|
+
labels: Optional label key-value pairs
|
|
226
|
+
"""
|
|
227
|
+
with self._lock:
|
|
228
|
+
if name in self._histograms:
|
|
229
|
+
self._histograms[name].observe(value, labels)
|
|
230
|
+
|
|
231
|
+
def get_counter(self, name: str, labels: dict[str, str] | None = None) -> float:
|
|
232
|
+
"""Get current counter value.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
name: Metric name
|
|
236
|
+
labels: Optional label key-value pairs
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
Current counter value or 0.0 if not found
|
|
240
|
+
"""
|
|
241
|
+
with self._lock:
|
|
242
|
+
if name in self._counters:
|
|
243
|
+
return self._counters[name].get(labels)
|
|
244
|
+
return 0.0
|
|
245
|
+
|
|
246
|
+
def get_histogram_count(self, name: str, labels: dict[str, str] | None = None) -> float:
|
|
247
|
+
"""Get histogram observation count.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
name: Metric name
|
|
251
|
+
labels: Optional label key-value pairs
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
Total observation count or 0.0 if not found
|
|
255
|
+
"""
|
|
256
|
+
with self._lock:
|
|
257
|
+
if name in self._histograms:
|
|
258
|
+
return self._histograms[name].get_count(labels)
|
|
259
|
+
return 0.0
|
|
260
|
+
|
|
261
|
+
def _format_labels(self, labels: tuple[tuple[str, str], ...]) -> str:
|
|
262
|
+
"""Format labels for Prometheus output.
|
|
263
|
+
|
|
264
|
+
Prometheus label values must escape backslashes and double quotes.
|
|
265
|
+
Backslashes are escaped as \\, and double quotes are escaped as \\".
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
labels: Sorted tuple of label key-value pairs
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
Formatted label string (e.g., '{key="value",key2="value2"}')
|
|
272
|
+
"""
|
|
273
|
+
if not labels:
|
|
274
|
+
return ""
|
|
275
|
+
|
|
276
|
+
def escape_label_value(value: str) -> str:
|
|
277
|
+
"""Escape label value per Prometheus specification."""
|
|
278
|
+
# Escape backslashes first (to avoid double-escaping)
|
|
279
|
+
value = value.replace("\\", "\\\\")
|
|
280
|
+
# Escape double quotes
|
|
281
|
+
return value.replace('"', '\\"')
|
|
282
|
+
|
|
283
|
+
parts = [f'{k}="{escape_label_value(v)}"' for k, v in labels]
|
|
284
|
+
return "{" + ",".join(parts) + "}"
|
|
285
|
+
|
|
286
|
+
def export_prometheus(self) -> str:
|
|
287
|
+
"""Export all metrics in Prometheus text format.
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
Metrics in Prometheus exposition format
|
|
291
|
+
|
|
292
|
+
Example:
|
|
293
|
+
>>> collector = MetricsCollector()
|
|
294
|
+
>>> collector.increment_counter("asap_requests_total", {"status": "success"})
|
|
295
|
+
>>> output = collector.export_prometheus()
|
|
296
|
+
>>> "asap_requests_total" in output
|
|
297
|
+
True
|
|
298
|
+
"""
|
|
299
|
+
lines: list[str] = []
|
|
300
|
+
|
|
301
|
+
with self._lock:
|
|
302
|
+
# Export counters
|
|
303
|
+
for counter in self._counters.values():
|
|
304
|
+
lines.append(f"# HELP {counter.name} {counter.help_text}")
|
|
305
|
+
lines.append(f"# TYPE {counter.name} counter")
|
|
306
|
+
if not counter.values:
|
|
307
|
+
# Export zero value if no data
|
|
308
|
+
lines.append(f"{counter.name} 0")
|
|
309
|
+
else:
|
|
310
|
+
for label_key, value in counter.values.items():
|
|
311
|
+
label_str = self._format_labels(label_key)
|
|
312
|
+
lines.append(f"{counter.name}{label_str} {value}")
|
|
313
|
+
|
|
314
|
+
# Export histograms
|
|
315
|
+
for histogram in self._histograms.values():
|
|
316
|
+
lines.append(f"# HELP {histogram.name} {histogram.help_text}")
|
|
317
|
+
lines.append(f"# TYPE {histogram.name} histogram")
|
|
318
|
+
if not histogram.values:
|
|
319
|
+
# Export zero values if no data
|
|
320
|
+
for bound in histogram.buckets:
|
|
321
|
+
lines.append(f'{histogram.name}_bucket{{le="{bound}"}} 0')
|
|
322
|
+
lines.append(f'{histogram.name}_bucket{{le="+Inf"}} 0')
|
|
323
|
+
lines.append(f"{histogram.name}_sum 0")
|
|
324
|
+
lines.append(f"{histogram.name}_count 0")
|
|
325
|
+
else:
|
|
326
|
+
for label_key, data in histogram.values.items():
|
|
327
|
+
base_labels = self._format_labels(label_key)
|
|
328
|
+
|
|
329
|
+
# Bucket values
|
|
330
|
+
buckets = data["buckets"]
|
|
331
|
+
cumulative = 0.0
|
|
332
|
+
if isinstance(buckets, dict):
|
|
333
|
+
for bound in histogram.buckets:
|
|
334
|
+
cumulative += buckets.get(bound, 0.0)
|
|
335
|
+
if base_labels:
|
|
336
|
+
# Insert le before closing brace
|
|
337
|
+
label_str = base_labels[:-1] + f',le="{bound}"' + "}"
|
|
338
|
+
else:
|
|
339
|
+
label_str = f'{{le="{bound}"}}'
|
|
340
|
+
lines.append(f"{histogram.name}_bucket{label_str} {cumulative}")
|
|
341
|
+
|
|
342
|
+
# +Inf bucket (total count)
|
|
343
|
+
count = data.get("count", 0.0)
|
|
344
|
+
if base_labels:
|
|
345
|
+
label_str = base_labels[:-1] + ',le="+Inf"}'
|
|
346
|
+
else:
|
|
347
|
+
label_str = '{le="+Inf"}'
|
|
348
|
+
lines.append(f"{histogram.name}_bucket{label_str} {count}")
|
|
349
|
+
|
|
350
|
+
# Sum and count
|
|
351
|
+
sum_val = data.get("sum", 0.0)
|
|
352
|
+
lines.append(f"{histogram.name}_sum{base_labels} {sum_val}")
|
|
353
|
+
lines.append(f"{histogram.name}_count{base_labels} {count}")
|
|
354
|
+
|
|
355
|
+
# Add process uptime
|
|
356
|
+
uptime = time.time() - self._start_time
|
|
357
|
+
lines.append("# HELP asap_process_uptime_seconds Time since server start")
|
|
358
|
+
lines.append("# TYPE asap_process_uptime_seconds gauge")
|
|
359
|
+
lines.append(f"asap_process_uptime_seconds {uptime:.3f}")
|
|
360
|
+
|
|
361
|
+
return "\n".join(lines) + "\n"
|
|
362
|
+
|
|
363
|
+
def reset(self) -> None:
|
|
364
|
+
"""Reset all metrics to zero. Useful for testing."""
|
|
365
|
+
with self._lock:
|
|
366
|
+
for counter in self._counters.values():
|
|
367
|
+
counter.values.clear()
|
|
368
|
+
for histogram in self._histograms.values():
|
|
369
|
+
histogram.values.clear()
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
# Global metrics collector instance
|
|
373
|
+
_metrics_collector: MetricsCollector | None = None
|
|
374
|
+
_collector_lock = threading.Lock()
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def get_metrics() -> MetricsCollector:
|
|
378
|
+
"""Get the global metrics collector instance.
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
The global MetricsCollector singleton
|
|
382
|
+
|
|
383
|
+
Example:
|
|
384
|
+
>>> metrics = get_metrics()
|
|
385
|
+
>>> metrics.increment_counter("asap_requests_total")
|
|
386
|
+
"""
|
|
387
|
+
global _metrics_collector
|
|
388
|
+
with _collector_lock:
|
|
389
|
+
if _metrics_collector is None:
|
|
390
|
+
_metrics_collector = MetricsCollector()
|
|
391
|
+
return _metrics_collector
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def reset_metrics() -> None:
|
|
395
|
+
"""Reset the global metrics collector. Useful for testing."""
|
|
396
|
+
global _metrics_collector
|
|
397
|
+
with _collector_lock:
|
|
398
|
+
if _metrics_collector is not None:
|
|
399
|
+
_metrics_collector.reset()
|