mantisdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mantisdk might be problematic. Click here for more details.

Files changed (190) hide show
  1. mantisdk/__init__.py +22 -0
  2. mantisdk/adapter/__init__.py +15 -0
  3. mantisdk/adapter/base.py +94 -0
  4. mantisdk/adapter/messages.py +270 -0
  5. mantisdk/adapter/triplet.py +1028 -0
  6. mantisdk/algorithm/__init__.py +39 -0
  7. mantisdk/algorithm/apo/__init__.py +5 -0
  8. mantisdk/algorithm/apo/apo.py +889 -0
  9. mantisdk/algorithm/apo/prompts/apply_edit_variant01.poml +22 -0
  10. mantisdk/algorithm/apo/prompts/apply_edit_variant02.poml +18 -0
  11. mantisdk/algorithm/apo/prompts/text_gradient_variant01.poml +18 -0
  12. mantisdk/algorithm/apo/prompts/text_gradient_variant02.poml +16 -0
  13. mantisdk/algorithm/apo/prompts/text_gradient_variant03.poml +107 -0
  14. mantisdk/algorithm/base.py +162 -0
  15. mantisdk/algorithm/decorator.py +264 -0
  16. mantisdk/algorithm/fast.py +250 -0
  17. mantisdk/algorithm/gepa/__init__.py +59 -0
  18. mantisdk/algorithm/gepa/adapter.py +459 -0
  19. mantisdk/algorithm/gepa/gepa.py +364 -0
  20. mantisdk/algorithm/gepa/lib/__init__.py +18 -0
  21. mantisdk/algorithm/gepa/lib/adapters/README.md +12 -0
  22. mantisdk/algorithm/gepa/lib/adapters/__init__.py +0 -0
  23. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/README.md +341 -0
  24. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/__init__.py +1 -0
  25. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/anymaths_adapter.py +174 -0
  26. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/requirements.txt +1 -0
  27. mantisdk/algorithm/gepa/lib/adapters/default_adapter/README.md +0 -0
  28. mantisdk/algorithm/gepa/lib/adapters/default_adapter/__init__.py +0 -0
  29. mantisdk/algorithm/gepa/lib/adapters/default_adapter/default_adapter.py +209 -0
  30. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/README.md +7 -0
  31. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/__init__.py +0 -0
  32. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/dspy_adapter.py +307 -0
  33. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/README.md +99 -0
  34. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py +137 -0
  35. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py +266 -0
  36. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/GEPA_RAG.md +621 -0
  37. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/__init__.py +56 -0
  38. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/evaluation_metrics.py +226 -0
  39. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/generic_rag_adapter.py +496 -0
  40. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/rag_pipeline.py +238 -0
  41. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_store_interface.py +212 -0
  42. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/__init__.py +2 -0
  43. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/chroma_store.py +196 -0
  44. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/lancedb_store.py +422 -0
  45. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/milvus_store.py +409 -0
  46. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/qdrant_store.py +368 -0
  47. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/weaviate_store.py +418 -0
  48. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/README.md +552 -0
  49. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/__init__.py +37 -0
  50. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_adapter.py +705 -0
  51. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_client.py +364 -0
  52. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/README.md +9 -0
  53. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/__init__.py +0 -0
  54. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/terminal_bench_adapter.py +217 -0
  55. mantisdk/algorithm/gepa/lib/api.py +375 -0
  56. mantisdk/algorithm/gepa/lib/core/__init__.py +0 -0
  57. mantisdk/algorithm/gepa/lib/core/adapter.py +180 -0
  58. mantisdk/algorithm/gepa/lib/core/data_loader.py +74 -0
  59. mantisdk/algorithm/gepa/lib/core/engine.py +356 -0
  60. mantisdk/algorithm/gepa/lib/core/result.py +233 -0
  61. mantisdk/algorithm/gepa/lib/core/state.py +636 -0
  62. mantisdk/algorithm/gepa/lib/examples/__init__.py +0 -0
  63. mantisdk/algorithm/gepa/lib/examples/aime.py +24 -0
  64. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/eval_default.py +111 -0
  65. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/instruction_prompt.txt +9 -0
  66. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/optimal_prompt.txt +24 -0
  67. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/train_anymaths.py +177 -0
  68. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/arc_agi.ipynb +25705 -0
  69. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/example.ipynb +348 -0
  70. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/__init__.py +4 -0
  71. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/mcp_optimization_example.py +455 -0
  72. mantisdk/algorithm/gepa/lib/examples/rag_adapter/RAG_GUIDE.md +613 -0
  73. mantisdk/algorithm/gepa/lib/examples/rag_adapter/__init__.py +9 -0
  74. mantisdk/algorithm/gepa/lib/examples/rag_adapter/rag_optimization.py +824 -0
  75. mantisdk/algorithm/gepa/lib/examples/rag_adapter/requirements-rag.txt +29 -0
  76. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/instruction_prompt.txt +16 -0
  77. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/terminus.txt +9 -0
  78. mantisdk/algorithm/gepa/lib/examples/terminal-bench/train_terminus.py +161 -0
  79. mantisdk/algorithm/gepa/lib/gepa_utils.py +117 -0
  80. mantisdk/algorithm/gepa/lib/logging/__init__.py +0 -0
  81. mantisdk/algorithm/gepa/lib/logging/experiment_tracker.py +187 -0
  82. mantisdk/algorithm/gepa/lib/logging/logger.py +75 -0
  83. mantisdk/algorithm/gepa/lib/logging/utils.py +103 -0
  84. mantisdk/algorithm/gepa/lib/proposer/__init__.py +0 -0
  85. mantisdk/algorithm/gepa/lib/proposer/base.py +31 -0
  86. mantisdk/algorithm/gepa/lib/proposer/merge.py +357 -0
  87. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/__init__.py +0 -0
  88. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/base.py +49 -0
  89. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/reflective_mutation.py +176 -0
  90. mantisdk/algorithm/gepa/lib/py.typed +0 -0
  91. mantisdk/algorithm/gepa/lib/strategies/__init__.py +0 -0
  92. mantisdk/algorithm/gepa/lib/strategies/batch_sampler.py +77 -0
  93. mantisdk/algorithm/gepa/lib/strategies/candidate_selector.py +50 -0
  94. mantisdk/algorithm/gepa/lib/strategies/component_selector.py +36 -0
  95. mantisdk/algorithm/gepa/lib/strategies/eval_policy.py +64 -0
  96. mantisdk/algorithm/gepa/lib/strategies/instruction_proposal.py +127 -0
  97. mantisdk/algorithm/gepa/lib/utils/__init__.py +10 -0
  98. mantisdk/algorithm/gepa/lib/utils/stop_condition.py +196 -0
  99. mantisdk/algorithm/gepa/tracing.py +105 -0
  100. mantisdk/algorithm/utils.py +177 -0
  101. mantisdk/algorithm/verl/__init__.py +5 -0
  102. mantisdk/algorithm/verl/interface.py +202 -0
  103. mantisdk/cli/__init__.py +56 -0
  104. mantisdk/cli/prometheus.py +115 -0
  105. mantisdk/cli/store.py +131 -0
  106. mantisdk/cli/vllm.py +29 -0
  107. mantisdk/client.py +408 -0
  108. mantisdk/config.py +348 -0
  109. mantisdk/emitter/__init__.py +43 -0
  110. mantisdk/emitter/annotation.py +370 -0
  111. mantisdk/emitter/exception.py +54 -0
  112. mantisdk/emitter/message.py +61 -0
  113. mantisdk/emitter/object.py +117 -0
  114. mantisdk/emitter/reward.py +320 -0
  115. mantisdk/env_var.py +156 -0
  116. mantisdk/execution/__init__.py +15 -0
  117. mantisdk/execution/base.py +64 -0
  118. mantisdk/execution/client_server.py +443 -0
  119. mantisdk/execution/events.py +69 -0
  120. mantisdk/execution/inter_process.py +16 -0
  121. mantisdk/execution/shared_memory.py +282 -0
  122. mantisdk/instrumentation/__init__.py +119 -0
  123. mantisdk/instrumentation/agentops.py +314 -0
  124. mantisdk/instrumentation/agentops_langchain.py +45 -0
  125. mantisdk/instrumentation/litellm.py +83 -0
  126. mantisdk/instrumentation/vllm.py +81 -0
  127. mantisdk/instrumentation/weave.py +500 -0
  128. mantisdk/litagent/__init__.py +11 -0
  129. mantisdk/litagent/decorator.py +536 -0
  130. mantisdk/litagent/litagent.py +252 -0
  131. mantisdk/llm_proxy.py +1890 -0
  132. mantisdk/logging.py +370 -0
  133. mantisdk/reward.py +7 -0
  134. mantisdk/runner/__init__.py +11 -0
  135. mantisdk/runner/agent.py +845 -0
  136. mantisdk/runner/base.py +182 -0
  137. mantisdk/runner/legacy.py +309 -0
  138. mantisdk/semconv.py +170 -0
  139. mantisdk/server.py +401 -0
  140. mantisdk/store/__init__.py +23 -0
  141. mantisdk/store/base.py +897 -0
  142. mantisdk/store/client_server.py +2092 -0
  143. mantisdk/store/collection/__init__.py +30 -0
  144. mantisdk/store/collection/base.py +587 -0
  145. mantisdk/store/collection/memory.py +970 -0
  146. mantisdk/store/collection/mongo.py +1412 -0
  147. mantisdk/store/collection_based.py +1823 -0
  148. mantisdk/store/insight.py +648 -0
  149. mantisdk/store/listener.py +58 -0
  150. mantisdk/store/memory.py +396 -0
  151. mantisdk/store/mongo.py +165 -0
  152. mantisdk/store/sqlite.py +3 -0
  153. mantisdk/store/threading.py +357 -0
  154. mantisdk/store/utils.py +142 -0
  155. mantisdk/tracer/__init__.py +16 -0
  156. mantisdk/tracer/agentops.py +242 -0
  157. mantisdk/tracer/base.py +287 -0
  158. mantisdk/tracer/dummy.py +106 -0
  159. mantisdk/tracer/otel.py +555 -0
  160. mantisdk/tracer/weave.py +677 -0
  161. mantisdk/trainer/__init__.py +6 -0
  162. mantisdk/trainer/init_utils.py +263 -0
  163. mantisdk/trainer/legacy.py +367 -0
  164. mantisdk/trainer/registry.py +12 -0
  165. mantisdk/trainer/trainer.py +618 -0
  166. mantisdk/types/__init__.py +6 -0
  167. mantisdk/types/core.py +553 -0
  168. mantisdk/types/resources.py +204 -0
  169. mantisdk/types/tracer.py +515 -0
  170. mantisdk/types/tracing.py +218 -0
  171. mantisdk/utils/__init__.py +1 -0
  172. mantisdk/utils/id.py +18 -0
  173. mantisdk/utils/metrics.py +1025 -0
  174. mantisdk/utils/otel.py +578 -0
  175. mantisdk/utils/otlp.py +536 -0
  176. mantisdk/utils/server_launcher.py +1045 -0
  177. mantisdk/utils/system_snapshot.py +81 -0
  178. mantisdk/verl/__init__.py +8 -0
  179. mantisdk/verl/__main__.py +6 -0
  180. mantisdk/verl/async_server.py +46 -0
  181. mantisdk/verl/config.yaml +27 -0
  182. mantisdk/verl/daemon.py +1154 -0
  183. mantisdk/verl/dataset.py +44 -0
  184. mantisdk/verl/entrypoint.py +248 -0
  185. mantisdk/verl/trainer.py +549 -0
  186. mantisdk-0.1.0.dist-info/METADATA +119 -0
  187. mantisdk-0.1.0.dist-info/RECORD +190 -0
  188. mantisdk-0.1.0.dist-info/WHEEL +4 -0
  189. mantisdk-0.1.0.dist-info/entry_points.txt +2 -0
  190. mantisdk-0.1.0.dist-info/licenses/LICENSE +19 -0
@@ -0,0 +1,1025 @@
1
+ # Copyright (c) Microsoft. All rights reserved.
2
+
3
+ """Metrics abstraction with explicit registration and several backends.
4
+
5
+ It provides:
6
+
7
+ - MetricsBackend: Abstract interface for registering and recording metrics.
8
+ - ConsoleMetricsBackend: In-process backend with sliding-window
9
+ aggregations (rate, P50, P95, P99) logged to stdout.
10
+ - PrometheusMetricsBackend: Thin wrapper around prometheus_client.
11
+ - MultiMetricsBackend: Fan-out backend that forwards calls to multiple underlying backends.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import logging
17
+ import os
18
+ import tempfile
19
+ import time
20
+ from dataclasses import dataclass
21
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
22
+
23
+ import aiologic
24
+
25
+ if TYPE_CHECKING:
26
+ from prometheus_client import CollectorRegistry
27
+
28
+ LabelDict = Dict[str, str]
29
+ # Label metadata
30
+ LabelKey = Tuple[Tuple[str, str], ...] # normalized (key, value) pairs in registration order
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ def _validate_labels(
36
+ kind: str,
37
+ metric_name: str,
38
+ labels: LabelDict,
39
+ expected_names: Tuple[str, ...],
40
+ ) -> LabelKey:
41
+ """Validates label keys against the metric definition.
42
+
43
+ Args:
44
+ kind: Metric kind for error messages ("counter" or "histogram").
45
+ metric_name: Metric name.
46
+ labels: Provided label dictionary.
47
+ expected_names: Expected label names as a tuple.
48
+
49
+ Returns:
50
+ A tuple of (key, value) pairs honoring the registered label order.
51
+
52
+ Raises:
53
+ ValueError: If label keys do not match expected_names.
54
+ """
55
+
56
+ label_items: List[Tuple[str, str]] = []
57
+ for label_name in expected_names:
58
+ if label_name not in labels:
59
+ raise ValueError(f"Label '{label_name}' is required for {kind.capitalize()} '{metric_name}'.")
60
+ label_items.append((label_name, labels[label_name]))
61
+
62
+ return tuple(label_items)
63
+
64
+
65
+ def _normalize_label_names(label_names: Optional[Sequence[str]]) -> Tuple[str, ...]:
66
+ """Normalizes label names into a canonical tuple.
67
+
68
+ Args:
69
+ label_names: Iterable of label names or None.
70
+
71
+ Returns:
72
+ A tuple of label names preserving their original order.
73
+ """
74
+ if not label_names:
75
+ return ()
76
+ return tuple(label_names)
77
+
78
+
79
+ def _normalize_prometheus_metric_name(metric_name: str) -> str:
80
+ """Normalizes Prometheus metric names by replacing unsupported characters."""
81
+
82
+ return metric_name.replace(".", "_")
83
+
84
+
85
+ @dataclass(frozen=True)
86
+ class _CounterDef:
87
+ """Definition of a registered counter metric."""
88
+
89
+ name: str
90
+ label_names: Tuple[str, ...]
91
+ group_level: Optional[int] = None
92
+
93
+
94
+ @dataclass(frozen=True)
95
+ class _HistogramDef:
96
+ """Definition of a registered histogram metric."""
97
+
98
+ name: str
99
+ label_names: Tuple[str, ...]
100
+ buckets: Tuple[float, ...]
101
+ group_level: Optional[int] = None
102
+
103
+
104
+ @dataclass
105
+ class _CounterState:
106
+ """Runtime state of a counter metric group (for console backend)."""
107
+
108
+ timestamps: List[float]
109
+ amounts: List[float]
110
+
111
+
112
+ @dataclass
113
+ class _HistogramState:
114
+ """Runtime state of a histogram metric group (for console backend)."""
115
+
116
+ timestamps: List[float]
117
+ values: List[float]
118
+
119
+
120
+ class MetricsBackend:
121
+ """Abstract base class for metrics backends."""
122
+
123
+ def has_prometheus(self) -> bool:
124
+ """Check if the backend has prometheus support."""
125
+ return False
126
+
127
+ def register_counter(
128
+ self,
129
+ name: str,
130
+ label_names: Optional[Sequence[str]] = None,
131
+ group_level: Optional[int] = None,
132
+ ) -> None:
133
+ """Registers a counter metric.
134
+
135
+ Args:
136
+ name: Metric name.
137
+ label_names: List of label names. Order determines the truncation
138
+ priority for group-level logging.
139
+ group_level: Optional per-metric grouping depth for backends that
140
+ support label grouping (Console). Global backend settings take
141
+ precedence when provided.
142
+
143
+ Raises:
144
+ ValueError: If the metric is already registered with a different
145
+ type or label set.
146
+ """
147
+ raise NotImplementedError()
148
+
149
+ def register_histogram(
150
+ self,
151
+ name: str,
152
+ label_names: Optional[Sequence[str]] = None,
153
+ buckets: Optional[Sequence[float]] = None,
154
+ group_level: Optional[int] = None,
155
+ ) -> None:
156
+ """Registers a histogram metric.
157
+
158
+ Args:
159
+ name: Metric name.
160
+ label_names: List of label names. Order determines the truncation
161
+ priority for group-level logging.
162
+ buckets: Bucket boundaries (exclusive upper bounds). If None, the
163
+ backend may choose defaults.
164
+ group_level: Optional per-metric grouping depth for backends that
165
+ support label grouping (Console). Global backend settings take
166
+ precedence when provided.
167
+
168
+ Raises:
169
+ ValueError: If the metric is already registered with a different
170
+ type or label set.
171
+ """
172
+ raise NotImplementedError()
173
+
174
+ async def inc_counter(
175
+ self,
176
+ name: str,
177
+ amount: float = 1.0,
178
+ labels: Optional[LabelDict] = None,
179
+ ) -> None:
180
+ """Increments a registered counter.
181
+
182
+ Args:
183
+ name: Metric name (must be registered as a counter).
184
+ amount: Increment amount.
185
+ labels: Label values.
186
+
187
+ Raises:
188
+ ValueError: If the metric is not registered, has the wrong type,
189
+ or label keys do not match the registered label names.
190
+ """
191
+ raise NotImplementedError()
192
+
193
+ async def observe_histogram(
194
+ self,
195
+ name: str,
196
+ value: float,
197
+ labels: Optional[LabelDict] = None,
198
+ ) -> None:
199
+ """Records an observation for a registered histogram.
200
+
201
+ Args:
202
+ name: Metric name (must be registered as a histogram).
203
+ value: Observed value.
204
+ labels: Label values.
205
+
206
+ Raises:
207
+ ValueError: If the metric is not registered, has the wrong type,
208
+ or label keys do not match the registered label names.
209
+ """
210
+ raise NotImplementedError()
211
+
212
+
213
+ class ConsoleMetricsBackend(MetricsBackend):
214
+ """Console backend with sliding-window aggregations and label grouping.
215
+
216
+ This backend:
217
+
218
+ * Requires explicit metric registration.
219
+ * Stores timestamped events per (metric_name, labels) key.
220
+ * Computes rate and percentiles (P50, P95, P99) over a sliding time window.
221
+ * Uses a single global logging decision: when logging is triggered, it
222
+ logs all metric groups, not just the one being updated.
223
+
224
+ Rate is always per second.
225
+
226
+ Label grouping: When logging, label dictionaries are truncated to the first
227
+ `group_level` label pairs (following the registered label order) and metrics
228
+ with identical truncated labels are aggregated together. For example:
229
+
230
+ ```python
231
+ labels = {"method": "GET", "path": "/", "status": "200"}
232
+ group_level = 2 # aggregated labels {"method": "GET", "path": "/"}
233
+ ```
234
+
235
+ If `group_level` is None or < 1, all label combinations for a metric are
236
+ merged into a single log entry (equivalent to grouping by zero labels).
237
+ Individual counters or histograms can set their own `group_level` during
238
+ registration; those values apply only when the backend-level `group_level`
239
+ is unset, allowing selective overrides.
240
+
241
+ Thread-safety: Runtime updates and snapshotting use two aiologic locks: one for mutating
242
+ shared state and another that serializes the global logging decision/snapshot capture so
243
+ other tasks can continue writing. Metric registration happens during initialization,
244
+ so it is intentionally left lock-free; this assumption is documented here to avoid
245
+ blocking writes unnecessarily.
246
+ """
247
+
248
+ def __init__(
249
+ self,
250
+ window_seconds: Optional[float] = 60.0,
251
+ log_interval_seconds: float = 10.0,
252
+ group_level: Optional[int] = None,
253
+ ) -> None:
254
+ """Initializes ConsoleMetricsBackend.
255
+
256
+ Args:
257
+ window_seconds: Sliding window size (in seconds) used when computing
258
+ rate and percentiles. If None, all in-memory events are used.
259
+ log_interval_seconds: Minimum time (in seconds) between log bursts.
260
+ When the interval elapses, the next metric event triggers a
261
+ snapshot and logging of all metrics.
262
+ group_level: Label grouping depth. When logging, only the first
263
+ `group_level` labels (following registered order) are retained and metric
264
+ events sharing those labels are aggregated. If None or < 1,
265
+ all label combinations collapse into a single group per metric.
266
+ """
267
+ self.window_seconds = window_seconds
268
+ self.log_interval_seconds = log_interval_seconds
269
+ self.group_level = group_level
270
+
271
+ self._counters: Dict[str, _CounterDef] = {}
272
+ self._histograms: Dict[str, _HistogramDef] = {}
273
+
274
+ # Runtime state keyed by (metric_name, label_key)
275
+ self._counter_state: Dict[Tuple[str, LabelKey], _CounterState] = {}
276
+ self._hist_state: Dict[Tuple[str, LabelKey], _HistogramState] = {}
277
+
278
+ # Global last log time (for all metrics)
279
+ self._last_log_time: Optional[float] = None
280
+
281
+ self._write_lock = aiologic.Lock()
282
+ self._snapshot_lock = aiologic.Lock()
283
+
284
+ def register_counter(
285
+ self,
286
+ name: str,
287
+ label_names: Optional[Sequence[str]] = None,
288
+ group_level: Optional[int] = None,
289
+ ) -> None:
290
+ """Registers a counter metric.
291
+
292
+ See base class for argument documentation.
293
+ """
294
+ label_tuple = _normalize_label_names(label_names)
295
+ existing_counter = self._counters.get(name)
296
+ existing_hist = self._histograms.get(name)
297
+
298
+ if existing_hist is not None:
299
+ raise ValueError(f"Metric '{name}' already registered as histogram.")
300
+
301
+ if existing_counter is not None:
302
+ if existing_counter.label_names != label_tuple:
303
+ raise ValueError(
304
+ f"Counter '{name}' already registered with labels "
305
+ f"{existing_counter.label_names}, got {label_tuple}."
306
+ )
307
+ return
308
+
309
+ self._counters[name] = _CounterDef(name=name, label_names=label_tuple, group_level=group_level)
310
+
311
+ def register_histogram(
312
+ self,
313
+ name: str,
314
+ label_names: Optional[Sequence[str]] = None,
315
+ buckets: Optional[Sequence[float]] = None,
316
+ group_level: Optional[int] = None,
317
+ ) -> None:
318
+ """Registers a histogram metric.
319
+
320
+ See base class for argument documentation.
321
+ """
322
+ label_tuple = _normalize_label_names(label_names)
323
+ if buckets is None:
324
+ bucket_tuple: Tuple[float, ...] = (0.1, 0.2, 0.5, 1.0, 2.0)
325
+ else:
326
+ bucket_tuple = tuple(buckets)
327
+
328
+ existing_counter = self._counters.get(name)
329
+ existing_hist = self._histograms.get(name)
330
+
331
+ if existing_counter is not None:
332
+ raise ValueError(f"Metric '{name}' already registered as counter.")
333
+
334
+ if existing_hist is not None:
335
+ if existing_hist.label_names != label_tuple or existing_hist.buckets != bucket_tuple:
336
+ raise ValueError(
337
+ f"Histogram '{name}' already registered with "
338
+ f"labels={existing_hist.label_names}, "
339
+ f"buckets={existing_hist.buckets}."
340
+ )
341
+ return
342
+
343
+ self._histograms[name] = _HistogramDef(
344
+ name=name,
345
+ label_names=label_tuple,
346
+ buckets=bucket_tuple,
347
+ group_level=group_level,
348
+ )
349
+
350
+ async def inc_counter(
351
+ self,
352
+ name: str,
353
+ amount: float = 1.0,
354
+ labels: Optional[LabelDict] = None,
355
+ ) -> None:
356
+ """Increments a registered counter metric.
357
+
358
+ See base class for behavior and error conditions.
359
+ """
360
+ now = time.time()
361
+ labels = labels or {}
362
+
363
+ definition = self._counters.get(name)
364
+ if definition is None:
365
+ raise ValueError(f"Counter '{name}' is not registered.")
366
+
367
+ label_key = _validate_labels("counter", name, labels, definition.label_names)
368
+ state_key = (name, label_key)
369
+
370
+ async with self._write_lock:
371
+ state = self._counter_state.get(state_key)
372
+ if state is None:
373
+ state = _CounterState(timestamps=[], amounts=[])
374
+ self._counter_state[state_key] = state
375
+
376
+ state.timestamps.append(now)
377
+ state.amounts.append(amount)
378
+ self._prune_events(state.timestamps, state.amounts, now)
379
+
380
+ counter_snaps: List[Tuple[str, LabelDict, List[float], List[float]]] = []
381
+ hist_snaps: List[Tuple[str, LabelDict, List[float], Tuple[float, ...]]] = []
382
+ should_log = False
383
+ snapshot_time = now
384
+
385
+ async with self._snapshot_lock:
386
+ should_log = self._should_log_locked(now)
387
+ if should_log:
388
+ async with self._write_lock:
389
+ counter_snaps, hist_snaps = self._snapshot_locked(now)
390
+ self._log_snapshot(counter_snaps, hist_snaps, snapshot_time)
391
+
392
+ async def observe_histogram(
393
+ self,
394
+ name: str,
395
+ value: float,
396
+ labels: Optional[LabelDict] = None,
397
+ ) -> None:
398
+ """Records an observation for a registered histogram metric.
399
+
400
+ See base class for behavior and error conditions.
401
+ """
402
+ now = time.time()
403
+ labels = labels or {}
404
+
405
+ definition = self._histograms.get(name)
406
+ if definition is None:
407
+ raise ValueError(f"Histogram '{name}' is not registered.")
408
+
409
+ label_key = _validate_labels("histogram", name, labels, definition.label_names)
410
+ state_key = (name, label_key)
411
+
412
+ async with self._write_lock:
413
+ state = self._hist_state.get(state_key)
414
+ if state is None:
415
+ state = _HistogramState(timestamps=[], values=[])
416
+ self._hist_state[state_key] = state
417
+
418
+ state.timestamps.append(now)
419
+ state.values.append(value)
420
+ self._prune_events(state.timestamps, state.values, now)
421
+
422
+ counter_snaps: List[Tuple[str, LabelDict, List[float], List[float]]] = []
423
+ hist_snaps: List[Tuple[str, LabelDict, List[float], Tuple[float, ...]]] = []
424
+ should_log = False
425
+ snapshot_time = now
426
+
427
+ async with self._snapshot_lock:
428
+ should_log = self._should_log_locked(now)
429
+
430
+ if should_log:
431
+ async with self._write_lock:
432
+ counter_snaps, hist_snaps = self._snapshot_locked(now)
433
+ self._log_snapshot(counter_snaps, hist_snaps, snapshot_time)
434
+
435
+ def _prune_events(
436
+ self,
437
+ timestamps: List[float],
438
+ values: List[float],
439
+ now: float,
440
+ ) -> None:
441
+ """Prunes events older than the sliding window.
442
+
443
+ Args:
444
+ timestamps: List of event timestamps (ascending).
445
+ values: List of corresponding values or amounts.
446
+ now: Current time.
447
+ """
448
+ if self.window_seconds is None or not timestamps:
449
+ return
450
+ cutoff = now - self.window_seconds
451
+ idx = 0
452
+ for i, ts in enumerate(timestamps):
453
+ if ts >= cutoff:
454
+ idx = i
455
+ break
456
+ else:
457
+ idx = len(timestamps)
458
+ if idx > 0:
459
+ del timestamps[:idx]
460
+ del values[:idx]
461
+
462
+ def _should_log_locked(self, now: float) -> bool:
463
+ """Determines whether to emit a log snapshot (lock must be held).
464
+
465
+ This decision is global: if it returns True, all metrics will be
466
+ logged based on a snapshot taken at this time.
467
+
468
+ Args:
469
+ now: Current timestamp.
470
+
471
+ Returns:
472
+ True if enough time has elapsed since the last log; False otherwise.
473
+ """
474
+ last = self._last_log_time
475
+ if last is None or now - last >= self.log_interval_seconds:
476
+ self._last_log_time = now
477
+ return True
478
+ return False
479
+
480
+ def _snapshot_locked(
481
+ self,
482
+ now: float,
483
+ ) -> Tuple[
484
+ List[Tuple[str, LabelDict, List[float], List[float]]],
485
+ List[Tuple[str, LabelDict, List[float], Tuple[float, ...]]],
486
+ ]:
487
+ """Creates a snapshot of all metric state (lock must be held).
488
+
489
+ Args:
490
+ now: Current timestamp.
491
+
492
+ Returns:
493
+ A tuple (counter_snapshots, histogram_snapshots) where:
494
+ - counter_snapshots: list of (metric_name, labels, timestamps, amounts)
495
+ - histogram_snapshots: list of (metric_name, labels, values, buckets)
496
+ """
497
+ counter_snaps: List[Tuple[str, LabelDict, List[float], List[float]]] = []
498
+ hist_snaps: List[Tuple[str, LabelDict, List[float], Tuple[float, ...]]] = []
499
+
500
+ # Prune and snapshot counters.
501
+ for (name, label_key), state in self._counter_state.items():
502
+ self._prune_events(state.timestamps, state.amounts, now)
503
+ if not state.timestamps:
504
+ continue
505
+ labels = dict(label_key)
506
+ counter_snaps.append(
507
+ (
508
+ name,
509
+ labels,
510
+ list(state.timestamps),
511
+ list(state.amounts),
512
+ )
513
+ )
514
+
515
+ # Prune and snapshot histograms.
516
+ for (name, label_key), state in self._hist_state.items():
517
+ self._prune_events(state.timestamps, state.values, now)
518
+ if not state.values:
519
+ continue
520
+ labels = dict(label_key)
521
+ buckets = self._histograms[name].buckets
522
+ hist_snaps.append(
523
+ (
524
+ name,
525
+ labels,
526
+ list(state.values),
527
+ buckets,
528
+ )
529
+ )
530
+
531
+ return counter_snaps, hist_snaps
532
+
533
+ def _truncate_labels_for_logging(self, labels: LabelDict, group_level: Optional[int]) -> LabelDict:
534
+ """Returns a label dict truncated to the configured group depth.
535
+
536
+ Args:
537
+ labels: Original label dictionary.
538
+ group_level: Effective grouping depth for this metric.
539
+
540
+ Returns:
541
+ A new dictionary containing at most `group_level` label pairs,
542
+ chosen by registered label order. If group_level is None or < 1,
543
+ returns an empty dict so that all label combinations collapse together.
544
+ """
545
+ if group_level is None or group_level < 1:
546
+ return {}
547
+ items = list(labels.items())
548
+ return dict(items[:group_level])
549
+
550
+ def _log(self, message: str) -> None:
551
+ """Logs a message via the module logger."""
552
+ logger.info(message)
553
+
554
+ def _log_snapshot(
555
+ self,
556
+ counter_snaps: List[Tuple[str, LabelDict, List[float], List[float]]],
557
+ hist_snaps: List[Tuple[str, LabelDict, List[float], Tuple[float, ...]]],
558
+ snapshot_time: float,
559
+ ) -> None:
560
+ """Logs all metrics from a snapshot.
561
+
562
+ Args:
563
+ counter_snaps: Counter snapshot list.
564
+ hist_snaps: Histogram snapshot list.
565
+ """
566
+ entries: List[str] = []
567
+ for name, labels, timestamps, amounts in self._group_counter_snapshots(counter_snaps):
568
+ line = self._log_counter(name, labels, timestamps, amounts, snapshot_time)
569
+ if line:
570
+ entries.append(line)
571
+
572
+ for name, labels, values, buckets in self._group_histogram_snapshots(hist_snaps):
573
+ line = self._log_histogram(name, labels, values, buckets, snapshot_time)
574
+ if line:
575
+ entries.append(line)
576
+
577
+ if entries:
578
+ entries.sort()
579
+ self._log(" ".join(entries))
580
+
581
+ def _effective_group_level(self, metric_name: str, *, is_histogram: bool) -> Optional[int]:
582
+ """Returns the active group level for a metric, honoring per-metric overrides."""
583
+ if self.group_level is not None:
584
+ return self.group_level
585
+ if is_histogram:
586
+ definition = self._histograms.get(metric_name)
587
+ else:
588
+ definition = self._counters.get(metric_name)
589
+ if definition is None:
590
+ return None
591
+ return definition.group_level
592
+
593
+ def _group_counter_snapshots(
594
+ self,
595
+ counter_snaps: List[Tuple[str, LabelDict, List[float], List[float]]],
596
+ ) -> List[Tuple[str, LabelDict, List[float], List[float]]]:
597
+ grouped: Dict[Tuple[str, Tuple[Tuple[str, str], ...]], Dict[str, Any]] = {}
598
+ for name, labels, timestamps, amounts in counter_snaps:
599
+ group_level = self._effective_group_level(name, is_histogram=False)
600
+ truncated_labels = self._truncate_labels_for_logging(labels, group_level)
601
+ key = (name, tuple(truncated_labels.items()))
602
+ entry = grouped.setdefault(
603
+ key,
604
+ {"name": name, "labels": truncated_labels, "timestamps": [], "amounts": []},
605
+ )
606
+ entry["timestamps"].extend(timestamps)
607
+ entry["amounts"].extend(amounts)
608
+
609
+ grouped_snaps: List[Tuple[str, LabelDict, List[float], List[float]]] = []
610
+ for entry in grouped.values():
611
+ timestamps = entry["timestamps"]
612
+ amounts = entry["amounts"]
613
+ if not timestamps:
614
+ continue
615
+ combined = sorted(zip(timestamps, amounts), key=lambda item: item[0])
616
+ ordered_timestamps = [ts for ts, _ in combined]
617
+ ordered_amounts = [amt for _, amt in combined]
618
+ grouped_snaps.append(
619
+ (
620
+ entry["name"],
621
+ entry["labels"],
622
+ ordered_timestamps,
623
+ ordered_amounts,
624
+ )
625
+ )
626
+
627
+ return grouped_snaps
628
+
629
+ def _group_histogram_snapshots(
630
+ self,
631
+ hist_snaps: List[Tuple[str, LabelDict, List[float], Tuple[float, ...]]],
632
+ ) -> List[Tuple[str, LabelDict, List[float], Tuple[float, ...]]]:
633
+ grouped: Dict[Tuple[str, Tuple[Tuple[str, str], ...]], Dict[str, Any]] = {}
634
+ for name, labels, values, buckets in hist_snaps:
635
+ group_level = self._effective_group_level(name, is_histogram=True)
636
+ truncated_labels = self._truncate_labels_for_logging(labels, group_level)
637
+ key = (name, tuple(truncated_labels.items()))
638
+ entry = grouped.setdefault(
639
+ key,
640
+ {"name": name, "labels": truncated_labels, "values": [], "buckets": buckets},
641
+ )
642
+ if entry["buckets"] != buckets:
643
+ raise ValueError(f"Histogram buckets mismatch for metric '{name}'.")
644
+ entry["values"].extend(values)
645
+
646
+ grouped_snaps: List[Tuple[str, LabelDict, List[float], Tuple[float, ...]]] = []
647
+ for entry in grouped.values():
648
+ values = entry["values"]
649
+ if not values:
650
+ continue
651
+ grouped_snaps.append(
652
+ (
653
+ entry["name"],
654
+ entry["labels"],
655
+ list(values),
656
+ entry["buckets"],
657
+ )
658
+ )
659
+
660
+ return grouped_snaps
661
+
662
+ def _log_counter(
663
+ self,
664
+ name: str,
665
+ labels: LabelDict,
666
+ timestamps: List[float],
667
+ amounts: List[float],
668
+ snapshot_time: float,
669
+ ) -> Optional[str]:
670
+ """Computes counter stats and returns formatted line."""
671
+ if not timestamps:
672
+ return None
673
+
674
+ total = sum(amounts)
675
+ window_start = timestamps[0]
676
+ if self.window_seconds is not None:
677
+ window_start = max(window_start, snapshot_time - self.window_seconds)
678
+ min_duration = self.log_interval_seconds if self.log_interval_seconds > 0 else 1e-3
679
+ duration = max(snapshot_time - window_start, min_duration)
680
+ rate = total / duration
681
+
682
+ label_str = _format_label_string(labels)
683
+ return f"{name}{label_str}={rate:.2f}/s"
684
+
685
+ def _log_histogram(
686
+ self,
687
+ name: str,
688
+ labels: LabelDict,
689
+ values: List[float],
690
+ buckets: Tuple[float, ...],
691
+ snapshot_time: float,
692
+ ) -> Optional[str]:
693
+ """Computes histogram stats and returns formatted line."""
694
+ if not values:
695
+ return None
696
+
697
+ sorted_vals = sorted(values)
698
+ n = len(sorted_vals)
699
+
700
+ def percentile(p: float) -> float:
701
+ if n == 1:
702
+ return sorted_vals[0]
703
+ pos = (p / 100.0) * (n - 1)
704
+ lo = int(pos)
705
+ hi = min(lo + 1, n - 1)
706
+ if lo == hi:
707
+ return sorted_vals[lo]
708
+ w = pos - lo
709
+ return sorted_vals[lo] * (1 - w) + sorted_vals[hi] * w
710
+
711
+ p50 = percentile(50.0)
712
+ p95 = percentile(95.0)
713
+ p99 = percentile(99.0)
714
+
715
+ label_str = _format_label_string(labels)
716
+ formatted = ",".join([_format_duration(p50), _format_duration(p95), _format_duration(p99)])
717
+ return f"{name}{label_str}={formatted}"
718
+
719
+
720
+ def _format_label_string(labels: LabelDict) -> str:
721
+ if not labels:
722
+ return ""
723
+ ordered = ",".join(f"{key}={value}" for key, value in labels.items())
724
+ return f"{{{ordered}}}"
725
+
726
+
727
+ def _format_duration(value: float) -> str:
728
+ abs_value = abs(value)
729
+ if abs_value >= 1.0:
730
+ return f"{value:.2f}s"
731
+ if abs_value >= 1e-3:
732
+ return f"{value * 1_000:.2f}ms"
733
+ if abs_value >= 1e-6:
734
+ return f"{value * 1_000_000:.2f}µs"
735
+ return f"{value * 1_000_000_000:.2f}ns"
736
+
737
+
738
+ class PrometheusMetricsBackend(MetricsBackend):
739
+ """Metrics backend that forwards events to prometheus_client.
740
+
741
+ All metrics must be registered before use. This backend does not compute
742
+ any aggregations; it only updates Prometheus metrics.
743
+
744
+ Thread-safety: Registration is protected by a lock. Metric updates assume metrics
745
+ are registered during initialization and then remain stable.
746
+
747
+ Due to the nature of Prometheus, this backend is only suitable for recording high-volume metrics.
748
+ Low-volume metrics might be lost if the event has only appeared once.
749
+ """
750
+
751
+ def __init__(self) -> None:
752
+ """Initializes PrometheusMetricsBackend.
753
+
754
+ Raises:
755
+ ImportError: If prometheus_client is not installed.
756
+ """
757
+ try:
758
+ import prometheus_client # type: ignore
759
+ except ImportError:
760
+ raise ImportError(
761
+ "prometheus_client is not installed. Please either install it or use ConsoleMetricsBackend instead."
762
+ )
763
+
764
+ self._counters: Dict[str, _CounterDef] = {}
765
+ self._histograms: Dict[str, _HistogramDef] = {}
766
+ self._prom_counters: Dict[str, Any] = {}
767
+ self._prom_histograms: Dict[str, Any] = {}
768
+ self._prom_metric_names: Dict[str, str] = {}
769
+
770
+ def has_prometheus(self) -> bool:
771
+ """Check if the backend has prometheus support."""
772
+ return True
773
+
774
+ def register_counter(
775
+ self,
776
+ name: str,
777
+ label_names: Optional[Sequence[str]] = None,
778
+ group_level: Optional[int] = None,
779
+ ) -> None:
780
+ """Registers a Prometheus counter metric."""
781
+ from prometheus_client import Counter as PromCounter
782
+
783
+ label_tuple = _normalize_label_names(label_names)
784
+
785
+ if name in self._histograms:
786
+ raise ValueError(f"Metric '{name}' already registered as histogram.")
787
+
788
+ existing = self._counters.get(name)
789
+ if existing is not None:
790
+ if existing.label_names != label_tuple:
791
+ raise ValueError(
792
+ f"Counter '{name}' already registered with labels " f"{existing.label_names}, got {label_tuple}."
793
+ )
794
+ return
795
+
796
+ prom_name = self._register_prometheus_metric_name(name)
797
+ self._counters[name] = _CounterDef(name=name, label_names=label_tuple, group_level=group_level)
798
+
799
+ prom_counter = PromCounter(
800
+ prom_name,
801
+ f"Counter {name}",
802
+ labelnames=label_tuple,
803
+ )
804
+ self._prom_counters[name] = prom_counter
805
+
806
+ def register_histogram(
807
+ self,
808
+ name: str,
809
+ label_names: Optional[Sequence[str]] = None,
810
+ buckets: Optional[Sequence[float]] = None,
811
+ group_level: Optional[int] = None,
812
+ ) -> None:
813
+ """Registers a Prometheus histogram metric."""
814
+ from prometheus_client import Histogram as PromHistogram
815
+
816
+ label_tuple = _normalize_label_names(label_names)
817
+ bucket_tuple = tuple(buckets) if buckets is not None else ()
818
+
819
+ if name in self._counters:
820
+ raise ValueError(f"Metric '{name}' already registered as counter.")
821
+
822
+ existing = self._histograms.get(name)
823
+ if existing is not None:
824
+ if existing.label_names != label_tuple or existing.buckets != bucket_tuple:
825
+ raise ValueError(
826
+ f"Histogram '{name}' already registered with "
827
+ f"labels={existing.label_names}, "
828
+ f"buckets={existing.buckets}."
829
+ )
830
+ return
831
+
832
+ prom_name = self._register_prometheus_metric_name(name)
833
+ self._histograms[name] = _HistogramDef(
834
+ name=name,
835
+ label_names=label_tuple,
836
+ buckets=bucket_tuple,
837
+ group_level=group_level,
838
+ )
839
+
840
+ if bucket_tuple:
841
+ prom_hist = PromHistogram(
842
+ prom_name,
843
+ f"Histogram {name}",
844
+ labelnames=label_tuple,
845
+ buckets=bucket_tuple,
846
+ )
847
+ else:
848
+ prom_hist = PromHistogram(
849
+ prom_name,
850
+ f"Histogram {name}",
851
+ labelnames=label_tuple,
852
+ )
853
+
854
+ self._prom_histograms[name] = prom_hist
855
+
856
+ async def inc_counter(
857
+ self,
858
+ name: str,
859
+ amount: float = 1.0,
860
+ labels: Optional[LabelDict] = None,
861
+ ) -> None:
862
+ """Increments a registered Prometheus counter."""
863
+ labels = labels or {}
864
+ definition = self._counters.get(name)
865
+ if definition is None:
866
+ raise ValueError(f"Counter '{name}' is not registered.")
867
+
868
+ prom_counter = self._prom_counters[name]
869
+ if definition.label_names:
870
+ label_key = _validate_labels("counter", name, labels, definition.label_names)
871
+ prom_counter.labels(**dict(label_key)).inc(amount)
872
+ else:
873
+ prom_counter.inc(amount)
874
+
875
+ async def observe_histogram(
876
+ self,
877
+ name: str,
878
+ value: float,
879
+ labels: Optional[LabelDict] = None,
880
+ ) -> None:
881
+ """Records an observation for a registered Prometheus histogram."""
882
+ labels = labels or {}
883
+ definition = self._histograms.get(name)
884
+ if definition is None:
885
+ raise ValueError(f"Histogram '{name}' is not registered.")
886
+
887
+ prom_hist = self._prom_histograms[name]
888
+ if definition.label_names:
889
+ label_key = _validate_labels("histogram", name, labels, definition.label_names)
890
+ prom_hist.labels(**dict(label_key)).observe(value)
891
+ else:
892
+ prom_hist.observe(value)
893
+
894
+ def _register_prometheus_metric_name(self, name: str) -> str:
895
+ """Registers the normalized Prometheus metric name and ensures uniqueness."""
896
+
897
+ normalized = _normalize_prometheus_metric_name(name)
898
+ existing = self._prom_metric_names.get(normalized)
899
+ if existing is not None and existing != name:
900
+ raise ValueError(
901
+ f"Prometheus metric name conflict: '{name}' normalizes to '{normalized}', "
902
+ f"which is already used by '{existing}'. Consider renaming one of the metrics."
903
+ )
904
+ self._prom_metric_names.setdefault(normalized, name)
905
+ return normalized
906
+
907
+
908
+ class MultiMetricsBackend(MetricsBackend):
909
+ """Metrics backend that forwards calls to multiple underlying backends."""
910
+
911
+ def __init__(self, backends: Sequence[MetricsBackend]) -> None:
912
+ """Initializes MultiMetricsBackend.
913
+
914
+ Args:
915
+ backends: Sequence of underlying backends.
916
+
917
+ Raises:
918
+ ValueError: If no backends are provided.
919
+ """
920
+ if not backends:
921
+ raise ValueError("MultiMetricsBackend requires at least one backend.")
922
+ self._backends = list(backends)
923
+
924
+ def has_prometheus(self) -> bool:
925
+ """Check if the backend has prometheus support."""
926
+ return any(backend.has_prometheus() for backend in self._backends)
927
+
928
+ def register_counter(
929
+ self,
930
+ name: str,
931
+ label_names: Optional[Sequence[str]] = None,
932
+ group_level: Optional[int] = None,
933
+ ) -> None:
934
+ """Registers a counter metric in all underlying backends."""
935
+ for backend in self._backends:
936
+ backend.register_counter(name, label_names=label_names, group_level=group_level)
937
+
938
+ def register_histogram(
939
+ self,
940
+ name: str,
941
+ label_names: Optional[Sequence[str]] = None,
942
+ buckets: Optional[Sequence[float]] = None,
943
+ group_level: Optional[int] = None,
944
+ ) -> None:
945
+ """Registers a histogram metric in all underlying backends."""
946
+ for backend in self._backends:
947
+ backend.register_histogram(
948
+ name,
949
+ label_names=label_names,
950
+ buckets=buckets,
951
+ group_level=group_level,
952
+ )
953
+
954
+ async def inc_counter(
955
+ self,
956
+ name: str,
957
+ amount: float = 1.0,
958
+ labels: Optional[LabelDict] = None,
959
+ ) -> None:
960
+ """Increments a counter metric in all underlying backends."""
961
+ for backend in self._backends:
962
+ await backend.inc_counter(name, amount=amount, labels=labels)
963
+
964
+ async def observe_histogram(
965
+ self,
966
+ name: str,
967
+ value: float,
968
+ labels: Optional[LabelDict] = None,
969
+ ) -> None:
970
+ """Records a histogram observation in all underlying backends."""
971
+ for backend in self._backends:
972
+ await backend.observe_histogram(name, value=value, labels=labels)
973
+
974
+
975
+ # This variable should be carried into forked processes
976
+ _prometheus_multiproc_dir: tempfile.TemporaryDirectory[str] | None = None
977
+
978
+
979
+ def setup_multiprocess_prometheus():
980
+ """Set up prometheus multiprocessing directory if not already configured."""
981
+
982
+ global _prometheus_multiproc_dir
983
+
984
+ if "PROMETHEUS_MULTIPROC_DIR" not in os.environ:
985
+ # Make TemporaryDirectory for prometheus multiprocessing
986
+ # Note: global TemporaryDirectory will be automatically
987
+ # cleaned up upon exit.
988
+ _prometheus_multiproc_dir = tempfile.TemporaryDirectory()
989
+ os.environ["PROMETHEUS_MULTIPROC_DIR"] = _prometheus_multiproc_dir.name
990
+ logger.debug("Created PROMETHEUS_MULTIPROC_DIR at %s", _prometheus_multiproc_dir.name)
991
+ else:
992
+ logger.warning(
993
+ "Found PROMETHEUS_MULTIPROC_DIR was set by user. This directory must be wiped between multiple runs."
994
+ )
995
+
996
+
997
+ def get_prometheus_registry() -> CollectorRegistry:
998
+ """Get the appropriate prometheus registry based on multiprocessing configuration."""
999
+ from prometheus_client import REGISTRY, CollectorRegistry, multiprocess
1000
+
1001
+ if os.getenv("PROMETHEUS_MULTIPROC_DIR") is not None:
1002
+ logger.info("Using multiprocess registry for prometheus metrics: %s", os.getenv("PROMETHEUS_MULTIPROC_DIR"))
1003
+ registry = CollectorRegistry()
1004
+ multiprocess.MultiProcessCollector(registry)
1005
+ return registry
1006
+
1007
+ return REGISTRY
1008
+
1009
+
1010
+ def shutdown_metrics(server: Any = None, worker: Any = None, *args: Any, **kwargs: Any) -> None:
1011
+ """Shutdown prometheus metrics."""
1012
+
1013
+ if _prometheus_multiproc_dir is not None:
1014
+ from prometheus_client import multiprocess
1015
+
1016
+ path = _prometheus_multiproc_dir
1017
+ try:
1018
+ if hasattr(worker, "pid"):
1019
+ pid = worker.pid
1020
+ else:
1021
+ pid = os.getpid()
1022
+ multiprocess.mark_process_dead(pid, path.name) # type: ignore
1023
+ logger.debug("Marked Prometheus metrics for process %d as dead", pid)
1024
+ except Exception as e:
1025
+ logger.error("Error during metrics cleanup: %s", str(e))