hindsight-api 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/admin/__init__.py +1 -0
- hindsight_api/admin/cli.py +252 -0
- hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
- hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
- hindsight_api/api/http.py +282 -20
- hindsight_api/api/mcp.py +47 -52
- hindsight_api/config.py +238 -6
- hindsight_api/engine/cross_encoder.py +599 -86
- hindsight_api/engine/db_budget.py +284 -0
- hindsight_api/engine/db_utils.py +11 -0
- hindsight_api/engine/embeddings.py +453 -26
- hindsight_api/engine/entity_resolver.py +8 -5
- hindsight_api/engine/interface.py +8 -4
- hindsight_api/engine/llm_wrapper.py +241 -27
- hindsight_api/engine/memory_engine.py +609 -122
- hindsight_api/engine/query_analyzer.py +4 -3
- hindsight_api/engine/response_models.py +38 -0
- hindsight_api/engine/retain/fact_extraction.py +388 -192
- hindsight_api/engine/retain/fact_storage.py +34 -8
- hindsight_api/engine/retain/link_utils.py +24 -16
- hindsight_api/engine/retain/orchestrator.py +52 -17
- hindsight_api/engine/retain/types.py +9 -0
- hindsight_api/engine/search/graph_retrieval.py +42 -13
- hindsight_api/engine/search/link_expansion_retrieval.py +256 -0
- hindsight_api/engine/search/mpfp_retrieval.py +362 -117
- hindsight_api/engine/search/reranking.py +2 -2
- hindsight_api/engine/search/retrieval.py +847 -200
- hindsight_api/engine/search/tags.py +172 -0
- hindsight_api/engine/search/think_utils.py +1 -1
- hindsight_api/engine/search/trace.py +12 -0
- hindsight_api/engine/search/tracer.py +24 -1
- hindsight_api/engine/search/types.py +21 -0
- hindsight_api/engine/task_backend.py +109 -18
- hindsight_api/engine/utils.py +1 -1
- hindsight_api/extensions/context.py +10 -1
- hindsight_api/main.py +56 -4
- hindsight_api/metrics.py +433 -48
- hindsight_api/migrations.py +141 -1
- hindsight_api/models.py +3 -1
- hindsight_api/pg0.py +53 -0
- hindsight_api/server.py +39 -2
- {hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/METADATA +5 -1
- hindsight_api-0.3.0.dist-info/RECORD +82 -0
- {hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/entry_points.txt +1 -0
- hindsight_api-0.2.0.dist-info/RECORD +0 -75
- {hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/WHEEL +0 -0
hindsight_api/metrics.py
CHANGED
|
@@ -5,17 +5,86 @@ This module provides metrics for:
|
|
|
5
5
|
- Operation latency (retain, recall, reflect) with percentiles
|
|
6
6
|
- Token usage (input/output) per operation
|
|
7
7
|
- Per-bank granularity via labels
|
|
8
|
+
- LLM call latency and token usage with scope dimension
|
|
9
|
+
- HTTP request metrics (latency, count by endpoint/method/status)
|
|
10
|
+
- Process metrics (CPU, memory, file descriptors, threads)
|
|
11
|
+
- Database connection pool metrics
|
|
8
12
|
"""
|
|
9
13
|
|
|
10
14
|
import logging
|
|
15
|
+
import os
|
|
16
|
+
import resource
|
|
17
|
+
import threading
|
|
11
18
|
import time
|
|
12
19
|
from contextlib import contextmanager
|
|
20
|
+
from typing import TYPE_CHECKING, Callable
|
|
13
21
|
|
|
14
22
|
from opentelemetry import metrics
|
|
15
23
|
from opentelemetry.exporter.prometheus import PrometheusMetricReader
|
|
16
24
|
from opentelemetry.sdk.metrics import MeterProvider
|
|
25
|
+
from opentelemetry.sdk.metrics.view import ExplicitBucketHistogramAggregation, View
|
|
17
26
|
from opentelemetry.sdk.resources import Resource
|
|
18
27
|
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
import asyncpg
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _get_tenant() -> str:
|
|
33
|
+
"""Get current tenant (schema) from context for metrics labeling."""
|
|
34
|
+
# Import here to avoid circular imports
|
|
35
|
+
from hindsight_api.engine.memory_engine import get_current_schema
|
|
36
|
+
|
|
37
|
+
return get_current_schema()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# Custom bucket boundaries for operation duration (in seconds)
|
|
41
|
+
# Fine granularity in 0-30s range where most operations complete
|
|
42
|
+
DURATION_BUCKETS = (0.1, 0.25, 0.5, 0.75, 1.0, 2.0, 3.0, 5.0, 7.5, 10.0, 15.0, 20.0, 30.0, 60.0, 120.0)
|
|
43
|
+
|
|
44
|
+
# LLM duration buckets (finer granularity for faster LLM calls)
|
|
45
|
+
LLM_DURATION_BUCKETS = (0.1, 0.25, 0.5, 1.0, 2.0, 3.0, 5.0, 10.0, 15.0, 30.0, 60.0, 120.0)
|
|
46
|
+
|
|
47
|
+
# HTTP request duration buckets (millisecond-level for fast endpoints)
|
|
48
|
+
HTTP_DURATION_BUCKETS = (0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def get_token_bucket(token_count: int) -> str:
|
|
52
|
+
"""
|
|
53
|
+
Convert a token count to a bucket label for use as a dimension.
|
|
54
|
+
|
|
55
|
+
This allows analyzing token usage patterns without high-cardinality issues.
|
|
56
|
+
|
|
57
|
+
Buckets:
|
|
58
|
+
- "0-100": Very small requests/responses
|
|
59
|
+
- "100-500": Small requests/responses
|
|
60
|
+
- "500-1k": Medium requests/responses
|
|
61
|
+
- "1k-5k": Large requests/responses
|
|
62
|
+
- "5k-10k": Very large requests/responses
|
|
63
|
+
- "10k-50k": Huge requests/responses
|
|
64
|
+
- "50k+": Extremely large requests/responses
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
token_count: Number of tokens
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Bucket label string
|
|
71
|
+
"""
|
|
72
|
+
if token_count < 100:
|
|
73
|
+
return "0-100"
|
|
74
|
+
elif token_count < 500:
|
|
75
|
+
return "100-500"
|
|
76
|
+
elif token_count < 1000:
|
|
77
|
+
return "500-1k"
|
|
78
|
+
elif token_count < 5000:
|
|
79
|
+
return "1k-5k"
|
|
80
|
+
elif token_count < 10000:
|
|
81
|
+
return "5k-10k"
|
|
82
|
+
elif token_count < 50000:
|
|
83
|
+
return "10k-50k"
|
|
84
|
+
else:
|
|
85
|
+
return "50k+"
|
|
86
|
+
|
|
87
|
+
|
|
19
88
|
logger = logging.getLogger(__name__)
|
|
20
89
|
|
|
21
90
|
# Global meter instance
|
|
@@ -48,8 +117,30 @@ def initialize_metrics(service_name: str = "hindsight-api", service_version: str
|
|
|
48
117
|
# Create Prometheus metric reader
|
|
49
118
|
prometheus_reader = PrometheusMetricReader()
|
|
50
119
|
|
|
51
|
-
# Create
|
|
52
|
-
|
|
120
|
+
# Create view with custom bucket boundaries for duration histogram
|
|
121
|
+
duration_view = View(
|
|
122
|
+
instrument_name="hindsight.operation.duration",
|
|
123
|
+
aggregation=ExplicitBucketHistogramAggregation(boundaries=DURATION_BUCKETS),
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Create view with custom bucket boundaries for LLM duration histogram
|
|
127
|
+
llm_duration_view = View(
|
|
128
|
+
instrument_name="hindsight.llm.duration",
|
|
129
|
+
aggregation=ExplicitBucketHistogramAggregation(boundaries=LLM_DURATION_BUCKETS),
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Create view with custom bucket boundaries for HTTP request duration histogram
|
|
133
|
+
http_duration_view = View(
|
|
134
|
+
instrument_name="hindsight.http.duration",
|
|
135
|
+
aggregation=ExplicitBucketHistogramAggregation(boundaries=HTTP_DURATION_BUCKETS),
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Create meter provider with Prometheus exporter and custom views
|
|
139
|
+
provider = MeterProvider(
|
|
140
|
+
resource=resource,
|
|
141
|
+
metric_readers=[prometheus_reader],
|
|
142
|
+
views=[duration_view, llm_duration_view, http_duration_view],
|
|
143
|
+
)
|
|
53
144
|
|
|
54
145
|
# Set the global meter provider
|
|
55
146
|
metrics.set_meter_provider(provider)
|
|
@@ -71,43 +162,84 @@ class MetricsCollectorBase:
|
|
|
71
162
|
"""Base class for metrics collectors."""
|
|
72
163
|
|
|
73
164
|
@contextmanager
|
|
74
|
-
def record_operation(
|
|
165
|
+
def record_operation(
|
|
166
|
+
self,
|
|
167
|
+
operation: str,
|
|
168
|
+
bank_id: str,
|
|
169
|
+
source: str = "api",
|
|
170
|
+
budget: str | None = None,
|
|
171
|
+
max_tokens: int | None = None,
|
|
172
|
+
):
|
|
75
173
|
"""Context manager to record operation duration and status."""
|
|
76
174
|
raise NotImplementedError
|
|
77
175
|
|
|
78
|
-
def
|
|
176
|
+
def record_llm_call(
|
|
79
177
|
self,
|
|
80
|
-
|
|
81
|
-
|
|
178
|
+
provider: str,
|
|
179
|
+
model: str,
|
|
180
|
+
scope: str,
|
|
181
|
+
duration: float,
|
|
82
182
|
input_tokens: int = 0,
|
|
83
183
|
output_tokens: int = 0,
|
|
84
|
-
|
|
85
|
-
max_tokens: int | None = None,
|
|
184
|
+
success: bool = True,
|
|
86
185
|
):
|
|
87
|
-
"""
|
|
186
|
+
"""
|
|
187
|
+
Record metrics for an LLM call.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
provider: LLM provider name (openai, anthropic, gemini, groq, ollama, lmstudio)
|
|
191
|
+
model: Model name
|
|
192
|
+
scope: Scope identifier (e.g., "memory", "reflect", "entity_observation")
|
|
193
|
+
duration: Call duration in seconds
|
|
194
|
+
input_tokens: Number of input/prompt tokens
|
|
195
|
+
output_tokens: Number of output/completion tokens
|
|
196
|
+
success: Whether the call was successful
|
|
197
|
+
"""
|
|
88
198
|
raise NotImplementedError
|
|
89
199
|
|
|
200
|
+
@contextmanager
|
|
201
|
+
def record_http_request(self, method: str, endpoint: str, status_code_getter: Callable[[], int]):
|
|
202
|
+
"""Context manager to record HTTP request metrics."""
|
|
203
|
+
raise NotImplementedError
|
|
204
|
+
|
|
205
|
+
def set_db_pool(self, pool: "asyncpg.Pool"):
|
|
206
|
+
"""Set the database pool for metrics collection."""
|
|
207
|
+
pass
|
|
208
|
+
|
|
90
209
|
|
|
91
210
|
class NoOpMetricsCollector(MetricsCollectorBase):
|
|
92
211
|
"""No-op metrics collector that does nothing. Used when metrics are disabled."""
|
|
93
212
|
|
|
94
213
|
@contextmanager
|
|
95
|
-
def record_operation(
|
|
214
|
+
def record_operation(
|
|
215
|
+
self,
|
|
216
|
+
operation: str,
|
|
217
|
+
bank_id: str,
|
|
218
|
+
source: str = "api",
|
|
219
|
+
budget: str | None = None,
|
|
220
|
+
max_tokens: int | None = None,
|
|
221
|
+
):
|
|
96
222
|
"""No-op context manager."""
|
|
97
223
|
yield
|
|
98
224
|
|
|
99
|
-
def
|
|
225
|
+
def record_llm_call(
|
|
100
226
|
self,
|
|
101
|
-
|
|
102
|
-
|
|
227
|
+
provider: str,
|
|
228
|
+
model: str,
|
|
229
|
+
scope: str,
|
|
230
|
+
duration: float,
|
|
103
231
|
input_tokens: int = 0,
|
|
104
232
|
output_tokens: int = 0,
|
|
105
|
-
|
|
106
|
-
max_tokens: int | None = None,
|
|
233
|
+
success: bool = True,
|
|
107
234
|
):
|
|
108
|
-
"""No-op
|
|
235
|
+
"""No-op LLM call recording."""
|
|
109
236
|
pass
|
|
110
237
|
|
|
238
|
+
@contextmanager
|
|
239
|
+
def record_http_request(self, method: str, endpoint: str, status_code_getter: Callable[[], int]):
|
|
240
|
+
"""No-op HTTP request recording."""
|
|
241
|
+
yield
|
|
242
|
+
|
|
111
243
|
|
|
112
244
|
class MetricsCollector(MetricsCollectorBase):
|
|
113
245
|
"""
|
|
@@ -125,33 +257,73 @@ class MetricsCollector(MetricsCollectorBase):
|
|
|
125
257
|
name="hindsight.operation.duration", description="Duration of Hindsight operations in seconds", unit="s"
|
|
126
258
|
)
|
|
127
259
|
|
|
128
|
-
#
|
|
129
|
-
self.
|
|
130
|
-
name="hindsight.
|
|
260
|
+
# Operation counter (success/failure)
|
|
261
|
+
self.operation_total = self.meter.create_counter(
|
|
262
|
+
name="hindsight.operation.total", description="Total number of operations executed", unit="operations"
|
|
131
263
|
)
|
|
132
264
|
|
|
133
|
-
|
|
134
|
-
|
|
265
|
+
# LLM call latency histogram (in seconds)
|
|
266
|
+
# Records duration of LLM API calls with provider, model, and scope dimensions
|
|
267
|
+
self.llm_duration = self.meter.create_histogram(
|
|
268
|
+
name="hindsight.llm.duration", description="Duration of LLM API calls in seconds", unit="s"
|
|
135
269
|
)
|
|
136
270
|
|
|
137
|
-
#
|
|
138
|
-
self.
|
|
139
|
-
name="hindsight.
|
|
271
|
+
# LLM token usage counters with bucket labels
|
|
272
|
+
self.llm_tokens_input = self.meter.create_counter(
|
|
273
|
+
name="hindsight.llm.tokens.input", description="Number of input tokens for LLM calls", unit="tokens"
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
self.llm_tokens_output = self.meter.create_counter(
|
|
277
|
+
name="hindsight.llm.tokens.output", description="Number of output tokens from LLM calls", unit="tokens"
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# LLM call counter (success/failure)
|
|
281
|
+
self.llm_calls_total = self.meter.create_counter(
|
|
282
|
+
name="hindsight.llm.calls.total", description="Total number of LLM API calls", unit="calls"
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
# HTTP request metrics
|
|
286
|
+
self.http_request_duration = self.meter.create_histogram(
|
|
287
|
+
name="hindsight.http.duration", description="Duration of HTTP requests in seconds", unit="s"
|
|
140
288
|
)
|
|
141
289
|
|
|
290
|
+
self.http_requests_total = self.meter.create_counter(
|
|
291
|
+
name="hindsight.http.requests.total", description="Total number of HTTP requests", unit="requests"
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
self.http_requests_in_progress = self.meter.create_up_down_counter(
|
|
295
|
+
name="hindsight.http.requests.in_progress",
|
|
296
|
+
description="Number of HTTP requests in progress",
|
|
297
|
+
unit="requests",
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
# Process metrics (observable gauges - collected on scrape)
|
|
301
|
+
self._setup_process_metrics()
|
|
302
|
+
|
|
303
|
+
# DB pool metrics holder (set via set_db_pool)
|
|
304
|
+
self._db_pool: "asyncpg.Pool | None" = None
|
|
305
|
+
|
|
142
306
|
@contextmanager
|
|
143
|
-
def record_operation(
|
|
307
|
+
def record_operation(
|
|
308
|
+
self,
|
|
309
|
+
operation: str,
|
|
310
|
+
bank_id: str,
|
|
311
|
+
source: str = "api",
|
|
312
|
+
budget: str | None = None,
|
|
313
|
+
max_tokens: int | None = None,
|
|
314
|
+
):
|
|
144
315
|
"""
|
|
145
316
|
Context manager to record operation duration and status.
|
|
146
317
|
|
|
147
318
|
Usage:
|
|
148
|
-
with metrics.record_operation("recall", bank_id="user123", budget="mid", max_tokens=4096):
|
|
319
|
+
with metrics.record_operation("recall", bank_id="user123", source="api", budget="mid", max_tokens=4096):
|
|
149
320
|
# ... perform operation
|
|
150
321
|
pass
|
|
151
322
|
|
|
152
323
|
Args:
|
|
153
|
-
operation: Operation name (retain, recall, reflect)
|
|
324
|
+
operation: Operation name (retain, recall, reflect, entity_observation)
|
|
154
325
|
bank_id: Memory bank ID
|
|
326
|
+
source: Source of the operation (api, reflect, internal)
|
|
155
327
|
budget: Optional budget level (low, mid, high)
|
|
156
328
|
max_tokens: Optional max tokens for the operation
|
|
157
329
|
"""
|
|
@@ -159,6 +331,8 @@ class MetricsCollector(MetricsCollectorBase):
|
|
|
159
331
|
attributes = {
|
|
160
332
|
"operation": operation,
|
|
161
333
|
"bank_id": bank_id,
|
|
334
|
+
"source": source,
|
|
335
|
+
"tenant": _get_tenant(),
|
|
162
336
|
}
|
|
163
337
|
if budget:
|
|
164
338
|
attributes["budget"] = budget
|
|
@@ -181,40 +355,251 @@ class MetricsCollector(MetricsCollectorBase):
|
|
|
181
355
|
# Record operation count
|
|
182
356
|
self.operation_total.add(1, attributes)
|
|
183
357
|
|
|
184
|
-
def
|
|
358
|
+
def record_llm_call(
|
|
185
359
|
self,
|
|
186
|
-
|
|
187
|
-
|
|
360
|
+
provider: str,
|
|
361
|
+
model: str,
|
|
362
|
+
scope: str,
|
|
363
|
+
duration: float,
|
|
188
364
|
input_tokens: int = 0,
|
|
189
365
|
output_tokens: int = 0,
|
|
190
|
-
|
|
191
|
-
max_tokens: int | None = None,
|
|
366
|
+
success: bool = True,
|
|
192
367
|
):
|
|
193
368
|
"""
|
|
194
|
-
Record
|
|
369
|
+
Record metrics for an LLM call.
|
|
195
370
|
|
|
196
371
|
Args:
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
372
|
+
provider: LLM provider name (openai, anthropic, gemini, groq, ollama, lmstudio)
|
|
373
|
+
model: Model name
|
|
374
|
+
scope: Scope identifier (e.g., "memory", "reflect", "entity_observation")
|
|
375
|
+
duration: Call duration in seconds
|
|
376
|
+
input_tokens: Number of input/prompt tokens
|
|
377
|
+
output_tokens: Number of output/completion tokens
|
|
378
|
+
success: Whether the call was successful
|
|
203
379
|
"""
|
|
204
|
-
attributes
|
|
205
|
-
|
|
206
|
-
"
|
|
380
|
+
# Base attributes for all metrics
|
|
381
|
+
base_attributes = {
|
|
382
|
+
"provider": provider,
|
|
383
|
+
"model": model,
|
|
384
|
+
"scope": scope,
|
|
385
|
+
"success": str(success).lower(),
|
|
386
|
+
"tenant": _get_tenant(),
|
|
207
387
|
}
|
|
208
|
-
if budget:
|
|
209
|
-
attributes["budget"] = budget
|
|
210
|
-
if max_tokens:
|
|
211
|
-
attributes["max_tokens"] = str(max_tokens)
|
|
212
388
|
|
|
389
|
+
# Record duration
|
|
390
|
+
self.llm_duration.record(duration, base_attributes)
|
|
391
|
+
|
|
392
|
+
# Record call count
|
|
393
|
+
self.llm_calls_total.add(1, base_attributes)
|
|
394
|
+
|
|
395
|
+
# Record tokens with bucket labels for cardinality control
|
|
213
396
|
if input_tokens > 0:
|
|
214
|
-
|
|
397
|
+
input_attributes = {
|
|
398
|
+
**base_attributes,
|
|
399
|
+
"token_bucket": get_token_bucket(input_tokens),
|
|
400
|
+
}
|
|
401
|
+
self.llm_tokens_input.add(input_tokens, input_attributes)
|
|
215
402
|
|
|
216
403
|
if output_tokens > 0:
|
|
217
|
-
|
|
404
|
+
output_attributes = {
|
|
405
|
+
**base_attributes,
|
|
406
|
+
"token_bucket": get_token_bucket(output_tokens),
|
|
407
|
+
}
|
|
408
|
+
self.llm_tokens_output.add(output_tokens, output_attributes)
|
|
409
|
+
|
|
410
|
+
@contextmanager
|
|
411
|
+
def record_http_request(self, method: str, endpoint: str, status_code_getter: Callable[[], int]):
|
|
412
|
+
"""
|
|
413
|
+
Context manager to record HTTP request metrics.
|
|
414
|
+
|
|
415
|
+
Usage:
|
|
416
|
+
status_code = [200] # Use list for mutability
|
|
417
|
+
with metrics.record_http_request("GET", "/api/banks", lambda: status_code[0]):
|
|
418
|
+
# ... handle request
|
|
419
|
+
status_code[0] = response.status_code
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
method: HTTP method (GET, POST, etc.)
|
|
423
|
+
endpoint: Request endpoint path
|
|
424
|
+
status_code_getter: Callable that returns the status code after request completes
|
|
425
|
+
"""
|
|
426
|
+
start_time = time.time()
|
|
427
|
+
base_attributes = {"method": method, "endpoint": endpoint}
|
|
428
|
+
|
|
429
|
+
# Track in-progress
|
|
430
|
+
self.http_requests_in_progress.add(1, base_attributes)
|
|
431
|
+
|
|
432
|
+
try:
|
|
433
|
+
yield
|
|
434
|
+
finally:
|
|
435
|
+
duration = time.time() - start_time
|
|
436
|
+
status_code = status_code_getter()
|
|
437
|
+
status_class = f"{status_code // 100}xx"
|
|
438
|
+
|
|
439
|
+
# Get tenant from context (may be set during request processing)
|
|
440
|
+
tenant = _get_tenant()
|
|
441
|
+
|
|
442
|
+
attributes = {
|
|
443
|
+
**base_attributes,
|
|
444
|
+
"status_code": str(status_code),
|
|
445
|
+
"status_class": status_class,
|
|
446
|
+
"tenant": tenant,
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
# Record duration and count
|
|
450
|
+
self.http_request_duration.record(duration, attributes)
|
|
451
|
+
self.http_requests_total.add(1, attributes)
|
|
452
|
+
|
|
453
|
+
# Decrement in-progress
|
|
454
|
+
self.http_requests_in_progress.add(-1, base_attributes)
|
|
455
|
+
|
|
456
|
+
def _setup_process_metrics(self):
|
|
457
|
+
"""Set up observable gauges for process metrics."""
|
|
458
|
+
|
|
459
|
+
def get_cpu_times(_options):
|
|
460
|
+
"""Get process CPU times."""
|
|
461
|
+
try:
|
|
462
|
+
rusage = resource.getrusage(resource.RUSAGE_SELF)
|
|
463
|
+
yield metrics.Observation(rusage.ru_utime, {"type": "user"})
|
|
464
|
+
yield metrics.Observation(rusage.ru_stime, {"type": "system"})
|
|
465
|
+
except Exception:
|
|
466
|
+
pass
|
|
467
|
+
|
|
468
|
+
def get_memory_usage(_options):
|
|
469
|
+
"""Get process memory usage in bytes."""
|
|
470
|
+
try:
|
|
471
|
+
rusage = resource.getrusage(resource.RUSAGE_SELF)
|
|
472
|
+
# ru_maxrss is in kilobytes on Linux, bytes on macOS
|
|
473
|
+
max_rss = rusage.ru_maxrss
|
|
474
|
+
if os.uname().sysname == "Linux":
|
|
475
|
+
max_rss *= 1024 # Convert KB to bytes
|
|
476
|
+
yield metrics.Observation(max_rss, {"type": "rss_max"})
|
|
477
|
+
except Exception:
|
|
478
|
+
pass
|
|
479
|
+
|
|
480
|
+
def get_open_file_descriptors(_options):
|
|
481
|
+
"""Get number of open file descriptors."""
|
|
482
|
+
try:
|
|
483
|
+
# Try to count open FDs by checking /proc on Linux
|
|
484
|
+
if os.path.exists("/proc/self/fd"):
|
|
485
|
+
count = len(os.listdir("/proc/self/fd"))
|
|
486
|
+
yield metrics.Observation(count)
|
|
487
|
+
else:
|
|
488
|
+
# Fallback: use resource limits
|
|
489
|
+
soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
|
|
490
|
+
yield metrics.Observation(soft, {"limit": "soft"})
|
|
491
|
+
except Exception:
|
|
492
|
+
pass
|
|
493
|
+
|
|
494
|
+
def get_thread_count(_options):
|
|
495
|
+
"""Get number of active threads."""
|
|
496
|
+
try:
|
|
497
|
+
yield metrics.Observation(threading.active_count())
|
|
498
|
+
except Exception:
|
|
499
|
+
pass
|
|
500
|
+
|
|
501
|
+
# Create observable gauges
|
|
502
|
+
self.meter.create_observable_gauge(
|
|
503
|
+
name="hindsight.process.cpu.seconds",
|
|
504
|
+
callbacks=[get_cpu_times],
|
|
505
|
+
description="Process CPU time in seconds",
|
|
506
|
+
unit="s",
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
self.meter.create_observable_gauge(
|
|
510
|
+
name="hindsight.process.memory.bytes",
|
|
511
|
+
callbacks=[get_memory_usage],
|
|
512
|
+
description="Process memory usage in bytes",
|
|
513
|
+
unit="By",
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
self.meter.create_observable_gauge(
|
|
517
|
+
name="hindsight.process.open_fds",
|
|
518
|
+
callbacks=[get_open_file_descriptors],
|
|
519
|
+
description="Number of open file descriptors",
|
|
520
|
+
unit="{fds}",
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
self.meter.create_observable_gauge(
|
|
524
|
+
name="hindsight.process.threads",
|
|
525
|
+
callbacks=[get_thread_count],
|
|
526
|
+
description="Number of active threads",
|
|
527
|
+
unit="{threads}",
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
def set_db_pool(self, pool: "asyncpg.Pool"):
|
|
531
|
+
"""
|
|
532
|
+
Set the database pool for metrics collection.
|
|
533
|
+
|
|
534
|
+
Args:
|
|
535
|
+
pool: asyncpg connection pool instance
|
|
536
|
+
"""
|
|
537
|
+
self._db_pool = pool
|
|
538
|
+
self._setup_db_pool_metrics()
|
|
539
|
+
|
|
540
|
+
def _setup_db_pool_metrics(self):
|
|
541
|
+
"""Set up observable gauges for database pool metrics."""
|
|
542
|
+
|
|
543
|
+
def get_pool_size(_options):
|
|
544
|
+
"""Get current pool size."""
|
|
545
|
+
if self._db_pool is not None:
|
|
546
|
+
try:
|
|
547
|
+
yield metrics.Observation(self._db_pool.get_size())
|
|
548
|
+
except Exception:
|
|
549
|
+
pass
|
|
550
|
+
|
|
551
|
+
def get_pool_free_size(_options):
|
|
552
|
+
"""Get number of free connections in pool."""
|
|
553
|
+
if self._db_pool is not None:
|
|
554
|
+
try:
|
|
555
|
+
yield metrics.Observation(self._db_pool.get_idle_size())
|
|
556
|
+
except Exception:
|
|
557
|
+
pass
|
|
558
|
+
|
|
559
|
+
def get_pool_min_size(_options):
|
|
560
|
+
"""Get pool minimum size."""
|
|
561
|
+
if self._db_pool is not None:
|
|
562
|
+
try:
|
|
563
|
+
yield metrics.Observation(self._db_pool.get_min_size())
|
|
564
|
+
except Exception:
|
|
565
|
+
pass
|
|
566
|
+
|
|
567
|
+
def get_pool_max_size(_options):
|
|
568
|
+
"""Get pool maximum size."""
|
|
569
|
+
if self._db_pool is not None:
|
|
570
|
+
try:
|
|
571
|
+
yield metrics.Observation(self._db_pool.get_max_size())
|
|
572
|
+
except Exception:
|
|
573
|
+
pass
|
|
574
|
+
|
|
575
|
+
# Create observable gauges for pool metrics
|
|
576
|
+
self.meter.create_observable_gauge(
|
|
577
|
+
name="hindsight.db.pool.size",
|
|
578
|
+
callbacks=[get_pool_size],
|
|
579
|
+
description="Current number of connections in the pool",
|
|
580
|
+
unit="{connections}",
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
self.meter.create_observable_gauge(
|
|
584
|
+
name="hindsight.db.pool.idle",
|
|
585
|
+
callbacks=[get_pool_free_size],
|
|
586
|
+
description="Number of idle connections in the pool",
|
|
587
|
+
unit="{connections}",
|
|
588
|
+
)
|
|
589
|
+
|
|
590
|
+
self.meter.create_observable_gauge(
|
|
591
|
+
name="hindsight.db.pool.min",
|
|
592
|
+
callbacks=[get_pool_min_size],
|
|
593
|
+
description="Minimum pool size",
|
|
594
|
+
unit="{connections}",
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
self.meter.create_observable_gauge(
|
|
598
|
+
name="hindsight.db.pool.max",
|
|
599
|
+
callbacks=[get_pool_max_size],
|
|
600
|
+
description="Maximum pool size",
|
|
601
|
+
unit="{connections}",
|
|
602
|
+
)
|
|
218
603
|
|
|
219
604
|
|
|
220
605
|
# Global metrics collector instance (defaults to no-op)
|