hindsight-api 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. hindsight_api/admin/__init__.py +1 -0
  2. hindsight_api/admin/cli.py +311 -0
  3. hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
  4. hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
  5. hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
  6. hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
  7. hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
  8. hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
  9. hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
  10. hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
  11. hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
  12. hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
  13. hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
  14. hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
  15. hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
  16. hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
  17. hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
  18. hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
  19. hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
  20. hindsight_api/api/http.py +1406 -118
  21. hindsight_api/api/mcp.py +11 -196
  22. hindsight_api/config.py +359 -27
  23. hindsight_api/engine/consolidation/__init__.py +5 -0
  24. hindsight_api/engine/consolidation/consolidator.py +859 -0
  25. hindsight_api/engine/consolidation/prompts.py +69 -0
  26. hindsight_api/engine/cross_encoder.py +706 -88
  27. hindsight_api/engine/db_budget.py +284 -0
  28. hindsight_api/engine/db_utils.py +11 -0
  29. hindsight_api/engine/directives/__init__.py +5 -0
  30. hindsight_api/engine/directives/models.py +37 -0
  31. hindsight_api/engine/embeddings.py +553 -29
  32. hindsight_api/engine/entity_resolver.py +8 -5
  33. hindsight_api/engine/interface.py +40 -17
  34. hindsight_api/engine/llm_wrapper.py +744 -68
  35. hindsight_api/engine/memory_engine.py +2505 -1017
  36. hindsight_api/engine/mental_models/__init__.py +14 -0
  37. hindsight_api/engine/mental_models/models.py +53 -0
  38. hindsight_api/engine/query_analyzer.py +4 -3
  39. hindsight_api/engine/reflect/__init__.py +18 -0
  40. hindsight_api/engine/reflect/agent.py +933 -0
  41. hindsight_api/engine/reflect/models.py +109 -0
  42. hindsight_api/engine/reflect/observations.py +186 -0
  43. hindsight_api/engine/reflect/prompts.py +483 -0
  44. hindsight_api/engine/reflect/tools.py +437 -0
  45. hindsight_api/engine/reflect/tools_schema.py +250 -0
  46. hindsight_api/engine/response_models.py +168 -4
  47. hindsight_api/engine/retain/bank_utils.py +79 -201
  48. hindsight_api/engine/retain/fact_extraction.py +424 -195
  49. hindsight_api/engine/retain/fact_storage.py +35 -12
  50. hindsight_api/engine/retain/link_utils.py +29 -24
  51. hindsight_api/engine/retain/orchestrator.py +24 -43
  52. hindsight_api/engine/retain/types.py +11 -2
  53. hindsight_api/engine/search/graph_retrieval.py +43 -14
  54. hindsight_api/engine/search/link_expansion_retrieval.py +391 -0
  55. hindsight_api/engine/search/mpfp_retrieval.py +362 -117
  56. hindsight_api/engine/search/reranking.py +2 -2
  57. hindsight_api/engine/search/retrieval.py +848 -201
  58. hindsight_api/engine/search/tags.py +172 -0
  59. hindsight_api/engine/search/think_utils.py +42 -141
  60. hindsight_api/engine/search/trace.py +12 -1
  61. hindsight_api/engine/search/tracer.py +26 -6
  62. hindsight_api/engine/search/types.py +21 -3
  63. hindsight_api/engine/task_backend.py +113 -106
  64. hindsight_api/engine/utils.py +1 -152
  65. hindsight_api/extensions/__init__.py +10 -1
  66. hindsight_api/extensions/builtin/tenant.py +5 -1
  67. hindsight_api/extensions/context.py +10 -1
  68. hindsight_api/extensions/operation_validator.py +81 -4
  69. hindsight_api/extensions/tenant.py +26 -0
  70. hindsight_api/main.py +69 -6
  71. hindsight_api/mcp_local.py +12 -53
  72. hindsight_api/mcp_tools.py +494 -0
  73. hindsight_api/metrics.py +433 -48
  74. hindsight_api/migrations.py +141 -1
  75. hindsight_api/models.py +3 -3
  76. hindsight_api/pg0.py +53 -0
  77. hindsight_api/server.py +39 -2
  78. hindsight_api/worker/__init__.py +11 -0
  79. hindsight_api/worker/main.py +296 -0
  80. hindsight_api/worker/poller.py +486 -0
  81. {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/METADATA +16 -6
  82. hindsight_api-0.4.0.dist-info/RECORD +112 -0
  83. {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/entry_points.txt +2 -0
  84. hindsight_api/engine/retain/observation_regeneration.py +0 -254
  85. hindsight_api/engine/search/observation_utils.py +0 -125
  86. hindsight_api/engine/search/scoring.py +0 -159
  87. hindsight_api-0.2.1.dist-info/RECORD +0 -75
  88. {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/WHEEL +0 -0
hindsight_api/metrics.py CHANGED
@@ -5,17 +5,86 @@ This module provides metrics for:
5
5
  - Operation latency (retain, recall, reflect) with percentiles
6
6
  - Token usage (input/output) per operation
7
7
  - Per-bank granularity via labels
8
+ - LLM call latency and token usage with scope dimension
9
+ - HTTP request metrics (latency, count by endpoint/method/status)
10
+ - Process metrics (CPU, memory, file descriptors, threads)
11
+ - Database connection pool metrics
8
12
  """
9
13
 
10
14
  import logging
15
+ import os
16
+ import resource
17
+ import threading
11
18
  import time
12
19
  from contextlib import contextmanager
20
+ from typing import TYPE_CHECKING, Callable
13
21
 
14
22
  from opentelemetry import metrics
15
23
  from opentelemetry.exporter.prometheus import PrometheusMetricReader
16
24
  from opentelemetry.sdk.metrics import MeterProvider
25
+ from opentelemetry.sdk.metrics.view import ExplicitBucketHistogramAggregation, View
17
26
  from opentelemetry.sdk.resources import Resource
18
27
 
28
+ if TYPE_CHECKING:
29
+ import asyncpg
30
+
31
+
32
+ def _get_tenant() -> str:
33
+ """Get current tenant (schema) from context for metrics labeling."""
34
+ # Import here to avoid circular imports
35
+ from hindsight_api.engine.memory_engine import get_current_schema
36
+
37
+ return get_current_schema()
38
+
39
+
40
+ # Custom bucket boundaries for operation duration (in seconds)
41
+ # Fine granularity in 0-30s range where most operations complete
42
+ DURATION_BUCKETS = (0.1, 0.25, 0.5, 0.75, 1.0, 2.0, 3.0, 5.0, 7.5, 10.0, 15.0, 20.0, 30.0, 60.0, 120.0)
43
+
44
+ # LLM duration buckets (finer granularity for faster LLM calls)
45
+ LLM_DURATION_BUCKETS = (0.1, 0.25, 0.5, 1.0, 2.0, 3.0, 5.0, 10.0, 15.0, 30.0, 60.0, 120.0)
46
+
47
+ # HTTP request duration buckets (millisecond-level for fast endpoints)
48
+ HTTP_DURATION_BUCKETS = (0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0)
49
+
50
+
51
+ def get_token_bucket(token_count: int) -> str:
52
+ """
53
+ Convert a token count to a bucket label for use as a dimension.
54
+
55
+ This allows analyzing token usage patterns without high-cardinality issues.
56
+
57
+ Buckets:
58
+ - "0-100": Very small requests/responses
59
+ - "100-500": Small requests/responses
60
+ - "500-1k": Medium requests/responses
61
+ - "1k-5k": Large requests/responses
62
+ - "5k-10k": Very large requests/responses
63
+ - "10k-50k": Huge requests/responses
64
+ - "50k+": Extremely large requests/responses
65
+
66
+ Args:
67
+ token_count: Number of tokens
68
+
69
+ Returns:
70
+ Bucket label string
71
+ """
72
+ if token_count < 100:
73
+ return "0-100"
74
+ elif token_count < 500:
75
+ return "100-500"
76
+ elif token_count < 1000:
77
+ return "500-1k"
78
+ elif token_count < 5000:
79
+ return "1k-5k"
80
+ elif token_count < 10000:
81
+ return "5k-10k"
82
+ elif token_count < 50000:
83
+ return "10k-50k"
84
+ else:
85
+ return "50k+"
86
+
87
+
19
88
  logger = logging.getLogger(__name__)
20
89
 
21
90
  # Global meter instance
@@ -48,8 +117,30 @@ def initialize_metrics(service_name: str = "hindsight-api", service_version: str
48
117
  # Create Prometheus metric reader
49
118
  prometheus_reader = PrometheusMetricReader()
50
119
 
51
- # Create meter provider with Prometheus exporter
52
- provider = MeterProvider(resource=resource, metric_readers=[prometheus_reader])
120
+ # Create view with custom bucket boundaries for duration histogram
121
+ duration_view = View(
122
+ instrument_name="hindsight.operation.duration",
123
+ aggregation=ExplicitBucketHistogramAggregation(boundaries=DURATION_BUCKETS),
124
+ )
125
+
126
+ # Create view with custom bucket boundaries for LLM duration histogram
127
+ llm_duration_view = View(
128
+ instrument_name="hindsight.llm.duration",
129
+ aggregation=ExplicitBucketHistogramAggregation(boundaries=LLM_DURATION_BUCKETS),
130
+ )
131
+
132
+ # Create view with custom bucket boundaries for HTTP request duration histogram
133
+ http_duration_view = View(
134
+ instrument_name="hindsight.http.duration",
135
+ aggregation=ExplicitBucketHistogramAggregation(boundaries=HTTP_DURATION_BUCKETS),
136
+ )
137
+
138
+ # Create meter provider with Prometheus exporter and custom views
139
+ provider = MeterProvider(
140
+ resource=resource,
141
+ metric_readers=[prometheus_reader],
142
+ views=[duration_view, llm_duration_view, http_duration_view],
143
+ )
53
144
 
54
145
  # Set the global meter provider
55
146
  metrics.set_meter_provider(provider)
@@ -71,43 +162,84 @@ class MetricsCollectorBase:
71
162
  """Base class for metrics collectors."""
72
163
 
73
164
  @contextmanager
74
- def record_operation(self, operation: str, bank_id: str, budget: str | None = None, max_tokens: int | None = None):
165
+ def record_operation(
166
+ self,
167
+ operation: str,
168
+ bank_id: str,
169
+ source: str = "api",
170
+ budget: str | None = None,
171
+ max_tokens: int | None = None,
172
+ ):
75
173
  """Context manager to record operation duration and status."""
76
174
  raise NotImplementedError
77
175
 
78
- def record_tokens(
176
+ def record_llm_call(
79
177
  self,
80
- operation: str,
81
- bank_id: str,
178
+ provider: str,
179
+ model: str,
180
+ scope: str,
181
+ duration: float,
82
182
  input_tokens: int = 0,
83
183
  output_tokens: int = 0,
84
- budget: str | None = None,
85
- max_tokens: int | None = None,
184
+ success: bool = True,
86
185
  ):
87
- """Record token usage for an operation."""
186
+ """
187
+ Record metrics for an LLM call.
188
+
189
+ Args:
190
+ provider: LLM provider name (openai, anthropic, gemini, groq, ollama, lmstudio)
191
+ model: Model name
192
+ scope: Scope identifier (e.g., "memory", "reflect", "entity_observation")
193
+ duration: Call duration in seconds
194
+ input_tokens: Number of input/prompt tokens
195
+ output_tokens: Number of output/completion tokens
196
+ success: Whether the call was successful
197
+ """
88
198
  raise NotImplementedError
89
199
 
200
+ @contextmanager
201
+ def record_http_request(self, method: str, endpoint: str, status_code_getter: Callable[[], int]):
202
+ """Context manager to record HTTP request metrics."""
203
+ raise NotImplementedError
204
+
205
+ def set_db_pool(self, pool: "asyncpg.Pool"):
206
+ """Set the database pool for metrics collection."""
207
+ pass
208
+
90
209
 
91
210
  class NoOpMetricsCollector(MetricsCollectorBase):
92
211
  """No-op metrics collector that does nothing. Used when metrics are disabled."""
93
212
 
94
213
  @contextmanager
95
- def record_operation(self, operation: str, bank_id: str, budget: str | None = None, max_tokens: int | None = None):
214
+ def record_operation(
215
+ self,
216
+ operation: str,
217
+ bank_id: str,
218
+ source: str = "api",
219
+ budget: str | None = None,
220
+ max_tokens: int | None = None,
221
+ ):
96
222
  """No-op context manager."""
97
223
  yield
98
224
 
99
- def record_tokens(
225
+ def record_llm_call(
100
226
  self,
101
- operation: str,
102
- bank_id: str,
227
+ provider: str,
228
+ model: str,
229
+ scope: str,
230
+ duration: float,
103
231
  input_tokens: int = 0,
104
232
  output_tokens: int = 0,
105
- budget: str | None = None,
106
- max_tokens: int | None = None,
233
+ success: bool = True,
107
234
  ):
108
- """No-op token recording."""
235
+ """No-op LLM call recording."""
109
236
  pass
110
237
 
238
+ @contextmanager
239
+ def record_http_request(self, method: str, endpoint: str, status_code_getter: Callable[[], int]):
240
+ """No-op HTTP request recording."""
241
+ yield
242
+
111
243
 
112
244
  class MetricsCollector(MetricsCollectorBase):
113
245
  """
@@ -125,33 +257,73 @@ class MetricsCollector(MetricsCollectorBase):
125
257
  name="hindsight.operation.duration", description="Duration of Hindsight operations in seconds", unit="s"
126
258
  )
127
259
 
128
- # Token usage counters
129
- self.tokens_input = self.meter.create_counter(
130
- name="hindsight.tokens.input", description="Number of input tokens consumed", unit="tokens"
260
+ # Operation counter (success/failure)
261
+ self.operation_total = self.meter.create_counter(
262
+ name="hindsight.operation.total", description="Total number of operations executed", unit="operations"
131
263
  )
132
264
 
133
- self.tokens_output = self.meter.create_counter(
134
- name="hindsight.tokens.output", description="Number of output tokens generated", unit="tokens"
265
+ # LLM call latency histogram (in seconds)
266
+ # Records duration of LLM API calls with provider, model, and scope dimensions
267
+ self.llm_duration = self.meter.create_histogram(
268
+ name="hindsight.llm.duration", description="Duration of LLM API calls in seconds", unit="s"
135
269
  )
136
270
 
137
- # Operation counter (success/failure)
138
- self.operation_total = self.meter.create_counter(
139
- name="hindsight.operation.total", description="Total number of operations executed", unit="operations"
271
+ # LLM token usage counters with bucket labels
272
+ self.llm_tokens_input = self.meter.create_counter(
273
+ name="hindsight.llm.tokens.input", description="Number of input tokens for LLM calls", unit="tokens"
274
+ )
275
+
276
+ self.llm_tokens_output = self.meter.create_counter(
277
+ name="hindsight.llm.tokens.output", description="Number of output tokens from LLM calls", unit="tokens"
278
+ )
279
+
280
+ # LLM call counter (success/failure)
281
+ self.llm_calls_total = self.meter.create_counter(
282
+ name="hindsight.llm.calls.total", description="Total number of LLM API calls", unit="calls"
283
+ )
284
+
285
+ # HTTP request metrics
286
+ self.http_request_duration = self.meter.create_histogram(
287
+ name="hindsight.http.duration", description="Duration of HTTP requests in seconds", unit="s"
140
288
  )
141
289
 
290
+ self.http_requests_total = self.meter.create_counter(
291
+ name="hindsight.http.requests.total", description="Total number of HTTP requests", unit="requests"
292
+ )
293
+
294
+ self.http_requests_in_progress = self.meter.create_up_down_counter(
295
+ name="hindsight.http.requests.in_progress",
296
+ description="Number of HTTP requests in progress",
297
+ unit="requests",
298
+ )
299
+
300
+ # Process metrics (observable gauges - collected on scrape)
301
+ self._setup_process_metrics()
302
+
303
+ # DB pool metrics holder (set via set_db_pool)
304
+ self._db_pool: "asyncpg.Pool | None" = None
305
+
142
306
  @contextmanager
143
- def record_operation(self, operation: str, bank_id: str, budget: str | None = None, max_tokens: int | None = None):
307
+ def record_operation(
308
+ self,
309
+ operation: str,
310
+ bank_id: str,
311
+ source: str = "api",
312
+ budget: str | None = None,
313
+ max_tokens: int | None = None,
314
+ ):
144
315
  """
145
316
  Context manager to record operation duration and status.
146
317
 
147
318
  Usage:
148
- with metrics.record_operation("recall", bank_id="user123", budget="mid", max_tokens=4096):
319
+ with metrics.record_operation("recall", bank_id="user123", source="api", budget="mid", max_tokens=4096):
149
320
  # ... perform operation
150
321
  pass
151
322
 
152
323
  Args:
153
- operation: Operation name (retain, recall, reflect)
324
+ operation: Operation name (retain, recall, reflect, entity_observation)
154
325
  bank_id: Memory bank ID
326
+ source: Source of the operation (api, reflect, internal)
155
327
  budget: Optional budget level (low, mid, high)
156
328
  max_tokens: Optional max tokens for the operation
157
329
  """
@@ -159,6 +331,8 @@ class MetricsCollector(MetricsCollectorBase):
159
331
  attributes = {
160
332
  "operation": operation,
161
333
  "bank_id": bank_id,
334
+ "source": source,
335
+ "tenant": _get_tenant(),
162
336
  }
163
337
  if budget:
164
338
  attributes["budget"] = budget
@@ -181,40 +355,251 @@ class MetricsCollector(MetricsCollectorBase):
181
355
  # Record operation count
182
356
  self.operation_total.add(1, attributes)
183
357
 
184
- def record_tokens(
358
+ def record_llm_call(
185
359
  self,
186
- operation: str,
187
- bank_id: str,
360
+ provider: str,
361
+ model: str,
362
+ scope: str,
363
+ duration: float,
188
364
  input_tokens: int = 0,
189
365
  output_tokens: int = 0,
190
- budget: str | None = None,
191
- max_tokens: int | None = None,
366
+ success: bool = True,
192
367
  ):
193
368
  """
194
- Record token usage for an operation.
369
+ Record metrics for an LLM call.
195
370
 
196
371
  Args:
197
- operation: Operation name (retain, recall, reflect)
198
- bank_id: Memory bank ID
199
- input_tokens: Number of input tokens
200
- output_tokens: Number of output tokens
201
- budget: Optional budget level
202
- max_tokens: Optional max tokens for the operation
372
+ provider: LLM provider name (openai, anthropic, gemini, groq, ollama, lmstudio)
373
+ model: Model name
374
+ scope: Scope identifier (e.g., "memory", "reflect", "entity_observation")
375
+ duration: Call duration in seconds
376
+ input_tokens: Number of input/prompt tokens
377
+ output_tokens: Number of output/completion tokens
378
+ success: Whether the call was successful
203
379
  """
204
- attributes = {
205
- "operation": operation,
206
- "bank_id": bank_id,
380
+ # Base attributes for all metrics
381
+ base_attributes = {
382
+ "provider": provider,
383
+ "model": model,
384
+ "scope": scope,
385
+ "success": str(success).lower(),
386
+ "tenant": _get_tenant(),
207
387
  }
208
- if budget:
209
- attributes["budget"] = budget
210
- if max_tokens:
211
- attributes["max_tokens"] = str(max_tokens)
212
388
 
389
+ # Record duration
390
+ self.llm_duration.record(duration, base_attributes)
391
+
392
+ # Record call count
393
+ self.llm_calls_total.add(1, base_attributes)
394
+
395
+ # Record tokens with bucket labels for cardinality control
213
396
  if input_tokens > 0:
214
- self.tokens_input.add(input_tokens, attributes)
397
+ input_attributes = {
398
+ **base_attributes,
399
+ "token_bucket": get_token_bucket(input_tokens),
400
+ }
401
+ self.llm_tokens_input.add(input_tokens, input_attributes)
215
402
 
216
403
  if output_tokens > 0:
217
- self.tokens_output.add(output_tokens, attributes)
404
+ output_attributes = {
405
+ **base_attributes,
406
+ "token_bucket": get_token_bucket(output_tokens),
407
+ }
408
+ self.llm_tokens_output.add(output_tokens, output_attributes)
409
+
410
+ @contextmanager
411
+ def record_http_request(self, method: str, endpoint: str, status_code_getter: Callable[[], int]):
412
+ """
413
+ Context manager to record HTTP request metrics.
414
+
415
+ Usage:
416
+ status_code = [200] # Use list for mutability
417
+ with metrics.record_http_request("GET", "/api/banks", lambda: status_code[0]):
418
+ # ... handle request
419
+ status_code[0] = response.status_code
420
+
421
+ Args:
422
+ method: HTTP method (GET, POST, etc.)
423
+ endpoint: Request endpoint path
424
+ status_code_getter: Callable that returns the status code after request completes
425
+ """
426
+ start_time = time.time()
427
+ base_attributes = {"method": method, "endpoint": endpoint}
428
+
429
+ # Track in-progress
430
+ self.http_requests_in_progress.add(1, base_attributes)
431
+
432
+ try:
433
+ yield
434
+ finally:
435
+ duration = time.time() - start_time
436
+ status_code = status_code_getter()
437
+ status_class = f"{status_code // 100}xx"
438
+
439
+ # Get tenant from context (may be set during request processing)
440
+ tenant = _get_tenant()
441
+
442
+ attributes = {
443
+ **base_attributes,
444
+ "status_code": str(status_code),
445
+ "status_class": status_class,
446
+ "tenant": tenant,
447
+ }
448
+
449
+ # Record duration and count
450
+ self.http_request_duration.record(duration, attributes)
451
+ self.http_requests_total.add(1, attributes)
452
+
453
+ # Decrement in-progress
454
+ self.http_requests_in_progress.add(-1, base_attributes)
455
+
456
+ def _setup_process_metrics(self):
457
+ """Set up observable gauges for process metrics."""
458
+
459
+ def get_cpu_times(_options):
460
+ """Get process CPU times."""
461
+ try:
462
+ rusage = resource.getrusage(resource.RUSAGE_SELF)
463
+ yield metrics.Observation(rusage.ru_utime, {"type": "user"})
464
+ yield metrics.Observation(rusage.ru_stime, {"type": "system"})
465
+ except Exception:
466
+ pass
467
+
468
+ def get_memory_usage(_options):
469
+ """Get process memory usage in bytes."""
470
+ try:
471
+ rusage = resource.getrusage(resource.RUSAGE_SELF)
472
+ # ru_maxrss is in kilobytes on Linux, bytes on macOS
473
+ max_rss = rusage.ru_maxrss
474
+ if os.uname().sysname == "Linux":
475
+ max_rss *= 1024 # Convert KB to bytes
476
+ yield metrics.Observation(max_rss, {"type": "rss_max"})
477
+ except Exception:
478
+ pass
479
+
480
+ def get_open_file_descriptors(_options):
481
+ """Get number of open file descriptors."""
482
+ try:
483
+ # Try to count open FDs by checking /proc on Linux
484
+ if os.path.exists("/proc/self/fd"):
485
+ count = len(os.listdir("/proc/self/fd"))
486
+ yield metrics.Observation(count)
487
+ else:
488
+ # Fallback: use resource limits
489
+ soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
490
+ yield metrics.Observation(soft, {"limit": "soft"})
491
+ except Exception:
492
+ pass
493
+
494
+ def get_thread_count(_options):
495
+ """Get number of active threads."""
496
+ try:
497
+ yield metrics.Observation(threading.active_count())
498
+ except Exception:
499
+ pass
500
+
501
+ # Create observable gauges
502
+ self.meter.create_observable_gauge(
503
+ name="hindsight.process.cpu.seconds",
504
+ callbacks=[get_cpu_times],
505
+ description="Process CPU time in seconds",
506
+ unit="s",
507
+ )
508
+
509
+ self.meter.create_observable_gauge(
510
+ name="hindsight.process.memory.bytes",
511
+ callbacks=[get_memory_usage],
512
+ description="Process memory usage in bytes",
513
+ unit="By",
514
+ )
515
+
516
+ self.meter.create_observable_gauge(
517
+ name="hindsight.process.open_fds",
518
+ callbacks=[get_open_file_descriptors],
519
+ description="Number of open file descriptors",
520
+ unit="{fds}",
521
+ )
522
+
523
+ self.meter.create_observable_gauge(
524
+ name="hindsight.process.threads",
525
+ callbacks=[get_thread_count],
526
+ description="Number of active threads",
527
+ unit="{threads}",
528
+ )
529
+
530
+ def set_db_pool(self, pool: "asyncpg.Pool"):
531
+ """
532
+ Set the database pool for metrics collection.
533
+
534
+ Args:
535
+ pool: asyncpg connection pool instance
536
+ """
537
+ self._db_pool = pool
538
+ self._setup_db_pool_metrics()
539
+
540
+ def _setup_db_pool_metrics(self):
541
+ """Set up observable gauges for database pool metrics."""
542
+
543
+ def get_pool_size(_options):
544
+ """Get current pool size."""
545
+ if self._db_pool is not None:
546
+ try:
547
+ yield metrics.Observation(self._db_pool.get_size())
548
+ except Exception:
549
+ pass
550
+
551
+ def get_pool_free_size(_options):
552
+ """Get number of free connections in pool."""
553
+ if self._db_pool is not None:
554
+ try:
555
+ yield metrics.Observation(self._db_pool.get_idle_size())
556
+ except Exception:
557
+ pass
558
+
559
+ def get_pool_min_size(_options):
560
+ """Get pool minimum size."""
561
+ if self._db_pool is not None:
562
+ try:
563
+ yield metrics.Observation(self._db_pool.get_min_size())
564
+ except Exception:
565
+ pass
566
+
567
+ def get_pool_max_size(_options):
568
+ """Get pool maximum size."""
569
+ if self._db_pool is not None:
570
+ try:
571
+ yield metrics.Observation(self._db_pool.get_max_size())
572
+ except Exception:
573
+ pass
574
+
575
+ # Create observable gauges for pool metrics
576
+ self.meter.create_observable_gauge(
577
+ name="hindsight.db.pool.size",
578
+ callbacks=[get_pool_size],
579
+ description="Current number of connections in the pool",
580
+ unit="{connections}",
581
+ )
582
+
583
+ self.meter.create_observable_gauge(
584
+ name="hindsight.db.pool.idle",
585
+ callbacks=[get_pool_free_size],
586
+ description="Number of idle connections in the pool",
587
+ unit="{connections}",
588
+ )
589
+
590
+ self.meter.create_observable_gauge(
591
+ name="hindsight.db.pool.min",
592
+ callbacks=[get_pool_min_size],
593
+ description="Minimum pool size",
594
+ unit="{connections}",
595
+ )
596
+
597
+ self.meter.create_observable_gauge(
598
+ name="hindsight.db.pool.max",
599
+ callbacks=[get_pool_max_size],
600
+ description="Maximum pool size",
601
+ unit="{connections}",
602
+ )
218
603
 
219
604
 
220
605
  # Global metrics collector instance (defaults to no-op)