hindsight-api 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. hindsight_api/__init__.py +38 -0
  2. hindsight_api/api/__init__.py +105 -0
  3. hindsight_api/api/http.py +1872 -0
  4. hindsight_api/api/mcp.py +157 -0
  5. hindsight_api/engine/__init__.py +47 -0
  6. hindsight_api/engine/cross_encoder.py +97 -0
  7. hindsight_api/engine/db_utils.py +93 -0
  8. hindsight_api/engine/embeddings.py +113 -0
  9. hindsight_api/engine/entity_resolver.py +575 -0
  10. hindsight_api/engine/llm_wrapper.py +269 -0
  11. hindsight_api/engine/memory_engine.py +3095 -0
  12. hindsight_api/engine/query_analyzer.py +519 -0
  13. hindsight_api/engine/response_models.py +222 -0
  14. hindsight_api/engine/retain/__init__.py +50 -0
  15. hindsight_api/engine/retain/bank_utils.py +423 -0
  16. hindsight_api/engine/retain/chunk_storage.py +82 -0
  17. hindsight_api/engine/retain/deduplication.py +104 -0
  18. hindsight_api/engine/retain/embedding_processing.py +62 -0
  19. hindsight_api/engine/retain/embedding_utils.py +54 -0
  20. hindsight_api/engine/retain/entity_processing.py +90 -0
  21. hindsight_api/engine/retain/fact_extraction.py +1027 -0
  22. hindsight_api/engine/retain/fact_storage.py +176 -0
  23. hindsight_api/engine/retain/link_creation.py +121 -0
  24. hindsight_api/engine/retain/link_utils.py +651 -0
  25. hindsight_api/engine/retain/orchestrator.py +405 -0
  26. hindsight_api/engine/retain/types.py +206 -0
  27. hindsight_api/engine/search/__init__.py +15 -0
  28. hindsight_api/engine/search/fusion.py +122 -0
  29. hindsight_api/engine/search/observation_utils.py +132 -0
  30. hindsight_api/engine/search/reranking.py +103 -0
  31. hindsight_api/engine/search/retrieval.py +503 -0
  32. hindsight_api/engine/search/scoring.py +161 -0
  33. hindsight_api/engine/search/temporal_extraction.py +64 -0
  34. hindsight_api/engine/search/think_utils.py +255 -0
  35. hindsight_api/engine/search/trace.py +215 -0
  36. hindsight_api/engine/search/tracer.py +447 -0
  37. hindsight_api/engine/search/types.py +160 -0
  38. hindsight_api/engine/task_backend.py +223 -0
  39. hindsight_api/engine/utils.py +203 -0
  40. hindsight_api/metrics.py +227 -0
  41. hindsight_api/migrations.py +163 -0
  42. hindsight_api/models.py +309 -0
  43. hindsight_api/pg0.py +425 -0
  44. hindsight_api/web/__init__.py +12 -0
  45. hindsight_api/web/server.py +143 -0
  46. hindsight_api-0.0.13.dist-info/METADATA +41 -0
  47. hindsight_api-0.0.13.dist-info/RECORD +48 -0
  48. hindsight_api-0.0.13.dist-info/WHEEL +4 -0
@@ -0,0 +1,223 @@
1
+ """
2
+ Abstract task backend for running async tasks.
3
+
4
+ This provides an abstraction that can be adapted to different execution models:
5
+ - AsyncIO queue (default implementation)
6
+ - Pub/Sub architectures (future)
7
+ - Message brokers (future)
8
+ """
9
+ from abc import ABC, abstractmethod
10
+ from typing import Any, Dict, Optional, Callable, Awaitable
11
+ import asyncio
12
+ import logging
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class TaskBackend(ABC):
18
+ """
19
+ Abstract base class for task execution backends.
20
+
21
+ Implementations must:
22
+ 1. Store/publish task events (as serializable dicts)
23
+ 2. Execute tasks through a provided executor callback
24
+
25
+ The backend treats tasks as pure dictionaries that can be serialized
26
+ and sent over the network. The executor (typically MemoryEngine.execute_task)
27
+ receives the dict and routes it to the appropriate handler.
28
+ """
29
+
30
+ def __init__(self):
31
+ """Initialize the task backend."""
32
+ self._executor: Optional[Callable[[Dict[str, Any]], Awaitable[None]]] = None
33
+ self._initialized = False
34
+
35
+ def set_executor(self, executor: Callable[[Dict[str, Any]], Awaitable[None]]):
36
+ """
37
+ Set the executor callback for processing tasks.
38
+
39
+ Args:
40
+ executor: Async function that takes a task dict and executes it
41
+ """
42
+ self._executor = executor
43
+
44
+ @abstractmethod
45
+ async def initialize(self):
46
+ """
47
+ Initialize the backend (e.g., start workers, connect to broker).
48
+ """
49
+ pass
50
+
51
+ @abstractmethod
52
+ async def submit_task(self, task_dict: Dict[str, Any]):
53
+ """
54
+ Submit a task for execution.
55
+
56
+ Args:
57
+ task_dict: Task as a dictionary (must be serializable)
58
+ """
59
+ pass
60
+
61
+ @abstractmethod
62
+ async def shutdown(self):
63
+ """
64
+ Shutdown the backend gracefully (e.g., stop workers, close connections).
65
+ """
66
+ pass
67
+
68
+ async def _execute_task(self, task_dict: Dict[str, Any]):
69
+ """
70
+ Execute a task through the registered executor.
71
+
72
+ Args:
73
+ task_dict: Task dictionary to execute
74
+ """
75
+ if self._executor is None:
76
+ task_type = task_dict.get('type', 'unknown')
77
+ logger.warning(f"No executor registered, skipping task {task_type}")
78
+ return
79
+
80
+ try:
81
+ await self._executor(task_dict)
82
+ except Exception as e:
83
+ task_type = task_dict.get('type', 'unknown')
84
+ logger.error(f"Error executing task {task_type}: {e}")
85
+ import traceback
86
+ traceback.print_exc()
87
+
88
+
89
+ class AsyncIOQueueBackend(TaskBackend):
90
+ """
91
+ Task backend implementation using asyncio queues.
92
+
93
+ This is the default implementation that uses in-process asyncio queues
94
+ and a periodic consumer worker.
95
+ """
96
+
97
+ def __init__(
98
+ self,
99
+ batch_size: int = 100,
100
+ batch_interval: float = 1.0
101
+ ):
102
+ """
103
+ Initialize AsyncIO queue backend.
104
+
105
+ Args:
106
+ batch_size: Maximum number of tasks to process in one batch
107
+ batch_interval: Maximum time (seconds) to wait before processing batch
108
+ """
109
+ super().__init__()
110
+ self._queue: Optional[asyncio.Queue] = None
111
+ self._worker_task: Optional[asyncio.Task] = None
112
+ self._shutdown_event: Optional[asyncio.Event] = None
113
+ self._batch_size = batch_size
114
+ self._batch_interval = batch_interval
115
+
116
+ async def initialize(self):
117
+ """Initialize the queue and start the worker."""
118
+ if self._initialized:
119
+ return
120
+
121
+ self._queue = asyncio.Queue()
122
+ self._shutdown_event = asyncio.Event()
123
+ self._worker_task = asyncio.create_task(self._worker())
124
+ self._initialized = True
125
+ logger.info("AsyncIOQueueBackend initialized")
126
+
127
+ async def submit_task(self, task_dict: Dict[str, Any]):
128
+ """
129
+ Submit a task by putting it in the queue.
130
+
131
+ Args:
132
+ task_dict: Task dictionary to execute
133
+ """
134
+ if not self._initialized:
135
+ await self.initialize()
136
+
137
+ await self._queue.put(task_dict)
138
+ task_type = task_dict.get('type', 'unknown')
139
+ task_id = task_dict.get('id')
140
+
141
+ async def wait_for_pending_tasks(self, timeout: float = 5.0):
142
+ """
143
+ Wait for all pending tasks in the queue to be processed.
144
+
145
+ This is useful in tests to ensure background tasks complete before assertions.
146
+
147
+ Args:
148
+ timeout: Maximum time to wait in seconds
149
+ """
150
+ if not self._initialized or self._queue is None:
151
+ return
152
+
153
+ # Wait for queue to be empty and give worker time to process
154
+ start_time = asyncio.get_event_loop().time()
155
+ while asyncio.get_event_loop().time() - start_time < timeout:
156
+ if self._queue.empty():
157
+ # Queue is empty, give worker a bit more time to finish any in-flight task
158
+ await asyncio.sleep(0.3)
159
+ # Check again - if still empty, we're done
160
+ if self._queue.empty():
161
+ return
162
+ else:
163
+ # Queue not empty, wait a bit
164
+ await asyncio.sleep(0.1)
165
+
166
+ async def shutdown(self):
167
+ """Shutdown the worker and drain the queue."""
168
+ if not self._initialized:
169
+ return
170
+
171
+ logger.info("Shutting down AsyncIOQueueBackend...")
172
+
173
+ # Signal shutdown
174
+ self._shutdown_event.set()
175
+
176
+ # Cancel worker
177
+ if self._worker_task is not None:
178
+ self._worker_task.cancel()
179
+ try:
180
+ await self._worker_task
181
+ except asyncio.CancelledError:
182
+ pass # Worker cancelled successfully
183
+
184
+ self._initialized = False
185
+ logger.info("AsyncIOQueueBackend shutdown complete")
186
+
187
+ async def _worker(self):
188
+ """
189
+ Background worker that processes tasks in batches.
190
+
191
+ Collects tasks for up to batch_interval seconds or batch_size items,
192
+ then processes them.
193
+ """
194
+ while not self._shutdown_event.is_set():
195
+ try:
196
+ # Collect tasks for batching
197
+ tasks = []
198
+ deadline = asyncio.get_event_loop().time() + self._batch_interval
199
+
200
+ while len(tasks) < self._batch_size and asyncio.get_event_loop().time() < deadline:
201
+ try:
202
+ remaining_time = max(0.1, deadline - asyncio.get_event_loop().time())
203
+ task_dict = await asyncio.wait_for(
204
+ self._queue.get(),
205
+ timeout=remaining_time
206
+ )
207
+ tasks.append(task_dict)
208
+ except asyncio.TimeoutError:
209
+ break
210
+
211
+ # Process batch
212
+ if tasks:
213
+ # Execute tasks concurrently
214
+ await asyncio.gather(
215
+ *[self._execute_task(task_dict) for task_dict in tasks],
216
+ return_exceptions=True
217
+ )
218
+
219
+ except asyncio.CancelledError:
220
+ break
221
+ except Exception as e:
222
+ logger.error(f"Worker error: {e}")
223
+ await asyncio.sleep(1) # Backoff on error
@@ -0,0 +1,203 @@
1
+ """
2
+ Utility functions for memory system.
3
+ """
4
+ import logging
5
+ from datetime import datetime
6
+ from typing import List, Dict, TYPE_CHECKING
7
+
8
+ if TYPE_CHECKING:
9
+ from .llm_wrapper import LLMConfig
10
+ from .retain.fact_extraction import Fact
11
+
12
+ from .retain.fact_extraction import extract_facts_from_text
13
+
14
+
15
+ async def extract_facts(text: str, event_date: datetime, context: str = "", llm_config: 'LLMConfig' = None, agent_name: str = None, extract_opinions: bool = False) -> tuple[List['Fact'], List[tuple[str, int]]]:
16
+ """
17
+ Extract semantic facts from text using LLM.
18
+
19
+ Uses LLM for intelligent fact extraction that:
20
+ - Filters out social pleasantries and filler words
21
+ - Creates self-contained statements with absolute dates
22
+ - Handles conversational text well
23
+ - Resolves relative time expressions to absolute dates
24
+
25
+ Args:
26
+ text: Input text (conversation, article, etc.)
27
+ event_date: Reference date for resolving relative times
28
+ context: Context about the conversation/document
29
+ llm_config: LLM configuration to use
30
+ agent_name: Optional agent name to help identify agent-related facts
31
+ extract_opinions: If True, extract ONLY opinions. If False, extract world and agent facts (no opinions)
32
+
33
+ Returns:
34
+ Tuple of (facts, chunks) where:
35
+ - facts: List of Fact model instances
36
+ - chunks: List of tuples (chunk_text, fact_count) for each chunk
37
+
38
+ Raises:
39
+ Exception: If LLM fact extraction fails
40
+ """
41
+ if not text or not text.strip():
42
+ return [], []
43
+
44
+ facts, chunks = await extract_facts_from_text(text, event_date, context=context, llm_config=llm_config, agent_name=agent_name, extract_opinions=extract_opinions)
45
+
46
+ if not facts:
47
+ logging.warning(f"LLM extracted 0 facts from text of length {len(text)}. This may indicate the text contains no meaningful information, or the LLM failed to extract facts. Full text: {text}")
48
+ return [], chunks
49
+
50
+ return facts, chunks
51
+
52
+
53
+ def cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
54
+ """
55
+ Calculate cosine similarity between two vectors.
56
+
57
+ Args:
58
+ vec1: First vector
59
+ vec2: Second vector
60
+
61
+ Returns:
62
+ Similarity score between 0 and 1
63
+ """
64
+ if len(vec1) != len(vec2):
65
+ raise ValueError("Vectors must have same dimension")
66
+
67
+ dot_product = sum(a * b for a, b in zip(vec1, vec2))
68
+ magnitude1 = sum(a * a for a in vec1) ** 0.5
69
+ magnitude2 = sum(b * b for b in vec2) ** 0.5
70
+
71
+ if magnitude1 == 0 or magnitude2 == 0:
72
+ return 0.0
73
+
74
+ return dot_product / (magnitude1 * magnitude2)
75
+
76
+
77
+ def calculate_recency_weight(days_since: float, half_life_days: float = 365.0) -> float:
78
+ """
79
+ Calculate recency weight using logarithmic decay.
80
+
81
+ This provides much better differentiation over long time periods compared to
82
+ exponential decay. Uses a log-based decay where the half-life parameter controls
83
+ when memories reach 50% weight.
84
+
85
+ Examples:
86
+ - Today (0 days): 1.0
87
+ - 1 year (365 days): ~0.5 (with default half_life=365)
88
+ - 2 years (730 days): ~0.33
89
+ - 5 years (1825 days): ~0.17
90
+ - 10 years (3650 days): ~0.09
91
+
92
+ This ensures that 2-year-old and 5-year-old memories have meaningfully
93
+ different weights, unlike exponential decay which makes them both ~0.
94
+
95
+ Args:
96
+ days_since: Number of days since the memory was created
97
+ half_life_days: Number of days for weight to reach 0.5 (default: 1 year)
98
+
99
+ Returns:
100
+ Weight between 0 and 1
101
+ """
102
+ import math
103
+ # Logarithmic decay: 1 / (1 + log(1 + days_since/half_life))
104
+ # This decays much slower than exponential, giving better long-term differentiation
105
+ normalized_age = days_since / half_life_days
106
+ return 1.0 / (1.0 + math.log1p(normalized_age))
107
+
108
+
109
+ def calculate_frequency_weight(access_count: int, max_boost: float = 2.0) -> float:
110
+ """
111
+ Calculate frequency weight based on access count.
112
+
113
+ Frequently accessed memories are weighted higher.
114
+ Uses logarithmic scaling to avoid over-weighting.
115
+
116
+ Args:
117
+ access_count: Number of times the memory was accessed
118
+ max_boost: Maximum multiplier for frequently accessed memories
119
+
120
+ Returns:
121
+ Weight between 1.0 and max_boost
122
+ """
123
+ import math
124
+ if access_count <= 0:
125
+ return 1.0
126
+
127
+ # Logarithmic scaling: log(access_count + 1) / log(10)
128
+ # This gives: 0 accesses = 1.0, 9 accesses ~= 1.5, 99 accesses ~= 2.0
129
+ normalized = math.log(access_count + 1) / math.log(10)
130
+ return 1.0 + min(normalized, max_boost - 1.0)
131
+
132
+
133
+ def calculate_temporal_anchor(occurred_start: datetime, occurred_end: datetime) -> datetime:
134
+ """
135
+ Calculate a single temporal anchor point from a temporal range.
136
+
137
+ Used for spreading activation - we need a single representative date
138
+ to calculate temporal proximity between facts. This simplifies the
139
+ range-to-range distance problem.
140
+
141
+ Strategy: Use midpoint of the range for balanced representation.
142
+
143
+ Args:
144
+ occurred_start: Start of temporal range
145
+ occurred_end: End of temporal range
146
+
147
+ Returns:
148
+ Single datetime representing the temporal anchor (midpoint)
149
+
150
+ Examples:
151
+ - Point event (July 14): start=July 14, end=July 14 → anchor=July 14
152
+ - Month range (February): start=Feb 1, end=Feb 28 → anchor=Feb 14
153
+ - Year range (2023): start=Jan 1, end=Dec 31 → anchor=July 1
154
+ """
155
+ # Calculate midpoint
156
+ time_delta = occurred_end - occurred_start
157
+ midpoint = occurred_start + (time_delta / 2)
158
+ return midpoint
159
+
160
+
161
+ def calculate_temporal_proximity(
162
+ anchor_a: datetime,
163
+ anchor_b: datetime,
164
+ half_life_days: float = 30.0
165
+ ) -> float:
166
+ """
167
+ Calculate temporal proximity between two temporal anchors.
168
+
169
+ Used for spreading activation to determine how "close" two facts are
170
+ in time. Uses logarithmic decay so that temporal similarity doesn't
171
+ drop off too quickly.
172
+
173
+ Args:
174
+ anchor_a: Temporal anchor of first fact
175
+ anchor_b: Temporal anchor of second fact
176
+ half_life_days: Number of days for proximity to reach 0.5
177
+ (default: 30 days = 1 month)
178
+
179
+ Returns:
180
+ Proximity score in [0, 1] where:
181
+ - 1.0 = same day
182
+ - 0.5 = ~half_life days apart
183
+ - 0.0 = very distant in time
184
+
185
+ Examples:
186
+ - Same day: 1.0
187
+ - 1 week apart (half_life=30): ~0.7
188
+ - 1 month apart (half_life=30): ~0.5
189
+ - 1 year apart (half_life=30): ~0.2
190
+ """
191
+ import math
192
+
193
+ days_apart = abs((anchor_a - anchor_b).days)
194
+
195
+ if days_apart == 0:
196
+ return 1.0
197
+
198
+ # Logarithmic decay: 1 / (1 + log(1 + days_apart/half_life))
199
+ # Similar to calculate_recency_weight but for proximity between events
200
+ normalized_distance = days_apart / half_life_days
201
+ proximity = 1.0 / (1.0 + math.log1p(normalized_distance))
202
+
203
+ return proximity
@@ -0,0 +1,227 @@
1
+ """
2
+ OpenTelemetry metrics instrumentation for Hindsight API.
3
+
4
+ This module provides metrics for:
5
+ - Operation latency (retain, recall, reflect) with percentiles
6
+ - Token usage (input/output) per operation
7
+ - Per-bank granularity via labels
8
+ """
9
+ import logging
10
+ from typing import Dict, Any, Optional
11
+ from contextlib import contextmanager
12
+ import time
13
+
14
+ from opentelemetry import metrics
15
+ from opentelemetry.sdk.metrics import MeterProvider
16
+ from opentelemetry.sdk.resources import Resource
17
+ from opentelemetry.exporter.prometheus import PrometheusMetricReader
18
+ from prometheus_client import REGISTRY
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # Global meter instance
23
+ _meter = None
24
+
25
+
26
+ def initialize_metrics(service_name: str = "hindsight-api", service_version: str = "1.0.0"):
27
+ """
28
+ Initialize OpenTelemetry metrics with Prometheus exporter.
29
+
30
+ This should be called once during application startup.
31
+
32
+ Args:
33
+ service_name: Name of the service for resource attributes
34
+ service_version: Version of the service
35
+
36
+ Returns:
37
+ PrometheusMetricReader instance (for accessing metrics endpoint)
38
+ """
39
+ global _meter
40
+
41
+ # Create resource with service information
42
+ resource = Resource.create({
43
+ "service.name": service_name,
44
+ "service.version": service_version,
45
+ })
46
+
47
+ # Create Prometheus metric reader
48
+ prometheus_reader = PrometheusMetricReader()
49
+
50
+ # Create meter provider with Prometheus exporter
51
+ provider = MeterProvider(
52
+ resource=resource,
53
+ metric_readers=[prometheus_reader]
54
+ )
55
+
56
+ # Set the global meter provider
57
+ metrics.set_meter_provider(provider)
58
+
59
+ # Get meter for this application
60
+ _meter = metrics.get_meter(__name__)
61
+
62
+ return prometheus_reader
63
+
64
+
65
+ def get_meter():
66
+ """Get the global meter instance."""
67
+ if _meter is None:
68
+ raise RuntimeError("Metrics not initialized. Call initialize_metrics() first.")
69
+ return _meter
70
+
71
+
72
+ class MetricsCollectorBase:
73
+ """Base class for metrics collectors."""
74
+
75
+ @contextmanager
76
+ def record_operation(self, operation: str, bank_id: str, budget: Optional[str] = None, max_tokens: Optional[int] = None):
77
+ """Context manager to record operation duration and status."""
78
+ raise NotImplementedError
79
+
80
+ def record_tokens(self, operation: str, bank_id: str, input_tokens: int = 0, output_tokens: int = 0, budget: Optional[str] = None, max_tokens: Optional[int] = None):
81
+ """Record token usage for an operation."""
82
+ raise NotImplementedError
83
+
84
+
85
+ class NoOpMetricsCollector(MetricsCollectorBase):
86
+ """No-op metrics collector that does nothing. Used when metrics are disabled."""
87
+
88
+ @contextmanager
89
+ def record_operation(self, operation: str, bank_id: str, budget: Optional[str] = None, max_tokens: Optional[int] = None):
90
+ """No-op context manager."""
91
+ yield
92
+
93
+ def record_tokens(self, operation: str, bank_id: str, input_tokens: int = 0, output_tokens: int = 0, budget: Optional[str] = None, max_tokens: Optional[int] = None):
94
+ """No-op token recording."""
95
+ pass
96
+
97
+
98
+ class MetricsCollector(MetricsCollectorBase):
99
+ """
100
+ Collector for Hindsight API metrics.
101
+
102
+ Provides methods to record latency and token usage for operations.
103
+ """
104
+
105
+ def __init__(self):
106
+ self.meter = get_meter()
107
+
108
+ # Operation latency histogram (in seconds)
109
+ # Records duration of retain, recall, reflect operations
110
+ self.operation_duration = self.meter.create_histogram(
111
+ name="hindsight.operation.duration",
112
+ description="Duration of Hindsight operations in seconds",
113
+ unit="s"
114
+ )
115
+
116
+ # Token usage counters
117
+ self.tokens_input = self.meter.create_counter(
118
+ name="hindsight.tokens.input",
119
+ description="Number of input tokens consumed",
120
+ unit="tokens"
121
+ )
122
+
123
+ self.tokens_output = self.meter.create_counter(
124
+ name="hindsight.tokens.output",
125
+ description="Number of output tokens generated",
126
+ unit="tokens"
127
+ )
128
+
129
+ # Operation counter (success/failure)
130
+ self.operation_total = self.meter.create_counter(
131
+ name="hindsight.operation.total",
132
+ description="Total number of operations executed",
133
+ unit="operations"
134
+ )
135
+
136
+ @contextmanager
137
+ def record_operation(self, operation: str, bank_id: str, budget: Optional[str] = None, max_tokens: Optional[int] = None):
138
+ """
139
+ Context manager to record operation duration and status.
140
+
141
+ Usage:
142
+ with metrics.record_operation("recall", bank_id="user123", budget="mid", max_tokens=4096):
143
+ # ... perform operation
144
+ pass
145
+
146
+ Args:
147
+ operation: Operation name (retain, recall, reflect)
148
+ bank_id: Memory bank ID
149
+ budget: Optional budget level (low, mid, high)
150
+ max_tokens: Optional max tokens for the operation
151
+ """
152
+ start_time = time.time()
153
+ attributes = {
154
+ "operation": operation,
155
+ "bank_id": bank_id,
156
+ }
157
+ if budget:
158
+ attributes["budget"] = budget
159
+ if max_tokens:
160
+ attributes["max_tokens"] = str(max_tokens)
161
+
162
+ success = True
163
+ try:
164
+ yield
165
+ except Exception:
166
+ success = False
167
+ raise
168
+ finally:
169
+ duration = time.time() - start_time
170
+ attributes["success"] = str(success).lower()
171
+
172
+ # Record duration
173
+ self.operation_duration.record(duration, attributes)
174
+
175
+ # Record operation count
176
+ self.operation_total.add(1, attributes)
177
+
178
+ def record_tokens(self, operation: str, bank_id: str, input_tokens: int = 0, output_tokens: int = 0, budget: Optional[str] = None, max_tokens: Optional[int] = None):
179
+ """
180
+ Record token usage for an operation.
181
+
182
+ Args:
183
+ operation: Operation name (retain, recall, reflect)
184
+ bank_id: Memory bank ID
185
+ input_tokens: Number of input tokens
186
+ output_tokens: Number of output tokens
187
+ budget: Optional budget level
188
+ max_tokens: Optional max tokens for the operation
189
+ """
190
+ attributes = {
191
+ "operation": operation,
192
+ "bank_id": bank_id,
193
+ }
194
+ if budget:
195
+ attributes["budget"] = budget
196
+ if max_tokens:
197
+ attributes["max_tokens"] = str(max_tokens)
198
+
199
+ if input_tokens > 0:
200
+ self.tokens_input.add(input_tokens, attributes)
201
+
202
+ if output_tokens > 0:
203
+ self.tokens_output.add(output_tokens, attributes)
204
+
205
+
206
+ # Global metrics collector instance (defaults to no-op)
207
+ _metrics_collector: MetricsCollectorBase = NoOpMetricsCollector()
208
+
209
+
210
+ def get_metrics_collector() -> MetricsCollectorBase:
211
+ """
212
+ Get the global metrics collector instance.
213
+
214
+ Returns a no-op collector if metrics are not initialized.
215
+ """
216
+ return _metrics_collector
217
+
218
+
219
+ def create_metrics_collector() -> MetricsCollector:
220
+ """
221
+ Create and set the global metrics collector.
222
+
223
+ Should be called after initialize_metrics().
224
+ """
225
+ global _metrics_collector
226
+ _metrics_collector = MetricsCollector()
227
+ return _metrics_collector