hindsight-api 0.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +38 -0
- hindsight_api/api/__init__.py +105 -0
- hindsight_api/api/http.py +1872 -0
- hindsight_api/api/mcp.py +157 -0
- hindsight_api/engine/__init__.py +47 -0
- hindsight_api/engine/cross_encoder.py +97 -0
- hindsight_api/engine/db_utils.py +93 -0
- hindsight_api/engine/embeddings.py +113 -0
- hindsight_api/engine/entity_resolver.py +575 -0
- hindsight_api/engine/llm_wrapper.py +269 -0
- hindsight_api/engine/memory_engine.py +3095 -0
- hindsight_api/engine/query_analyzer.py +519 -0
- hindsight_api/engine/response_models.py +222 -0
- hindsight_api/engine/retain/__init__.py +50 -0
- hindsight_api/engine/retain/bank_utils.py +423 -0
- hindsight_api/engine/retain/chunk_storage.py +82 -0
- hindsight_api/engine/retain/deduplication.py +104 -0
- hindsight_api/engine/retain/embedding_processing.py +62 -0
- hindsight_api/engine/retain/embedding_utils.py +54 -0
- hindsight_api/engine/retain/entity_processing.py +90 -0
- hindsight_api/engine/retain/fact_extraction.py +1027 -0
- hindsight_api/engine/retain/fact_storage.py +176 -0
- hindsight_api/engine/retain/link_creation.py +121 -0
- hindsight_api/engine/retain/link_utils.py +651 -0
- hindsight_api/engine/retain/orchestrator.py +405 -0
- hindsight_api/engine/retain/types.py +206 -0
- hindsight_api/engine/search/__init__.py +15 -0
- hindsight_api/engine/search/fusion.py +122 -0
- hindsight_api/engine/search/observation_utils.py +132 -0
- hindsight_api/engine/search/reranking.py +103 -0
- hindsight_api/engine/search/retrieval.py +503 -0
- hindsight_api/engine/search/scoring.py +161 -0
- hindsight_api/engine/search/temporal_extraction.py +64 -0
- hindsight_api/engine/search/think_utils.py +255 -0
- hindsight_api/engine/search/trace.py +215 -0
- hindsight_api/engine/search/tracer.py +447 -0
- hindsight_api/engine/search/types.py +160 -0
- hindsight_api/engine/task_backend.py +223 -0
- hindsight_api/engine/utils.py +203 -0
- hindsight_api/metrics.py +227 -0
- hindsight_api/migrations.py +163 -0
- hindsight_api/models.py +309 -0
- hindsight_api/pg0.py +425 -0
- hindsight_api/web/__init__.py +12 -0
- hindsight_api/web/server.py +143 -0
- hindsight_api-0.0.13.dist-info/METADATA +41 -0
- hindsight_api-0.0.13.dist-info/RECORD +48 -0
- hindsight_api-0.0.13.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Abstract task backend for running async tasks.
|
|
3
|
+
|
|
4
|
+
This provides an abstraction that can be adapted to different execution models:
|
|
5
|
+
- AsyncIO queue (default implementation)
|
|
6
|
+
- Pub/Sub architectures (future)
|
|
7
|
+
- Message brokers (future)
|
|
8
|
+
"""
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from typing import Any, Dict, Optional, Callable, Awaitable
|
|
11
|
+
import asyncio
|
|
12
|
+
import logging
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TaskBackend(ABC):
|
|
18
|
+
"""
|
|
19
|
+
Abstract base class for task execution backends.
|
|
20
|
+
|
|
21
|
+
Implementations must:
|
|
22
|
+
1. Store/publish task events (as serializable dicts)
|
|
23
|
+
2. Execute tasks through a provided executor callback
|
|
24
|
+
|
|
25
|
+
The backend treats tasks as pure dictionaries that can be serialized
|
|
26
|
+
and sent over the network. The executor (typically MemoryEngine.execute_task)
|
|
27
|
+
receives the dict and routes it to the appropriate handler.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self):
|
|
31
|
+
"""Initialize the task backend."""
|
|
32
|
+
self._executor: Optional[Callable[[Dict[str, Any]], Awaitable[None]]] = None
|
|
33
|
+
self._initialized = False
|
|
34
|
+
|
|
35
|
+
def set_executor(self, executor: Callable[[Dict[str, Any]], Awaitable[None]]):
|
|
36
|
+
"""
|
|
37
|
+
Set the executor callback for processing tasks.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
executor: Async function that takes a task dict and executes it
|
|
41
|
+
"""
|
|
42
|
+
self._executor = executor
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
async def initialize(self):
|
|
46
|
+
"""
|
|
47
|
+
Initialize the backend (e.g., start workers, connect to broker).
|
|
48
|
+
"""
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
@abstractmethod
|
|
52
|
+
async def submit_task(self, task_dict: Dict[str, Any]):
|
|
53
|
+
"""
|
|
54
|
+
Submit a task for execution.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
task_dict: Task as a dictionary (must be serializable)
|
|
58
|
+
"""
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
@abstractmethod
|
|
62
|
+
async def shutdown(self):
|
|
63
|
+
"""
|
|
64
|
+
Shutdown the backend gracefully (e.g., stop workers, close connections).
|
|
65
|
+
"""
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
async def _execute_task(self, task_dict: Dict[str, Any]):
|
|
69
|
+
"""
|
|
70
|
+
Execute a task through the registered executor.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
task_dict: Task dictionary to execute
|
|
74
|
+
"""
|
|
75
|
+
if self._executor is None:
|
|
76
|
+
task_type = task_dict.get('type', 'unknown')
|
|
77
|
+
logger.warning(f"No executor registered, skipping task {task_type}")
|
|
78
|
+
return
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
await self._executor(task_dict)
|
|
82
|
+
except Exception as e:
|
|
83
|
+
task_type = task_dict.get('type', 'unknown')
|
|
84
|
+
logger.error(f"Error executing task {task_type}: {e}")
|
|
85
|
+
import traceback
|
|
86
|
+
traceback.print_exc()
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class AsyncIOQueueBackend(TaskBackend):
|
|
90
|
+
"""
|
|
91
|
+
Task backend implementation using asyncio queues.
|
|
92
|
+
|
|
93
|
+
This is the default implementation that uses in-process asyncio queues
|
|
94
|
+
and a periodic consumer worker.
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
def __init__(
|
|
98
|
+
self,
|
|
99
|
+
batch_size: int = 100,
|
|
100
|
+
batch_interval: float = 1.0
|
|
101
|
+
):
|
|
102
|
+
"""
|
|
103
|
+
Initialize AsyncIO queue backend.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
batch_size: Maximum number of tasks to process in one batch
|
|
107
|
+
batch_interval: Maximum time (seconds) to wait before processing batch
|
|
108
|
+
"""
|
|
109
|
+
super().__init__()
|
|
110
|
+
self._queue: Optional[asyncio.Queue] = None
|
|
111
|
+
self._worker_task: Optional[asyncio.Task] = None
|
|
112
|
+
self._shutdown_event: Optional[asyncio.Event] = None
|
|
113
|
+
self._batch_size = batch_size
|
|
114
|
+
self._batch_interval = batch_interval
|
|
115
|
+
|
|
116
|
+
async def initialize(self):
|
|
117
|
+
"""Initialize the queue and start the worker."""
|
|
118
|
+
if self._initialized:
|
|
119
|
+
return
|
|
120
|
+
|
|
121
|
+
self._queue = asyncio.Queue()
|
|
122
|
+
self._shutdown_event = asyncio.Event()
|
|
123
|
+
self._worker_task = asyncio.create_task(self._worker())
|
|
124
|
+
self._initialized = True
|
|
125
|
+
logger.info("AsyncIOQueueBackend initialized")
|
|
126
|
+
|
|
127
|
+
async def submit_task(self, task_dict: Dict[str, Any]):
|
|
128
|
+
"""
|
|
129
|
+
Submit a task by putting it in the queue.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
task_dict: Task dictionary to execute
|
|
133
|
+
"""
|
|
134
|
+
if not self._initialized:
|
|
135
|
+
await self.initialize()
|
|
136
|
+
|
|
137
|
+
await self._queue.put(task_dict)
|
|
138
|
+
task_type = task_dict.get('type', 'unknown')
|
|
139
|
+
task_id = task_dict.get('id')
|
|
140
|
+
|
|
141
|
+
async def wait_for_pending_tasks(self, timeout: float = 5.0):
|
|
142
|
+
"""
|
|
143
|
+
Wait for all pending tasks in the queue to be processed.
|
|
144
|
+
|
|
145
|
+
This is useful in tests to ensure background tasks complete before assertions.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
timeout: Maximum time to wait in seconds
|
|
149
|
+
"""
|
|
150
|
+
if not self._initialized or self._queue is None:
|
|
151
|
+
return
|
|
152
|
+
|
|
153
|
+
# Wait for queue to be empty and give worker time to process
|
|
154
|
+
start_time = asyncio.get_event_loop().time()
|
|
155
|
+
while asyncio.get_event_loop().time() - start_time < timeout:
|
|
156
|
+
if self._queue.empty():
|
|
157
|
+
# Queue is empty, give worker a bit more time to finish any in-flight task
|
|
158
|
+
await asyncio.sleep(0.3)
|
|
159
|
+
# Check again - if still empty, we're done
|
|
160
|
+
if self._queue.empty():
|
|
161
|
+
return
|
|
162
|
+
else:
|
|
163
|
+
# Queue not empty, wait a bit
|
|
164
|
+
await asyncio.sleep(0.1)
|
|
165
|
+
|
|
166
|
+
async def shutdown(self):
|
|
167
|
+
"""Shutdown the worker and drain the queue."""
|
|
168
|
+
if not self._initialized:
|
|
169
|
+
return
|
|
170
|
+
|
|
171
|
+
logger.info("Shutting down AsyncIOQueueBackend...")
|
|
172
|
+
|
|
173
|
+
# Signal shutdown
|
|
174
|
+
self._shutdown_event.set()
|
|
175
|
+
|
|
176
|
+
# Cancel worker
|
|
177
|
+
if self._worker_task is not None:
|
|
178
|
+
self._worker_task.cancel()
|
|
179
|
+
try:
|
|
180
|
+
await self._worker_task
|
|
181
|
+
except asyncio.CancelledError:
|
|
182
|
+
pass # Worker cancelled successfully
|
|
183
|
+
|
|
184
|
+
self._initialized = False
|
|
185
|
+
logger.info("AsyncIOQueueBackend shutdown complete")
|
|
186
|
+
|
|
187
|
+
async def _worker(self):
|
|
188
|
+
"""
|
|
189
|
+
Background worker that processes tasks in batches.
|
|
190
|
+
|
|
191
|
+
Collects tasks for up to batch_interval seconds or batch_size items,
|
|
192
|
+
then processes them.
|
|
193
|
+
"""
|
|
194
|
+
while not self._shutdown_event.is_set():
|
|
195
|
+
try:
|
|
196
|
+
# Collect tasks for batching
|
|
197
|
+
tasks = []
|
|
198
|
+
deadline = asyncio.get_event_loop().time() + self._batch_interval
|
|
199
|
+
|
|
200
|
+
while len(tasks) < self._batch_size and asyncio.get_event_loop().time() < deadline:
|
|
201
|
+
try:
|
|
202
|
+
remaining_time = max(0.1, deadline - asyncio.get_event_loop().time())
|
|
203
|
+
task_dict = await asyncio.wait_for(
|
|
204
|
+
self._queue.get(),
|
|
205
|
+
timeout=remaining_time
|
|
206
|
+
)
|
|
207
|
+
tasks.append(task_dict)
|
|
208
|
+
except asyncio.TimeoutError:
|
|
209
|
+
break
|
|
210
|
+
|
|
211
|
+
# Process batch
|
|
212
|
+
if tasks:
|
|
213
|
+
# Execute tasks concurrently
|
|
214
|
+
await asyncio.gather(
|
|
215
|
+
*[self._execute_task(task_dict) for task_dict in tasks],
|
|
216
|
+
return_exceptions=True
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
except asyncio.CancelledError:
|
|
220
|
+
break
|
|
221
|
+
except Exception as e:
|
|
222
|
+
logger.error(f"Worker error: {e}")
|
|
223
|
+
await asyncio.sleep(1) # Backoff on error
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for memory system.
|
|
3
|
+
"""
|
|
4
|
+
import logging
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import List, Dict, TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from .llm_wrapper import LLMConfig
|
|
10
|
+
from .retain.fact_extraction import Fact
|
|
11
|
+
|
|
12
|
+
from .retain.fact_extraction import extract_facts_from_text
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def extract_facts(text: str, event_date: datetime, context: str = "", llm_config: 'LLMConfig' = None, agent_name: str = None, extract_opinions: bool = False) -> tuple[List['Fact'], List[tuple[str, int]]]:
|
|
16
|
+
"""
|
|
17
|
+
Extract semantic facts from text using LLM.
|
|
18
|
+
|
|
19
|
+
Uses LLM for intelligent fact extraction that:
|
|
20
|
+
- Filters out social pleasantries and filler words
|
|
21
|
+
- Creates self-contained statements with absolute dates
|
|
22
|
+
- Handles conversational text well
|
|
23
|
+
- Resolves relative time expressions to absolute dates
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
text: Input text (conversation, article, etc.)
|
|
27
|
+
event_date: Reference date for resolving relative times
|
|
28
|
+
context: Context about the conversation/document
|
|
29
|
+
llm_config: LLM configuration to use
|
|
30
|
+
agent_name: Optional agent name to help identify agent-related facts
|
|
31
|
+
extract_opinions: If True, extract ONLY opinions. If False, extract world and agent facts (no opinions)
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Tuple of (facts, chunks) where:
|
|
35
|
+
- facts: List of Fact model instances
|
|
36
|
+
- chunks: List of tuples (chunk_text, fact_count) for each chunk
|
|
37
|
+
|
|
38
|
+
Raises:
|
|
39
|
+
Exception: If LLM fact extraction fails
|
|
40
|
+
"""
|
|
41
|
+
if not text or not text.strip():
|
|
42
|
+
return [], []
|
|
43
|
+
|
|
44
|
+
facts, chunks = await extract_facts_from_text(text, event_date, context=context, llm_config=llm_config, agent_name=agent_name, extract_opinions=extract_opinions)
|
|
45
|
+
|
|
46
|
+
if not facts:
|
|
47
|
+
logging.warning(f"LLM extracted 0 facts from text of length {len(text)}. This may indicate the text contains no meaningful information, or the LLM failed to extract facts. Full text: {text}")
|
|
48
|
+
return [], chunks
|
|
49
|
+
|
|
50
|
+
return facts, chunks
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
|
|
54
|
+
"""
|
|
55
|
+
Calculate cosine similarity between two vectors.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
vec1: First vector
|
|
59
|
+
vec2: Second vector
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Similarity score between 0 and 1
|
|
63
|
+
"""
|
|
64
|
+
if len(vec1) != len(vec2):
|
|
65
|
+
raise ValueError("Vectors must have same dimension")
|
|
66
|
+
|
|
67
|
+
dot_product = sum(a * b for a, b in zip(vec1, vec2))
|
|
68
|
+
magnitude1 = sum(a * a for a in vec1) ** 0.5
|
|
69
|
+
magnitude2 = sum(b * b for b in vec2) ** 0.5
|
|
70
|
+
|
|
71
|
+
if magnitude1 == 0 or magnitude2 == 0:
|
|
72
|
+
return 0.0
|
|
73
|
+
|
|
74
|
+
return dot_product / (magnitude1 * magnitude2)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def calculate_recency_weight(days_since: float, half_life_days: float = 365.0) -> float:
|
|
78
|
+
"""
|
|
79
|
+
Calculate recency weight using logarithmic decay.
|
|
80
|
+
|
|
81
|
+
This provides much better differentiation over long time periods compared to
|
|
82
|
+
exponential decay. Uses a log-based decay where the half-life parameter controls
|
|
83
|
+
when memories reach 50% weight.
|
|
84
|
+
|
|
85
|
+
Examples:
|
|
86
|
+
- Today (0 days): 1.0
|
|
87
|
+
- 1 year (365 days): ~0.5 (with default half_life=365)
|
|
88
|
+
- 2 years (730 days): ~0.33
|
|
89
|
+
- 5 years (1825 days): ~0.17
|
|
90
|
+
- 10 years (3650 days): ~0.09
|
|
91
|
+
|
|
92
|
+
This ensures that 2-year-old and 5-year-old memories have meaningfully
|
|
93
|
+
different weights, unlike exponential decay which makes them both ~0.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
days_since: Number of days since the memory was created
|
|
97
|
+
half_life_days: Number of days for weight to reach 0.5 (default: 1 year)
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
Weight between 0 and 1
|
|
101
|
+
"""
|
|
102
|
+
import math
|
|
103
|
+
# Logarithmic decay: 1 / (1 + log(1 + days_since/half_life))
|
|
104
|
+
# This decays much slower than exponential, giving better long-term differentiation
|
|
105
|
+
normalized_age = days_since / half_life_days
|
|
106
|
+
return 1.0 / (1.0 + math.log1p(normalized_age))
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def calculate_frequency_weight(access_count: int, max_boost: float = 2.0) -> float:
|
|
110
|
+
"""
|
|
111
|
+
Calculate frequency weight based on access count.
|
|
112
|
+
|
|
113
|
+
Frequently accessed memories are weighted higher.
|
|
114
|
+
Uses logarithmic scaling to avoid over-weighting.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
access_count: Number of times the memory was accessed
|
|
118
|
+
max_boost: Maximum multiplier for frequently accessed memories
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Weight between 1.0 and max_boost
|
|
122
|
+
"""
|
|
123
|
+
import math
|
|
124
|
+
if access_count <= 0:
|
|
125
|
+
return 1.0
|
|
126
|
+
|
|
127
|
+
# Logarithmic scaling: log(access_count + 1) / log(10)
|
|
128
|
+
# This gives: 0 accesses = 1.0, 9 accesses ~= 1.5, 99 accesses ~= 2.0
|
|
129
|
+
normalized = math.log(access_count + 1) / math.log(10)
|
|
130
|
+
return 1.0 + min(normalized, max_boost - 1.0)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def calculate_temporal_anchor(occurred_start: datetime, occurred_end: datetime) -> datetime:
|
|
134
|
+
"""
|
|
135
|
+
Calculate a single temporal anchor point from a temporal range.
|
|
136
|
+
|
|
137
|
+
Used for spreading activation - we need a single representative date
|
|
138
|
+
to calculate temporal proximity between facts. This simplifies the
|
|
139
|
+
range-to-range distance problem.
|
|
140
|
+
|
|
141
|
+
Strategy: Use midpoint of the range for balanced representation.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
occurred_start: Start of temporal range
|
|
145
|
+
occurred_end: End of temporal range
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Single datetime representing the temporal anchor (midpoint)
|
|
149
|
+
|
|
150
|
+
Examples:
|
|
151
|
+
- Point event (July 14): start=July 14, end=July 14 → anchor=July 14
|
|
152
|
+
- Month range (February): start=Feb 1, end=Feb 28 → anchor=Feb 14
|
|
153
|
+
- Year range (2023): start=Jan 1, end=Dec 31 → anchor=July 1
|
|
154
|
+
"""
|
|
155
|
+
# Calculate midpoint
|
|
156
|
+
time_delta = occurred_end - occurred_start
|
|
157
|
+
midpoint = occurred_start + (time_delta / 2)
|
|
158
|
+
return midpoint
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def calculate_temporal_proximity(
|
|
162
|
+
anchor_a: datetime,
|
|
163
|
+
anchor_b: datetime,
|
|
164
|
+
half_life_days: float = 30.0
|
|
165
|
+
) -> float:
|
|
166
|
+
"""
|
|
167
|
+
Calculate temporal proximity between two temporal anchors.
|
|
168
|
+
|
|
169
|
+
Used for spreading activation to determine how "close" two facts are
|
|
170
|
+
in time. Uses logarithmic decay so that temporal similarity doesn't
|
|
171
|
+
drop off too quickly.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
anchor_a: Temporal anchor of first fact
|
|
175
|
+
anchor_b: Temporal anchor of second fact
|
|
176
|
+
half_life_days: Number of days for proximity to reach 0.5
|
|
177
|
+
(default: 30 days = 1 month)
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Proximity score in [0, 1] where:
|
|
181
|
+
- 1.0 = same day
|
|
182
|
+
- 0.5 = ~half_life days apart
|
|
183
|
+
- 0.0 = very distant in time
|
|
184
|
+
|
|
185
|
+
Examples:
|
|
186
|
+
- Same day: 1.0
|
|
187
|
+
- 1 week apart (half_life=30): ~0.7
|
|
188
|
+
- 1 month apart (half_life=30): ~0.5
|
|
189
|
+
- 1 year apart (half_life=30): ~0.2
|
|
190
|
+
"""
|
|
191
|
+
import math
|
|
192
|
+
|
|
193
|
+
days_apart = abs((anchor_a - anchor_b).days)
|
|
194
|
+
|
|
195
|
+
if days_apart == 0:
|
|
196
|
+
return 1.0
|
|
197
|
+
|
|
198
|
+
# Logarithmic decay: 1 / (1 + log(1 + days_apart/half_life))
|
|
199
|
+
# Similar to calculate_recency_weight but for proximity between events
|
|
200
|
+
normalized_distance = days_apart / half_life_days
|
|
201
|
+
proximity = 1.0 / (1.0 + math.log1p(normalized_distance))
|
|
202
|
+
|
|
203
|
+
return proximity
|
hindsight_api/metrics.py
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OpenTelemetry metrics instrumentation for Hindsight API.
|
|
3
|
+
|
|
4
|
+
This module provides metrics for:
|
|
5
|
+
- Operation latency (retain, recall, reflect) with percentiles
|
|
6
|
+
- Token usage (input/output) per operation
|
|
7
|
+
- Per-bank granularity via labels
|
|
8
|
+
"""
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Dict, Any, Optional
|
|
11
|
+
from contextlib import contextmanager
|
|
12
|
+
import time
|
|
13
|
+
|
|
14
|
+
from opentelemetry import metrics
|
|
15
|
+
from opentelemetry.sdk.metrics import MeterProvider
|
|
16
|
+
from opentelemetry.sdk.resources import Resource
|
|
17
|
+
from opentelemetry.exporter.prometheus import PrometheusMetricReader
|
|
18
|
+
from prometheus_client import REGISTRY
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
# Global meter instance
|
|
23
|
+
_meter = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def initialize_metrics(service_name: str = "hindsight-api", service_version: str = "1.0.0"):
|
|
27
|
+
"""
|
|
28
|
+
Initialize OpenTelemetry metrics with Prometheus exporter.
|
|
29
|
+
|
|
30
|
+
This should be called once during application startup.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
service_name: Name of the service for resource attributes
|
|
34
|
+
service_version: Version of the service
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
PrometheusMetricReader instance (for accessing metrics endpoint)
|
|
38
|
+
"""
|
|
39
|
+
global _meter
|
|
40
|
+
|
|
41
|
+
# Create resource with service information
|
|
42
|
+
resource = Resource.create({
|
|
43
|
+
"service.name": service_name,
|
|
44
|
+
"service.version": service_version,
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
# Create Prometheus metric reader
|
|
48
|
+
prometheus_reader = PrometheusMetricReader()
|
|
49
|
+
|
|
50
|
+
# Create meter provider with Prometheus exporter
|
|
51
|
+
provider = MeterProvider(
|
|
52
|
+
resource=resource,
|
|
53
|
+
metric_readers=[prometheus_reader]
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Set the global meter provider
|
|
57
|
+
metrics.set_meter_provider(provider)
|
|
58
|
+
|
|
59
|
+
# Get meter for this application
|
|
60
|
+
_meter = metrics.get_meter(__name__)
|
|
61
|
+
|
|
62
|
+
return prometheus_reader
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_meter():
|
|
66
|
+
"""Get the global meter instance."""
|
|
67
|
+
if _meter is None:
|
|
68
|
+
raise RuntimeError("Metrics not initialized. Call initialize_metrics() first.")
|
|
69
|
+
return _meter
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class MetricsCollectorBase:
|
|
73
|
+
"""Base class for metrics collectors."""
|
|
74
|
+
|
|
75
|
+
@contextmanager
|
|
76
|
+
def record_operation(self, operation: str, bank_id: str, budget: Optional[str] = None, max_tokens: Optional[int] = None):
|
|
77
|
+
"""Context manager to record operation duration and status."""
|
|
78
|
+
raise NotImplementedError
|
|
79
|
+
|
|
80
|
+
def record_tokens(self, operation: str, bank_id: str, input_tokens: int = 0, output_tokens: int = 0, budget: Optional[str] = None, max_tokens: Optional[int] = None):
|
|
81
|
+
"""Record token usage for an operation."""
|
|
82
|
+
raise NotImplementedError
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class NoOpMetricsCollector(MetricsCollectorBase):
|
|
86
|
+
"""No-op metrics collector that does nothing. Used when metrics are disabled."""
|
|
87
|
+
|
|
88
|
+
@contextmanager
|
|
89
|
+
def record_operation(self, operation: str, bank_id: str, budget: Optional[str] = None, max_tokens: Optional[int] = None):
|
|
90
|
+
"""No-op context manager."""
|
|
91
|
+
yield
|
|
92
|
+
|
|
93
|
+
def record_tokens(self, operation: str, bank_id: str, input_tokens: int = 0, output_tokens: int = 0, budget: Optional[str] = None, max_tokens: Optional[int] = None):
|
|
94
|
+
"""No-op token recording."""
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class MetricsCollector(MetricsCollectorBase):
|
|
99
|
+
"""
|
|
100
|
+
Collector for Hindsight API metrics.
|
|
101
|
+
|
|
102
|
+
Provides methods to record latency and token usage for operations.
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
def __init__(self):
|
|
106
|
+
self.meter = get_meter()
|
|
107
|
+
|
|
108
|
+
# Operation latency histogram (in seconds)
|
|
109
|
+
# Records duration of retain, recall, reflect operations
|
|
110
|
+
self.operation_duration = self.meter.create_histogram(
|
|
111
|
+
name="hindsight.operation.duration",
|
|
112
|
+
description="Duration of Hindsight operations in seconds",
|
|
113
|
+
unit="s"
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Token usage counters
|
|
117
|
+
self.tokens_input = self.meter.create_counter(
|
|
118
|
+
name="hindsight.tokens.input",
|
|
119
|
+
description="Number of input tokens consumed",
|
|
120
|
+
unit="tokens"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
self.tokens_output = self.meter.create_counter(
|
|
124
|
+
name="hindsight.tokens.output",
|
|
125
|
+
description="Number of output tokens generated",
|
|
126
|
+
unit="tokens"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# Operation counter (success/failure)
|
|
130
|
+
self.operation_total = self.meter.create_counter(
|
|
131
|
+
name="hindsight.operation.total",
|
|
132
|
+
description="Total number of operations executed",
|
|
133
|
+
unit="operations"
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
@contextmanager
|
|
137
|
+
def record_operation(self, operation: str, bank_id: str, budget: Optional[str] = None, max_tokens: Optional[int] = None):
|
|
138
|
+
"""
|
|
139
|
+
Context manager to record operation duration and status.
|
|
140
|
+
|
|
141
|
+
Usage:
|
|
142
|
+
with metrics.record_operation("recall", bank_id="user123", budget="mid", max_tokens=4096):
|
|
143
|
+
# ... perform operation
|
|
144
|
+
pass
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
operation: Operation name (retain, recall, reflect)
|
|
148
|
+
bank_id: Memory bank ID
|
|
149
|
+
budget: Optional budget level (low, mid, high)
|
|
150
|
+
max_tokens: Optional max tokens for the operation
|
|
151
|
+
"""
|
|
152
|
+
start_time = time.time()
|
|
153
|
+
attributes = {
|
|
154
|
+
"operation": operation,
|
|
155
|
+
"bank_id": bank_id,
|
|
156
|
+
}
|
|
157
|
+
if budget:
|
|
158
|
+
attributes["budget"] = budget
|
|
159
|
+
if max_tokens:
|
|
160
|
+
attributes["max_tokens"] = str(max_tokens)
|
|
161
|
+
|
|
162
|
+
success = True
|
|
163
|
+
try:
|
|
164
|
+
yield
|
|
165
|
+
except Exception:
|
|
166
|
+
success = False
|
|
167
|
+
raise
|
|
168
|
+
finally:
|
|
169
|
+
duration = time.time() - start_time
|
|
170
|
+
attributes["success"] = str(success).lower()
|
|
171
|
+
|
|
172
|
+
# Record duration
|
|
173
|
+
self.operation_duration.record(duration, attributes)
|
|
174
|
+
|
|
175
|
+
# Record operation count
|
|
176
|
+
self.operation_total.add(1, attributes)
|
|
177
|
+
|
|
178
|
+
def record_tokens(self, operation: str, bank_id: str, input_tokens: int = 0, output_tokens: int = 0, budget: Optional[str] = None, max_tokens: Optional[int] = None):
|
|
179
|
+
"""
|
|
180
|
+
Record token usage for an operation.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
operation: Operation name (retain, recall, reflect)
|
|
184
|
+
bank_id: Memory bank ID
|
|
185
|
+
input_tokens: Number of input tokens
|
|
186
|
+
output_tokens: Number of output tokens
|
|
187
|
+
budget: Optional budget level
|
|
188
|
+
max_tokens: Optional max tokens for the operation
|
|
189
|
+
"""
|
|
190
|
+
attributes = {
|
|
191
|
+
"operation": operation,
|
|
192
|
+
"bank_id": bank_id,
|
|
193
|
+
}
|
|
194
|
+
if budget:
|
|
195
|
+
attributes["budget"] = budget
|
|
196
|
+
if max_tokens:
|
|
197
|
+
attributes["max_tokens"] = str(max_tokens)
|
|
198
|
+
|
|
199
|
+
if input_tokens > 0:
|
|
200
|
+
self.tokens_input.add(input_tokens, attributes)
|
|
201
|
+
|
|
202
|
+
if output_tokens > 0:
|
|
203
|
+
self.tokens_output.add(output_tokens, attributes)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
# Global metrics collector instance (defaults to no-op)
|
|
207
|
+
_metrics_collector: MetricsCollectorBase = NoOpMetricsCollector()
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def get_metrics_collector() -> MetricsCollectorBase:
|
|
211
|
+
"""
|
|
212
|
+
Get the global metrics collector instance.
|
|
213
|
+
|
|
214
|
+
Returns a no-op collector if metrics are not initialized.
|
|
215
|
+
"""
|
|
216
|
+
return _metrics_collector
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def create_metrics_collector() -> MetricsCollector:
|
|
220
|
+
"""
|
|
221
|
+
Create and set the global metrics collector.
|
|
222
|
+
|
|
223
|
+
Should be called after initialize_metrics().
|
|
224
|
+
"""
|
|
225
|
+
global _metrics_collector
|
|
226
|
+
_metrics_collector = MetricsCollector()
|
|
227
|
+
return _metrics_collector
|