hindsight-api 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/admin/__init__.py +1 -0
- hindsight_api/admin/cli.py +311 -0
- hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
- hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
- hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
- hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
- hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
- hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
- hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
- hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
- hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
- hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
- hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
- hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
- hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
- hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
- hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
- hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
- hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
- hindsight_api/api/http.py +1406 -118
- hindsight_api/api/mcp.py +11 -196
- hindsight_api/config.py +359 -27
- hindsight_api/engine/consolidation/__init__.py +5 -0
- hindsight_api/engine/consolidation/consolidator.py +859 -0
- hindsight_api/engine/consolidation/prompts.py +69 -0
- hindsight_api/engine/cross_encoder.py +706 -88
- hindsight_api/engine/db_budget.py +284 -0
- hindsight_api/engine/db_utils.py +11 -0
- hindsight_api/engine/directives/__init__.py +5 -0
- hindsight_api/engine/directives/models.py +37 -0
- hindsight_api/engine/embeddings.py +553 -29
- hindsight_api/engine/entity_resolver.py +8 -5
- hindsight_api/engine/interface.py +40 -17
- hindsight_api/engine/llm_wrapper.py +744 -68
- hindsight_api/engine/memory_engine.py +2505 -1017
- hindsight_api/engine/mental_models/__init__.py +14 -0
- hindsight_api/engine/mental_models/models.py +53 -0
- hindsight_api/engine/query_analyzer.py +4 -3
- hindsight_api/engine/reflect/__init__.py +18 -0
- hindsight_api/engine/reflect/agent.py +933 -0
- hindsight_api/engine/reflect/models.py +109 -0
- hindsight_api/engine/reflect/observations.py +186 -0
- hindsight_api/engine/reflect/prompts.py +483 -0
- hindsight_api/engine/reflect/tools.py +437 -0
- hindsight_api/engine/reflect/tools_schema.py +250 -0
- hindsight_api/engine/response_models.py +168 -4
- hindsight_api/engine/retain/bank_utils.py +79 -201
- hindsight_api/engine/retain/fact_extraction.py +424 -195
- hindsight_api/engine/retain/fact_storage.py +35 -12
- hindsight_api/engine/retain/link_utils.py +29 -24
- hindsight_api/engine/retain/orchestrator.py +24 -43
- hindsight_api/engine/retain/types.py +11 -2
- hindsight_api/engine/search/graph_retrieval.py +43 -14
- hindsight_api/engine/search/link_expansion_retrieval.py +391 -0
- hindsight_api/engine/search/mpfp_retrieval.py +362 -117
- hindsight_api/engine/search/reranking.py +2 -2
- hindsight_api/engine/search/retrieval.py +848 -201
- hindsight_api/engine/search/tags.py +172 -0
- hindsight_api/engine/search/think_utils.py +42 -141
- hindsight_api/engine/search/trace.py +12 -1
- hindsight_api/engine/search/tracer.py +26 -6
- hindsight_api/engine/search/types.py +21 -3
- hindsight_api/engine/task_backend.py +113 -106
- hindsight_api/engine/utils.py +1 -152
- hindsight_api/extensions/__init__.py +10 -1
- hindsight_api/extensions/builtin/tenant.py +5 -1
- hindsight_api/extensions/context.py +10 -1
- hindsight_api/extensions/operation_validator.py +81 -4
- hindsight_api/extensions/tenant.py +26 -0
- hindsight_api/main.py +69 -6
- hindsight_api/mcp_local.py +12 -53
- hindsight_api/mcp_tools.py +494 -0
- hindsight_api/metrics.py +433 -48
- hindsight_api/migrations.py +141 -1
- hindsight_api/models.py +3 -3
- hindsight_api/pg0.py +53 -0
- hindsight_api/server.py +39 -2
- hindsight_api/worker/__init__.py +11 -0
- hindsight_api/worker/main.py +296 -0
- hindsight_api/worker/poller.py +486 -0
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/METADATA +16 -6
- hindsight_api-0.4.0.dist-info/RECORD +112 -0
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/entry_points.txt +2 -0
- hindsight_api/engine/retain/observation_regeneration.py +0 -254
- hindsight_api/engine/search/observation_utils.py +0 -125
- hindsight_api/engine/search/scoring.py +0 -159
- hindsight_api-0.2.1.dist-info/RECORD +0 -75
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/WHEEL +0 -0
|
@@ -1,31 +1,40 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
2
|
+
Task backend for distributed task processing.
|
|
3
3
|
|
|
4
|
-
This provides an abstraction
|
|
5
|
-
-
|
|
6
|
-
-
|
|
7
|
-
- Message brokers (future)
|
|
4
|
+
This provides an abstraction for task storage and execution:
|
|
5
|
+
- BrokerTaskBackend: Uses PostgreSQL as broker (production)
|
|
6
|
+
- SyncTaskBackend: Executes tasks immediately (testing/embedded)
|
|
8
7
|
"""
|
|
9
8
|
|
|
10
|
-
import
|
|
9
|
+
import json
|
|
11
10
|
import logging
|
|
12
11
|
from abc import ABC, abstractmethod
|
|
13
12
|
from collections.abc import Awaitable, Callable
|
|
14
|
-
from typing import Any
|
|
13
|
+
from typing import TYPE_CHECKING, Any
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
import asyncpg
|
|
15
17
|
|
|
16
18
|
logger = logging.getLogger(__name__)
|
|
17
19
|
|
|
18
20
|
|
|
21
|
+
def fq_table(table: str, schema: str | None = None) -> str:
|
|
22
|
+
"""Get fully-qualified table name with optional schema prefix."""
|
|
23
|
+
if schema:
|
|
24
|
+
return f'"{schema}".{table}'
|
|
25
|
+
return table
|
|
26
|
+
|
|
27
|
+
|
|
19
28
|
class TaskBackend(ABC):
|
|
20
29
|
"""
|
|
21
30
|
Abstract base class for task execution backends.
|
|
22
31
|
|
|
23
32
|
Implementations must:
|
|
24
33
|
1. Store/publish task events (as serializable dicts)
|
|
25
|
-
2. Execute tasks through a provided executor callback
|
|
34
|
+
2. Execute tasks through a provided executor callback (optional)
|
|
26
35
|
|
|
27
36
|
The backend treats tasks as pure dictionaries that can be serialized
|
|
28
|
-
and
|
|
37
|
+
and stored in the database. The executor (typically MemoryEngine.execute_task)
|
|
29
38
|
receives the dict and routes it to the appropriate handler.
|
|
30
39
|
"""
|
|
31
40
|
|
|
@@ -46,7 +55,7 @@ class TaskBackend(ABC):
|
|
|
46
55
|
@abstractmethod
|
|
47
56
|
async def initialize(self):
|
|
48
57
|
"""
|
|
49
|
-
Initialize the backend (e.g.,
|
|
58
|
+
Initialize the backend (e.g., connect to database).
|
|
50
59
|
"""
|
|
51
60
|
pass
|
|
52
61
|
|
|
@@ -63,7 +72,7 @@ class TaskBackend(ABC):
|
|
|
63
72
|
@abstractmethod
|
|
64
73
|
async def shutdown(self):
|
|
65
74
|
"""
|
|
66
|
-
Shutdown the backend gracefully
|
|
75
|
+
Shutdown the backend gracefully.
|
|
67
76
|
"""
|
|
68
77
|
pass
|
|
69
78
|
|
|
@@ -93,9 +102,8 @@ class SyncTaskBackend(TaskBackend):
|
|
|
93
102
|
"""
|
|
94
103
|
Synchronous task backend that executes tasks immediately.
|
|
95
104
|
|
|
96
|
-
This is useful for embedded/CLI usage where we don't want
|
|
97
|
-
workers
|
|
98
|
-
being queued.
|
|
105
|
+
This is useful for tests and embedded/CLI usage where we don't want
|
|
106
|
+
background workers. Tasks are executed inline rather than being queued.
|
|
99
107
|
"""
|
|
100
108
|
|
|
101
109
|
async def initialize(self):
|
|
@@ -121,130 +129,129 @@ class SyncTaskBackend(TaskBackend):
|
|
|
121
129
|
logger.debug("SyncTaskBackend shutdown")
|
|
122
130
|
|
|
123
131
|
|
|
124
|
-
class
|
|
132
|
+
class BrokerTaskBackend(TaskBackend):
|
|
125
133
|
"""
|
|
126
|
-
Task backend
|
|
134
|
+
Task backend using PostgreSQL as broker.
|
|
127
135
|
|
|
128
|
-
|
|
129
|
-
and
|
|
136
|
+
submit_task() stores task_payload in async_operations table.
|
|
137
|
+
Actual polling and execution is handled separately by WorkerPoller.
|
|
138
|
+
|
|
139
|
+
This backend is used by the API to store tasks. Workers poll
|
|
140
|
+
the database separately to claim and execute tasks.
|
|
130
141
|
"""
|
|
131
142
|
|
|
132
|
-
def __init__(
|
|
143
|
+
def __init__(
|
|
144
|
+
self,
|
|
145
|
+
pool_getter: Callable[[], "asyncpg.Pool"],
|
|
146
|
+
schema: str | None = None,
|
|
147
|
+
schema_getter: Callable[[], str | None] | None = None,
|
|
148
|
+
):
|
|
133
149
|
"""
|
|
134
|
-
Initialize
|
|
150
|
+
Initialize the broker task backend.
|
|
135
151
|
|
|
136
152
|
Args:
|
|
137
|
-
|
|
138
|
-
|
|
153
|
+
pool_getter: Callable that returns the asyncpg connection pool
|
|
154
|
+
schema: Database schema for multi-tenant support (optional, static)
|
|
155
|
+
schema_getter: Callable that returns current schema dynamically (optional).
|
|
156
|
+
If set, takes precedence over static schema for submit_task.
|
|
139
157
|
"""
|
|
140
158
|
super().__init__()
|
|
141
|
-
self.
|
|
142
|
-
self.
|
|
143
|
-
self.
|
|
144
|
-
self._batch_size = batch_size
|
|
145
|
-
self._batch_interval = batch_interval
|
|
159
|
+
self._pool_getter = pool_getter
|
|
160
|
+
self._schema = schema
|
|
161
|
+
self._schema_getter = schema_getter
|
|
146
162
|
|
|
147
163
|
async def initialize(self):
|
|
148
|
-
"""Initialize the
|
|
149
|
-
if self._initialized:
|
|
150
|
-
return
|
|
151
|
-
|
|
152
|
-
self._queue = asyncio.Queue()
|
|
153
|
-
self._shutdown_event = asyncio.Event()
|
|
154
|
-
self._worker_task = asyncio.create_task(self._worker())
|
|
164
|
+
"""Initialize the backend."""
|
|
155
165
|
self._initialized = True
|
|
156
|
-
logger.info("
|
|
166
|
+
logger.info("BrokerTaskBackend initialized")
|
|
157
167
|
|
|
158
168
|
async def submit_task(self, task_dict: dict[str, Any]):
|
|
159
169
|
"""
|
|
160
|
-
|
|
170
|
+
Store task payload in async_operations table.
|
|
171
|
+
|
|
172
|
+
The task_dict should contain an 'operation_id' if updating an existing
|
|
173
|
+
operation record, otherwise a new operation will be created.
|
|
161
174
|
|
|
162
175
|
Args:
|
|
163
|
-
task_dict: Task dictionary to
|
|
176
|
+
task_dict: Task dictionary to store (must be JSON serializable)
|
|
164
177
|
"""
|
|
165
178
|
if not self._initialized:
|
|
166
179
|
await self.initialize()
|
|
167
180
|
|
|
168
|
-
|
|
181
|
+
pool = self._pool_getter()
|
|
182
|
+
operation_id = task_dict.get("operation_id")
|
|
169
183
|
task_type = task_dict.get("type", "unknown")
|
|
170
|
-
|
|
184
|
+
bank_id = task_dict.get("bank_id")
|
|
185
|
+
payload_json = json.dumps(task_dict)
|
|
186
|
+
|
|
187
|
+
schema = self._schema_getter() if self._schema_getter else self._schema
|
|
188
|
+
table = fq_table("async_operations", schema)
|
|
189
|
+
|
|
190
|
+
if operation_id:
|
|
191
|
+
# Update existing operation with task payload
|
|
192
|
+
await pool.execute(
|
|
193
|
+
f"""
|
|
194
|
+
UPDATE {table}
|
|
195
|
+
SET task_payload = $1::jsonb, updated_at = now()
|
|
196
|
+
WHERE operation_id = $2
|
|
197
|
+
""",
|
|
198
|
+
payload_json,
|
|
199
|
+
operation_id,
|
|
200
|
+
)
|
|
201
|
+
logger.debug(f"Updated task payload for operation {operation_id}")
|
|
202
|
+
else:
|
|
203
|
+
# Insert new operation (for tasks without pre-created records)
|
|
204
|
+
# e.g., access_count_update tasks
|
|
205
|
+
import uuid
|
|
206
|
+
|
|
207
|
+
new_id = uuid.uuid4()
|
|
208
|
+
await pool.execute(
|
|
209
|
+
f"""
|
|
210
|
+
INSERT INTO {table} (operation_id, bank_id, operation_type, status, task_payload)
|
|
211
|
+
VALUES ($1, $2, $3, 'pending', $4::jsonb)
|
|
212
|
+
""",
|
|
213
|
+
new_id,
|
|
214
|
+
bank_id,
|
|
215
|
+
task_type,
|
|
216
|
+
payload_json,
|
|
217
|
+
)
|
|
218
|
+
logger.debug(f"Created new operation {new_id} for task type {task_type}")
|
|
171
219
|
|
|
172
|
-
async def
|
|
220
|
+
async def shutdown(self):
|
|
221
|
+
"""Shutdown the backend."""
|
|
222
|
+
self._initialized = False
|
|
223
|
+
logger.info("BrokerTaskBackend shutdown")
|
|
224
|
+
|
|
225
|
+
async def wait_for_pending_tasks(self, timeout: float = 120.0):
|
|
173
226
|
"""
|
|
174
|
-
Wait for
|
|
227
|
+
Wait for pending tasks to be processed.
|
|
175
228
|
|
|
176
|
-
|
|
229
|
+
In the broker model, this polls the database to check if tasks
|
|
230
|
+
for this process have been completed. This is useful in tests
|
|
231
|
+
when worker_enabled=True (API processes its own tasks).
|
|
177
232
|
|
|
178
233
|
Args:
|
|
179
234
|
timeout: Maximum time to wait in seconds
|
|
180
235
|
"""
|
|
181
|
-
|
|
182
|
-
|
|
236
|
+
import asyncio
|
|
237
|
+
|
|
238
|
+
pool = self._pool_getter()
|
|
239
|
+
schema = self._schema_getter() if self._schema_getter else self._schema
|
|
240
|
+
table = fq_table("async_operations", schema)
|
|
183
241
|
|
|
184
|
-
# Wait for queue to be empty and give worker time to process
|
|
185
242
|
start_time = asyncio.get_event_loop().time()
|
|
186
243
|
while asyncio.get_event_loop().time() - start_time < timeout:
|
|
187
|
-
if
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
# Queue not empty, wait a bit
|
|
195
|
-
await asyncio.sleep(0.1)
|
|
244
|
+
# Check if there are any pending tasks with payloads
|
|
245
|
+
count = await pool.fetchval(
|
|
246
|
+
f"""
|
|
247
|
+
SELECT COUNT(*) FROM {table}
|
|
248
|
+
WHERE status = 'pending' AND task_payload IS NOT NULL
|
|
249
|
+
"""
|
|
250
|
+
)
|
|
196
251
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
if not self._initialized:
|
|
200
|
-
return
|
|
201
|
-
|
|
202
|
-
logger.info("Shutting down AsyncIOQueueBackend...")
|
|
252
|
+
if count == 0:
|
|
253
|
+
return
|
|
203
254
|
|
|
204
|
-
|
|
205
|
-
self._shutdown_event.set()
|
|
255
|
+
await asyncio.sleep(0.5)
|
|
206
256
|
|
|
207
|
-
|
|
208
|
-
if self._worker_task is not None:
|
|
209
|
-
self._worker_task.cancel()
|
|
210
|
-
try:
|
|
211
|
-
await self._worker_task
|
|
212
|
-
except asyncio.CancelledError:
|
|
213
|
-
pass # Worker cancelled successfully
|
|
214
|
-
|
|
215
|
-
self._initialized = False
|
|
216
|
-
logger.info("AsyncIOQueueBackend shutdown complete")
|
|
217
|
-
|
|
218
|
-
async def _worker(self):
|
|
219
|
-
"""
|
|
220
|
-
Background worker that processes tasks in batches.
|
|
221
|
-
|
|
222
|
-
Collects tasks for up to batch_interval seconds or batch_size items,
|
|
223
|
-
then processes them.
|
|
224
|
-
"""
|
|
225
|
-
while not self._shutdown_event.is_set():
|
|
226
|
-
try:
|
|
227
|
-
# Collect tasks for batching
|
|
228
|
-
tasks = []
|
|
229
|
-
deadline = asyncio.get_event_loop().time() + self._batch_interval
|
|
230
|
-
|
|
231
|
-
while len(tasks) < self._batch_size and asyncio.get_event_loop().time() < deadline:
|
|
232
|
-
try:
|
|
233
|
-
remaining_time = max(0.1, deadline - asyncio.get_event_loop().time())
|
|
234
|
-
task_dict = await asyncio.wait_for(self._queue.get(), timeout=remaining_time)
|
|
235
|
-
tasks.append(task_dict)
|
|
236
|
-
except TimeoutError:
|
|
237
|
-
break
|
|
238
|
-
|
|
239
|
-
# Process batch
|
|
240
|
-
if tasks:
|
|
241
|
-
# Execute tasks concurrently
|
|
242
|
-
await asyncio.gather(
|
|
243
|
-
*[self._execute_task(task_dict) for task_dict in tasks], return_exceptions=True
|
|
244
|
-
)
|
|
245
|
-
|
|
246
|
-
except asyncio.CancelledError:
|
|
247
|
-
break
|
|
248
|
-
except Exception as e:
|
|
249
|
-
logger.error(f"Worker error: {e}")
|
|
250
|
-
await asyncio.sleep(1) # Backoff on error
|
|
257
|
+
logger.warning(f"Timeout waiting for pending tasks after {timeout}s")
|
hindsight_api/engine/utils.py
CHANGED
|
@@ -49,7 +49,7 @@ async def extract_facts(
|
|
|
49
49
|
if not text or not text.strip():
|
|
50
50
|
return [], []
|
|
51
51
|
|
|
52
|
-
facts, chunks = await extract_facts_from_text(
|
|
52
|
+
facts, chunks, _ = await extract_facts_from_text(
|
|
53
53
|
text,
|
|
54
54
|
event_date,
|
|
55
55
|
context=context,
|
|
@@ -65,154 +65,3 @@ async def extract_facts(
|
|
|
65
65
|
return [], chunks
|
|
66
66
|
|
|
67
67
|
return facts, chunks
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def cosine_similarity(vec1: list[float], vec2: list[float]) -> float:
|
|
71
|
-
"""
|
|
72
|
-
Calculate cosine similarity between two vectors.
|
|
73
|
-
|
|
74
|
-
Args:
|
|
75
|
-
vec1: First vector
|
|
76
|
-
vec2: Second vector
|
|
77
|
-
|
|
78
|
-
Returns:
|
|
79
|
-
Similarity score between 0 and 1
|
|
80
|
-
"""
|
|
81
|
-
if len(vec1) != len(vec2):
|
|
82
|
-
raise ValueError("Vectors must have same dimension")
|
|
83
|
-
|
|
84
|
-
dot_product = sum(a * b for a, b in zip(vec1, vec2))
|
|
85
|
-
magnitude1 = sum(a * a for a in vec1) ** 0.5
|
|
86
|
-
magnitude2 = sum(b * b for b in vec2) ** 0.5
|
|
87
|
-
|
|
88
|
-
if magnitude1 == 0 or magnitude2 == 0:
|
|
89
|
-
return 0.0
|
|
90
|
-
|
|
91
|
-
return dot_product / (magnitude1 * magnitude2)
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def calculate_recency_weight(days_since: float, half_life_days: float = 365.0) -> float:
|
|
95
|
-
"""
|
|
96
|
-
Calculate recency weight using logarithmic decay.
|
|
97
|
-
|
|
98
|
-
This provides much better differentiation over long time periods compared to
|
|
99
|
-
exponential decay. Uses a log-based decay where the half-life parameter controls
|
|
100
|
-
when memories reach 50% weight.
|
|
101
|
-
|
|
102
|
-
Examples:
|
|
103
|
-
- Today (0 days): 1.0
|
|
104
|
-
- 1 year (365 days): ~0.5 (with default half_life=365)
|
|
105
|
-
- 2 years (730 days): ~0.33
|
|
106
|
-
- 5 years (1825 days): ~0.17
|
|
107
|
-
- 10 years (3650 days): ~0.09
|
|
108
|
-
|
|
109
|
-
This ensures that 2-year-old and 5-year-old memories have meaningfully
|
|
110
|
-
different weights, unlike exponential decay which makes them both ~0.
|
|
111
|
-
|
|
112
|
-
Args:
|
|
113
|
-
days_since: Number of days since the memory was created
|
|
114
|
-
half_life_days: Number of days for weight to reach 0.5 (default: 1 year)
|
|
115
|
-
|
|
116
|
-
Returns:
|
|
117
|
-
Weight between 0 and 1
|
|
118
|
-
"""
|
|
119
|
-
import math
|
|
120
|
-
|
|
121
|
-
# Logarithmic decay: 1 / (1 + log(1 + days_since/half_life))
|
|
122
|
-
# This decays much slower than exponential, giving better long-term differentiation
|
|
123
|
-
normalized_age = days_since / half_life_days
|
|
124
|
-
return 1.0 / (1.0 + math.log1p(normalized_age))
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
def calculate_frequency_weight(access_count: int, max_boost: float = 2.0) -> float:
|
|
128
|
-
"""
|
|
129
|
-
Calculate frequency weight based on access count.
|
|
130
|
-
|
|
131
|
-
Frequently accessed memories are weighted higher.
|
|
132
|
-
Uses logarithmic scaling to avoid over-weighting.
|
|
133
|
-
|
|
134
|
-
Args:
|
|
135
|
-
access_count: Number of times the memory was accessed
|
|
136
|
-
max_boost: Maximum multiplier for frequently accessed memories
|
|
137
|
-
|
|
138
|
-
Returns:
|
|
139
|
-
Weight between 1.0 and max_boost
|
|
140
|
-
"""
|
|
141
|
-
import math
|
|
142
|
-
|
|
143
|
-
if access_count <= 0:
|
|
144
|
-
return 1.0
|
|
145
|
-
|
|
146
|
-
# Logarithmic scaling: log(access_count + 1) / log(10)
|
|
147
|
-
# This gives: 0 accesses = 1.0, 9 accesses ~= 1.5, 99 accesses ~= 2.0
|
|
148
|
-
normalized = math.log(access_count + 1) / math.log(10)
|
|
149
|
-
return 1.0 + min(normalized, max_boost - 1.0)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def calculate_temporal_anchor(occurred_start: datetime, occurred_end: datetime) -> datetime:
|
|
153
|
-
"""
|
|
154
|
-
Calculate a single temporal anchor point from a temporal range.
|
|
155
|
-
|
|
156
|
-
Used for spreading activation - we need a single representative date
|
|
157
|
-
to calculate temporal proximity between facts. This simplifies the
|
|
158
|
-
range-to-range distance problem.
|
|
159
|
-
|
|
160
|
-
Strategy: Use midpoint of the range for balanced representation.
|
|
161
|
-
|
|
162
|
-
Args:
|
|
163
|
-
occurred_start: Start of temporal range
|
|
164
|
-
occurred_end: End of temporal range
|
|
165
|
-
|
|
166
|
-
Returns:
|
|
167
|
-
Single datetime representing the temporal anchor (midpoint)
|
|
168
|
-
|
|
169
|
-
Examples:
|
|
170
|
-
- Point event (July 14): start=July 14, end=July 14 → anchor=July 14
|
|
171
|
-
- Month range (February): start=Feb 1, end=Feb 28 → anchor=Feb 14
|
|
172
|
-
- Year range (2023): start=Jan 1, end=Dec 31 → anchor=July 1
|
|
173
|
-
"""
|
|
174
|
-
# Calculate midpoint
|
|
175
|
-
time_delta = occurred_end - occurred_start
|
|
176
|
-
midpoint = occurred_start + (time_delta / 2)
|
|
177
|
-
return midpoint
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
def calculate_temporal_proximity(anchor_a: datetime, anchor_b: datetime, half_life_days: float = 30.0) -> float:
|
|
181
|
-
"""
|
|
182
|
-
Calculate temporal proximity between two temporal anchors.
|
|
183
|
-
|
|
184
|
-
Used for spreading activation to determine how "close" two facts are
|
|
185
|
-
in time. Uses logarithmic decay so that temporal similarity doesn't
|
|
186
|
-
drop off too quickly.
|
|
187
|
-
|
|
188
|
-
Args:
|
|
189
|
-
anchor_a: Temporal anchor of first fact
|
|
190
|
-
anchor_b: Temporal anchor of second fact
|
|
191
|
-
half_life_days: Number of days for proximity to reach 0.5
|
|
192
|
-
(default: 30 days = 1 month)
|
|
193
|
-
|
|
194
|
-
Returns:
|
|
195
|
-
Proximity score in [0, 1] where:
|
|
196
|
-
- 1.0 = same day
|
|
197
|
-
- 0.5 = ~half_life days apart
|
|
198
|
-
- 0.0 = very distant in time
|
|
199
|
-
|
|
200
|
-
Examples:
|
|
201
|
-
- Same day: 1.0
|
|
202
|
-
- 1 week apart (half_life=30): ~0.7
|
|
203
|
-
- 1 month apart (half_life=30): ~0.5
|
|
204
|
-
- 1 year apart (half_life=30): ~0.2
|
|
205
|
-
"""
|
|
206
|
-
import math
|
|
207
|
-
|
|
208
|
-
days_apart = abs((anchor_a - anchor_b).days)
|
|
209
|
-
|
|
210
|
-
if days_apart == 0:
|
|
211
|
-
return 1.0
|
|
212
|
-
|
|
213
|
-
# Logarithmic decay: 1 / (1 + log(1 + days_apart/half_life))
|
|
214
|
-
# Similar to calculate_recency_weight but for proximity between events
|
|
215
|
-
normalized_distance = days_apart / half_life_days
|
|
216
|
-
proximity = 1.0 / (1.0 + math.log1p(normalized_distance))
|
|
217
|
-
|
|
218
|
-
return proximity
|
|
@@ -21,6 +21,10 @@ from hindsight_api.extensions.context import DefaultExtensionContext, ExtensionC
|
|
|
21
21
|
from hindsight_api.extensions.http import HttpExtension
|
|
22
22
|
from hindsight_api.extensions.loader import load_extension
|
|
23
23
|
from hindsight_api.extensions.operation_validator import (
|
|
24
|
+
# Consolidation operation
|
|
25
|
+
ConsolidateContext,
|
|
26
|
+
ConsolidateResult,
|
|
27
|
+
# Core operations
|
|
24
28
|
OperationValidationError,
|
|
25
29
|
OperationValidatorExtension,
|
|
26
30
|
RecallContext,
|
|
@@ -33,6 +37,7 @@ from hindsight_api.extensions.operation_validator import (
|
|
|
33
37
|
)
|
|
34
38
|
from hindsight_api.extensions.tenant import (
|
|
35
39
|
AuthenticationError,
|
|
40
|
+
Tenant,
|
|
36
41
|
TenantContext,
|
|
37
42
|
TenantExtension,
|
|
38
43
|
)
|
|
@@ -47,7 +52,7 @@ __all__ = [
|
|
|
47
52
|
"DefaultExtensionContext",
|
|
48
53
|
# HTTP Extension
|
|
49
54
|
"HttpExtension",
|
|
50
|
-
# Operation Validator
|
|
55
|
+
# Operation Validator - Core
|
|
51
56
|
"OperationValidationError",
|
|
52
57
|
"OperationValidatorExtension",
|
|
53
58
|
"RecallContext",
|
|
@@ -57,10 +62,14 @@ __all__ = [
|
|
|
57
62
|
"RetainContext",
|
|
58
63
|
"RetainResult",
|
|
59
64
|
"ValidationResult",
|
|
65
|
+
# Operation Validator - Consolidation
|
|
66
|
+
"ConsolidateContext",
|
|
67
|
+
"ConsolidateResult",
|
|
60
68
|
# Tenant/Auth
|
|
61
69
|
"ApiKeyTenantExtension",
|
|
62
70
|
"AuthenticationError",
|
|
63
71
|
"RequestContext",
|
|
72
|
+
"Tenant",
|
|
64
73
|
"TenantContext",
|
|
65
74
|
"TenantExtension",
|
|
66
75
|
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Built-in tenant extension implementations."""
|
|
2
2
|
|
|
3
|
-
from hindsight_api.extensions.tenant import AuthenticationError, TenantContext, TenantExtension
|
|
3
|
+
from hindsight_api.extensions.tenant import AuthenticationError, Tenant, TenantContext, TenantExtension
|
|
4
4
|
from hindsight_api.models import RequestContext
|
|
5
5
|
|
|
6
6
|
|
|
@@ -31,3 +31,7 @@ class ApiKeyTenantExtension(TenantExtension):
|
|
|
31
31
|
if context.api_key != self.expected_api_key:
|
|
32
32
|
raise AuthenticationError("Invalid API key")
|
|
33
33
|
return TenantContext(schema_name="public")
|
|
34
|
+
|
|
35
|
+
async def list_tenants(self) -> list[Tenant]:
|
|
36
|
+
"""Return public schema for single-tenant setup."""
|
|
37
|
+
return [Tenant(schema="public")]
|
|
@@ -96,7 +96,7 @@ class DefaultExtensionContext(ExtensionContext):
|
|
|
96
96
|
|
|
97
97
|
async def run_migration(self, schema: str) -> None:
|
|
98
98
|
"""Run migrations for a specific schema."""
|
|
99
|
-
from hindsight_api.migrations import run_migrations
|
|
99
|
+
from hindsight_api.migrations import ensure_embedding_dimension, run_migrations
|
|
100
100
|
|
|
101
101
|
# Prefer getting URL from memory engine (handles pg0 case where URL is set after init)
|
|
102
102
|
db_url = self._database_url
|
|
@@ -107,6 +107,15 @@ class DefaultExtensionContext(ExtensionContext):
|
|
|
107
107
|
|
|
108
108
|
run_migrations(db_url, schema=schema)
|
|
109
109
|
|
|
110
|
+
# Ensure embedding column dimension matches the model's dimension
|
|
111
|
+
# This is needed because migrations create columns with default dimension
|
|
112
|
+
if self._memory_engine is not None:
|
|
113
|
+
embeddings = getattr(self._memory_engine, "embeddings", None)
|
|
114
|
+
if embeddings is not None:
|
|
115
|
+
dimension = getattr(embeddings, "dimension", None)
|
|
116
|
+
if dimension is not None:
|
|
117
|
+
ensure_embedding_dimension(db_url, dimension, schema=schema)
|
|
118
|
+
|
|
110
119
|
def get_memory_engine(self) -> "MemoryEngineInterface":
|
|
111
120
|
"""Get the memory engine interface."""
|
|
112
121
|
if self._memory_engine is None:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Operation Validator Extension for validating retain/recall/reflect operations."""
|
|
1
|
+
"""Operation Validator Extension for validating retain/recall/reflect/consolidate operations."""
|
|
2
2
|
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
4
|
from dataclasses import dataclass, field
|
|
@@ -97,6 +97,19 @@ class ReflectContext:
|
|
|
97
97
|
context: str | None = None
|
|
98
98
|
|
|
99
99
|
|
|
100
|
+
# =============================================================================
|
|
101
|
+
# Consolidation Pre-operation Context
|
|
102
|
+
# =============================================================================
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@dataclass
|
|
106
|
+
class ConsolidateContext:
|
|
107
|
+
"""Context for a consolidation operation validation (pre-operation)."""
|
|
108
|
+
|
|
109
|
+
bank_id: str
|
|
110
|
+
request_context: "RequestContext"
|
|
111
|
+
|
|
112
|
+
|
|
100
113
|
# =============================================================================
|
|
101
114
|
# Post-operation Contexts (includes results)
|
|
102
115
|
# =============================================================================
|
|
@@ -164,9 +177,28 @@ class ReflectResultContext:
|
|
|
164
177
|
error: str | None = None
|
|
165
178
|
|
|
166
179
|
|
|
180
|
+
# =============================================================================
|
|
181
|
+
# Consolidation Post-operation Context
|
|
182
|
+
# =============================================================================
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
@dataclass
|
|
186
|
+
class ConsolidateResult:
|
|
187
|
+
"""Result context for post-consolidation hook."""
|
|
188
|
+
|
|
189
|
+
bank_id: str
|
|
190
|
+
request_context: "RequestContext"
|
|
191
|
+
# Result
|
|
192
|
+
processed: int = 0
|
|
193
|
+
created: int = 0
|
|
194
|
+
updated: int = 0
|
|
195
|
+
success: bool = True
|
|
196
|
+
error: str | None = None
|
|
197
|
+
|
|
198
|
+
|
|
167
199
|
class OperationValidatorExtension(Extension, ABC):
|
|
168
200
|
"""
|
|
169
|
-
Validates and hooks into retain/recall/reflect operations.
|
|
201
|
+
Validates and hooks into retain/recall/reflect/consolidate operations.
|
|
170
202
|
|
|
171
203
|
This extension allows implementing custom logic such as:
|
|
172
204
|
- Rate limiting (pre-operation)
|
|
@@ -185,9 +217,13 @@ class OperationValidatorExtension(Extension, ABC):
|
|
|
185
217
|
-> config = {"max_requests": "100"}
|
|
186
218
|
|
|
187
219
|
Hook execution order:
|
|
188
|
-
1.
|
|
220
|
+
1. validate_* (pre-operation)
|
|
189
221
|
2. [operation executes]
|
|
190
|
-
3.
|
|
222
|
+
3. on_*_complete (post-operation)
|
|
223
|
+
|
|
224
|
+
Supported operations:
|
|
225
|
+
- retain, recall, reflect (core memory operations)
|
|
226
|
+
- consolidate (mental models consolidation)
|
|
191
227
|
"""
|
|
192
228
|
|
|
193
229
|
# =========================================================================
|
|
@@ -325,3 +361,44 @@ class OperationValidatorExtension(Extension, ABC):
|
|
|
325
361
|
- error: Error message (if failed)
|
|
326
362
|
"""
|
|
327
363
|
pass
|
|
364
|
+
|
|
365
|
+
# =========================================================================
|
|
366
|
+
# Consolidation - Pre-operation validation hook (optional - override to implement)
|
|
367
|
+
# =========================================================================
|
|
368
|
+
|
|
369
|
+
async def validate_consolidate(self, ctx: ConsolidateContext) -> ValidationResult:
|
|
370
|
+
"""
|
|
371
|
+
Validate a consolidation operation before execution.
|
|
372
|
+
|
|
373
|
+
Override to implement custom validation logic for consolidation.
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
ctx: Context containing:
|
|
377
|
+
- bank_id: Bank identifier
|
|
378
|
+
- request_context: Request context with auth info
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
ValidationResult indicating whether the operation is allowed.
|
|
382
|
+
"""
|
|
383
|
+
return ValidationResult.accept()
|
|
384
|
+
|
|
385
|
+
# =========================================================================
|
|
386
|
+
# Consolidation - Post-operation hook (optional - override to implement)
|
|
387
|
+
# =========================================================================
|
|
388
|
+
|
|
389
|
+
async def on_consolidate_complete(self, result: ConsolidateResult) -> None:
|
|
390
|
+
"""
|
|
391
|
+
Called after a consolidation operation completes (success or failure).
|
|
392
|
+
|
|
393
|
+
Override to implement post-operation logic such as usage tracking or audit logging.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
result: Result context containing:
|
|
397
|
+
- bank_id: Bank identifier
|
|
398
|
+
- processed: Number of memories processed
|
|
399
|
+
- created: Number of mental models created
|
|
400
|
+
- updated: Number of mental models updated
|
|
401
|
+
- success: Whether the operation succeeded
|
|
402
|
+
- error: Error message (if failed)
|
|
403
|
+
"""
|
|
404
|
+
pass
|