hindsight-api 0.4.2__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +1 -1
- hindsight_api/api/http.py +7 -19
- hindsight_api/api/mcp.py +45 -5
- hindsight_api/config.py +30 -11
- hindsight_api/engine/consolidation/consolidator.py +8 -1
- hindsight_api/engine/cross_encoder.py +2 -2
- hindsight_api/engine/embeddings.py +1 -1
- hindsight_api/engine/interface.py +0 -43
- hindsight_api/engine/llm_wrapper.py +93 -22
- hindsight_api/engine/memory_engine.py +37 -138
- hindsight_api/engine/response_models.py +1 -21
- hindsight_api/engine/retain/fact_extraction.py +3 -23
- hindsight_api/engine/retain/orchestrator.py +1 -4
- hindsight_api/engine/utils.py +0 -3
- hindsight_api/main.py +6 -3
- hindsight_api/mcp_tools.py +31 -12
- hindsight_api/metrics.py +3 -3
- hindsight_api/pg0.py +1 -1
- hindsight_api/worker/main.py +11 -11
- hindsight_api/worker/poller.py +226 -97
- {hindsight_api-0.4.2.dist-info → hindsight_api-0.4.3.dist-info}/METADATA +2 -1
- {hindsight_api-0.4.2.dist-info → hindsight_api-0.4.3.dist-info}/RECORD +24 -24
- {hindsight_api-0.4.2.dist-info → hindsight_api-0.4.3.dist-info}/WHEEL +0 -0
- {hindsight_api-0.4.2.dist-info → hindsight_api-0.4.3.dist-info}/entry_points.txt +0 -0
hindsight_api/mcp_tools.py
CHANGED
|
@@ -32,6 +32,9 @@ class MCPToolsConfig:
|
|
|
32
32
|
# How to resolve bank_id for operations
|
|
33
33
|
bank_id_resolver: Callable[[], str | None]
|
|
34
34
|
|
|
35
|
+
# How to resolve API key for tenant auth (optional)
|
|
36
|
+
api_key_resolver: Callable[[], str | None] | None = None
|
|
37
|
+
|
|
35
38
|
# Whether to include bank_id as a parameter on tools (for multi-bank support)
|
|
36
39
|
include_bank_id_param: bool = False
|
|
37
40
|
|
|
@@ -46,6 +49,16 @@ class MCPToolsConfig:
|
|
|
46
49
|
retain_fire_and_forget: bool = False # If True, use asyncio.create_task pattern
|
|
47
50
|
|
|
48
51
|
|
|
52
|
+
def _get_request_context(config: MCPToolsConfig) -> RequestContext:
|
|
53
|
+
"""Create RequestContext with API key from resolver if available.
|
|
54
|
+
|
|
55
|
+
This enables tenant auth to work with MCP tools by propagating
|
|
56
|
+
the Bearer token from the MCP middleware to the memory engine.
|
|
57
|
+
"""
|
|
58
|
+
api_key = config.api_key_resolver() if config.api_key_resolver else None
|
|
59
|
+
return RequestContext(api_key=api_key)
|
|
60
|
+
|
|
61
|
+
|
|
49
62
|
def parse_timestamp(timestamp: str) -> datetime | None:
|
|
50
63
|
"""Parse an ISO format timestamp string.
|
|
51
64
|
|
|
@@ -155,12 +168,14 @@ def _register_retain(mcp: FastMCP, memory: MemoryEngine, config: MCPToolsConfig)
|
|
|
155
168
|
if error:
|
|
156
169
|
return {"status": "error", "message": error}
|
|
157
170
|
|
|
171
|
+
request_context = _get_request_context(config)
|
|
172
|
+
|
|
158
173
|
async def _retain():
|
|
159
174
|
try:
|
|
160
175
|
await memory.retain_batch_async(
|
|
161
176
|
bank_id=target_bank,
|
|
162
177
|
contents=[content_dict],
|
|
163
|
-
request_context=
|
|
178
|
+
request_context=request_context,
|
|
164
179
|
)
|
|
165
180
|
except Exception as e:
|
|
166
181
|
logger.error(f"Error storing memory: {e}", exc_info=True)
|
|
@@ -196,16 +211,17 @@ def _register_retain(mcp: FastMCP, memory: MemoryEngine, config: MCPToolsConfig)
|
|
|
196
211
|
return f"Error: {error}"
|
|
197
212
|
|
|
198
213
|
contents = [content_dict]
|
|
214
|
+
request_context = _get_request_context(config)
|
|
199
215
|
if async_processing:
|
|
200
216
|
result = await memory.submit_async_retain(
|
|
201
|
-
bank_id=target_bank, contents=contents, request_context=
|
|
217
|
+
bank_id=target_bank, contents=contents, request_context=request_context
|
|
202
218
|
)
|
|
203
219
|
return f"Memory queued for background processing (operation_id: {result.get('operation_id', 'N/A')})"
|
|
204
220
|
else:
|
|
205
221
|
await memory.retain_batch_async(
|
|
206
222
|
bank_id=target_bank,
|
|
207
223
|
contents=contents,
|
|
208
|
-
request_context=
|
|
224
|
+
request_context=request_context,
|
|
209
225
|
)
|
|
210
226
|
return f"Memory stored successfully in bank '{target_bank}'"
|
|
211
227
|
except Exception as e:
|
|
@@ -237,12 +253,14 @@ def _register_retain(mcp: FastMCP, memory: MemoryEngine, config: MCPToolsConfig)
|
|
|
237
253
|
if error:
|
|
238
254
|
return {"status": "error", "message": error}
|
|
239
255
|
|
|
256
|
+
request_context = _get_request_context(config)
|
|
257
|
+
|
|
240
258
|
async def _retain():
|
|
241
259
|
try:
|
|
242
260
|
await memory.retain_batch_async(
|
|
243
261
|
bank_id=target_bank,
|
|
244
262
|
contents=[content_dict],
|
|
245
|
-
request_context=
|
|
263
|
+
request_context=request_context,
|
|
246
264
|
)
|
|
247
265
|
except Exception as e:
|
|
248
266
|
logger.error(f"Error storing memory: {e}", exc_info=True)
|
|
@@ -280,7 +298,7 @@ def _register_recall(mcp: FastMCP, memory: MemoryEngine, config: MCPToolsConfig)
|
|
|
280
298
|
fact_type=list(VALID_RECALL_FACT_TYPES),
|
|
281
299
|
budget=Budget.HIGH,
|
|
282
300
|
max_tokens=max_tokens,
|
|
283
|
-
request_context=
|
|
301
|
+
request_context=_get_request_context(config),
|
|
284
302
|
)
|
|
285
303
|
|
|
286
304
|
return recall_result.model_dump_json(indent=2)
|
|
@@ -311,7 +329,7 @@ def _register_recall(mcp: FastMCP, memory: MemoryEngine, config: MCPToolsConfig)
|
|
|
311
329
|
fact_type=list(VALID_RECALL_FACT_TYPES),
|
|
312
330
|
budget=Budget.HIGH,
|
|
313
331
|
max_tokens=max_tokens,
|
|
314
|
-
request_context=
|
|
332
|
+
request_context=_get_request_context(config),
|
|
315
333
|
)
|
|
316
334
|
|
|
317
335
|
return recall_result.model_dump()
|
|
@@ -370,7 +388,7 @@ def _register_reflect(mcp: FastMCP, memory: MemoryEngine, config: MCPToolsConfig
|
|
|
370
388
|
query=query,
|
|
371
389
|
budget=budget_enum,
|
|
372
390
|
context=context,
|
|
373
|
-
request_context=
|
|
391
|
+
request_context=_get_request_context(config),
|
|
374
392
|
)
|
|
375
393
|
|
|
376
394
|
return reflect_result.model_dump_json(indent=2)
|
|
@@ -423,7 +441,7 @@ def _register_reflect(mcp: FastMCP, memory: MemoryEngine, config: MCPToolsConfig
|
|
|
423
441
|
query=query,
|
|
424
442
|
budget=budget_enum,
|
|
425
443
|
context=context,
|
|
426
|
-
request_context=
|
|
444
|
+
request_context=_get_request_context(config),
|
|
427
445
|
)
|
|
428
446
|
|
|
429
447
|
return reflect_result.model_dump()
|
|
@@ -447,7 +465,7 @@ def _register_list_banks(mcp: FastMCP, memory: MemoryEngine, config: MCPToolsCon
|
|
|
447
465
|
JSON list of banks with their IDs, names, dispositions, and missions.
|
|
448
466
|
"""
|
|
449
467
|
try:
|
|
450
|
-
banks = await memory.list_banks(request_context=
|
|
468
|
+
banks = await memory.list_banks(request_context=_get_request_context(config))
|
|
451
469
|
return json.dumps({"banks": banks}, indent=2)
|
|
452
470
|
except Exception as e:
|
|
453
471
|
logger.error(f"Error listing banks: {e}", exc_info=True)
|
|
@@ -471,8 +489,9 @@ def _register_create_bank(mcp: FastMCP, memory: MemoryEngine, config: MCPToolsCo
|
|
|
471
489
|
mission: Optional mission describing who the agent is and what they're trying to accomplish
|
|
472
490
|
"""
|
|
473
491
|
try:
|
|
492
|
+
request_context = _get_request_context(config)
|
|
474
493
|
# get_bank_profile auto-creates bank if it doesn't exist
|
|
475
|
-
profile = await memory.get_bank_profile(bank_id, request_context=
|
|
494
|
+
profile = await memory.get_bank_profile(bank_id, request_context=request_context)
|
|
476
495
|
|
|
477
496
|
# Update name/mission if provided
|
|
478
497
|
if name is not None or mission is not None:
|
|
@@ -480,10 +499,10 @@ def _register_create_bank(mcp: FastMCP, memory: MemoryEngine, config: MCPToolsCo
|
|
|
480
499
|
bank_id,
|
|
481
500
|
name=name,
|
|
482
501
|
mission=mission,
|
|
483
|
-
request_context=
|
|
502
|
+
request_context=request_context,
|
|
484
503
|
)
|
|
485
504
|
# Fetch updated profile
|
|
486
|
-
profile = await memory.get_bank_profile(bank_id, request_context=
|
|
505
|
+
profile = await memory.get_bank_profile(bank_id, request_context=request_context)
|
|
487
506
|
|
|
488
507
|
# Serialize disposition if it's a Pydantic model
|
|
489
508
|
if "disposition" in profile and hasattr(profile["disposition"], "model_dump"):
|
hindsight_api/metrics.py
CHANGED
|
@@ -189,7 +189,7 @@ class MetricsCollectorBase:
|
|
|
189
189
|
Args:
|
|
190
190
|
provider: LLM provider name (openai, anthropic, gemini, groq, ollama, lmstudio)
|
|
191
191
|
model: Model name
|
|
192
|
-
scope: Scope identifier (e.g., "memory", "reflect", "
|
|
192
|
+
scope: Scope identifier (e.g., "memory", "reflect", "consolidation")
|
|
193
193
|
duration: Call duration in seconds
|
|
194
194
|
input_tokens: Number of input/prompt tokens
|
|
195
195
|
output_tokens: Number of output/completion tokens
|
|
@@ -321,7 +321,7 @@ class MetricsCollector(MetricsCollectorBase):
|
|
|
321
321
|
pass
|
|
322
322
|
|
|
323
323
|
Args:
|
|
324
|
-
operation: Operation name (retain, recall, reflect,
|
|
324
|
+
operation: Operation name (retain, recall, reflect, consolidation)
|
|
325
325
|
bank_id: Memory bank ID
|
|
326
326
|
source: Source of the operation (api, reflect, internal)
|
|
327
327
|
budget: Optional budget level (low, mid, high)
|
|
@@ -371,7 +371,7 @@ class MetricsCollector(MetricsCollectorBase):
|
|
|
371
371
|
Args:
|
|
372
372
|
provider: LLM provider name (openai, anthropic, gemini, groq, ollama, lmstudio)
|
|
373
373
|
model: Model name
|
|
374
|
-
scope: Scope identifier (e.g., "memory", "reflect", "
|
|
374
|
+
scope: Scope identifier (e.g., "memory", "reflect", "consolidation")
|
|
375
375
|
duration: Call duration in seconds
|
|
376
376
|
input_tokens: Number of input/prompt tokens
|
|
377
377
|
output_tokens: Number of output/completion tokens
|
hindsight_api/pg0.py
CHANGED
|
@@ -40,7 +40,7 @@ class EmbeddedPostgres:
|
|
|
40
40
|
# Only set port if explicitly specified
|
|
41
41
|
if self.port is not None:
|
|
42
42
|
kwargs["port"] = self.port
|
|
43
|
-
self._pg0 = Pg0(**kwargs)
|
|
43
|
+
self._pg0 = Pg0(**kwargs)
|
|
44
44
|
return self._pg0
|
|
45
45
|
|
|
46
46
|
async def start(self, max_retries: int = 5, retry_delay: float = 4.0) -> str:
|
hindsight_api/worker/main.py
CHANGED
|
@@ -124,12 +124,6 @@ def main():
|
|
|
124
124
|
default=config.worker_poll_interval_ms,
|
|
125
125
|
help=f"Poll interval in milliseconds (default: {config.worker_poll_interval_ms}, env: HINDSIGHT_API_WORKER_POLL_INTERVAL_MS)",
|
|
126
126
|
)
|
|
127
|
-
parser.add_argument(
|
|
128
|
-
"--batch-size",
|
|
129
|
-
type=int,
|
|
130
|
-
default=config.worker_batch_size,
|
|
131
|
-
help=f"Tasks to claim per poll (default: {config.worker_batch_size}, env: HINDSIGHT_API_WORKER_BATCH_SIZE)",
|
|
132
|
-
)
|
|
133
127
|
parser.add_argument(
|
|
134
128
|
"--max-retries",
|
|
135
129
|
type=int,
|
|
@@ -168,8 +162,9 @@ def main():
|
|
|
168
162
|
|
|
169
163
|
print(f"Starting Hindsight Worker: {args.worker_id}")
|
|
170
164
|
print(f" Poll interval: {args.poll_interval}ms")
|
|
171
|
-
print(f" Batch size: {args.batch_size}")
|
|
172
165
|
print(f" Max retries: {args.max_retries}")
|
|
166
|
+
print(f" Max slots: {config.worker_max_slots}")
|
|
167
|
+
print(f" Consolidation max slots: {config.worker_consolidation_max_slots}")
|
|
173
168
|
print(f" HTTP server: {args.http_host}:{args.http_port}")
|
|
174
169
|
print()
|
|
175
170
|
|
|
@@ -183,21 +178,25 @@ def main():
|
|
|
183
178
|
|
|
184
179
|
from ..extensions import TenantExtension, load_extension
|
|
185
180
|
|
|
181
|
+
# Load tenant extension BEFORE creating MemoryEngine so it can
|
|
182
|
+
# set correct schema context during task execution. Without this,
|
|
183
|
+
# _authenticate_tenant sees no extension and resets schema to "public",
|
|
184
|
+
# causing worker writes to land in the wrong schema.
|
|
185
|
+
tenant_extension = load_extension("TENANT", TenantExtension)
|
|
186
|
+
|
|
186
187
|
# Initialize MemoryEngine
|
|
187
188
|
# Workers use SyncTaskBackend because they execute tasks directly,
|
|
188
189
|
# they don't need to store tasks (they poll from DB)
|
|
189
190
|
memory = MemoryEngine(
|
|
190
191
|
run_migrations=False, # Workers don't run migrations
|
|
191
192
|
task_backend=SyncTaskBackend(),
|
|
193
|
+
tenant_extension=tenant_extension,
|
|
192
194
|
)
|
|
193
195
|
|
|
194
196
|
await memory.initialize()
|
|
195
197
|
|
|
196
198
|
print(f"Database connected: {config.database_url}")
|
|
197
199
|
|
|
198
|
-
# Load tenant extension for dynamic schema discovery
|
|
199
|
-
tenant_extension = load_extension("TENANT", TenantExtension)
|
|
200
|
-
|
|
201
200
|
if tenant_extension:
|
|
202
201
|
print("Tenant extension loaded - schemas will be discovered dynamically on each poll")
|
|
203
202
|
else:
|
|
@@ -209,9 +208,10 @@ def main():
|
|
|
209
208
|
worker_id=args.worker_id,
|
|
210
209
|
executor=memory.execute_task,
|
|
211
210
|
poll_interval_ms=args.poll_interval,
|
|
212
|
-
batch_size=args.batch_size,
|
|
213
211
|
max_retries=args.max_retries,
|
|
214
212
|
tenant_extension=tenant_extension,
|
|
213
|
+
max_slots=config.worker_max_slots,
|
|
214
|
+
consolidation_max_slots=config.worker_consolidation_max_slots,
|
|
215
215
|
)
|
|
216
216
|
|
|
217
217
|
# Create the HTTP app for metrics/health
|