hindsight-api 0.1.15__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/api/__init__.py +38 -14
- hindsight_api/api/http.py +100 -9
- hindsight_api/api/mcp.py +203 -52
- hindsight_api/config.py +27 -0
- hindsight_api/engine/interface.py +4 -0
- hindsight_api/engine/llm_wrapper.py +275 -45
- hindsight_api/engine/memory_engine.py +69 -16
- hindsight_api/engine/response_models.py +7 -1
- hindsight_api/engine/retain/entity_processing.py +37 -8
- hindsight_api/engine/retain/fact_extraction.py +49 -6
- hindsight_api/engine/retain/observation_regeneration.py +4 -2
- hindsight_api/engine/retain/orchestrator.py +12 -1
- hindsight_api/engine/retain/types.py +7 -0
- hindsight_api/extensions/context.py +8 -1
- hindsight_api/extensions/operation_validator.py +6 -4
- hindsight_api/main.py +29 -1
- hindsight_api/models.py +3 -0
- {hindsight_api-0.1.15.dist-info → hindsight_api-0.2.0.dist-info}/METADATA +3 -2
- {hindsight_api-0.1.15.dist-info → hindsight_api-0.2.0.dist-info}/RECORD +21 -21
- {hindsight_api-0.1.15.dist-info → hindsight_api-0.2.0.dist-info}/WHEEL +0 -0
- {hindsight_api-0.1.15.dist-info → hindsight_api-0.2.0.dist-info}/entry_points.txt +0 -0
hindsight_api/api/mcp.py
CHANGED
|
@@ -8,6 +8,7 @@ from contextvars import ContextVar
|
|
|
8
8
|
from fastmcp import FastMCP
|
|
9
9
|
|
|
10
10
|
from hindsight_api import MemoryEngine
|
|
11
|
+
from hindsight_api.api.http import BankListItem, BankListResponse, BankProfileResponse, DispositionTraits
|
|
11
12
|
from hindsight_api.engine.response_models import VALID_RECALL_FACT_TYPES
|
|
12
13
|
from hindsight_api.models import RequestContext
|
|
13
14
|
|
|
@@ -27,12 +28,15 @@ logging.basicConfig(
|
|
|
27
28
|
)
|
|
28
29
|
logger = logging.getLogger(__name__)
|
|
29
30
|
|
|
30
|
-
#
|
|
31
|
+
# Default bank_id from environment variable
|
|
32
|
+
DEFAULT_BANK_ID = os.environ.get("HINDSIGHT_MCP_BANK_ID", "default")
|
|
33
|
+
|
|
34
|
+
# Context variable to hold the current bank_id
|
|
31
35
|
_current_bank_id: ContextVar[str | None] = ContextVar("current_bank_id", default=None)
|
|
32
36
|
|
|
33
37
|
|
|
34
38
|
def get_current_bank_id() -> str | None:
|
|
35
|
-
"""Get the current bank_id from context
|
|
39
|
+
"""Get the current bank_id from context."""
|
|
36
40
|
return _current_bank_id.get()
|
|
37
41
|
|
|
38
42
|
|
|
@@ -44,12 +48,13 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
|
|
|
44
48
|
memory: MemoryEngine instance (required)
|
|
45
49
|
|
|
46
50
|
Returns:
|
|
47
|
-
Configured FastMCP server instance
|
|
51
|
+
Configured FastMCP server instance with stateless_http enabled
|
|
48
52
|
"""
|
|
49
|
-
|
|
53
|
+
# Use stateless_http=True for Claude Code compatibility
|
|
54
|
+
mcp = FastMCP("hindsight-mcp-server", stateless_http=True)
|
|
50
55
|
|
|
51
56
|
@mcp.tool()
|
|
52
|
-
async def retain(content: str, context: str = "general") -> str:
|
|
57
|
+
async def retain(content: str, context: str = "general", bank_id: str | None = None) -> str:
|
|
53
58
|
"""
|
|
54
59
|
Store important information to long-term memory.
|
|
55
60
|
|
|
@@ -65,21 +70,24 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
|
|
|
65
70
|
Args:
|
|
66
71
|
content: The fact/memory to store (be specific and include relevant details)
|
|
67
72
|
context: Category for the memory (e.g., 'preferences', 'work', 'hobbies', 'family'). Default: 'general'
|
|
73
|
+
bank_id: Optional bank to store in (defaults to session bank). Use for cross-bank operations.
|
|
68
74
|
"""
|
|
69
75
|
try:
|
|
70
|
-
|
|
71
|
-
if
|
|
76
|
+
target_bank = bank_id or get_current_bank_id()
|
|
77
|
+
if target_bank is None:
|
|
72
78
|
return "Error: No bank_id configured"
|
|
73
79
|
await memory.retain_batch_async(
|
|
74
|
-
bank_id=
|
|
80
|
+
bank_id=target_bank,
|
|
81
|
+
contents=[{"content": content, "context": context}],
|
|
82
|
+
request_context=RequestContext(),
|
|
75
83
|
)
|
|
76
|
-
return "Memory stored successfully"
|
|
84
|
+
return f"Memory stored successfully in bank '{target_bank}'"
|
|
77
85
|
except Exception as e:
|
|
78
86
|
logger.error(f"Error storing memory: {e}", exc_info=True)
|
|
79
87
|
return f"Error: {str(e)}"
|
|
80
88
|
|
|
81
89
|
@mcp.tool()
|
|
82
|
-
async def recall(query: str,
|
|
90
|
+
async def recall(query: str, max_tokens: int = 4096, bank_id: str | None = None) -> str:
|
|
83
91
|
"""
|
|
84
92
|
Search memories to provide personalized, context-aware responses.
|
|
85
93
|
|
|
@@ -91,49 +99,184 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
|
|
|
91
99
|
|
|
92
100
|
Args:
|
|
93
101
|
query: Natural language search query (e.g., "user's food preferences", "what projects is user working on")
|
|
94
|
-
|
|
102
|
+
max_tokens: Maximum tokens in the response (default: 4096)
|
|
103
|
+
bank_id: Optional bank to search in (defaults to session bank). Use for cross-bank operations.
|
|
95
104
|
"""
|
|
96
105
|
try:
|
|
97
|
-
|
|
98
|
-
if
|
|
106
|
+
target_bank = bank_id or get_current_bank_id()
|
|
107
|
+
if target_bank is None:
|
|
99
108
|
return "Error: No bank_id configured"
|
|
100
109
|
from hindsight_api.engine.memory_engine import Budget
|
|
101
110
|
|
|
102
|
-
|
|
103
|
-
bank_id=
|
|
111
|
+
recall_result = await memory.recall_async(
|
|
112
|
+
bank_id=target_bank,
|
|
104
113
|
query=query,
|
|
105
114
|
fact_type=list(VALID_RECALL_FACT_TYPES),
|
|
106
|
-
budget=Budget.
|
|
115
|
+
budget=Budget.HIGH,
|
|
116
|
+
max_tokens=max_tokens,
|
|
117
|
+
request_context=RequestContext(),
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Use model's JSON serialization
|
|
121
|
+
return recall_result.model_dump_json(indent=2)
|
|
122
|
+
except Exception as e:
|
|
123
|
+
logger.error(f"Error searching: {e}", exc_info=True)
|
|
124
|
+
return f'{{"error": "{e}", "results": []}}'
|
|
125
|
+
|
|
126
|
+
@mcp.tool()
|
|
127
|
+
async def reflect(query: str, context: str | None = None, budget: str = "low", bank_id: str | None = None) -> str:
|
|
128
|
+
"""
|
|
129
|
+
Generate thoughtful analysis by synthesizing stored memories with the bank's personality.
|
|
130
|
+
|
|
131
|
+
WHEN TO USE THIS TOOL:
|
|
132
|
+
Use reflect when you need reasoned analysis, not just fact retrieval. This tool
|
|
133
|
+
thinks through the question using everything the bank knows and its personality traits.
|
|
134
|
+
|
|
135
|
+
EXAMPLES OF GOOD QUERIES:
|
|
136
|
+
- "What patterns have emerged in how I approach debugging?"
|
|
137
|
+
- "Based on my past decisions, what architectural style do I prefer?"
|
|
138
|
+
- "What might be the best approach for this problem given what you know about me?"
|
|
139
|
+
- "How should I prioritize these tasks based on my goals?"
|
|
140
|
+
|
|
141
|
+
HOW IT DIFFERS FROM RECALL:
|
|
142
|
+
- recall: Returns raw facts matching your search (fast lookup)
|
|
143
|
+
- reflect: Reasons across memories to form a synthesized answer (deeper analysis)
|
|
144
|
+
|
|
145
|
+
Use recall for "what did I say about X?" and reflect for "what should I do about X?"
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
query: The question or topic to reflect on
|
|
149
|
+
context: Optional context about why this reflection is needed
|
|
150
|
+
budget: Search budget - 'low', 'mid', or 'high' (default: 'low')
|
|
151
|
+
bank_id: Optional bank to reflect in (defaults to session bank). Use for cross-bank operations.
|
|
152
|
+
"""
|
|
153
|
+
try:
|
|
154
|
+
target_bank = bank_id or get_current_bank_id()
|
|
155
|
+
if target_bank is None:
|
|
156
|
+
return "Error: No bank_id configured"
|
|
157
|
+
from hindsight_api.engine.memory_engine import Budget
|
|
158
|
+
|
|
159
|
+
# Map string budget to enum
|
|
160
|
+
budget_map = {"low": Budget.LOW, "mid": Budget.MID, "high": Budget.HIGH}
|
|
161
|
+
budget_enum = budget_map.get(budget.lower(), Budget.LOW)
|
|
162
|
+
|
|
163
|
+
reflect_result = await memory.reflect_async(
|
|
164
|
+
bank_id=target_bank,
|
|
165
|
+
query=query,
|
|
166
|
+
budget=budget_enum,
|
|
167
|
+
context=context,
|
|
107
168
|
request_context=RequestContext(),
|
|
108
169
|
)
|
|
109
170
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
171
|
+
return reflect_result.model_dump_json(indent=2)
|
|
172
|
+
except Exception as e:
|
|
173
|
+
logger.error(f"Error reflecting: {e}", exc_info=True)
|
|
174
|
+
return f'{{"error": "{e}", "text": ""}}'
|
|
175
|
+
|
|
176
|
+
@mcp.tool()
|
|
177
|
+
async def list_banks() -> str:
|
|
178
|
+
"""
|
|
179
|
+
List all available memory banks.
|
|
180
|
+
|
|
181
|
+
Use this to discover banks for orchestration or to find
|
|
182
|
+
the correct bank_id for cross-bank operations.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
JSON object with banks array containing bank_id, name, disposition, background, and timestamps
|
|
186
|
+
"""
|
|
187
|
+
try:
|
|
188
|
+
banks = await memory.list_banks(request_context=RequestContext())
|
|
189
|
+
bank_items = [
|
|
190
|
+
BankListItem(
|
|
191
|
+
bank_id=b.get("bank_id") or b.get("id"),
|
|
192
|
+
name=b.get("name"),
|
|
193
|
+
disposition=DispositionTraits(
|
|
194
|
+
**b.get("disposition", {"skepticism": 3, "literalism": 3, "empathy": 3})
|
|
195
|
+
),
|
|
196
|
+
background=b.get("background"),
|
|
197
|
+
created_at=str(b.get("created_at")) if b.get("created_at") else None,
|
|
198
|
+
updated_at=str(b.get("updated_at")) if b.get("updated_at") else None,
|
|
199
|
+
)
|
|
200
|
+
for b in banks
|
|
119
201
|
]
|
|
202
|
+
return BankListResponse(banks=bank_items).model_dump_json(indent=2)
|
|
203
|
+
except Exception as e:
|
|
204
|
+
logger.error(f"Error listing banks: {e}", exc_info=True)
|
|
205
|
+
return f'{{"error": "{e}", "banks": []}}'
|
|
120
206
|
|
|
121
|
-
|
|
207
|
+
@mcp.tool()
|
|
208
|
+
async def create_bank(bank_id: str, name: str | None = None, background: str | None = None) -> str:
|
|
209
|
+
"""
|
|
210
|
+
Create or update a memory bank.
|
|
211
|
+
|
|
212
|
+
Use this to create new banks for different agents, sessions, or purposes.
|
|
213
|
+
Banks are isolated memory stores - each bank has its own memories and personality.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
bank_id: Unique identifier for the bank (e.g., 'orchestrator-memory', 'agent-1')
|
|
217
|
+
name: Human-readable name for the bank
|
|
218
|
+
background: Context about what this bank stores or its purpose
|
|
219
|
+
"""
|
|
220
|
+
try:
|
|
221
|
+
# Get or create the bank profile (auto-creates with defaults)
|
|
222
|
+
await memory.get_bank_profile(bank_id, request_context=RequestContext())
|
|
223
|
+
|
|
224
|
+
# Update name and/or background if provided
|
|
225
|
+
if name is not None or background is not None:
|
|
226
|
+
await memory.update_bank(bank_id, name=name, background=background, request_context=RequestContext())
|
|
227
|
+
|
|
228
|
+
# Get final profile and return using BankProfileResponse model
|
|
229
|
+
profile = await memory.get_bank_profile(bank_id, request_context=RequestContext())
|
|
230
|
+
disposition = profile.get("disposition")
|
|
231
|
+
if hasattr(disposition, "model_dump"):
|
|
232
|
+
disposition_traits = DispositionTraits(**disposition.model_dump())
|
|
233
|
+
else:
|
|
234
|
+
disposition_traits = DispositionTraits(
|
|
235
|
+
**dict(disposition or {"skepticism": 3, "literalism": 3, "empathy": 3})
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
response = BankProfileResponse(
|
|
239
|
+
bank_id=bank_id,
|
|
240
|
+
name=profile.get("name") or "",
|
|
241
|
+
disposition=disposition_traits,
|
|
242
|
+
background=profile.get("background") or "",
|
|
243
|
+
)
|
|
244
|
+
return response.model_dump_json(indent=2)
|
|
122
245
|
except Exception as e:
|
|
123
|
-
logger.error(f"Error
|
|
124
|
-
return json.dumps({"error": str(e)
|
|
246
|
+
logger.error(f"Error creating bank: {e}", exc_info=True)
|
|
247
|
+
return json.dumps({"error": str(e)})
|
|
125
248
|
|
|
126
249
|
return mcp
|
|
127
250
|
|
|
128
251
|
|
|
129
252
|
class MCPMiddleware:
|
|
130
|
-
"""ASGI middleware that extracts bank_id from path and sets context.
|
|
253
|
+
"""ASGI middleware that extracts bank_id from header or path and sets context.
|
|
254
|
+
|
|
255
|
+
Bank ID can be provided via:
|
|
256
|
+
1. X-Bank-Id header (recommended for Claude Code)
|
|
257
|
+
2. URL path: /mcp/{bank_id}/
|
|
258
|
+
3. Environment variable HINDSIGHT_MCP_BANK_ID (fallback default)
|
|
259
|
+
|
|
260
|
+
For Claude Code, configure with:
|
|
261
|
+
claude mcp add --transport http hindsight http://localhost:8888/mcp \\
|
|
262
|
+
--header "X-Bank-Id: my-bank"
|
|
263
|
+
"""
|
|
131
264
|
|
|
132
265
|
def __init__(self, app, memory: MemoryEngine):
|
|
133
266
|
self.app = app
|
|
134
267
|
self.memory = memory
|
|
135
268
|
self.mcp_server = create_mcp_server(memory)
|
|
136
|
-
self.mcp_app = self.mcp_server.http_app()
|
|
269
|
+
self.mcp_app = self.mcp_server.http_app(path="/")
|
|
270
|
+
# Expose the lifespan for the parent app to chain
|
|
271
|
+
self.lifespan = self.mcp_app.lifespan_handler if hasattr(self.mcp_app, "lifespan_handler") else None
|
|
272
|
+
|
|
273
|
+
def _get_header(self, scope: dict, name: str) -> str | None:
|
|
274
|
+
"""Extract a header value from ASGI scope."""
|
|
275
|
+
name_lower = name.lower().encode()
|
|
276
|
+
for header_name, header_value in scope.get("headers", []):
|
|
277
|
+
if header_name.lower() == name_lower:
|
|
278
|
+
return header_value.decode()
|
|
279
|
+
return None
|
|
137
280
|
|
|
138
281
|
async def __call__(self, scope, receive, send):
|
|
139
282
|
if scope["type"] != "http":
|
|
@@ -150,32 +293,39 @@ class MCPMiddleware:
|
|
|
150
293
|
# Also handle case where mount path wasn't stripped (e.g., /mcp/...)
|
|
151
294
|
if path.startswith("/mcp/"):
|
|
152
295
|
path = path[4:] # Remove /mcp prefix
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
296
|
+
elif path == "/mcp":
|
|
297
|
+
path = "/"
|
|
298
|
+
|
|
299
|
+
# Try to get bank_id from header first (for Claude Code compatibility)
|
|
300
|
+
bank_id = self._get_header(scope, "X-Bank-Id")
|
|
301
|
+
|
|
302
|
+
# MCP endpoint paths that should not be treated as bank_ids
|
|
303
|
+
MCP_ENDPOINTS = {"sse", "messages"}
|
|
304
|
+
|
|
305
|
+
# If no header, try to extract from path: /{bank_id}/...
|
|
306
|
+
new_path = path
|
|
307
|
+
if not bank_id and path.startswith("/") and len(path) > 1:
|
|
308
|
+
parts = path[1:].split("/", 1)
|
|
309
|
+
# Don't treat MCP endpoints as bank_ids
|
|
310
|
+
if parts[0] and parts[0] not in MCP_ENDPOINTS:
|
|
311
|
+
# First segment looks like a bank_id
|
|
312
|
+
bank_id = parts[0]
|
|
313
|
+
new_path = "/" + parts[1] if len(parts) > 1 else "/"
|
|
314
|
+
|
|
315
|
+
# Fall back to default bank_id
|
|
316
|
+
if not bank_id:
|
|
317
|
+
bank_id = DEFAULT_BANK_ID
|
|
318
|
+
logger.debug(f"Using default bank_id: {bank_id}")
|
|
169
319
|
|
|
170
320
|
# Set bank_id context
|
|
171
321
|
token = _current_bank_id.set(bank_id)
|
|
172
322
|
try:
|
|
173
323
|
new_scope = scope.copy()
|
|
174
324
|
new_scope["path"] = new_path
|
|
325
|
+
# Clear root_path since we're passing directly to the app
|
|
326
|
+
new_scope["root_path"] = ""
|
|
175
327
|
|
|
176
|
-
# Wrap send to rewrite the SSE endpoint URL to include bank_id
|
|
177
|
-
# The SSE app sends "event: endpoint\ndata: /messages\n" but we need
|
|
178
|
-
# the client to POST to /{bank_id}/messages instead
|
|
328
|
+
# Wrap send to rewrite the SSE endpoint URL to include bank_id if using path-based routing
|
|
179
329
|
async def send_wrapper(message):
|
|
180
330
|
if message["type"] == "http.response.body":
|
|
181
331
|
body = message.get("body", b"")
|
|
@@ -211,9 +361,10 @@ def create_mcp_app(memory: MemoryEngine):
|
|
|
211
361
|
"""
|
|
212
362
|
Create an ASGI app that handles MCP requests.
|
|
213
363
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
364
|
+
Bank ID can be provided via:
|
|
365
|
+
1. X-Bank-Id header: claude mcp add --transport http hindsight http://localhost:8888/mcp --header "X-Bank-Id: my-bank"
|
|
366
|
+
2. URL path: /mcp/{bank_id}/
|
|
367
|
+
3. Environment variable HINDSIGHT_MCP_BANK_ID (fallback, default: "default")
|
|
217
368
|
|
|
218
369
|
Args:
|
|
219
370
|
memory: MemoryEngine instance
|
hindsight_api/config.py
CHANGED
|
@@ -16,6 +16,8 @@ ENV_LLM_PROVIDER = "HINDSIGHT_API_LLM_PROVIDER"
|
|
|
16
16
|
ENV_LLM_API_KEY = "HINDSIGHT_API_LLM_API_KEY"
|
|
17
17
|
ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
|
|
18
18
|
ENV_LLM_BASE_URL = "HINDSIGHT_API_LLM_BASE_URL"
|
|
19
|
+
ENV_LLM_MAX_CONCURRENT = "HINDSIGHT_API_LLM_MAX_CONCURRENT"
|
|
20
|
+
ENV_LLM_TIMEOUT = "HINDSIGHT_API_LLM_TIMEOUT"
|
|
19
21
|
|
|
20
22
|
ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
|
|
21
23
|
ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
|
|
@@ -33,6 +35,10 @@ ENV_GRAPH_RETRIEVER = "HINDSIGHT_API_GRAPH_RETRIEVER"
|
|
|
33
35
|
ENV_MCP_LOCAL_BANK_ID = "HINDSIGHT_API_MCP_LOCAL_BANK_ID"
|
|
34
36
|
ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
|
|
35
37
|
|
|
38
|
+
# Observation thresholds
|
|
39
|
+
ENV_OBSERVATION_MIN_FACTS = "HINDSIGHT_API_OBSERVATION_MIN_FACTS"
|
|
40
|
+
ENV_OBSERVATION_TOP_ENTITIES = "HINDSIGHT_API_OBSERVATION_TOP_ENTITIES"
|
|
41
|
+
|
|
36
42
|
# Optimization flags
|
|
37
43
|
ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
|
|
38
44
|
ENV_LAZY_RERANKER = "HINDSIGHT_API_LAZY_RERANKER"
|
|
@@ -41,6 +47,8 @@ ENV_LAZY_RERANKER = "HINDSIGHT_API_LAZY_RERANKER"
|
|
|
41
47
|
DEFAULT_DATABASE_URL = "pg0"
|
|
42
48
|
DEFAULT_LLM_PROVIDER = "openai"
|
|
43
49
|
DEFAULT_LLM_MODEL = "gpt-5-mini"
|
|
50
|
+
DEFAULT_LLM_MAX_CONCURRENT = 32
|
|
51
|
+
DEFAULT_LLM_TIMEOUT = 120.0 # seconds
|
|
44
52
|
|
|
45
53
|
DEFAULT_EMBEDDINGS_PROVIDER = "local"
|
|
46
54
|
DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
|
|
@@ -55,6 +63,10 @@ DEFAULT_MCP_ENABLED = True
|
|
|
55
63
|
DEFAULT_GRAPH_RETRIEVER = "bfs" # Options: "bfs", "mpfp"
|
|
56
64
|
DEFAULT_MCP_LOCAL_BANK_ID = "mcp"
|
|
57
65
|
|
|
66
|
+
# Observation thresholds
|
|
67
|
+
DEFAULT_OBSERVATION_MIN_FACTS = 5 # Min facts required to generate entity observations
|
|
68
|
+
DEFAULT_OBSERVATION_TOP_ENTITIES = 5 # Max entities to process per retain batch
|
|
69
|
+
|
|
58
70
|
# Default MCP tool descriptions (can be customized via env vars)
|
|
59
71
|
DEFAULT_MCP_RETAIN_DESCRIPTION = """Store important information to long-term memory.
|
|
60
72
|
|
|
@@ -91,6 +103,8 @@ class HindsightConfig:
|
|
|
91
103
|
llm_api_key: str | None
|
|
92
104
|
llm_model: str
|
|
93
105
|
llm_base_url: str | None
|
|
106
|
+
llm_max_concurrent: int
|
|
107
|
+
llm_timeout: float
|
|
94
108
|
|
|
95
109
|
# Embeddings
|
|
96
110
|
embeddings_provider: str
|
|
@@ -111,6 +125,10 @@ class HindsightConfig:
|
|
|
111
125
|
# Recall
|
|
112
126
|
graph_retriever: str
|
|
113
127
|
|
|
128
|
+
# Observation thresholds
|
|
129
|
+
observation_min_facts: int
|
|
130
|
+
observation_top_entities: int
|
|
131
|
+
|
|
114
132
|
# Optimization flags
|
|
115
133
|
skip_llm_verification: bool
|
|
116
134
|
lazy_reranker: bool
|
|
@@ -126,6 +144,8 @@ class HindsightConfig:
|
|
|
126
144
|
llm_api_key=os.getenv(ENV_LLM_API_KEY),
|
|
127
145
|
llm_model=os.getenv(ENV_LLM_MODEL, DEFAULT_LLM_MODEL),
|
|
128
146
|
llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
|
|
147
|
+
llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
|
|
148
|
+
llm_timeout=float(os.getenv(ENV_LLM_TIMEOUT, str(DEFAULT_LLM_TIMEOUT))),
|
|
129
149
|
# Embeddings
|
|
130
150
|
embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
|
|
131
151
|
embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
|
|
@@ -144,6 +164,11 @@ class HindsightConfig:
|
|
|
144
164
|
# Optimization flags
|
|
145
165
|
skip_llm_verification=os.getenv(ENV_SKIP_LLM_VERIFICATION, "false").lower() == "true",
|
|
146
166
|
lazy_reranker=os.getenv(ENV_LAZY_RERANKER, "false").lower() == "true",
|
|
167
|
+
# Observation thresholds
|
|
168
|
+
observation_min_facts=int(os.getenv(ENV_OBSERVATION_MIN_FACTS, str(DEFAULT_OBSERVATION_MIN_FACTS))),
|
|
169
|
+
observation_top_entities=int(
|
|
170
|
+
os.getenv(ENV_OBSERVATION_TOP_ENTITIES, str(DEFAULT_OBSERVATION_TOP_ENTITIES))
|
|
171
|
+
),
|
|
147
172
|
)
|
|
148
173
|
|
|
149
174
|
def get_llm_base_url(self) -> str:
|
|
@@ -156,6 +181,8 @@ class HindsightConfig:
|
|
|
156
181
|
return "https://api.groq.com/openai/v1"
|
|
157
182
|
elif provider == "ollama":
|
|
158
183
|
return "http://localhost:11434/v1"
|
|
184
|
+
elif provider == "lmstudio":
|
|
185
|
+
return "http://localhost:1234/v1"
|
|
159
186
|
else:
|
|
160
187
|
return ""
|
|
161
188
|
|
|
@@ -110,6 +110,8 @@ class MemoryEngineInterface(ABC):
|
|
|
110
110
|
*,
|
|
111
111
|
budget: "Budget | None" = None,
|
|
112
112
|
context: str | None = None,
|
|
113
|
+
max_tokens: int = 4096,
|
|
114
|
+
response_schema: dict | None = None,
|
|
113
115
|
request_context: "RequestContext",
|
|
114
116
|
) -> "ReflectResult":
|
|
115
117
|
"""
|
|
@@ -120,6 +122,8 @@ class MemoryEngineInterface(ABC):
|
|
|
120
122
|
query: The question to reflect on.
|
|
121
123
|
budget: Search budget for retrieving context.
|
|
122
124
|
context: Additional context for the reflection.
|
|
125
|
+
max_tokens: Maximum tokens for the response.
|
|
126
|
+
response_schema: Optional JSON Schema for structured output.
|
|
123
127
|
request_context: Request context for authentication.
|
|
124
128
|
|
|
125
129
|
Returns:
|