hindsight-api 0.1.15__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hindsight_api/api/mcp.py CHANGED
@@ -8,6 +8,7 @@ from contextvars import ContextVar
8
8
  from fastmcp import FastMCP
9
9
 
10
10
  from hindsight_api import MemoryEngine
11
+ from hindsight_api.api.http import BankListItem, BankListResponse, BankProfileResponse, DispositionTraits
11
12
  from hindsight_api.engine.response_models import VALID_RECALL_FACT_TYPES
12
13
  from hindsight_api.models import RequestContext
13
14
 
@@ -27,12 +28,15 @@ logging.basicConfig(
27
28
  )
28
29
  logger = logging.getLogger(__name__)
29
30
 
30
- # Context variable to hold the current bank_id from the URL path
31
+ # Default bank_id from environment variable
32
+ DEFAULT_BANK_ID = os.environ.get("HINDSIGHT_MCP_BANK_ID", "default")
33
+
34
+ # Context variable to hold the current bank_id
31
35
  _current_bank_id: ContextVar[str | None] = ContextVar("current_bank_id", default=None)
32
36
 
33
37
 
34
38
  def get_current_bank_id() -> str | None:
35
- """Get the current bank_id from context (set from URL path)."""
39
+ """Get the current bank_id from context."""
36
40
  return _current_bank_id.get()
37
41
 
38
42
 
@@ -44,12 +48,13 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
44
48
  memory: MemoryEngine instance (required)
45
49
 
46
50
  Returns:
47
- Configured FastMCP server instance
51
+ Configured FastMCP server instance with stateless_http enabled
48
52
  """
49
- mcp = FastMCP("hindsight-mcp-server")
53
+ # Use stateless_http=True for Claude Code compatibility
54
+ mcp = FastMCP("hindsight-mcp-server", stateless_http=True)
50
55
 
51
56
  @mcp.tool()
52
- async def retain(content: str, context: str = "general") -> str:
57
+ async def retain(content: str, context: str = "general", bank_id: str | None = None) -> str:
53
58
  """
54
59
  Store important information to long-term memory.
55
60
 
@@ -65,21 +70,24 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
65
70
  Args:
66
71
  content: The fact/memory to store (be specific and include relevant details)
67
72
  context: Category for the memory (e.g., 'preferences', 'work', 'hobbies', 'family'). Default: 'general'
73
+ bank_id: Optional bank to store in (defaults to session bank). Use for cross-bank operations.
68
74
  """
69
75
  try:
70
- bank_id = get_current_bank_id()
71
- if bank_id is None:
76
+ target_bank = bank_id or get_current_bank_id()
77
+ if target_bank is None:
72
78
  return "Error: No bank_id configured"
73
79
  await memory.retain_batch_async(
74
- bank_id=bank_id, contents=[{"content": content, "context": context}], request_context=RequestContext()
80
+ bank_id=target_bank,
81
+ contents=[{"content": content, "context": context}],
82
+ request_context=RequestContext(),
75
83
  )
76
- return "Memory stored successfully"
84
+ return f"Memory stored successfully in bank '{target_bank}'"
77
85
  except Exception as e:
78
86
  logger.error(f"Error storing memory: {e}", exc_info=True)
79
87
  return f"Error: {str(e)}"
80
88
 
81
89
  @mcp.tool()
82
- async def recall(query: str, max_results: int = 10) -> str:
90
+ async def recall(query: str, max_tokens: int = 4096, bank_id: str | None = None) -> str:
83
91
  """
84
92
  Search memories to provide personalized, context-aware responses.
85
93
 
@@ -91,49 +99,184 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
91
99
 
92
100
  Args:
93
101
  query: Natural language search query (e.g., "user's food preferences", "what projects is user working on")
94
- max_results: Maximum number of results to return (default: 10)
102
+ max_tokens: Maximum tokens in the response (default: 4096)
103
+ bank_id: Optional bank to search in (defaults to session bank). Use for cross-bank operations.
95
104
  """
96
105
  try:
97
- bank_id = get_current_bank_id()
98
- if bank_id is None:
106
+ target_bank = bank_id or get_current_bank_id()
107
+ if target_bank is None:
99
108
  return "Error: No bank_id configured"
100
109
  from hindsight_api.engine.memory_engine import Budget
101
110
 
102
- search_result = await memory.recall_async(
103
- bank_id=bank_id,
111
+ recall_result = await memory.recall_async(
112
+ bank_id=target_bank,
104
113
  query=query,
105
114
  fact_type=list(VALID_RECALL_FACT_TYPES),
106
- budget=Budget.LOW,
115
+ budget=Budget.HIGH,
116
+ max_tokens=max_tokens,
117
+ request_context=RequestContext(),
118
+ )
119
+
120
+ # Use model's JSON serialization
121
+ return recall_result.model_dump_json(indent=2)
122
+ except Exception as e:
123
+ logger.error(f"Error searching: {e}", exc_info=True)
124
+ return f'{{"error": "{e}", "results": []}}'
125
+
126
+ @mcp.tool()
127
+ async def reflect(query: str, context: str | None = None, budget: str = "low", bank_id: str | None = None) -> str:
128
+ """
129
+ Generate thoughtful analysis by synthesizing stored memories with the bank's personality.
130
+
131
+ WHEN TO USE THIS TOOL:
132
+ Use reflect when you need reasoned analysis, not just fact retrieval. This tool
133
+ thinks through the question using everything the bank knows and its personality traits.
134
+
135
+ EXAMPLES OF GOOD QUERIES:
136
+ - "What patterns have emerged in how I approach debugging?"
137
+ - "Based on my past decisions, what architectural style do I prefer?"
138
+ - "What might be the best approach for this problem given what you know about me?"
139
+ - "How should I prioritize these tasks based on my goals?"
140
+
141
+ HOW IT DIFFERS FROM RECALL:
142
+ - recall: Returns raw facts matching your search (fast lookup)
143
+ - reflect: Reasons across memories to form a synthesized answer (deeper analysis)
144
+
145
+ Use recall for "what did I say about X?" and reflect for "what should I do about X?"
146
+
147
+ Args:
148
+ query: The question or topic to reflect on
149
+ context: Optional context about why this reflection is needed
150
+ budget: Search budget - 'low', 'mid', or 'high' (default: 'low')
151
+ bank_id: Optional bank to reflect in (defaults to session bank). Use for cross-bank operations.
152
+ """
153
+ try:
154
+ target_bank = bank_id or get_current_bank_id()
155
+ if target_bank is None:
156
+ return "Error: No bank_id configured"
157
+ from hindsight_api.engine.memory_engine import Budget
158
+
159
+ # Map string budget to enum
160
+ budget_map = {"low": Budget.LOW, "mid": Budget.MID, "high": Budget.HIGH}
161
+ budget_enum = budget_map.get(budget.lower(), Budget.LOW)
162
+
163
+ reflect_result = await memory.reflect_async(
164
+ bank_id=target_bank,
165
+ query=query,
166
+ budget=budget_enum,
167
+ context=context,
107
168
  request_context=RequestContext(),
108
169
  )
109
170
 
110
- results = [
111
- {
112
- "id": fact.id,
113
- "text": fact.text,
114
- "type": fact.fact_type,
115
- "context": fact.context,
116
- "occurred_start": fact.occurred_start,
117
- }
118
- for fact in search_result.results[:max_results]
171
+ return reflect_result.model_dump_json(indent=2)
172
+ except Exception as e:
173
+ logger.error(f"Error reflecting: {e}", exc_info=True)
174
+ return f'{{"error": "{e}", "text": ""}}'
175
+
176
+ @mcp.tool()
177
+ async def list_banks() -> str:
178
+ """
179
+ List all available memory banks.
180
+
181
+ Use this to discover banks for orchestration or to find
182
+ the correct bank_id for cross-bank operations.
183
+
184
+ Returns:
185
+ JSON object with banks array containing bank_id, name, disposition, background, and timestamps
186
+ """
187
+ try:
188
+ banks = await memory.list_banks(request_context=RequestContext())
189
+ bank_items = [
190
+ BankListItem(
191
+ bank_id=b.get("bank_id") or b.get("id"),
192
+ name=b.get("name"),
193
+ disposition=DispositionTraits(
194
+ **b.get("disposition", {"skepticism": 3, "literalism": 3, "empathy": 3})
195
+ ),
196
+ background=b.get("background"),
197
+ created_at=str(b.get("created_at")) if b.get("created_at") else None,
198
+ updated_at=str(b.get("updated_at")) if b.get("updated_at") else None,
199
+ )
200
+ for b in banks
119
201
  ]
202
+ return BankListResponse(banks=bank_items).model_dump_json(indent=2)
203
+ except Exception as e:
204
+ logger.error(f"Error listing banks: {e}", exc_info=True)
205
+ return f'{{"error": "{e}", "banks": []}}'
120
206
 
121
- return json.dumps({"results": results}, indent=2)
207
+ @mcp.tool()
208
+ async def create_bank(bank_id: str, name: str | None = None, background: str | None = None) -> str:
209
+ """
210
+ Create or update a memory bank.
211
+
212
+ Use this to create new banks for different agents, sessions, or purposes.
213
+ Banks are isolated memory stores - each bank has its own memories and personality.
214
+
215
+ Args:
216
+ bank_id: Unique identifier for the bank (e.g., 'orchestrator-memory', 'agent-1')
217
+ name: Human-readable name for the bank
218
+ background: Context about what this bank stores or its purpose
219
+ """
220
+ try:
221
+ # Get or create the bank profile (auto-creates with defaults)
222
+ await memory.get_bank_profile(bank_id, request_context=RequestContext())
223
+
224
+ # Update name and/or background if provided
225
+ if name is not None or background is not None:
226
+ await memory.update_bank(bank_id, name=name, background=background, request_context=RequestContext())
227
+
228
+ # Get final profile and return using BankProfileResponse model
229
+ profile = await memory.get_bank_profile(bank_id, request_context=RequestContext())
230
+ disposition = profile.get("disposition")
231
+ if hasattr(disposition, "model_dump"):
232
+ disposition_traits = DispositionTraits(**disposition.model_dump())
233
+ else:
234
+ disposition_traits = DispositionTraits(
235
+ **dict(disposition or {"skepticism": 3, "literalism": 3, "empathy": 3})
236
+ )
237
+
238
+ response = BankProfileResponse(
239
+ bank_id=bank_id,
240
+ name=profile.get("name") or "",
241
+ disposition=disposition_traits,
242
+ background=profile.get("background") or "",
243
+ )
244
+ return response.model_dump_json(indent=2)
122
245
  except Exception as e:
123
- logger.error(f"Error searching: {e}", exc_info=True)
124
- return json.dumps({"error": str(e), "results": []})
246
+ logger.error(f"Error creating bank: {e}", exc_info=True)
247
+ return json.dumps({"error": str(e)})
125
248
 
126
249
  return mcp
127
250
 
128
251
 
129
252
  class MCPMiddleware:
130
- """ASGI middleware that extracts bank_id from path and sets context."""
253
+ """ASGI middleware that extracts bank_id from header or path and sets context.
254
+
255
+ Bank ID can be provided via:
256
+ 1. X-Bank-Id header (recommended for Claude Code)
257
+ 2. URL path: /mcp/{bank_id}/
258
+ 3. Environment variable HINDSIGHT_MCP_BANK_ID (fallback default)
259
+
260
+ For Claude Code, configure with:
261
+ claude mcp add --transport http hindsight http://localhost:8888/mcp \\
262
+ --header "X-Bank-Id: my-bank"
263
+ """
131
264
 
132
265
  def __init__(self, app, memory: MemoryEngine):
133
266
  self.app = app
134
267
  self.memory = memory
135
268
  self.mcp_server = create_mcp_server(memory)
136
- self.mcp_app = self.mcp_server.http_app()
269
+ self.mcp_app = self.mcp_server.http_app(path="/")
270
+ # Expose the lifespan for the parent app to chain
271
+ self.lifespan = self.mcp_app.lifespan_handler if hasattr(self.mcp_app, "lifespan_handler") else None
272
+
273
+ def _get_header(self, scope: dict, name: str) -> str | None:
274
+ """Extract a header value from ASGI scope."""
275
+ name_lower = name.lower().encode()
276
+ for header_name, header_value in scope.get("headers", []):
277
+ if header_name.lower() == name_lower:
278
+ return header_value.decode()
279
+ return None
137
280
 
138
281
  async def __call__(self, scope, receive, send):
139
282
  if scope["type"] != "http":
@@ -150,32 +293,39 @@ class MCPMiddleware:
150
293
  # Also handle case where mount path wasn't stripped (e.g., /mcp/...)
151
294
  if path.startswith("/mcp/"):
152
295
  path = path[4:] # Remove /mcp prefix
153
-
154
- # Extract bank_id from path: /{bank_id}/ or /{bank_id}
155
- # http_app expects requests at /
156
- if not path.startswith("/") or len(path) <= 1:
157
- # No bank_id in path - return error
158
- await self._send_error(send, 400, "bank_id required in path: /mcp/{bank_id}/")
159
- return
160
-
161
- # Extract bank_id from first path segment
162
- parts = path[1:].split("/", 1)
163
- if not parts[0]:
164
- await self._send_error(send, 400, "bank_id required in path: /mcp/{bank_id}/")
165
- return
166
-
167
- bank_id = parts[0]
168
- new_path = "/" + parts[1] if len(parts) > 1 else "/"
296
+ elif path == "/mcp":
297
+ path = "/"
298
+
299
+ # Try to get bank_id from header first (for Claude Code compatibility)
300
+ bank_id = self._get_header(scope, "X-Bank-Id")
301
+
302
+ # MCP endpoint paths that should not be treated as bank_ids
303
+ MCP_ENDPOINTS = {"sse", "messages"}
304
+
305
+ # If no header, try to extract from path: /{bank_id}/...
306
+ new_path = path
307
+ if not bank_id and path.startswith("/") and len(path) > 1:
308
+ parts = path[1:].split("/", 1)
309
+ # Don't treat MCP endpoints as bank_ids
310
+ if parts[0] and parts[0] not in MCP_ENDPOINTS:
311
+ # First segment looks like a bank_id
312
+ bank_id = parts[0]
313
+ new_path = "/" + parts[1] if len(parts) > 1 else "/"
314
+
315
+ # Fall back to default bank_id
316
+ if not bank_id:
317
+ bank_id = DEFAULT_BANK_ID
318
+ logger.debug(f"Using default bank_id: {bank_id}")
169
319
 
170
320
  # Set bank_id context
171
321
  token = _current_bank_id.set(bank_id)
172
322
  try:
173
323
  new_scope = scope.copy()
174
324
  new_scope["path"] = new_path
325
+ # Clear root_path since we're passing directly to the app
326
+ new_scope["root_path"] = ""
175
327
 
176
- # Wrap send to rewrite the SSE endpoint URL to include bank_id
177
- # The SSE app sends "event: endpoint\ndata: /messages\n" but we need
178
- # the client to POST to /{bank_id}/messages instead
328
+ # Wrap send to rewrite the SSE endpoint URL to include bank_id if using path-based routing
179
329
  async def send_wrapper(message):
180
330
  if message["type"] == "http.response.body":
181
331
  body = message.get("body", b"")
@@ -211,9 +361,10 @@ def create_mcp_app(memory: MemoryEngine):
211
361
  """
212
362
  Create an ASGI app that handles MCP requests.
213
363
 
214
- URL pattern: /mcp/{bank_id}/
215
-
216
- The bank_id is extracted from the URL path and made available to tools.
364
+ Bank ID can be provided via:
365
+ 1. X-Bank-Id header: claude mcp add --transport http hindsight http://localhost:8888/mcp --header "X-Bank-Id: my-bank"
366
+ 2. URL path: /mcp/{bank_id}/
367
+ 3. Environment variable HINDSIGHT_MCP_BANK_ID (fallback, default: "default")
217
368
 
218
369
  Args:
219
370
  memory: MemoryEngine instance
hindsight_api/config.py CHANGED
@@ -16,6 +16,8 @@ ENV_LLM_PROVIDER = "HINDSIGHT_API_LLM_PROVIDER"
16
16
  ENV_LLM_API_KEY = "HINDSIGHT_API_LLM_API_KEY"
17
17
  ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
18
18
  ENV_LLM_BASE_URL = "HINDSIGHT_API_LLM_BASE_URL"
19
+ ENV_LLM_MAX_CONCURRENT = "HINDSIGHT_API_LLM_MAX_CONCURRENT"
20
+ ENV_LLM_TIMEOUT = "HINDSIGHT_API_LLM_TIMEOUT"
19
21
 
20
22
  ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
21
23
  ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
@@ -33,6 +35,10 @@ ENV_GRAPH_RETRIEVER = "HINDSIGHT_API_GRAPH_RETRIEVER"
33
35
  ENV_MCP_LOCAL_BANK_ID = "HINDSIGHT_API_MCP_LOCAL_BANK_ID"
34
36
  ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
35
37
 
38
+ # Observation thresholds
39
+ ENV_OBSERVATION_MIN_FACTS = "HINDSIGHT_API_OBSERVATION_MIN_FACTS"
40
+ ENV_OBSERVATION_TOP_ENTITIES = "HINDSIGHT_API_OBSERVATION_TOP_ENTITIES"
41
+
36
42
  # Optimization flags
37
43
  ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
38
44
  ENV_LAZY_RERANKER = "HINDSIGHT_API_LAZY_RERANKER"
@@ -41,6 +47,8 @@ ENV_LAZY_RERANKER = "HINDSIGHT_API_LAZY_RERANKER"
41
47
  DEFAULT_DATABASE_URL = "pg0"
42
48
  DEFAULT_LLM_PROVIDER = "openai"
43
49
  DEFAULT_LLM_MODEL = "gpt-5-mini"
50
+ DEFAULT_LLM_MAX_CONCURRENT = 32
51
+ DEFAULT_LLM_TIMEOUT = 120.0 # seconds
44
52
 
45
53
  DEFAULT_EMBEDDINGS_PROVIDER = "local"
46
54
  DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
@@ -55,6 +63,10 @@ DEFAULT_MCP_ENABLED = True
55
63
  DEFAULT_GRAPH_RETRIEVER = "bfs" # Options: "bfs", "mpfp"
56
64
  DEFAULT_MCP_LOCAL_BANK_ID = "mcp"
57
65
 
66
+ # Observation thresholds
67
+ DEFAULT_OBSERVATION_MIN_FACTS = 5 # Min facts required to generate entity observations
68
+ DEFAULT_OBSERVATION_TOP_ENTITIES = 5 # Max entities to process per retain batch
69
+
58
70
  # Default MCP tool descriptions (can be customized via env vars)
59
71
  DEFAULT_MCP_RETAIN_DESCRIPTION = """Store important information to long-term memory.
60
72
 
@@ -91,6 +103,8 @@ class HindsightConfig:
91
103
  llm_api_key: str | None
92
104
  llm_model: str
93
105
  llm_base_url: str | None
106
+ llm_max_concurrent: int
107
+ llm_timeout: float
94
108
 
95
109
  # Embeddings
96
110
  embeddings_provider: str
@@ -111,6 +125,10 @@ class HindsightConfig:
111
125
  # Recall
112
126
  graph_retriever: str
113
127
 
128
+ # Observation thresholds
129
+ observation_min_facts: int
130
+ observation_top_entities: int
131
+
114
132
  # Optimization flags
115
133
  skip_llm_verification: bool
116
134
  lazy_reranker: bool
@@ -126,6 +144,8 @@ class HindsightConfig:
126
144
  llm_api_key=os.getenv(ENV_LLM_API_KEY),
127
145
  llm_model=os.getenv(ENV_LLM_MODEL, DEFAULT_LLM_MODEL),
128
146
  llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
147
+ llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
148
+ llm_timeout=float(os.getenv(ENV_LLM_TIMEOUT, str(DEFAULT_LLM_TIMEOUT))),
129
149
  # Embeddings
130
150
  embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
131
151
  embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
@@ -144,6 +164,11 @@ class HindsightConfig:
144
164
  # Optimization flags
145
165
  skip_llm_verification=os.getenv(ENV_SKIP_LLM_VERIFICATION, "false").lower() == "true",
146
166
  lazy_reranker=os.getenv(ENV_LAZY_RERANKER, "false").lower() == "true",
167
+ # Observation thresholds
168
+ observation_min_facts=int(os.getenv(ENV_OBSERVATION_MIN_FACTS, str(DEFAULT_OBSERVATION_MIN_FACTS))),
169
+ observation_top_entities=int(
170
+ os.getenv(ENV_OBSERVATION_TOP_ENTITIES, str(DEFAULT_OBSERVATION_TOP_ENTITIES))
171
+ ),
147
172
  )
148
173
 
149
174
  def get_llm_base_url(self) -> str:
@@ -156,6 +181,8 @@ class HindsightConfig:
156
181
  return "https://api.groq.com/openai/v1"
157
182
  elif provider == "ollama":
158
183
  return "http://localhost:11434/v1"
184
+ elif provider == "lmstudio":
185
+ return "http://localhost:1234/v1"
159
186
  else:
160
187
  return ""
161
188
 
@@ -110,6 +110,8 @@ class MemoryEngineInterface(ABC):
110
110
  *,
111
111
  budget: "Budget | None" = None,
112
112
  context: str | None = None,
113
+ max_tokens: int = 4096,
114
+ response_schema: dict | None = None,
113
115
  request_context: "RequestContext",
114
116
  ) -> "ReflectResult":
115
117
  """
@@ -120,6 +122,8 @@ class MemoryEngineInterface(ABC):
120
122
  query: The question to reflect on.
121
123
  budget: Search budget for retrieving context.
122
124
  context: Additional context for the reflection.
125
+ max_tokens: Maximum tokens for the response.
126
+ response_schema: Optional JSON Schema for structured output.
123
127
  request_context: Request context for authentication.
124
128
 
125
129
  Returns: