hindsight-api 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. hindsight_api/admin/__init__.py +1 -0
  2. hindsight_api/admin/cli.py +252 -0
  3. hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
  4. hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
  5. hindsight_api/api/http.py +282 -20
  6. hindsight_api/api/mcp.py +47 -52
  7. hindsight_api/config.py +238 -6
  8. hindsight_api/engine/cross_encoder.py +599 -86
  9. hindsight_api/engine/db_budget.py +284 -0
  10. hindsight_api/engine/db_utils.py +11 -0
  11. hindsight_api/engine/embeddings.py +453 -26
  12. hindsight_api/engine/entity_resolver.py +8 -5
  13. hindsight_api/engine/interface.py +8 -4
  14. hindsight_api/engine/llm_wrapper.py +241 -27
  15. hindsight_api/engine/memory_engine.py +609 -122
  16. hindsight_api/engine/query_analyzer.py +4 -3
  17. hindsight_api/engine/response_models.py +38 -0
  18. hindsight_api/engine/retain/fact_extraction.py +388 -192
  19. hindsight_api/engine/retain/fact_storage.py +34 -8
  20. hindsight_api/engine/retain/link_utils.py +24 -16
  21. hindsight_api/engine/retain/orchestrator.py +52 -17
  22. hindsight_api/engine/retain/types.py +9 -0
  23. hindsight_api/engine/search/graph_retrieval.py +42 -13
  24. hindsight_api/engine/search/link_expansion_retrieval.py +256 -0
  25. hindsight_api/engine/search/mpfp_retrieval.py +362 -117
  26. hindsight_api/engine/search/reranking.py +2 -2
  27. hindsight_api/engine/search/retrieval.py +847 -200
  28. hindsight_api/engine/search/tags.py +172 -0
  29. hindsight_api/engine/search/think_utils.py +1 -1
  30. hindsight_api/engine/search/trace.py +12 -0
  31. hindsight_api/engine/search/tracer.py +24 -1
  32. hindsight_api/engine/search/types.py +21 -0
  33. hindsight_api/engine/task_backend.py +109 -18
  34. hindsight_api/engine/utils.py +1 -1
  35. hindsight_api/extensions/context.py +10 -1
  36. hindsight_api/main.py +56 -4
  37. hindsight_api/metrics.py +433 -48
  38. hindsight_api/migrations.py +141 -1
  39. hindsight_api/models.py +3 -1
  40. hindsight_api/pg0.py +53 -0
  41. hindsight_api/server.py +39 -2
  42. {hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/METADATA +5 -1
  43. hindsight_api-0.3.0.dist-info/RECORD +82 -0
  44. {hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/entry_points.txt +1 -0
  45. hindsight_api-0.2.0.dist-info/RECORD +0 -75
  46. {hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/WHEEL +0 -0
hindsight_api/api/mcp.py CHANGED
@@ -8,7 +8,6 @@ from contextvars import ContextVar
8
8
  from fastmcp import FastMCP
9
9
 
10
10
  from hindsight_api import MemoryEngine
11
- from hindsight_api.api.http import BankListItem, BankListResponse, BankProfileResponse, DispositionTraits
12
11
  from hindsight_api.engine.response_models import VALID_RECALL_FACT_TYPES
13
12
  from hindsight_api.models import RequestContext
14
13
 
@@ -54,7 +53,12 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
54
53
  mcp = FastMCP("hindsight-mcp-server", stateless_http=True)
55
54
 
56
55
  @mcp.tool()
57
- async def retain(content: str, context: str = "general", bank_id: str | None = None) -> str:
56
+ async def retain(
57
+ content: str,
58
+ context: str = "general",
59
+ async_processing: bool = True,
60
+ bank_id: str | None = None,
61
+ ) -> str:
58
62
  """
59
63
  Store important information to long-term memory.
60
64
 
@@ -70,18 +74,28 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
70
74
  Args:
71
75
  content: The fact/memory to store (be specific and include relevant details)
72
76
  context: Category for the memory (e.g., 'preferences', 'work', 'hobbies', 'family'). Default: 'general'
77
+ async_processing: If True, queue for background processing and return immediately. If False, wait for completion. Default: True
73
78
  bank_id: Optional bank to store in (defaults to session bank). Use for cross-bank operations.
74
79
  """
75
80
  try:
76
81
  target_bank = bank_id or get_current_bank_id()
77
82
  if target_bank is None:
78
83
  return "Error: No bank_id configured"
79
- await memory.retain_batch_async(
80
- bank_id=target_bank,
81
- contents=[{"content": content, "context": context}],
82
- request_context=RequestContext(),
83
- )
84
- return f"Memory stored successfully in bank '{target_bank}'"
84
+ contents = [{"content": content, "context": context}]
85
+ if async_processing:
86
+ # Queue for background processing and return immediately
87
+ result = await memory.submit_async_retain(
88
+ bank_id=target_bank, contents=contents, request_context=RequestContext()
89
+ )
90
+ return f"Memory queued for background processing (operation_id: {result.get('operation_id', 'N/A')})"
91
+ else:
92
+ # Wait for completion
93
+ await memory.retain_batch_async(
94
+ bank_id=target_bank,
95
+ contents=contents,
96
+ request_context=RequestContext(),
97
+ )
98
+ return f"Memory stored successfully in bank '{target_bank}'"
85
99
  except Exception as e:
86
100
  logger.error(f"Error storing memory: {e}", exc_info=True)
87
101
  return f"Error: {str(e)}"
@@ -178,28 +192,15 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
178
192
  """
179
193
  List all available memory banks.
180
194
 
181
- Use this to discover banks for orchestration or to find
182
- the correct bank_id for cross-bank operations.
195
+ Use this tool to discover what memory banks exist in the system.
196
+ Each bank is an isolated memory store (like a separate "brain").
183
197
 
184
198
  Returns:
185
- JSON object with banks array containing bank_id, name, disposition, background, and timestamps
199
+ JSON list of banks with their IDs, names, dispositions, and backgrounds.
186
200
  """
187
201
  try:
188
202
  banks = await memory.list_banks(request_context=RequestContext())
189
- bank_items = [
190
- BankListItem(
191
- bank_id=b.get("bank_id") or b.get("id"),
192
- name=b.get("name"),
193
- disposition=DispositionTraits(
194
- **b.get("disposition", {"skepticism": 3, "literalism": 3, "empathy": 3})
195
- ),
196
- background=b.get("background"),
197
- created_at=str(b.get("created_at")) if b.get("created_at") else None,
198
- updated_at=str(b.get("updated_at")) if b.get("updated_at") else None,
199
- )
200
- for b in banks
201
- ]
202
- return BankListResponse(banks=bank_items).model_dump_json(indent=2)
203
+ return json.dumps({"banks": banks}, indent=2)
203
204
  except Exception as e:
204
205
  logger.error(f"Error listing banks: {e}", exc_info=True)
205
206
  return f'{{"error": "{e}", "banks": []}}'
@@ -207,44 +208,38 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
207
208
  @mcp.tool()
208
209
  async def create_bank(bank_id: str, name: str | None = None, background: str | None = None) -> str:
209
210
  """
210
- Create or update a memory bank.
211
+ Create a new memory bank or get an existing one.
211
212
 
212
- Use this to create new banks for different agents, sessions, or purposes.
213
- Banks are isolated memory stores - each bank has its own memories and personality.
213
+ Memory banks are isolated stores - each one is like a separate "brain" for a user/agent.
214
+ Banks are auto-created with default settings if they don't exist.
214
215
 
215
216
  Args:
216
- bank_id: Unique identifier for the bank (e.g., 'orchestrator-memory', 'agent-1')
217
- name: Human-readable name for the bank
218
- background: Context about what this bank stores or its purpose
217
+ bank_id: Unique identifier for the bank (e.g., 'user-123', 'agent-alpha')
218
+ name: Optional human-friendly name for the bank
219
+ background: Optional background context about the bank's owner/purpose
219
220
  """
220
221
  try:
221
- # Get or create the bank profile (auto-creates with defaults)
222
- await memory.get_bank_profile(bank_id, request_context=RequestContext())
222
+ # get_bank_profile auto-creates bank if it doesn't exist
223
+ profile = await memory.get_bank_profile(bank_id, request_context=RequestContext())
223
224
 
224
- # Update name and/or background if provided
225
+ # Update name/background if provided
225
226
  if name is not None or background is not None:
226
- await memory.update_bank(bank_id, name=name, background=background, request_context=RequestContext())
227
-
228
- # Get final profile and return using BankProfileResponse model
229
- profile = await memory.get_bank_profile(bank_id, request_context=RequestContext())
230
- disposition = profile.get("disposition")
231
- if hasattr(disposition, "model_dump"):
232
- disposition_traits = DispositionTraits(**disposition.model_dump())
233
- else:
234
- disposition_traits = DispositionTraits(
235
- **dict(disposition or {"skepticism": 3, "literalism": 3, "empathy": 3})
227
+ await memory.update_bank(
228
+ bank_id,
229
+ name=name,
230
+ background=background,
231
+ request_context=RequestContext(),
236
232
  )
233
+ # Fetch updated profile
234
+ profile = await memory.get_bank_profile(bank_id, request_context=RequestContext())
237
235
 
238
- response = BankProfileResponse(
239
- bank_id=bank_id,
240
- name=profile.get("name") or "",
241
- disposition=disposition_traits,
242
- background=profile.get("background") or "",
243
- )
244
- return response.model_dump_json(indent=2)
236
+ # Serialize disposition if it's a Pydantic model
237
+ if "disposition" in profile and hasattr(profile["disposition"], "model_dump"):
238
+ profile["disposition"] = profile["disposition"].model_dump()
239
+ return json.dumps(profile, indent=2)
245
240
  except Exception as e:
246
241
  logger.error(f"Error creating bank: {e}", exc_info=True)
247
- return json.dumps({"error": str(e)})
242
+ return f'{{"error": "{e}"}}'
248
243
 
249
244
  return mcp
250
245
 
hindsight_api/config.py CHANGED
@@ -8,6 +8,11 @@ import logging
8
8
  import os
9
9
  from dataclasses import dataclass
10
10
 
11
+ from dotenv import find_dotenv, load_dotenv
12
+
13
+ # Load .env file, searching current and parent directories (overrides existing env vars)
14
+ load_dotenv(find_dotenv(usecwd=True), override=True)
15
+
11
16
  logger = logging.getLogger(__name__)
12
17
 
13
18
  # Environment variable names
@@ -18,20 +23,57 @@ ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
18
23
  ENV_LLM_BASE_URL = "HINDSIGHT_API_LLM_BASE_URL"
19
24
  ENV_LLM_MAX_CONCURRENT = "HINDSIGHT_API_LLM_MAX_CONCURRENT"
20
25
  ENV_LLM_TIMEOUT = "HINDSIGHT_API_LLM_TIMEOUT"
26
+ ENV_LLM_GROQ_SERVICE_TIER = "HINDSIGHT_API_LLM_GROQ_SERVICE_TIER"
27
+
28
+ # Per-operation LLM configuration (optional, falls back to global LLM config)
29
+ ENV_RETAIN_LLM_PROVIDER = "HINDSIGHT_API_RETAIN_LLM_PROVIDER"
30
+ ENV_RETAIN_LLM_API_KEY = "HINDSIGHT_API_RETAIN_LLM_API_KEY"
31
+ ENV_RETAIN_LLM_MODEL = "HINDSIGHT_API_RETAIN_LLM_MODEL"
32
+ ENV_RETAIN_LLM_BASE_URL = "HINDSIGHT_API_RETAIN_LLM_BASE_URL"
33
+
34
+ ENV_REFLECT_LLM_PROVIDER = "HINDSIGHT_API_REFLECT_LLM_PROVIDER"
35
+ ENV_REFLECT_LLM_API_KEY = "HINDSIGHT_API_REFLECT_LLM_API_KEY"
36
+ ENV_REFLECT_LLM_MODEL = "HINDSIGHT_API_REFLECT_LLM_MODEL"
37
+ ENV_REFLECT_LLM_BASE_URL = "HINDSIGHT_API_REFLECT_LLM_BASE_URL"
21
38
 
22
39
  ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
23
40
  ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
24
41
  ENV_EMBEDDINGS_TEI_URL = "HINDSIGHT_API_EMBEDDINGS_TEI_URL"
42
+ ENV_EMBEDDINGS_OPENAI_API_KEY = "HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY"
43
+ ENV_EMBEDDINGS_OPENAI_MODEL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL"
44
+ ENV_EMBEDDINGS_OPENAI_BASE_URL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_BASE_URL"
45
+
46
+ ENV_COHERE_API_KEY = "HINDSIGHT_API_COHERE_API_KEY"
47
+ ENV_EMBEDDINGS_COHERE_MODEL = "HINDSIGHT_API_EMBEDDINGS_COHERE_MODEL"
48
+ ENV_EMBEDDINGS_COHERE_BASE_URL = "HINDSIGHT_API_EMBEDDINGS_COHERE_BASE_URL"
49
+ ENV_RERANKER_COHERE_MODEL = "HINDSIGHT_API_RERANKER_COHERE_MODEL"
50
+ ENV_RERANKER_COHERE_BASE_URL = "HINDSIGHT_API_RERANKER_COHERE_BASE_URL"
51
+
52
+ # LiteLLM gateway configuration (for embeddings and reranker via LiteLLM proxy)
53
+ ENV_LITELLM_API_BASE = "HINDSIGHT_API_LITELLM_API_BASE"
54
+ ENV_LITELLM_API_KEY = "HINDSIGHT_API_LITELLM_API_KEY"
55
+ ENV_EMBEDDINGS_LITELLM_MODEL = "HINDSIGHT_API_EMBEDDINGS_LITELLM_MODEL"
56
+ ENV_RERANKER_LITELLM_MODEL = "HINDSIGHT_API_RERANKER_LITELLM_MODEL"
25
57
 
26
58
  ENV_RERANKER_PROVIDER = "HINDSIGHT_API_RERANKER_PROVIDER"
27
59
  ENV_RERANKER_LOCAL_MODEL = "HINDSIGHT_API_RERANKER_LOCAL_MODEL"
60
+ ENV_RERANKER_LOCAL_MAX_CONCURRENT = "HINDSIGHT_API_RERANKER_LOCAL_MAX_CONCURRENT"
28
61
  ENV_RERANKER_TEI_URL = "HINDSIGHT_API_RERANKER_TEI_URL"
62
+ ENV_RERANKER_TEI_BATCH_SIZE = "HINDSIGHT_API_RERANKER_TEI_BATCH_SIZE"
63
+ ENV_RERANKER_TEI_MAX_CONCURRENT = "HINDSIGHT_API_RERANKER_TEI_MAX_CONCURRENT"
64
+ ENV_RERANKER_MAX_CANDIDATES = "HINDSIGHT_API_RERANKER_MAX_CANDIDATES"
65
+ ENV_RERANKER_FLASHRANK_MODEL = "HINDSIGHT_API_RERANKER_FLASHRANK_MODEL"
66
+ ENV_RERANKER_FLASHRANK_CACHE_DIR = "HINDSIGHT_API_RERANKER_FLASHRANK_CACHE_DIR"
29
67
 
30
68
  ENV_HOST = "HINDSIGHT_API_HOST"
31
69
  ENV_PORT = "HINDSIGHT_API_PORT"
32
70
  ENV_LOG_LEVEL = "HINDSIGHT_API_LOG_LEVEL"
71
+ ENV_WORKERS = "HINDSIGHT_API_WORKERS"
33
72
  ENV_MCP_ENABLED = "HINDSIGHT_API_MCP_ENABLED"
34
73
  ENV_GRAPH_RETRIEVER = "HINDSIGHT_API_GRAPH_RETRIEVER"
74
+ ENV_MPFP_TOP_K_NEIGHBORS = "HINDSIGHT_API_MPFP_TOP_K_NEIGHBORS"
75
+ ENV_RECALL_MAX_CONCURRENT = "HINDSIGHT_API_RECALL_MAX_CONCURRENT"
76
+ ENV_RECALL_CONNECTION_BUDGET = "HINDSIGHT_API_RECALL_CONNECTION_BUDGET"
35
77
  ENV_MCP_LOCAL_BANK_ID = "HINDSIGHT_API_MCP_LOCAL_BANK_ID"
36
78
  ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
37
79
 
@@ -39,10 +81,31 @@ ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
39
81
  ENV_OBSERVATION_MIN_FACTS = "HINDSIGHT_API_OBSERVATION_MIN_FACTS"
40
82
  ENV_OBSERVATION_TOP_ENTITIES = "HINDSIGHT_API_OBSERVATION_TOP_ENTITIES"
41
83
 
84
+ # Retain settings
85
+ ENV_RETAIN_MAX_COMPLETION_TOKENS = "HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS"
86
+ ENV_RETAIN_CHUNK_SIZE = "HINDSIGHT_API_RETAIN_CHUNK_SIZE"
87
+ ENV_RETAIN_EXTRACT_CAUSAL_LINKS = "HINDSIGHT_API_RETAIN_EXTRACT_CAUSAL_LINKS"
88
+ ENV_RETAIN_EXTRACTION_MODE = "HINDSIGHT_API_RETAIN_EXTRACTION_MODE"
89
+ ENV_RETAIN_OBSERVATIONS_ASYNC = "HINDSIGHT_API_RETAIN_OBSERVATIONS_ASYNC"
90
+
42
91
  # Optimization flags
43
92
  ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
44
93
  ENV_LAZY_RERANKER = "HINDSIGHT_API_LAZY_RERANKER"
45
94
 
95
+ # Database migrations
96
+ ENV_RUN_MIGRATIONS_ON_STARTUP = "HINDSIGHT_API_RUN_MIGRATIONS_ON_STARTUP"
97
+
98
+ # Database connection pool
99
+ ENV_DB_POOL_MIN_SIZE = "HINDSIGHT_API_DB_POOL_MIN_SIZE"
100
+ ENV_DB_POOL_MAX_SIZE = "HINDSIGHT_API_DB_POOL_MAX_SIZE"
101
+ ENV_DB_COMMAND_TIMEOUT = "HINDSIGHT_API_DB_COMMAND_TIMEOUT"
102
+ ENV_DB_ACQUIRE_TIMEOUT = "HINDSIGHT_API_DB_ACQUIRE_TIMEOUT"
103
+
104
+ # Background task processing
105
+ ENV_TASK_BACKEND = "HINDSIGHT_API_TASK_BACKEND"
106
+ ENV_TASK_BACKEND_MEMORY_BATCH_SIZE = "HINDSIGHT_API_TASK_BACKEND_MEMORY_BATCH_SIZE"
107
+ ENV_TASK_BACKEND_MEMORY_BATCH_INTERVAL = "HINDSIGHT_API_TASK_BACKEND_MEMORY_BATCH_INTERVAL"
108
+
46
109
  # Default values
47
110
  DEFAULT_DATABASE_URL = "pg0"
48
111
  DEFAULT_LLM_PROVIDER = "openai"
@@ -52,21 +115,63 @@ DEFAULT_LLM_TIMEOUT = 120.0 # seconds
52
115
 
53
116
  DEFAULT_EMBEDDINGS_PROVIDER = "local"
54
117
  DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
118
+ DEFAULT_EMBEDDINGS_OPENAI_MODEL = "text-embedding-3-small"
119
+ DEFAULT_EMBEDDING_DIMENSION = 384
55
120
 
56
121
  DEFAULT_RERANKER_PROVIDER = "local"
57
122
  DEFAULT_RERANKER_LOCAL_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
123
+ DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT = 4 # Limit concurrent CPU-bound reranking to prevent thrashing
124
+ DEFAULT_RERANKER_TEI_BATCH_SIZE = 128
125
+ DEFAULT_RERANKER_TEI_MAX_CONCURRENT = 8
126
+ DEFAULT_RERANKER_MAX_CANDIDATES = 300
127
+ DEFAULT_RERANKER_FLASHRANK_MODEL = "ms-marco-MiniLM-L-12-v2" # Best balance of speed and quality
128
+ DEFAULT_RERANKER_FLASHRANK_CACHE_DIR = None # Use default cache directory
129
+
130
+ DEFAULT_EMBEDDINGS_COHERE_MODEL = "embed-english-v3.0"
131
+ DEFAULT_RERANKER_COHERE_MODEL = "rerank-english-v3.0"
132
+
133
+ # LiteLLM defaults
134
+ DEFAULT_LITELLM_API_BASE = "http://localhost:4000"
135
+ DEFAULT_EMBEDDINGS_LITELLM_MODEL = "text-embedding-3-small"
136
+ DEFAULT_RERANKER_LITELLM_MODEL = "cohere/rerank-english-v3.0"
58
137
 
59
138
  DEFAULT_HOST = "0.0.0.0"
60
139
  DEFAULT_PORT = 8888
61
140
  DEFAULT_LOG_LEVEL = "info"
141
+ DEFAULT_WORKERS = 1
62
142
  DEFAULT_MCP_ENABLED = True
63
- DEFAULT_GRAPH_RETRIEVER = "bfs" # Options: "bfs", "mpfp"
143
+ DEFAULT_GRAPH_RETRIEVER = "link_expansion" # Options: "link_expansion", "mpfp", "bfs"
144
+ DEFAULT_MPFP_TOP_K_NEIGHBORS = 20 # Fan-out limit per node in MPFP graph traversal
145
+ DEFAULT_RECALL_MAX_CONCURRENT = 32 # Max concurrent recall operations per worker
146
+ DEFAULT_RECALL_CONNECTION_BUDGET = 4 # Max concurrent DB connections per recall operation
64
147
  DEFAULT_MCP_LOCAL_BANK_ID = "mcp"
65
148
 
66
149
  # Observation thresholds
67
150
  DEFAULT_OBSERVATION_MIN_FACTS = 5 # Min facts required to generate entity observations
68
151
  DEFAULT_OBSERVATION_TOP_ENTITIES = 5 # Max entities to process per retain batch
69
152
 
153
+ # Retain settings
154
+ DEFAULT_RETAIN_MAX_COMPLETION_TOKENS = 64000 # Max tokens for fact extraction LLM call
155
+ DEFAULT_RETAIN_CHUNK_SIZE = 3000 # Max chars per chunk for fact extraction
156
+ DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS = True # Extract causal links between facts
157
+ DEFAULT_RETAIN_EXTRACTION_MODE = "concise" # Extraction mode: "concise" or "verbose"
158
+ RETAIN_EXTRACTION_MODES = ("concise", "verbose") # Allowed extraction modes
159
+ DEFAULT_RETAIN_OBSERVATIONS_ASYNC = False # Run observation generation async (after retain completes)
160
+
161
+ # Database migrations
162
+ DEFAULT_RUN_MIGRATIONS_ON_STARTUP = True
163
+
164
+ # Database connection pool
165
+ DEFAULT_DB_POOL_MIN_SIZE = 5
166
+ DEFAULT_DB_POOL_MAX_SIZE = 100
167
+ DEFAULT_DB_COMMAND_TIMEOUT = 60 # seconds
168
+ DEFAULT_DB_ACQUIRE_TIMEOUT = 30 # seconds
169
+
170
+ # Background task processing
171
+ DEFAULT_TASK_BACKEND = "memory" # Options: "memory", "noop"
172
+ DEFAULT_TASK_BACKEND_MEMORY_BATCH_SIZE = 10
173
+ DEFAULT_TASK_BACKEND_MEMORY_BATCH_INTERVAL = 1.0 # seconds
174
+
70
175
  # Default MCP tool descriptions (can be customized via env vars)
71
176
  DEFAULT_MCP_RETAIN_DESCRIPTION = """Store important information to long-term memory.
72
177
 
@@ -87,8 +192,20 @@ Use this tool PROACTIVELY to:
87
192
  - Remember user's goals and context
88
193
  - Personalize responses based on past interactions"""
89
194
 
90
- # Required embedding dimension for database schema
91
- EMBEDDING_DIMENSION = 384
195
+ # Default embedding dimension (used by initial migration, adjusted at runtime)
196
+ EMBEDDING_DIMENSION = DEFAULT_EMBEDDING_DIMENSION
197
+
198
+
199
+ def _validate_extraction_mode(mode: str) -> str:
200
+ """Validate and normalize extraction mode."""
201
+ mode_lower = mode.lower()
202
+ if mode_lower not in RETAIN_EXTRACTION_MODES:
203
+ logger.warning(
204
+ f"Invalid extraction mode '{mode}', must be one of {RETAIN_EXTRACTION_MODES}. "
205
+ f"Defaulting to '{DEFAULT_RETAIN_EXTRACTION_MODE}'."
206
+ )
207
+ return DEFAULT_RETAIN_EXTRACTION_MODE
208
+ return mode_lower
92
209
 
93
210
 
94
211
  @dataclass
@@ -98,7 +215,7 @@ class HindsightConfig:
98
215
  # Database
99
216
  database_url: str
100
217
 
101
- # LLM
218
+ # LLM (default, used as fallback for per-operation config)
102
219
  llm_provider: str
103
220
  llm_api_key: str | None
104
221
  llm_model: str
@@ -106,15 +223,32 @@ class HindsightConfig:
106
223
  llm_max_concurrent: int
107
224
  llm_timeout: float
108
225
 
226
+ # Per-operation LLM configuration (None = use default LLM config)
227
+ retain_llm_provider: str | None
228
+ retain_llm_api_key: str | None
229
+ retain_llm_model: str | None
230
+ retain_llm_base_url: str | None
231
+
232
+ reflect_llm_provider: str | None
233
+ reflect_llm_api_key: str | None
234
+ reflect_llm_model: str | None
235
+ reflect_llm_base_url: str | None
236
+
109
237
  # Embeddings
110
238
  embeddings_provider: str
111
239
  embeddings_local_model: str
112
240
  embeddings_tei_url: str | None
241
+ embeddings_openai_base_url: str | None
242
+ embeddings_cohere_base_url: str | None
113
243
 
114
244
  # Reranker
115
245
  reranker_provider: str
116
246
  reranker_local_model: str
117
247
  reranker_tei_url: str | None
248
+ reranker_tei_batch_size: int
249
+ reranker_tei_max_concurrent: int
250
+ reranker_max_candidates: int
251
+ reranker_cohere_base_url: str | None
118
252
 
119
253
  # Server
120
254
  host: str
@@ -124,15 +258,39 @@ class HindsightConfig:
124
258
 
125
259
  # Recall
126
260
  graph_retriever: str
261
+ mpfp_top_k_neighbors: int
262
+ recall_max_concurrent: int
263
+ recall_connection_budget: int
127
264
 
128
265
  # Observation thresholds
129
266
  observation_min_facts: int
130
267
  observation_top_entities: int
131
268
 
269
+ # Retain settings
270
+ retain_max_completion_tokens: int
271
+ retain_chunk_size: int
272
+ retain_extract_causal_links: bool
273
+ retain_extraction_mode: str
274
+ retain_observations_async: bool
275
+
132
276
  # Optimization flags
133
277
  skip_llm_verification: bool
134
278
  lazy_reranker: bool
135
279
 
280
+ # Database migrations
281
+ run_migrations_on_startup: bool
282
+
283
+ # Database connection pool
284
+ db_pool_min_size: int
285
+ db_pool_max_size: int
286
+ db_command_timeout: int
287
+ db_acquire_timeout: int
288
+
289
+ # Background task processing
290
+ task_backend: str
291
+ task_backend_memory_batch_size: int
292
+ task_backend_memory_batch_interval: float
293
+
136
294
  @classmethod
137
295
  def from_env(cls) -> "HindsightConfig":
138
296
  """Create configuration from environment variables."""
@@ -146,14 +304,31 @@ class HindsightConfig:
146
304
  llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
147
305
  llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
148
306
  llm_timeout=float(os.getenv(ENV_LLM_TIMEOUT, str(DEFAULT_LLM_TIMEOUT))),
307
+ # Per-operation LLM config (None = use default)
308
+ retain_llm_provider=os.getenv(ENV_RETAIN_LLM_PROVIDER) or None,
309
+ retain_llm_api_key=os.getenv(ENV_RETAIN_LLM_API_KEY) or None,
310
+ retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL) or None,
311
+ retain_llm_base_url=os.getenv(ENV_RETAIN_LLM_BASE_URL) or None,
312
+ reflect_llm_provider=os.getenv(ENV_REFLECT_LLM_PROVIDER) or None,
313
+ reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
314
+ reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL) or None,
315
+ reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
149
316
  # Embeddings
150
317
  embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
151
318
  embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
152
319
  embeddings_tei_url=os.getenv(ENV_EMBEDDINGS_TEI_URL),
320
+ embeddings_openai_base_url=os.getenv(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None,
321
+ embeddings_cohere_base_url=os.getenv(ENV_EMBEDDINGS_COHERE_BASE_URL) or None,
153
322
  # Reranker
154
323
  reranker_provider=os.getenv(ENV_RERANKER_PROVIDER, DEFAULT_RERANKER_PROVIDER),
155
324
  reranker_local_model=os.getenv(ENV_RERANKER_LOCAL_MODEL, DEFAULT_RERANKER_LOCAL_MODEL),
156
325
  reranker_tei_url=os.getenv(ENV_RERANKER_TEI_URL),
326
+ reranker_tei_batch_size=int(os.getenv(ENV_RERANKER_TEI_BATCH_SIZE, str(DEFAULT_RERANKER_TEI_BATCH_SIZE))),
327
+ reranker_tei_max_concurrent=int(
328
+ os.getenv(ENV_RERANKER_TEI_MAX_CONCURRENT, str(DEFAULT_RERANKER_TEI_MAX_CONCURRENT))
329
+ ),
330
+ reranker_max_candidates=int(os.getenv(ENV_RERANKER_MAX_CANDIDATES, str(DEFAULT_RERANKER_MAX_CANDIDATES))),
331
+ reranker_cohere_base_url=os.getenv(ENV_RERANKER_COHERE_BASE_URL) or None,
157
332
  # Server
158
333
  host=os.getenv(ENV_HOST, DEFAULT_HOST),
159
334
  port=int(os.getenv(ENV_PORT, DEFAULT_PORT)),
@@ -161,6 +336,11 @@ class HindsightConfig:
161
336
  mcp_enabled=os.getenv(ENV_MCP_ENABLED, str(DEFAULT_MCP_ENABLED)).lower() == "true",
162
337
  # Recall
163
338
  graph_retriever=os.getenv(ENV_GRAPH_RETRIEVER, DEFAULT_GRAPH_RETRIEVER),
339
+ mpfp_top_k_neighbors=int(os.getenv(ENV_MPFP_TOP_K_NEIGHBORS, str(DEFAULT_MPFP_TOP_K_NEIGHBORS))),
340
+ recall_max_concurrent=int(os.getenv(ENV_RECALL_MAX_CONCURRENT, str(DEFAULT_RECALL_MAX_CONCURRENT))),
341
+ recall_connection_budget=int(
342
+ os.getenv(ENV_RECALL_CONNECTION_BUDGET, str(DEFAULT_RECALL_CONNECTION_BUDGET))
343
+ ),
164
344
  # Optimization flags
165
345
  skip_llm_verification=os.getenv(ENV_SKIP_LLM_VERIFICATION, "false").lower() == "true",
166
346
  lazy_reranker=os.getenv(ENV_LAZY_RERANKER, "false").lower() == "true",
@@ -169,6 +349,37 @@ class HindsightConfig:
169
349
  observation_top_entities=int(
170
350
  os.getenv(ENV_OBSERVATION_TOP_ENTITIES, str(DEFAULT_OBSERVATION_TOP_ENTITIES))
171
351
  ),
352
+ # Retain settings
353
+ retain_max_completion_tokens=int(
354
+ os.getenv(ENV_RETAIN_MAX_COMPLETION_TOKENS, str(DEFAULT_RETAIN_MAX_COMPLETION_TOKENS))
355
+ ),
356
+ retain_chunk_size=int(os.getenv(ENV_RETAIN_CHUNK_SIZE, str(DEFAULT_RETAIN_CHUNK_SIZE))),
357
+ retain_extract_causal_links=os.getenv(
358
+ ENV_RETAIN_EXTRACT_CAUSAL_LINKS, str(DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS)
359
+ ).lower()
360
+ == "true",
361
+ retain_extraction_mode=_validate_extraction_mode(
362
+ os.getenv(ENV_RETAIN_EXTRACTION_MODE, DEFAULT_RETAIN_EXTRACTION_MODE)
363
+ ),
364
+ retain_observations_async=os.getenv(
365
+ ENV_RETAIN_OBSERVATIONS_ASYNC, str(DEFAULT_RETAIN_OBSERVATIONS_ASYNC)
366
+ ).lower()
367
+ == "true",
368
+ # Database migrations
369
+ run_migrations_on_startup=os.getenv(ENV_RUN_MIGRATIONS_ON_STARTUP, "true").lower() == "true",
370
+ # Database connection pool
371
+ db_pool_min_size=int(os.getenv(ENV_DB_POOL_MIN_SIZE, str(DEFAULT_DB_POOL_MIN_SIZE))),
372
+ db_pool_max_size=int(os.getenv(ENV_DB_POOL_MAX_SIZE, str(DEFAULT_DB_POOL_MAX_SIZE))),
373
+ db_command_timeout=int(os.getenv(ENV_DB_COMMAND_TIMEOUT, str(DEFAULT_DB_COMMAND_TIMEOUT))),
374
+ db_acquire_timeout=int(os.getenv(ENV_DB_ACQUIRE_TIMEOUT, str(DEFAULT_DB_ACQUIRE_TIMEOUT))),
375
+ # Background task processing
376
+ task_backend=os.getenv(ENV_TASK_BACKEND, DEFAULT_TASK_BACKEND),
377
+ task_backend_memory_batch_size=int(
378
+ os.getenv(ENV_TASK_BACKEND_MEMORY_BATCH_SIZE, str(DEFAULT_TASK_BACKEND_MEMORY_BATCH_SIZE))
379
+ ),
380
+ task_backend_memory_batch_interval=float(
381
+ os.getenv(ENV_TASK_BACKEND_MEMORY_BATCH_INTERVAL, str(DEFAULT_TASK_BACKEND_MEMORY_BATCH_INTERVAL))
382
+ ),
172
383
  )
173
384
 
174
385
  def get_llm_base_url(self) -> str:
@@ -210,11 +421,32 @@ class HindsightConfig:
210
421
  """Log the current configuration (without sensitive values)."""
211
422
  logger.info(f"Database: {self.database_url}")
212
423
  logger.info(f"LLM: provider={self.llm_provider}, model={self.llm_model}")
424
+ if self.retain_llm_provider or self.retain_llm_model:
425
+ retain_provider = self.retain_llm_provider or self.llm_provider
426
+ retain_model = self.retain_llm_model or self.llm_model
427
+ logger.info(f"LLM (retain): provider={retain_provider}, model={retain_model}")
428
+ if self.reflect_llm_provider or self.reflect_llm_model:
429
+ reflect_provider = self.reflect_llm_provider or self.llm_provider
430
+ reflect_model = self.reflect_llm_model or self.llm_model
431
+ logger.info(f"LLM (reflect): provider={reflect_provider}, model={reflect_model}")
213
432
  logger.info(f"Embeddings: provider={self.embeddings_provider}")
214
433
  logger.info(f"Reranker: provider={self.reranker_provider}")
215
434
  logger.info(f"Graph retriever: {self.graph_retriever}")
216
435
 
217
436
 
437
+ # Cached config instance
438
+ _config_cache: HindsightConfig | None = None
439
+
440
+
218
441
  def get_config() -> HindsightConfig:
219
- """Get the current configuration from environment variables."""
220
- return HindsightConfig.from_env()
442
+ """Get the cached configuration, loading from environment on first call."""
443
+ global _config_cache
444
+ if _config_cache is None:
445
+ _config_cache = HindsightConfig.from_env()
446
+ return _config_cache
447
+
448
+
449
+ def clear_config_cache() -> None:
450
+ """Clear the config cache. Useful for testing or reloading config."""
451
+ global _config_cache
452
+ _config_cache = None