hindsight-api 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/admin/__init__.py +1 -0
- hindsight_api/admin/cli.py +252 -0
- hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
- hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
- hindsight_api/api/http.py +282 -20
- hindsight_api/api/mcp.py +47 -52
- hindsight_api/config.py +238 -6
- hindsight_api/engine/cross_encoder.py +599 -86
- hindsight_api/engine/db_budget.py +284 -0
- hindsight_api/engine/db_utils.py +11 -0
- hindsight_api/engine/embeddings.py +453 -26
- hindsight_api/engine/entity_resolver.py +8 -5
- hindsight_api/engine/interface.py +8 -4
- hindsight_api/engine/llm_wrapper.py +241 -27
- hindsight_api/engine/memory_engine.py +609 -122
- hindsight_api/engine/query_analyzer.py +4 -3
- hindsight_api/engine/response_models.py +38 -0
- hindsight_api/engine/retain/fact_extraction.py +388 -192
- hindsight_api/engine/retain/fact_storage.py +34 -8
- hindsight_api/engine/retain/link_utils.py +24 -16
- hindsight_api/engine/retain/orchestrator.py +52 -17
- hindsight_api/engine/retain/types.py +9 -0
- hindsight_api/engine/search/graph_retrieval.py +42 -13
- hindsight_api/engine/search/link_expansion_retrieval.py +256 -0
- hindsight_api/engine/search/mpfp_retrieval.py +362 -117
- hindsight_api/engine/search/reranking.py +2 -2
- hindsight_api/engine/search/retrieval.py +847 -200
- hindsight_api/engine/search/tags.py +172 -0
- hindsight_api/engine/search/think_utils.py +1 -1
- hindsight_api/engine/search/trace.py +12 -0
- hindsight_api/engine/search/tracer.py +24 -1
- hindsight_api/engine/search/types.py +21 -0
- hindsight_api/engine/task_backend.py +109 -18
- hindsight_api/engine/utils.py +1 -1
- hindsight_api/extensions/context.py +10 -1
- hindsight_api/main.py +56 -4
- hindsight_api/metrics.py +433 -48
- hindsight_api/migrations.py +141 -1
- hindsight_api/models.py +3 -1
- hindsight_api/pg0.py +53 -0
- hindsight_api/server.py +39 -2
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.3.0.dist-info}/METADATA +5 -1
- hindsight_api-0.3.0.dist-info/RECORD +82 -0
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.3.0.dist-info}/entry_points.txt +1 -0
- hindsight_api-0.2.1.dist-info/RECORD +0 -75
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.3.0.dist-info}/WHEEL +0 -0
hindsight_api/api/mcp.py
CHANGED
|
@@ -8,7 +8,6 @@ from contextvars import ContextVar
|
|
|
8
8
|
from fastmcp import FastMCP
|
|
9
9
|
|
|
10
10
|
from hindsight_api import MemoryEngine
|
|
11
|
-
from hindsight_api.api.http import BankListItem, BankListResponse, BankProfileResponse, DispositionTraits
|
|
12
11
|
from hindsight_api.engine.response_models import VALID_RECALL_FACT_TYPES
|
|
13
12
|
from hindsight_api.models import RequestContext
|
|
14
13
|
|
|
@@ -54,7 +53,12 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
|
|
|
54
53
|
mcp = FastMCP("hindsight-mcp-server", stateless_http=True)
|
|
55
54
|
|
|
56
55
|
@mcp.tool()
|
|
57
|
-
async def retain(
|
|
56
|
+
async def retain(
|
|
57
|
+
content: str,
|
|
58
|
+
context: str = "general",
|
|
59
|
+
async_processing: bool = True,
|
|
60
|
+
bank_id: str | None = None,
|
|
61
|
+
) -> str:
|
|
58
62
|
"""
|
|
59
63
|
Store important information to long-term memory.
|
|
60
64
|
|
|
@@ -70,18 +74,28 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
|
|
|
70
74
|
Args:
|
|
71
75
|
content: The fact/memory to store (be specific and include relevant details)
|
|
72
76
|
context: Category for the memory (e.g., 'preferences', 'work', 'hobbies', 'family'). Default: 'general'
|
|
77
|
+
async_processing: If True, queue for background processing and return immediately. If False, wait for completion. Default: True
|
|
73
78
|
bank_id: Optional bank to store in (defaults to session bank). Use for cross-bank operations.
|
|
74
79
|
"""
|
|
75
80
|
try:
|
|
76
81
|
target_bank = bank_id or get_current_bank_id()
|
|
77
82
|
if target_bank is None:
|
|
78
83
|
return "Error: No bank_id configured"
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
84
|
+
contents = [{"content": content, "context": context}]
|
|
85
|
+
if async_processing:
|
|
86
|
+
# Queue for background processing and return immediately
|
|
87
|
+
result = await memory.submit_async_retain(
|
|
88
|
+
bank_id=target_bank, contents=contents, request_context=RequestContext()
|
|
89
|
+
)
|
|
90
|
+
return f"Memory queued for background processing (operation_id: {result.get('operation_id', 'N/A')})"
|
|
91
|
+
else:
|
|
92
|
+
# Wait for completion
|
|
93
|
+
await memory.retain_batch_async(
|
|
94
|
+
bank_id=target_bank,
|
|
95
|
+
contents=contents,
|
|
96
|
+
request_context=RequestContext(),
|
|
97
|
+
)
|
|
98
|
+
return f"Memory stored successfully in bank '{target_bank}'"
|
|
85
99
|
except Exception as e:
|
|
86
100
|
logger.error(f"Error storing memory: {e}", exc_info=True)
|
|
87
101
|
return f"Error: {str(e)}"
|
|
@@ -178,28 +192,15 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
|
|
|
178
192
|
"""
|
|
179
193
|
List all available memory banks.
|
|
180
194
|
|
|
181
|
-
Use this to discover banks
|
|
182
|
-
|
|
195
|
+
Use this tool to discover what memory banks exist in the system.
|
|
196
|
+
Each bank is an isolated memory store (like a separate "brain").
|
|
183
197
|
|
|
184
198
|
Returns:
|
|
185
|
-
JSON
|
|
199
|
+
JSON list of banks with their IDs, names, dispositions, and backgrounds.
|
|
186
200
|
"""
|
|
187
201
|
try:
|
|
188
202
|
banks = await memory.list_banks(request_context=RequestContext())
|
|
189
|
-
|
|
190
|
-
BankListItem(
|
|
191
|
-
bank_id=b.get("bank_id") or b.get("id"),
|
|
192
|
-
name=b.get("name"),
|
|
193
|
-
disposition=DispositionTraits(
|
|
194
|
-
**b.get("disposition", {"skepticism": 3, "literalism": 3, "empathy": 3})
|
|
195
|
-
),
|
|
196
|
-
background=b.get("background"),
|
|
197
|
-
created_at=str(b.get("created_at")) if b.get("created_at") else None,
|
|
198
|
-
updated_at=str(b.get("updated_at")) if b.get("updated_at") else None,
|
|
199
|
-
)
|
|
200
|
-
for b in banks
|
|
201
|
-
]
|
|
202
|
-
return BankListResponse(banks=bank_items).model_dump_json(indent=2)
|
|
203
|
+
return json.dumps({"banks": banks}, indent=2)
|
|
203
204
|
except Exception as e:
|
|
204
205
|
logger.error(f"Error listing banks: {e}", exc_info=True)
|
|
205
206
|
return f'{{"error": "{e}", "banks": []}}'
|
|
@@ -207,44 +208,38 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
|
|
|
207
208
|
@mcp.tool()
|
|
208
209
|
async def create_bank(bank_id: str, name: str | None = None, background: str | None = None) -> str:
|
|
209
210
|
"""
|
|
210
|
-
Create or
|
|
211
|
+
Create a new memory bank or get an existing one.
|
|
211
212
|
|
|
212
|
-
|
|
213
|
-
Banks are
|
|
213
|
+
Memory banks are isolated stores - each one is like a separate "brain" for a user/agent.
|
|
214
|
+
Banks are auto-created with default settings if they don't exist.
|
|
214
215
|
|
|
215
216
|
Args:
|
|
216
|
-
bank_id: Unique identifier for the bank (e.g., '
|
|
217
|
-
name:
|
|
218
|
-
background:
|
|
217
|
+
bank_id: Unique identifier for the bank (e.g., 'user-123', 'agent-alpha')
|
|
218
|
+
name: Optional human-friendly name for the bank
|
|
219
|
+
background: Optional background context about the bank's owner/purpose
|
|
219
220
|
"""
|
|
220
221
|
try:
|
|
221
|
-
#
|
|
222
|
-
await memory.get_bank_profile(bank_id, request_context=RequestContext())
|
|
222
|
+
# get_bank_profile auto-creates bank if it doesn't exist
|
|
223
|
+
profile = await memory.get_bank_profile(bank_id, request_context=RequestContext())
|
|
223
224
|
|
|
224
|
-
# Update name
|
|
225
|
+
# Update name/background if provided
|
|
225
226
|
if name is not None or background is not None:
|
|
226
|
-
await memory.update_bank(
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
if hasattr(disposition, "model_dump"):
|
|
232
|
-
disposition_traits = DispositionTraits(**disposition.model_dump())
|
|
233
|
-
else:
|
|
234
|
-
disposition_traits = DispositionTraits(
|
|
235
|
-
**dict(disposition or {"skepticism": 3, "literalism": 3, "empathy": 3})
|
|
227
|
+
await memory.update_bank(
|
|
228
|
+
bank_id,
|
|
229
|
+
name=name,
|
|
230
|
+
background=background,
|
|
231
|
+
request_context=RequestContext(),
|
|
236
232
|
)
|
|
233
|
+
# Fetch updated profile
|
|
234
|
+
profile = await memory.get_bank_profile(bank_id, request_context=RequestContext())
|
|
237
235
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
background=profile.get("background") or "",
|
|
243
|
-
)
|
|
244
|
-
return response.model_dump_json(indent=2)
|
|
236
|
+
# Serialize disposition if it's a Pydantic model
|
|
237
|
+
if "disposition" in profile and hasattr(profile["disposition"], "model_dump"):
|
|
238
|
+
profile["disposition"] = profile["disposition"].model_dump()
|
|
239
|
+
return json.dumps(profile, indent=2)
|
|
245
240
|
except Exception as e:
|
|
246
241
|
logger.error(f"Error creating bank: {e}", exc_info=True)
|
|
247
|
-
return
|
|
242
|
+
return f'{{"error": "{e}"}}'
|
|
248
243
|
|
|
249
244
|
return mcp
|
|
250
245
|
|
hindsight_api/config.py
CHANGED
|
@@ -8,6 +8,11 @@ import logging
|
|
|
8
8
|
import os
|
|
9
9
|
from dataclasses import dataclass
|
|
10
10
|
|
|
11
|
+
from dotenv import find_dotenv, load_dotenv
|
|
12
|
+
|
|
13
|
+
# Load .env file, searching current and parent directories (overrides existing env vars)
|
|
14
|
+
load_dotenv(find_dotenv(usecwd=True), override=True)
|
|
15
|
+
|
|
11
16
|
logger = logging.getLogger(__name__)
|
|
12
17
|
|
|
13
18
|
# Environment variable names
|
|
@@ -18,20 +23,57 @@ ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
|
|
|
18
23
|
ENV_LLM_BASE_URL = "HINDSIGHT_API_LLM_BASE_URL"
|
|
19
24
|
ENV_LLM_MAX_CONCURRENT = "HINDSIGHT_API_LLM_MAX_CONCURRENT"
|
|
20
25
|
ENV_LLM_TIMEOUT = "HINDSIGHT_API_LLM_TIMEOUT"
|
|
26
|
+
ENV_LLM_GROQ_SERVICE_TIER = "HINDSIGHT_API_LLM_GROQ_SERVICE_TIER"
|
|
27
|
+
|
|
28
|
+
# Per-operation LLM configuration (optional, falls back to global LLM config)
|
|
29
|
+
ENV_RETAIN_LLM_PROVIDER = "HINDSIGHT_API_RETAIN_LLM_PROVIDER"
|
|
30
|
+
ENV_RETAIN_LLM_API_KEY = "HINDSIGHT_API_RETAIN_LLM_API_KEY"
|
|
31
|
+
ENV_RETAIN_LLM_MODEL = "HINDSIGHT_API_RETAIN_LLM_MODEL"
|
|
32
|
+
ENV_RETAIN_LLM_BASE_URL = "HINDSIGHT_API_RETAIN_LLM_BASE_URL"
|
|
33
|
+
|
|
34
|
+
ENV_REFLECT_LLM_PROVIDER = "HINDSIGHT_API_REFLECT_LLM_PROVIDER"
|
|
35
|
+
ENV_REFLECT_LLM_API_KEY = "HINDSIGHT_API_REFLECT_LLM_API_KEY"
|
|
36
|
+
ENV_REFLECT_LLM_MODEL = "HINDSIGHT_API_REFLECT_LLM_MODEL"
|
|
37
|
+
ENV_REFLECT_LLM_BASE_URL = "HINDSIGHT_API_REFLECT_LLM_BASE_URL"
|
|
21
38
|
|
|
22
39
|
ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
|
|
23
40
|
ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
|
|
24
41
|
ENV_EMBEDDINGS_TEI_URL = "HINDSIGHT_API_EMBEDDINGS_TEI_URL"
|
|
42
|
+
ENV_EMBEDDINGS_OPENAI_API_KEY = "HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY"
|
|
43
|
+
ENV_EMBEDDINGS_OPENAI_MODEL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL"
|
|
44
|
+
ENV_EMBEDDINGS_OPENAI_BASE_URL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_BASE_URL"
|
|
45
|
+
|
|
46
|
+
ENV_COHERE_API_KEY = "HINDSIGHT_API_COHERE_API_KEY"
|
|
47
|
+
ENV_EMBEDDINGS_COHERE_MODEL = "HINDSIGHT_API_EMBEDDINGS_COHERE_MODEL"
|
|
48
|
+
ENV_EMBEDDINGS_COHERE_BASE_URL = "HINDSIGHT_API_EMBEDDINGS_COHERE_BASE_URL"
|
|
49
|
+
ENV_RERANKER_COHERE_MODEL = "HINDSIGHT_API_RERANKER_COHERE_MODEL"
|
|
50
|
+
ENV_RERANKER_COHERE_BASE_URL = "HINDSIGHT_API_RERANKER_COHERE_BASE_URL"
|
|
51
|
+
|
|
52
|
+
# LiteLLM gateway configuration (for embeddings and reranker via LiteLLM proxy)
|
|
53
|
+
ENV_LITELLM_API_BASE = "HINDSIGHT_API_LITELLM_API_BASE"
|
|
54
|
+
ENV_LITELLM_API_KEY = "HINDSIGHT_API_LITELLM_API_KEY"
|
|
55
|
+
ENV_EMBEDDINGS_LITELLM_MODEL = "HINDSIGHT_API_EMBEDDINGS_LITELLM_MODEL"
|
|
56
|
+
ENV_RERANKER_LITELLM_MODEL = "HINDSIGHT_API_RERANKER_LITELLM_MODEL"
|
|
25
57
|
|
|
26
58
|
ENV_RERANKER_PROVIDER = "HINDSIGHT_API_RERANKER_PROVIDER"
|
|
27
59
|
ENV_RERANKER_LOCAL_MODEL = "HINDSIGHT_API_RERANKER_LOCAL_MODEL"
|
|
60
|
+
ENV_RERANKER_LOCAL_MAX_CONCURRENT = "HINDSIGHT_API_RERANKER_LOCAL_MAX_CONCURRENT"
|
|
28
61
|
ENV_RERANKER_TEI_URL = "HINDSIGHT_API_RERANKER_TEI_URL"
|
|
62
|
+
ENV_RERANKER_TEI_BATCH_SIZE = "HINDSIGHT_API_RERANKER_TEI_BATCH_SIZE"
|
|
63
|
+
ENV_RERANKER_TEI_MAX_CONCURRENT = "HINDSIGHT_API_RERANKER_TEI_MAX_CONCURRENT"
|
|
64
|
+
ENV_RERANKER_MAX_CANDIDATES = "HINDSIGHT_API_RERANKER_MAX_CANDIDATES"
|
|
65
|
+
ENV_RERANKER_FLASHRANK_MODEL = "HINDSIGHT_API_RERANKER_FLASHRANK_MODEL"
|
|
66
|
+
ENV_RERANKER_FLASHRANK_CACHE_DIR = "HINDSIGHT_API_RERANKER_FLASHRANK_CACHE_DIR"
|
|
29
67
|
|
|
30
68
|
ENV_HOST = "HINDSIGHT_API_HOST"
|
|
31
69
|
ENV_PORT = "HINDSIGHT_API_PORT"
|
|
32
70
|
ENV_LOG_LEVEL = "HINDSIGHT_API_LOG_LEVEL"
|
|
71
|
+
ENV_WORKERS = "HINDSIGHT_API_WORKERS"
|
|
33
72
|
ENV_MCP_ENABLED = "HINDSIGHT_API_MCP_ENABLED"
|
|
34
73
|
ENV_GRAPH_RETRIEVER = "HINDSIGHT_API_GRAPH_RETRIEVER"
|
|
74
|
+
ENV_MPFP_TOP_K_NEIGHBORS = "HINDSIGHT_API_MPFP_TOP_K_NEIGHBORS"
|
|
75
|
+
ENV_RECALL_MAX_CONCURRENT = "HINDSIGHT_API_RECALL_MAX_CONCURRENT"
|
|
76
|
+
ENV_RECALL_CONNECTION_BUDGET = "HINDSIGHT_API_RECALL_CONNECTION_BUDGET"
|
|
35
77
|
ENV_MCP_LOCAL_BANK_ID = "HINDSIGHT_API_MCP_LOCAL_BANK_ID"
|
|
36
78
|
ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
|
|
37
79
|
|
|
@@ -39,10 +81,31 @@ ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
|
|
|
39
81
|
ENV_OBSERVATION_MIN_FACTS = "HINDSIGHT_API_OBSERVATION_MIN_FACTS"
|
|
40
82
|
ENV_OBSERVATION_TOP_ENTITIES = "HINDSIGHT_API_OBSERVATION_TOP_ENTITIES"
|
|
41
83
|
|
|
84
|
+
# Retain settings
|
|
85
|
+
ENV_RETAIN_MAX_COMPLETION_TOKENS = "HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS"
|
|
86
|
+
ENV_RETAIN_CHUNK_SIZE = "HINDSIGHT_API_RETAIN_CHUNK_SIZE"
|
|
87
|
+
ENV_RETAIN_EXTRACT_CAUSAL_LINKS = "HINDSIGHT_API_RETAIN_EXTRACT_CAUSAL_LINKS"
|
|
88
|
+
ENV_RETAIN_EXTRACTION_MODE = "HINDSIGHT_API_RETAIN_EXTRACTION_MODE"
|
|
89
|
+
ENV_RETAIN_OBSERVATIONS_ASYNC = "HINDSIGHT_API_RETAIN_OBSERVATIONS_ASYNC"
|
|
90
|
+
|
|
42
91
|
# Optimization flags
|
|
43
92
|
ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
|
|
44
93
|
ENV_LAZY_RERANKER = "HINDSIGHT_API_LAZY_RERANKER"
|
|
45
94
|
|
|
95
|
+
# Database migrations
|
|
96
|
+
ENV_RUN_MIGRATIONS_ON_STARTUP = "HINDSIGHT_API_RUN_MIGRATIONS_ON_STARTUP"
|
|
97
|
+
|
|
98
|
+
# Database connection pool
|
|
99
|
+
ENV_DB_POOL_MIN_SIZE = "HINDSIGHT_API_DB_POOL_MIN_SIZE"
|
|
100
|
+
ENV_DB_POOL_MAX_SIZE = "HINDSIGHT_API_DB_POOL_MAX_SIZE"
|
|
101
|
+
ENV_DB_COMMAND_TIMEOUT = "HINDSIGHT_API_DB_COMMAND_TIMEOUT"
|
|
102
|
+
ENV_DB_ACQUIRE_TIMEOUT = "HINDSIGHT_API_DB_ACQUIRE_TIMEOUT"
|
|
103
|
+
|
|
104
|
+
# Background task processing
|
|
105
|
+
ENV_TASK_BACKEND = "HINDSIGHT_API_TASK_BACKEND"
|
|
106
|
+
ENV_TASK_BACKEND_MEMORY_BATCH_SIZE = "HINDSIGHT_API_TASK_BACKEND_MEMORY_BATCH_SIZE"
|
|
107
|
+
ENV_TASK_BACKEND_MEMORY_BATCH_INTERVAL = "HINDSIGHT_API_TASK_BACKEND_MEMORY_BATCH_INTERVAL"
|
|
108
|
+
|
|
46
109
|
# Default values
|
|
47
110
|
DEFAULT_DATABASE_URL = "pg0"
|
|
48
111
|
DEFAULT_LLM_PROVIDER = "openai"
|
|
@@ -52,21 +115,63 @@ DEFAULT_LLM_TIMEOUT = 120.0 # seconds
|
|
|
52
115
|
|
|
53
116
|
DEFAULT_EMBEDDINGS_PROVIDER = "local"
|
|
54
117
|
DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
|
|
118
|
+
DEFAULT_EMBEDDINGS_OPENAI_MODEL = "text-embedding-3-small"
|
|
119
|
+
DEFAULT_EMBEDDING_DIMENSION = 384
|
|
55
120
|
|
|
56
121
|
DEFAULT_RERANKER_PROVIDER = "local"
|
|
57
122
|
DEFAULT_RERANKER_LOCAL_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
|
123
|
+
DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT = 4 # Limit concurrent CPU-bound reranking to prevent thrashing
|
|
124
|
+
DEFAULT_RERANKER_TEI_BATCH_SIZE = 128
|
|
125
|
+
DEFAULT_RERANKER_TEI_MAX_CONCURRENT = 8
|
|
126
|
+
DEFAULT_RERANKER_MAX_CANDIDATES = 300
|
|
127
|
+
DEFAULT_RERANKER_FLASHRANK_MODEL = "ms-marco-MiniLM-L-12-v2" # Best balance of speed and quality
|
|
128
|
+
DEFAULT_RERANKER_FLASHRANK_CACHE_DIR = None # Use default cache directory
|
|
129
|
+
|
|
130
|
+
DEFAULT_EMBEDDINGS_COHERE_MODEL = "embed-english-v3.0"
|
|
131
|
+
DEFAULT_RERANKER_COHERE_MODEL = "rerank-english-v3.0"
|
|
132
|
+
|
|
133
|
+
# LiteLLM defaults
|
|
134
|
+
DEFAULT_LITELLM_API_BASE = "http://localhost:4000"
|
|
135
|
+
DEFAULT_EMBEDDINGS_LITELLM_MODEL = "text-embedding-3-small"
|
|
136
|
+
DEFAULT_RERANKER_LITELLM_MODEL = "cohere/rerank-english-v3.0"
|
|
58
137
|
|
|
59
138
|
DEFAULT_HOST = "0.0.0.0"
|
|
60
139
|
DEFAULT_PORT = 8888
|
|
61
140
|
DEFAULT_LOG_LEVEL = "info"
|
|
141
|
+
DEFAULT_WORKERS = 1
|
|
62
142
|
DEFAULT_MCP_ENABLED = True
|
|
63
|
-
DEFAULT_GRAPH_RETRIEVER = "
|
|
143
|
+
DEFAULT_GRAPH_RETRIEVER = "link_expansion" # Options: "link_expansion", "mpfp", "bfs"
|
|
144
|
+
DEFAULT_MPFP_TOP_K_NEIGHBORS = 20 # Fan-out limit per node in MPFP graph traversal
|
|
145
|
+
DEFAULT_RECALL_MAX_CONCURRENT = 32 # Max concurrent recall operations per worker
|
|
146
|
+
DEFAULT_RECALL_CONNECTION_BUDGET = 4 # Max concurrent DB connections per recall operation
|
|
64
147
|
DEFAULT_MCP_LOCAL_BANK_ID = "mcp"
|
|
65
148
|
|
|
66
149
|
# Observation thresholds
|
|
67
150
|
DEFAULT_OBSERVATION_MIN_FACTS = 5 # Min facts required to generate entity observations
|
|
68
151
|
DEFAULT_OBSERVATION_TOP_ENTITIES = 5 # Max entities to process per retain batch
|
|
69
152
|
|
|
153
|
+
# Retain settings
|
|
154
|
+
DEFAULT_RETAIN_MAX_COMPLETION_TOKENS = 64000 # Max tokens for fact extraction LLM call
|
|
155
|
+
DEFAULT_RETAIN_CHUNK_SIZE = 3000 # Max chars per chunk for fact extraction
|
|
156
|
+
DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS = True # Extract causal links between facts
|
|
157
|
+
DEFAULT_RETAIN_EXTRACTION_MODE = "concise" # Extraction mode: "concise" or "verbose"
|
|
158
|
+
RETAIN_EXTRACTION_MODES = ("concise", "verbose") # Allowed extraction modes
|
|
159
|
+
DEFAULT_RETAIN_OBSERVATIONS_ASYNC = False # Run observation generation async (after retain completes)
|
|
160
|
+
|
|
161
|
+
# Database migrations
|
|
162
|
+
DEFAULT_RUN_MIGRATIONS_ON_STARTUP = True
|
|
163
|
+
|
|
164
|
+
# Database connection pool
|
|
165
|
+
DEFAULT_DB_POOL_MIN_SIZE = 5
|
|
166
|
+
DEFAULT_DB_POOL_MAX_SIZE = 100
|
|
167
|
+
DEFAULT_DB_COMMAND_TIMEOUT = 60 # seconds
|
|
168
|
+
DEFAULT_DB_ACQUIRE_TIMEOUT = 30 # seconds
|
|
169
|
+
|
|
170
|
+
# Background task processing
|
|
171
|
+
DEFAULT_TASK_BACKEND = "memory" # Options: "memory", "noop"
|
|
172
|
+
DEFAULT_TASK_BACKEND_MEMORY_BATCH_SIZE = 10
|
|
173
|
+
DEFAULT_TASK_BACKEND_MEMORY_BATCH_INTERVAL = 1.0 # seconds
|
|
174
|
+
|
|
70
175
|
# Default MCP tool descriptions (can be customized via env vars)
|
|
71
176
|
DEFAULT_MCP_RETAIN_DESCRIPTION = """Store important information to long-term memory.
|
|
72
177
|
|
|
@@ -87,8 +192,20 @@ Use this tool PROACTIVELY to:
|
|
|
87
192
|
- Remember user's goals and context
|
|
88
193
|
- Personalize responses based on past interactions"""
|
|
89
194
|
|
|
90
|
-
#
|
|
91
|
-
EMBEDDING_DIMENSION =
|
|
195
|
+
# Default embedding dimension (used by initial migration, adjusted at runtime)
|
|
196
|
+
EMBEDDING_DIMENSION = DEFAULT_EMBEDDING_DIMENSION
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _validate_extraction_mode(mode: str) -> str:
|
|
200
|
+
"""Validate and normalize extraction mode."""
|
|
201
|
+
mode_lower = mode.lower()
|
|
202
|
+
if mode_lower not in RETAIN_EXTRACTION_MODES:
|
|
203
|
+
logger.warning(
|
|
204
|
+
f"Invalid extraction mode '{mode}', must be one of {RETAIN_EXTRACTION_MODES}. "
|
|
205
|
+
f"Defaulting to '{DEFAULT_RETAIN_EXTRACTION_MODE}'."
|
|
206
|
+
)
|
|
207
|
+
return DEFAULT_RETAIN_EXTRACTION_MODE
|
|
208
|
+
return mode_lower
|
|
92
209
|
|
|
93
210
|
|
|
94
211
|
@dataclass
|
|
@@ -98,7 +215,7 @@ class HindsightConfig:
|
|
|
98
215
|
# Database
|
|
99
216
|
database_url: str
|
|
100
217
|
|
|
101
|
-
# LLM
|
|
218
|
+
# LLM (default, used as fallback for per-operation config)
|
|
102
219
|
llm_provider: str
|
|
103
220
|
llm_api_key: str | None
|
|
104
221
|
llm_model: str
|
|
@@ -106,15 +223,32 @@ class HindsightConfig:
|
|
|
106
223
|
llm_max_concurrent: int
|
|
107
224
|
llm_timeout: float
|
|
108
225
|
|
|
226
|
+
# Per-operation LLM configuration (None = use default LLM config)
|
|
227
|
+
retain_llm_provider: str | None
|
|
228
|
+
retain_llm_api_key: str | None
|
|
229
|
+
retain_llm_model: str | None
|
|
230
|
+
retain_llm_base_url: str | None
|
|
231
|
+
|
|
232
|
+
reflect_llm_provider: str | None
|
|
233
|
+
reflect_llm_api_key: str | None
|
|
234
|
+
reflect_llm_model: str | None
|
|
235
|
+
reflect_llm_base_url: str | None
|
|
236
|
+
|
|
109
237
|
# Embeddings
|
|
110
238
|
embeddings_provider: str
|
|
111
239
|
embeddings_local_model: str
|
|
112
240
|
embeddings_tei_url: str | None
|
|
241
|
+
embeddings_openai_base_url: str | None
|
|
242
|
+
embeddings_cohere_base_url: str | None
|
|
113
243
|
|
|
114
244
|
# Reranker
|
|
115
245
|
reranker_provider: str
|
|
116
246
|
reranker_local_model: str
|
|
117
247
|
reranker_tei_url: str | None
|
|
248
|
+
reranker_tei_batch_size: int
|
|
249
|
+
reranker_tei_max_concurrent: int
|
|
250
|
+
reranker_max_candidates: int
|
|
251
|
+
reranker_cohere_base_url: str | None
|
|
118
252
|
|
|
119
253
|
# Server
|
|
120
254
|
host: str
|
|
@@ -124,15 +258,39 @@ class HindsightConfig:
|
|
|
124
258
|
|
|
125
259
|
# Recall
|
|
126
260
|
graph_retriever: str
|
|
261
|
+
mpfp_top_k_neighbors: int
|
|
262
|
+
recall_max_concurrent: int
|
|
263
|
+
recall_connection_budget: int
|
|
127
264
|
|
|
128
265
|
# Observation thresholds
|
|
129
266
|
observation_min_facts: int
|
|
130
267
|
observation_top_entities: int
|
|
131
268
|
|
|
269
|
+
# Retain settings
|
|
270
|
+
retain_max_completion_tokens: int
|
|
271
|
+
retain_chunk_size: int
|
|
272
|
+
retain_extract_causal_links: bool
|
|
273
|
+
retain_extraction_mode: str
|
|
274
|
+
retain_observations_async: bool
|
|
275
|
+
|
|
132
276
|
# Optimization flags
|
|
133
277
|
skip_llm_verification: bool
|
|
134
278
|
lazy_reranker: bool
|
|
135
279
|
|
|
280
|
+
# Database migrations
|
|
281
|
+
run_migrations_on_startup: bool
|
|
282
|
+
|
|
283
|
+
# Database connection pool
|
|
284
|
+
db_pool_min_size: int
|
|
285
|
+
db_pool_max_size: int
|
|
286
|
+
db_command_timeout: int
|
|
287
|
+
db_acquire_timeout: int
|
|
288
|
+
|
|
289
|
+
# Background task processing
|
|
290
|
+
task_backend: str
|
|
291
|
+
task_backend_memory_batch_size: int
|
|
292
|
+
task_backend_memory_batch_interval: float
|
|
293
|
+
|
|
136
294
|
@classmethod
|
|
137
295
|
def from_env(cls) -> "HindsightConfig":
|
|
138
296
|
"""Create configuration from environment variables."""
|
|
@@ -146,14 +304,31 @@ class HindsightConfig:
|
|
|
146
304
|
llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
|
|
147
305
|
llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
|
|
148
306
|
llm_timeout=float(os.getenv(ENV_LLM_TIMEOUT, str(DEFAULT_LLM_TIMEOUT))),
|
|
307
|
+
# Per-operation LLM config (None = use default)
|
|
308
|
+
retain_llm_provider=os.getenv(ENV_RETAIN_LLM_PROVIDER) or None,
|
|
309
|
+
retain_llm_api_key=os.getenv(ENV_RETAIN_LLM_API_KEY) or None,
|
|
310
|
+
retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL) or None,
|
|
311
|
+
retain_llm_base_url=os.getenv(ENV_RETAIN_LLM_BASE_URL) or None,
|
|
312
|
+
reflect_llm_provider=os.getenv(ENV_REFLECT_LLM_PROVIDER) or None,
|
|
313
|
+
reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
|
|
314
|
+
reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL) or None,
|
|
315
|
+
reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
|
|
149
316
|
# Embeddings
|
|
150
317
|
embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
|
|
151
318
|
embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
|
|
152
319
|
embeddings_tei_url=os.getenv(ENV_EMBEDDINGS_TEI_URL),
|
|
320
|
+
embeddings_openai_base_url=os.getenv(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None,
|
|
321
|
+
embeddings_cohere_base_url=os.getenv(ENV_EMBEDDINGS_COHERE_BASE_URL) or None,
|
|
153
322
|
# Reranker
|
|
154
323
|
reranker_provider=os.getenv(ENV_RERANKER_PROVIDER, DEFAULT_RERANKER_PROVIDER),
|
|
155
324
|
reranker_local_model=os.getenv(ENV_RERANKER_LOCAL_MODEL, DEFAULT_RERANKER_LOCAL_MODEL),
|
|
156
325
|
reranker_tei_url=os.getenv(ENV_RERANKER_TEI_URL),
|
|
326
|
+
reranker_tei_batch_size=int(os.getenv(ENV_RERANKER_TEI_BATCH_SIZE, str(DEFAULT_RERANKER_TEI_BATCH_SIZE))),
|
|
327
|
+
reranker_tei_max_concurrent=int(
|
|
328
|
+
os.getenv(ENV_RERANKER_TEI_MAX_CONCURRENT, str(DEFAULT_RERANKER_TEI_MAX_CONCURRENT))
|
|
329
|
+
),
|
|
330
|
+
reranker_max_candidates=int(os.getenv(ENV_RERANKER_MAX_CANDIDATES, str(DEFAULT_RERANKER_MAX_CANDIDATES))),
|
|
331
|
+
reranker_cohere_base_url=os.getenv(ENV_RERANKER_COHERE_BASE_URL) or None,
|
|
157
332
|
# Server
|
|
158
333
|
host=os.getenv(ENV_HOST, DEFAULT_HOST),
|
|
159
334
|
port=int(os.getenv(ENV_PORT, DEFAULT_PORT)),
|
|
@@ -161,6 +336,11 @@ class HindsightConfig:
|
|
|
161
336
|
mcp_enabled=os.getenv(ENV_MCP_ENABLED, str(DEFAULT_MCP_ENABLED)).lower() == "true",
|
|
162
337
|
# Recall
|
|
163
338
|
graph_retriever=os.getenv(ENV_GRAPH_RETRIEVER, DEFAULT_GRAPH_RETRIEVER),
|
|
339
|
+
mpfp_top_k_neighbors=int(os.getenv(ENV_MPFP_TOP_K_NEIGHBORS, str(DEFAULT_MPFP_TOP_K_NEIGHBORS))),
|
|
340
|
+
recall_max_concurrent=int(os.getenv(ENV_RECALL_MAX_CONCURRENT, str(DEFAULT_RECALL_MAX_CONCURRENT))),
|
|
341
|
+
recall_connection_budget=int(
|
|
342
|
+
os.getenv(ENV_RECALL_CONNECTION_BUDGET, str(DEFAULT_RECALL_CONNECTION_BUDGET))
|
|
343
|
+
),
|
|
164
344
|
# Optimization flags
|
|
165
345
|
skip_llm_verification=os.getenv(ENV_SKIP_LLM_VERIFICATION, "false").lower() == "true",
|
|
166
346
|
lazy_reranker=os.getenv(ENV_LAZY_RERANKER, "false").lower() == "true",
|
|
@@ -169,6 +349,37 @@ class HindsightConfig:
|
|
|
169
349
|
observation_top_entities=int(
|
|
170
350
|
os.getenv(ENV_OBSERVATION_TOP_ENTITIES, str(DEFAULT_OBSERVATION_TOP_ENTITIES))
|
|
171
351
|
),
|
|
352
|
+
# Retain settings
|
|
353
|
+
retain_max_completion_tokens=int(
|
|
354
|
+
os.getenv(ENV_RETAIN_MAX_COMPLETION_TOKENS, str(DEFAULT_RETAIN_MAX_COMPLETION_TOKENS))
|
|
355
|
+
),
|
|
356
|
+
retain_chunk_size=int(os.getenv(ENV_RETAIN_CHUNK_SIZE, str(DEFAULT_RETAIN_CHUNK_SIZE))),
|
|
357
|
+
retain_extract_causal_links=os.getenv(
|
|
358
|
+
ENV_RETAIN_EXTRACT_CAUSAL_LINKS, str(DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS)
|
|
359
|
+
).lower()
|
|
360
|
+
== "true",
|
|
361
|
+
retain_extraction_mode=_validate_extraction_mode(
|
|
362
|
+
os.getenv(ENV_RETAIN_EXTRACTION_MODE, DEFAULT_RETAIN_EXTRACTION_MODE)
|
|
363
|
+
),
|
|
364
|
+
retain_observations_async=os.getenv(
|
|
365
|
+
ENV_RETAIN_OBSERVATIONS_ASYNC, str(DEFAULT_RETAIN_OBSERVATIONS_ASYNC)
|
|
366
|
+
).lower()
|
|
367
|
+
== "true",
|
|
368
|
+
# Database migrations
|
|
369
|
+
run_migrations_on_startup=os.getenv(ENV_RUN_MIGRATIONS_ON_STARTUP, "true").lower() == "true",
|
|
370
|
+
# Database connection pool
|
|
371
|
+
db_pool_min_size=int(os.getenv(ENV_DB_POOL_MIN_SIZE, str(DEFAULT_DB_POOL_MIN_SIZE))),
|
|
372
|
+
db_pool_max_size=int(os.getenv(ENV_DB_POOL_MAX_SIZE, str(DEFAULT_DB_POOL_MAX_SIZE))),
|
|
373
|
+
db_command_timeout=int(os.getenv(ENV_DB_COMMAND_TIMEOUT, str(DEFAULT_DB_COMMAND_TIMEOUT))),
|
|
374
|
+
db_acquire_timeout=int(os.getenv(ENV_DB_ACQUIRE_TIMEOUT, str(DEFAULT_DB_ACQUIRE_TIMEOUT))),
|
|
375
|
+
# Background task processing
|
|
376
|
+
task_backend=os.getenv(ENV_TASK_BACKEND, DEFAULT_TASK_BACKEND),
|
|
377
|
+
task_backend_memory_batch_size=int(
|
|
378
|
+
os.getenv(ENV_TASK_BACKEND_MEMORY_BATCH_SIZE, str(DEFAULT_TASK_BACKEND_MEMORY_BATCH_SIZE))
|
|
379
|
+
),
|
|
380
|
+
task_backend_memory_batch_interval=float(
|
|
381
|
+
os.getenv(ENV_TASK_BACKEND_MEMORY_BATCH_INTERVAL, str(DEFAULT_TASK_BACKEND_MEMORY_BATCH_INTERVAL))
|
|
382
|
+
),
|
|
172
383
|
)
|
|
173
384
|
|
|
174
385
|
def get_llm_base_url(self) -> str:
|
|
@@ -210,11 +421,32 @@ class HindsightConfig:
|
|
|
210
421
|
"""Log the current configuration (without sensitive values)."""
|
|
211
422
|
logger.info(f"Database: {self.database_url}")
|
|
212
423
|
logger.info(f"LLM: provider={self.llm_provider}, model={self.llm_model}")
|
|
424
|
+
if self.retain_llm_provider or self.retain_llm_model:
|
|
425
|
+
retain_provider = self.retain_llm_provider or self.llm_provider
|
|
426
|
+
retain_model = self.retain_llm_model or self.llm_model
|
|
427
|
+
logger.info(f"LLM (retain): provider={retain_provider}, model={retain_model}")
|
|
428
|
+
if self.reflect_llm_provider or self.reflect_llm_model:
|
|
429
|
+
reflect_provider = self.reflect_llm_provider or self.llm_provider
|
|
430
|
+
reflect_model = self.reflect_llm_model or self.llm_model
|
|
431
|
+
logger.info(f"LLM (reflect): provider={reflect_provider}, model={reflect_model}")
|
|
213
432
|
logger.info(f"Embeddings: provider={self.embeddings_provider}")
|
|
214
433
|
logger.info(f"Reranker: provider={self.reranker_provider}")
|
|
215
434
|
logger.info(f"Graph retriever: {self.graph_retriever}")
|
|
216
435
|
|
|
217
436
|
|
|
437
|
+
# Cached config instance
|
|
438
|
+
_config_cache: HindsightConfig | None = None
|
|
439
|
+
|
|
440
|
+
|
|
218
441
|
def get_config() -> HindsightConfig:
|
|
219
|
-
"""Get the
|
|
220
|
-
|
|
442
|
+
"""Get the cached configuration, loading from environment on first call."""
|
|
443
|
+
global _config_cache
|
|
444
|
+
if _config_cache is None:
|
|
445
|
+
_config_cache = HindsightConfig.from_env()
|
|
446
|
+
return _config_cache
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def clear_config_cache() -> None:
|
|
450
|
+
"""Clear the config cache. Useful for testing or reloading config."""
|
|
451
|
+
global _config_cache
|
|
452
|
+
_config_cache = None
|