hindsight-api 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/admin/cli.py +59 -0
- hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
- hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
- hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
- hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
- hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
- hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
- hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
- hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
- hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
- hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
- hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
- hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
- hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
- hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
- hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
- hindsight_api/api/http.py +1119 -93
- hindsight_api/api/mcp.py +11 -191
- hindsight_api/config.py +145 -45
- hindsight_api/engine/consolidation/__init__.py +5 -0
- hindsight_api/engine/consolidation/consolidator.py +859 -0
- hindsight_api/engine/consolidation/prompts.py +69 -0
- hindsight_api/engine/cross_encoder.py +114 -9
- hindsight_api/engine/directives/__init__.py +5 -0
- hindsight_api/engine/directives/models.py +37 -0
- hindsight_api/engine/embeddings.py +102 -5
- hindsight_api/engine/interface.py +32 -13
- hindsight_api/engine/llm_wrapper.py +505 -43
- hindsight_api/engine/memory_engine.py +2090 -1089
- hindsight_api/engine/mental_models/__init__.py +14 -0
- hindsight_api/engine/mental_models/models.py +53 -0
- hindsight_api/engine/reflect/__init__.py +18 -0
- hindsight_api/engine/reflect/agent.py +933 -0
- hindsight_api/engine/reflect/models.py +109 -0
- hindsight_api/engine/reflect/observations.py +186 -0
- hindsight_api/engine/reflect/prompts.py +483 -0
- hindsight_api/engine/reflect/tools.py +437 -0
- hindsight_api/engine/reflect/tools_schema.py +250 -0
- hindsight_api/engine/response_models.py +130 -4
- hindsight_api/engine/retain/bank_utils.py +79 -201
- hindsight_api/engine/retain/fact_extraction.py +81 -48
- hindsight_api/engine/retain/fact_storage.py +5 -8
- hindsight_api/engine/retain/link_utils.py +5 -8
- hindsight_api/engine/retain/orchestrator.py +1 -55
- hindsight_api/engine/retain/types.py +2 -2
- hindsight_api/engine/search/graph_retrieval.py +2 -2
- hindsight_api/engine/search/link_expansion_retrieval.py +164 -29
- hindsight_api/engine/search/mpfp_retrieval.py +1 -1
- hindsight_api/engine/search/retrieval.py +14 -14
- hindsight_api/engine/search/think_utils.py +41 -140
- hindsight_api/engine/search/trace.py +0 -1
- hindsight_api/engine/search/tracer.py +2 -5
- hindsight_api/engine/search/types.py +0 -3
- hindsight_api/engine/task_backend.py +112 -196
- hindsight_api/engine/utils.py +0 -151
- hindsight_api/extensions/__init__.py +10 -1
- hindsight_api/extensions/builtin/tenant.py +5 -1
- hindsight_api/extensions/operation_validator.py +81 -4
- hindsight_api/extensions/tenant.py +26 -0
- hindsight_api/main.py +16 -5
- hindsight_api/mcp_local.py +12 -53
- hindsight_api/mcp_tools.py +494 -0
- hindsight_api/models.py +0 -2
- hindsight_api/worker/__init__.py +11 -0
- hindsight_api/worker/main.py +296 -0
- hindsight_api/worker/poller.py +486 -0
- {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.0.dist-info}/METADATA +12 -6
- hindsight_api-0.4.0.dist-info/RECORD +112 -0
- {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.0.dist-info}/entry_points.txt +1 -0
- hindsight_api/engine/retain/observation_regeneration.py +0 -254
- hindsight_api/engine/search/observation_utils.py +0 -125
- hindsight_api/engine/search/scoring.py +0 -159
- hindsight_api-0.3.0.dist-info/RECORD +0 -82
- {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.0.dist-info}/WHEEL +0 -0
hindsight_api/api/mcp.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Hindsight MCP Server implementation using FastMCP."""
|
|
1
|
+
"""Hindsight MCP Server implementation using FastMCP (HTTP transport)."""
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
import logging
|
|
@@ -8,8 +8,7 @@ from contextvars import ContextVar
|
|
|
8
8
|
from fastmcp import FastMCP
|
|
9
9
|
|
|
10
10
|
from hindsight_api import MemoryEngine
|
|
11
|
-
from hindsight_api.
|
|
12
|
-
from hindsight_api.models import RequestContext
|
|
11
|
+
from hindsight_api.mcp_tools import MCPToolsConfig, register_mcp_tools
|
|
13
12
|
|
|
14
13
|
# Configure logging from HINDSIGHT_API_LOG_LEVEL environment variable
|
|
15
14
|
_log_level_str = os.environ.get("HINDSIGHT_API_LOG_LEVEL", "info").lower()
|
|
@@ -52,194 +51,15 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
|
|
|
52
51
|
# Use stateless_http=True for Claude Code compatibility
|
|
53
52
|
mcp = FastMCP("hindsight-mcp-server", stateless_http=True)
|
|
54
53
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
Use this tool PROACTIVELY whenever the user shares:
|
|
66
|
-
- Personal facts, preferences, or interests
|
|
67
|
-
- Important events or milestones
|
|
68
|
-
- User history, experiences, or background
|
|
69
|
-
- Decisions, opinions, or stated preferences
|
|
70
|
-
- Goals, plans, or future intentions
|
|
71
|
-
- Relationships or people mentioned
|
|
72
|
-
- Work context, projects, or responsibilities
|
|
73
|
-
|
|
74
|
-
Args:
|
|
75
|
-
content: The fact/memory to store (be specific and include relevant details)
|
|
76
|
-
context: Category for the memory (e.g., 'preferences', 'work', 'hobbies', 'family'). Default: 'general'
|
|
77
|
-
async_processing: If True, queue for background processing and return immediately. If False, wait for completion. Default: True
|
|
78
|
-
bank_id: Optional bank to store in (defaults to session bank). Use for cross-bank operations.
|
|
79
|
-
"""
|
|
80
|
-
try:
|
|
81
|
-
target_bank = bank_id or get_current_bank_id()
|
|
82
|
-
if target_bank is None:
|
|
83
|
-
return "Error: No bank_id configured"
|
|
84
|
-
contents = [{"content": content, "context": context}]
|
|
85
|
-
if async_processing:
|
|
86
|
-
# Queue for background processing and return immediately
|
|
87
|
-
result = await memory.submit_async_retain(
|
|
88
|
-
bank_id=target_bank, contents=contents, request_context=RequestContext()
|
|
89
|
-
)
|
|
90
|
-
return f"Memory queued for background processing (operation_id: {result.get('operation_id', 'N/A')})"
|
|
91
|
-
else:
|
|
92
|
-
# Wait for completion
|
|
93
|
-
await memory.retain_batch_async(
|
|
94
|
-
bank_id=target_bank,
|
|
95
|
-
contents=contents,
|
|
96
|
-
request_context=RequestContext(),
|
|
97
|
-
)
|
|
98
|
-
return f"Memory stored successfully in bank '{target_bank}'"
|
|
99
|
-
except Exception as e:
|
|
100
|
-
logger.error(f"Error storing memory: {e}", exc_info=True)
|
|
101
|
-
return f"Error: {str(e)}"
|
|
102
|
-
|
|
103
|
-
@mcp.tool()
|
|
104
|
-
async def recall(query: str, max_tokens: int = 4096, bank_id: str | None = None) -> str:
|
|
105
|
-
"""
|
|
106
|
-
Search memories to provide personalized, context-aware responses.
|
|
107
|
-
|
|
108
|
-
Use this tool PROACTIVELY to:
|
|
109
|
-
- Check user's preferences before making suggestions
|
|
110
|
-
- Recall user's history to provide continuity
|
|
111
|
-
- Remember user's goals and context
|
|
112
|
-
- Personalize responses based on past interactions
|
|
113
|
-
|
|
114
|
-
Args:
|
|
115
|
-
query: Natural language search query (e.g., "user's food preferences", "what projects is user working on")
|
|
116
|
-
max_tokens: Maximum tokens in the response (default: 4096)
|
|
117
|
-
bank_id: Optional bank to search in (defaults to session bank). Use for cross-bank operations.
|
|
118
|
-
"""
|
|
119
|
-
try:
|
|
120
|
-
target_bank = bank_id or get_current_bank_id()
|
|
121
|
-
if target_bank is None:
|
|
122
|
-
return "Error: No bank_id configured"
|
|
123
|
-
from hindsight_api.engine.memory_engine import Budget
|
|
124
|
-
|
|
125
|
-
recall_result = await memory.recall_async(
|
|
126
|
-
bank_id=target_bank,
|
|
127
|
-
query=query,
|
|
128
|
-
fact_type=list(VALID_RECALL_FACT_TYPES),
|
|
129
|
-
budget=Budget.HIGH,
|
|
130
|
-
max_tokens=max_tokens,
|
|
131
|
-
request_context=RequestContext(),
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
# Use model's JSON serialization
|
|
135
|
-
return recall_result.model_dump_json(indent=2)
|
|
136
|
-
except Exception as e:
|
|
137
|
-
logger.error(f"Error searching: {e}", exc_info=True)
|
|
138
|
-
return f'{{"error": "{e}", "results": []}}'
|
|
139
|
-
|
|
140
|
-
@mcp.tool()
|
|
141
|
-
async def reflect(query: str, context: str | None = None, budget: str = "low", bank_id: str | None = None) -> str:
|
|
142
|
-
"""
|
|
143
|
-
Generate thoughtful analysis by synthesizing stored memories with the bank's personality.
|
|
144
|
-
|
|
145
|
-
WHEN TO USE THIS TOOL:
|
|
146
|
-
Use reflect when you need reasoned analysis, not just fact retrieval. This tool
|
|
147
|
-
thinks through the question using everything the bank knows and its personality traits.
|
|
148
|
-
|
|
149
|
-
EXAMPLES OF GOOD QUERIES:
|
|
150
|
-
- "What patterns have emerged in how I approach debugging?"
|
|
151
|
-
- "Based on my past decisions, what architectural style do I prefer?"
|
|
152
|
-
- "What might be the best approach for this problem given what you know about me?"
|
|
153
|
-
- "How should I prioritize these tasks based on my goals?"
|
|
154
|
-
|
|
155
|
-
HOW IT DIFFERS FROM RECALL:
|
|
156
|
-
- recall: Returns raw facts matching your search (fast lookup)
|
|
157
|
-
- reflect: Reasons across memories to form a synthesized answer (deeper analysis)
|
|
158
|
-
|
|
159
|
-
Use recall for "what did I say about X?" and reflect for "what should I do about X?"
|
|
160
|
-
|
|
161
|
-
Args:
|
|
162
|
-
query: The question or topic to reflect on
|
|
163
|
-
context: Optional context about why this reflection is needed
|
|
164
|
-
budget: Search budget - 'low', 'mid', or 'high' (default: 'low')
|
|
165
|
-
bank_id: Optional bank to reflect in (defaults to session bank). Use for cross-bank operations.
|
|
166
|
-
"""
|
|
167
|
-
try:
|
|
168
|
-
target_bank = bank_id or get_current_bank_id()
|
|
169
|
-
if target_bank is None:
|
|
170
|
-
return "Error: No bank_id configured"
|
|
171
|
-
from hindsight_api.engine.memory_engine import Budget
|
|
172
|
-
|
|
173
|
-
# Map string budget to enum
|
|
174
|
-
budget_map = {"low": Budget.LOW, "mid": Budget.MID, "high": Budget.HIGH}
|
|
175
|
-
budget_enum = budget_map.get(budget.lower(), Budget.LOW)
|
|
176
|
-
|
|
177
|
-
reflect_result = await memory.reflect_async(
|
|
178
|
-
bank_id=target_bank,
|
|
179
|
-
query=query,
|
|
180
|
-
budget=budget_enum,
|
|
181
|
-
context=context,
|
|
182
|
-
request_context=RequestContext(),
|
|
183
|
-
)
|
|
184
|
-
|
|
185
|
-
return reflect_result.model_dump_json(indent=2)
|
|
186
|
-
except Exception as e:
|
|
187
|
-
logger.error(f"Error reflecting: {e}", exc_info=True)
|
|
188
|
-
return f'{{"error": "{e}", "text": ""}}'
|
|
189
|
-
|
|
190
|
-
@mcp.tool()
|
|
191
|
-
async def list_banks() -> str:
|
|
192
|
-
"""
|
|
193
|
-
List all available memory banks.
|
|
194
|
-
|
|
195
|
-
Use this tool to discover what memory banks exist in the system.
|
|
196
|
-
Each bank is an isolated memory store (like a separate "brain").
|
|
197
|
-
|
|
198
|
-
Returns:
|
|
199
|
-
JSON list of banks with their IDs, names, dispositions, and backgrounds.
|
|
200
|
-
"""
|
|
201
|
-
try:
|
|
202
|
-
banks = await memory.list_banks(request_context=RequestContext())
|
|
203
|
-
return json.dumps({"banks": banks}, indent=2)
|
|
204
|
-
except Exception as e:
|
|
205
|
-
logger.error(f"Error listing banks: {e}", exc_info=True)
|
|
206
|
-
return f'{{"error": "{e}", "banks": []}}'
|
|
207
|
-
|
|
208
|
-
@mcp.tool()
|
|
209
|
-
async def create_bank(bank_id: str, name: str | None = None, background: str | None = None) -> str:
|
|
210
|
-
"""
|
|
211
|
-
Create a new memory bank or get an existing one.
|
|
212
|
-
|
|
213
|
-
Memory banks are isolated stores - each one is like a separate "brain" for a user/agent.
|
|
214
|
-
Banks are auto-created with default settings if they don't exist.
|
|
215
|
-
|
|
216
|
-
Args:
|
|
217
|
-
bank_id: Unique identifier for the bank (e.g., 'user-123', 'agent-alpha')
|
|
218
|
-
name: Optional human-friendly name for the bank
|
|
219
|
-
background: Optional background context about the bank's owner/purpose
|
|
220
|
-
"""
|
|
221
|
-
try:
|
|
222
|
-
# get_bank_profile auto-creates bank if it doesn't exist
|
|
223
|
-
profile = await memory.get_bank_profile(bank_id, request_context=RequestContext())
|
|
224
|
-
|
|
225
|
-
# Update name/background if provided
|
|
226
|
-
if name is not None or background is not None:
|
|
227
|
-
await memory.update_bank(
|
|
228
|
-
bank_id,
|
|
229
|
-
name=name,
|
|
230
|
-
background=background,
|
|
231
|
-
request_context=RequestContext(),
|
|
232
|
-
)
|
|
233
|
-
# Fetch updated profile
|
|
234
|
-
profile = await memory.get_bank_profile(bank_id, request_context=RequestContext())
|
|
235
|
-
|
|
236
|
-
# Serialize disposition if it's a Pydantic model
|
|
237
|
-
if "disposition" in profile and hasattr(profile["disposition"], "model_dump"):
|
|
238
|
-
profile["disposition"] = profile["disposition"].model_dump()
|
|
239
|
-
return json.dumps(profile, indent=2)
|
|
240
|
-
except Exception as e:
|
|
241
|
-
logger.error(f"Error creating bank: {e}", exc_info=True)
|
|
242
|
-
return f'{{"error": "{e}"}}'
|
|
54
|
+
# Configure and register tools using shared module
|
|
55
|
+
config = MCPToolsConfig(
|
|
56
|
+
bank_id_resolver=get_current_bank_id,
|
|
57
|
+
include_bank_id_param=True, # HTTP MCP supports multi-bank via parameter
|
|
58
|
+
tools=None, # All tools
|
|
59
|
+
retain_fire_and_forget=False, # HTTP MCP supports sync/async modes
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
register_mcp_tools(mcp, memory, config)
|
|
243
63
|
|
|
244
64
|
return mcp
|
|
245
65
|
|
hindsight_api/config.py
CHANGED
|
@@ -4,9 +4,12 @@ Centralized configuration for Hindsight API.
|
|
|
4
4
|
All environment variables and their defaults are defined here.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import json
|
|
7
8
|
import logging
|
|
8
9
|
import os
|
|
10
|
+
import sys
|
|
9
11
|
from dataclasses import dataclass
|
|
12
|
+
from datetime import datetime, timezone
|
|
10
13
|
|
|
11
14
|
from dotenv import find_dotenv, load_dotenv
|
|
12
15
|
|
|
@@ -36,6 +39,11 @@ ENV_REFLECT_LLM_API_KEY = "HINDSIGHT_API_REFLECT_LLM_API_KEY"
|
|
|
36
39
|
ENV_REFLECT_LLM_MODEL = "HINDSIGHT_API_REFLECT_LLM_MODEL"
|
|
37
40
|
ENV_REFLECT_LLM_BASE_URL = "HINDSIGHT_API_REFLECT_LLM_BASE_URL"
|
|
38
41
|
|
|
42
|
+
ENV_CONSOLIDATION_LLM_PROVIDER = "HINDSIGHT_API_CONSOLIDATION_LLM_PROVIDER"
|
|
43
|
+
ENV_CONSOLIDATION_LLM_API_KEY = "HINDSIGHT_API_CONSOLIDATION_LLM_API_KEY"
|
|
44
|
+
ENV_CONSOLIDATION_LLM_MODEL = "HINDSIGHT_API_CONSOLIDATION_LLM_MODEL"
|
|
45
|
+
ENV_CONSOLIDATION_LLM_BASE_URL = "HINDSIGHT_API_CONSOLIDATION_LLM_BASE_URL"
|
|
46
|
+
|
|
39
47
|
ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
|
|
40
48
|
ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
|
|
41
49
|
ENV_EMBEDDINGS_TEI_URL = "HINDSIGHT_API_EMBEDDINGS_TEI_URL"
|
|
@@ -68,6 +76,7 @@ ENV_RERANKER_FLASHRANK_CACHE_DIR = "HINDSIGHT_API_RERANKER_FLASHRANK_CACHE_DIR"
|
|
|
68
76
|
ENV_HOST = "HINDSIGHT_API_HOST"
|
|
69
77
|
ENV_PORT = "HINDSIGHT_API_PORT"
|
|
70
78
|
ENV_LOG_LEVEL = "HINDSIGHT_API_LOG_LEVEL"
|
|
79
|
+
ENV_LOG_FORMAT = "HINDSIGHT_API_LOG_FORMAT"
|
|
71
80
|
ENV_WORKERS = "HINDSIGHT_API_WORKERS"
|
|
72
81
|
ENV_MCP_ENABLED = "HINDSIGHT_API_MCP_ENABLED"
|
|
73
82
|
ENV_GRAPH_RETRIEVER = "HINDSIGHT_API_GRAPH_RETRIEVER"
|
|
@@ -76,18 +85,20 @@ ENV_RECALL_MAX_CONCURRENT = "HINDSIGHT_API_RECALL_MAX_CONCURRENT"
|
|
|
76
85
|
ENV_RECALL_CONNECTION_BUDGET = "HINDSIGHT_API_RECALL_CONNECTION_BUDGET"
|
|
77
86
|
ENV_MCP_LOCAL_BANK_ID = "HINDSIGHT_API_MCP_LOCAL_BANK_ID"
|
|
78
87
|
ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
|
|
79
|
-
|
|
80
|
-
# Observation thresholds
|
|
81
|
-
ENV_OBSERVATION_MIN_FACTS = "HINDSIGHT_API_OBSERVATION_MIN_FACTS"
|
|
82
|
-
ENV_OBSERVATION_TOP_ENTITIES = "HINDSIGHT_API_OBSERVATION_TOP_ENTITIES"
|
|
88
|
+
ENV_MENTAL_MODEL_REFRESH_CONCURRENCY = "HINDSIGHT_API_MENTAL_MODEL_REFRESH_CONCURRENCY"
|
|
83
89
|
|
|
84
90
|
# Retain settings
|
|
85
91
|
ENV_RETAIN_MAX_COMPLETION_TOKENS = "HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS"
|
|
86
92
|
ENV_RETAIN_CHUNK_SIZE = "HINDSIGHT_API_RETAIN_CHUNK_SIZE"
|
|
87
93
|
ENV_RETAIN_EXTRACT_CAUSAL_LINKS = "HINDSIGHT_API_RETAIN_EXTRACT_CAUSAL_LINKS"
|
|
88
94
|
ENV_RETAIN_EXTRACTION_MODE = "HINDSIGHT_API_RETAIN_EXTRACTION_MODE"
|
|
95
|
+
ENV_RETAIN_CUSTOM_INSTRUCTIONS = "HINDSIGHT_API_RETAIN_CUSTOM_INSTRUCTIONS"
|
|
89
96
|
ENV_RETAIN_OBSERVATIONS_ASYNC = "HINDSIGHT_API_RETAIN_OBSERVATIONS_ASYNC"
|
|
90
97
|
|
|
98
|
+
# Observations settings (consolidated knowledge from facts)
|
|
99
|
+
ENV_ENABLE_OBSERVATIONS = "HINDSIGHT_API_ENABLE_OBSERVATIONS"
|
|
100
|
+
ENV_CONSOLIDATION_BATCH_SIZE = "HINDSIGHT_API_CONSOLIDATION_BATCH_SIZE"
|
|
101
|
+
|
|
91
102
|
# Optimization flags
|
|
92
103
|
ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
|
|
93
104
|
ENV_LAZY_RERANKER = "HINDSIGHT_API_LAZY_RERANKER"
|
|
@@ -101,10 +112,16 @@ ENV_DB_POOL_MAX_SIZE = "HINDSIGHT_API_DB_POOL_MAX_SIZE"
|
|
|
101
112
|
ENV_DB_COMMAND_TIMEOUT = "HINDSIGHT_API_DB_COMMAND_TIMEOUT"
|
|
102
113
|
ENV_DB_ACQUIRE_TIMEOUT = "HINDSIGHT_API_DB_ACQUIRE_TIMEOUT"
|
|
103
114
|
|
|
104
|
-
#
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
115
|
+
# Worker configuration (distributed task processing)
|
|
116
|
+
ENV_WORKER_ENABLED = "HINDSIGHT_API_WORKER_ENABLED"
|
|
117
|
+
ENV_WORKER_ID = "HINDSIGHT_API_WORKER_ID"
|
|
118
|
+
ENV_WORKER_POLL_INTERVAL_MS = "HINDSIGHT_API_WORKER_POLL_INTERVAL_MS"
|
|
119
|
+
ENV_WORKER_MAX_RETRIES = "HINDSIGHT_API_WORKER_MAX_RETRIES"
|
|
120
|
+
ENV_WORKER_BATCH_SIZE = "HINDSIGHT_API_WORKER_BATCH_SIZE"
|
|
121
|
+
ENV_WORKER_HTTP_PORT = "HINDSIGHT_API_WORKER_HTTP_PORT"
|
|
122
|
+
|
|
123
|
+
# Reflect agent settings
|
|
124
|
+
ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
|
|
108
125
|
|
|
109
126
|
# Default values
|
|
110
127
|
DEFAULT_DATABASE_URL = "pg0"
|
|
@@ -138,6 +155,7 @@ DEFAULT_RERANKER_LITELLM_MODEL = "cohere/rerank-english-v3.0"
|
|
|
138
155
|
DEFAULT_HOST = "0.0.0.0"
|
|
139
156
|
DEFAULT_PORT = 8888
|
|
140
157
|
DEFAULT_LOG_LEVEL = "info"
|
|
158
|
+
DEFAULT_LOG_FORMAT = "text" # Options: "text", "json"
|
|
141
159
|
DEFAULT_WORKERS = 1
|
|
142
160
|
DEFAULT_MCP_ENABLED = True
|
|
143
161
|
DEFAULT_GRAPH_RETRIEVER = "link_expansion" # Options: "link_expansion", "mpfp", "bfs"
|
|
@@ -145,19 +163,21 @@ DEFAULT_MPFP_TOP_K_NEIGHBORS = 20 # Fan-out limit per node in MPFP graph traver
|
|
|
145
163
|
DEFAULT_RECALL_MAX_CONCURRENT = 32 # Max concurrent recall operations per worker
|
|
146
164
|
DEFAULT_RECALL_CONNECTION_BUDGET = 4 # Max concurrent DB connections per recall operation
|
|
147
165
|
DEFAULT_MCP_LOCAL_BANK_ID = "mcp"
|
|
148
|
-
|
|
149
|
-
# Observation thresholds
|
|
150
|
-
DEFAULT_OBSERVATION_MIN_FACTS = 5 # Min facts required to generate entity observations
|
|
151
|
-
DEFAULT_OBSERVATION_TOP_ENTITIES = 5 # Max entities to process per retain batch
|
|
166
|
+
DEFAULT_MENTAL_MODEL_REFRESH_CONCURRENCY = 8 # Max concurrent mental model refreshes
|
|
152
167
|
|
|
153
168
|
# Retain settings
|
|
154
169
|
DEFAULT_RETAIN_MAX_COMPLETION_TOKENS = 64000 # Max tokens for fact extraction LLM call
|
|
155
170
|
DEFAULT_RETAIN_CHUNK_SIZE = 3000 # Max chars per chunk for fact extraction
|
|
156
171
|
DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS = True # Extract causal links between facts
|
|
157
|
-
DEFAULT_RETAIN_EXTRACTION_MODE = "concise" # Extraction mode: "concise" or "
|
|
158
|
-
RETAIN_EXTRACTION_MODES = ("concise", "verbose") # Allowed extraction modes
|
|
172
|
+
DEFAULT_RETAIN_EXTRACTION_MODE = "concise" # Extraction mode: "concise", "verbose", or "custom"
|
|
173
|
+
RETAIN_EXTRACTION_MODES = ("concise", "verbose", "custom") # Allowed extraction modes
|
|
174
|
+
DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS = None # Custom extraction guidelines (only used when mode="custom")
|
|
159
175
|
DEFAULT_RETAIN_OBSERVATIONS_ASYNC = False # Run observation generation async (after retain completes)
|
|
160
176
|
|
|
177
|
+
# Observations defaults (consolidated knowledge from facts)
|
|
178
|
+
DEFAULT_ENABLE_OBSERVATIONS = True # Observations enabled by default
|
|
179
|
+
DEFAULT_CONSOLIDATION_BATCH_SIZE = 50 # Memories to load per batch (internal memory optimization)
|
|
180
|
+
|
|
161
181
|
# Database migrations
|
|
162
182
|
DEFAULT_RUN_MIGRATIONS_ON_STARTUP = True
|
|
163
183
|
|
|
@@ -167,10 +187,16 @@ DEFAULT_DB_POOL_MAX_SIZE = 100
|
|
|
167
187
|
DEFAULT_DB_COMMAND_TIMEOUT = 60 # seconds
|
|
168
188
|
DEFAULT_DB_ACQUIRE_TIMEOUT = 30 # seconds
|
|
169
189
|
|
|
170
|
-
#
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
190
|
+
# Worker configuration (distributed task processing)
|
|
191
|
+
DEFAULT_WORKER_ENABLED = True # API runs worker by default (standalone mode)
|
|
192
|
+
DEFAULT_WORKER_ID = None # Will use hostname if not specified
|
|
193
|
+
DEFAULT_WORKER_POLL_INTERVAL_MS = 500 # Poll database every 500ms
|
|
194
|
+
DEFAULT_WORKER_MAX_RETRIES = 3 # Max retries before marking task failed
|
|
195
|
+
DEFAULT_WORKER_BATCH_SIZE = 10 # Tasks to claim per poll cycle
|
|
196
|
+
DEFAULT_WORKER_HTTP_PORT = 8889 # HTTP port for worker metrics/health
|
|
197
|
+
|
|
198
|
+
# Reflect agent settings
|
|
199
|
+
DEFAULT_REFLECT_MAX_ITERATIONS = 10 # Max tool call iterations before forcing response
|
|
174
200
|
|
|
175
201
|
# Default MCP tool descriptions (can be customized via env vars)
|
|
176
202
|
DEFAULT_MCP_RETAIN_DESCRIPTION = """Store important information to long-term memory.
|
|
@@ -196,6 +222,36 @@ Use this tool PROACTIVELY to:
|
|
|
196
222
|
EMBEDDING_DIMENSION = DEFAULT_EMBEDDING_DIMENSION
|
|
197
223
|
|
|
198
224
|
|
|
225
|
+
class JsonFormatter(logging.Formatter):
|
|
226
|
+
"""JSON formatter for structured logging.
|
|
227
|
+
|
|
228
|
+
Outputs logs in JSON format with a 'severity' field that cloud logging
|
|
229
|
+
systems (GCP, AWS CloudWatch, etc.) can parse to correctly categorize log levels.
|
|
230
|
+
"""
|
|
231
|
+
|
|
232
|
+
SEVERITY_MAP = {
|
|
233
|
+
logging.DEBUG: "DEBUG",
|
|
234
|
+
logging.INFO: "INFO",
|
|
235
|
+
logging.WARNING: "WARNING",
|
|
236
|
+
logging.ERROR: "ERROR",
|
|
237
|
+
logging.CRITICAL: "CRITICAL",
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
241
|
+
log_entry = {
|
|
242
|
+
"severity": self.SEVERITY_MAP.get(record.levelno, "DEFAULT"),
|
|
243
|
+
"message": record.getMessage(),
|
|
244
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
245
|
+
"logger": record.name,
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
# Add exception info if present
|
|
249
|
+
if record.exc_info:
|
|
250
|
+
log_entry["exception"] = self.formatException(record.exc_info)
|
|
251
|
+
|
|
252
|
+
return json.dumps(log_entry)
|
|
253
|
+
|
|
254
|
+
|
|
199
255
|
def _validate_extraction_mode(mode: str) -> str:
|
|
200
256
|
"""Validate and normalize extraction mode."""
|
|
201
257
|
mode_lower = mode.lower()
|
|
@@ -234,6 +290,11 @@ class HindsightConfig:
|
|
|
234
290
|
reflect_llm_model: str | None
|
|
235
291
|
reflect_llm_base_url: str | None
|
|
236
292
|
|
|
293
|
+
consolidation_llm_provider: str | None
|
|
294
|
+
consolidation_llm_api_key: str | None
|
|
295
|
+
consolidation_llm_model: str | None
|
|
296
|
+
consolidation_llm_base_url: str | None
|
|
297
|
+
|
|
237
298
|
# Embeddings
|
|
238
299
|
embeddings_provider: str
|
|
239
300
|
embeddings_local_model: str
|
|
@@ -254,6 +315,7 @@ class HindsightConfig:
|
|
|
254
315
|
host: str
|
|
255
316
|
port: int
|
|
256
317
|
log_level: str
|
|
318
|
+
log_format: str
|
|
257
319
|
mcp_enabled: bool
|
|
258
320
|
|
|
259
321
|
# Recall
|
|
@@ -261,18 +323,20 @@ class HindsightConfig:
|
|
|
261
323
|
mpfp_top_k_neighbors: int
|
|
262
324
|
recall_max_concurrent: int
|
|
263
325
|
recall_connection_budget: int
|
|
264
|
-
|
|
265
|
-
# Observation thresholds
|
|
266
|
-
observation_min_facts: int
|
|
267
|
-
observation_top_entities: int
|
|
326
|
+
mental_model_refresh_concurrency: int
|
|
268
327
|
|
|
269
328
|
# Retain settings
|
|
270
329
|
retain_max_completion_tokens: int
|
|
271
330
|
retain_chunk_size: int
|
|
272
331
|
retain_extract_causal_links: bool
|
|
273
332
|
retain_extraction_mode: str
|
|
333
|
+
retain_custom_instructions: str | None
|
|
274
334
|
retain_observations_async: bool
|
|
275
335
|
|
|
336
|
+
# Observations settings (consolidated knowledge from facts)
|
|
337
|
+
enable_observations: bool
|
|
338
|
+
consolidation_batch_size: int
|
|
339
|
+
|
|
276
340
|
# Optimization flags
|
|
277
341
|
skip_llm_verification: bool
|
|
278
342
|
lazy_reranker: bool
|
|
@@ -286,10 +350,16 @@ class HindsightConfig:
|
|
|
286
350
|
db_command_timeout: int
|
|
287
351
|
db_acquire_timeout: int
|
|
288
352
|
|
|
289
|
-
#
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
353
|
+
# Worker configuration (distributed task processing)
|
|
354
|
+
worker_enabled: bool
|
|
355
|
+
worker_id: str | None
|
|
356
|
+
worker_poll_interval_ms: int
|
|
357
|
+
worker_max_retries: int
|
|
358
|
+
worker_batch_size: int
|
|
359
|
+
worker_http_port: int
|
|
360
|
+
|
|
361
|
+
# Reflect agent settings
|
|
362
|
+
reflect_max_iterations: int
|
|
293
363
|
|
|
294
364
|
@classmethod
|
|
295
365
|
def from_env(cls) -> "HindsightConfig":
|
|
@@ -313,6 +383,10 @@ class HindsightConfig:
|
|
|
313
383
|
reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
|
|
314
384
|
reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL) or None,
|
|
315
385
|
reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
|
|
386
|
+
consolidation_llm_provider=os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER) or None,
|
|
387
|
+
consolidation_llm_api_key=os.getenv(ENV_CONSOLIDATION_LLM_API_KEY) or None,
|
|
388
|
+
consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL) or None,
|
|
389
|
+
consolidation_llm_base_url=os.getenv(ENV_CONSOLIDATION_LLM_BASE_URL) or None,
|
|
316
390
|
# Embeddings
|
|
317
391
|
embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
|
|
318
392
|
embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
|
|
@@ -333,6 +407,7 @@ class HindsightConfig:
|
|
|
333
407
|
host=os.getenv(ENV_HOST, DEFAULT_HOST),
|
|
334
408
|
port=int(os.getenv(ENV_PORT, DEFAULT_PORT)),
|
|
335
409
|
log_level=os.getenv(ENV_LOG_LEVEL, DEFAULT_LOG_LEVEL),
|
|
410
|
+
log_format=os.getenv(ENV_LOG_FORMAT, DEFAULT_LOG_FORMAT).lower(),
|
|
336
411
|
mcp_enabled=os.getenv(ENV_MCP_ENABLED, str(DEFAULT_MCP_ENABLED)).lower() == "true",
|
|
337
412
|
# Recall
|
|
338
413
|
graph_retriever=os.getenv(ENV_GRAPH_RETRIEVER, DEFAULT_GRAPH_RETRIEVER),
|
|
@@ -341,14 +416,12 @@ class HindsightConfig:
|
|
|
341
416
|
recall_connection_budget=int(
|
|
342
417
|
os.getenv(ENV_RECALL_CONNECTION_BUDGET, str(DEFAULT_RECALL_CONNECTION_BUDGET))
|
|
343
418
|
),
|
|
419
|
+
mental_model_refresh_concurrency=int(
|
|
420
|
+
os.getenv(ENV_MENTAL_MODEL_REFRESH_CONCURRENCY, str(DEFAULT_MENTAL_MODEL_REFRESH_CONCURRENCY))
|
|
421
|
+
),
|
|
344
422
|
# Optimization flags
|
|
345
423
|
skip_llm_verification=os.getenv(ENV_SKIP_LLM_VERIFICATION, "false").lower() == "true",
|
|
346
424
|
lazy_reranker=os.getenv(ENV_LAZY_RERANKER, "false").lower() == "true",
|
|
347
|
-
# Observation thresholds
|
|
348
|
-
observation_min_facts=int(os.getenv(ENV_OBSERVATION_MIN_FACTS, str(DEFAULT_OBSERVATION_MIN_FACTS))),
|
|
349
|
-
observation_top_entities=int(
|
|
350
|
-
os.getenv(ENV_OBSERVATION_TOP_ENTITIES, str(DEFAULT_OBSERVATION_TOP_ENTITIES))
|
|
351
|
-
),
|
|
352
425
|
# Retain settings
|
|
353
426
|
retain_max_completion_tokens=int(
|
|
354
427
|
os.getenv(ENV_RETAIN_MAX_COMPLETION_TOKENS, str(DEFAULT_RETAIN_MAX_COMPLETION_TOKENS))
|
|
@@ -361,10 +434,16 @@ class HindsightConfig:
|
|
|
361
434
|
retain_extraction_mode=_validate_extraction_mode(
|
|
362
435
|
os.getenv(ENV_RETAIN_EXTRACTION_MODE, DEFAULT_RETAIN_EXTRACTION_MODE)
|
|
363
436
|
),
|
|
437
|
+
retain_custom_instructions=os.getenv(ENV_RETAIN_CUSTOM_INSTRUCTIONS) or DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS,
|
|
364
438
|
retain_observations_async=os.getenv(
|
|
365
439
|
ENV_RETAIN_OBSERVATIONS_ASYNC, str(DEFAULT_RETAIN_OBSERVATIONS_ASYNC)
|
|
366
440
|
).lower()
|
|
367
441
|
== "true",
|
|
442
|
+
# Observations settings (consolidated knowledge from facts)
|
|
443
|
+
enable_observations=os.getenv(ENV_ENABLE_OBSERVATIONS, str(DEFAULT_ENABLE_OBSERVATIONS)).lower() == "true",
|
|
444
|
+
consolidation_batch_size=int(
|
|
445
|
+
os.getenv(ENV_CONSOLIDATION_BATCH_SIZE, str(DEFAULT_CONSOLIDATION_BATCH_SIZE))
|
|
446
|
+
),
|
|
368
447
|
# Database migrations
|
|
369
448
|
run_migrations_on_startup=os.getenv(ENV_RUN_MIGRATIONS_ON_STARTUP, "true").lower() == "true",
|
|
370
449
|
# Database connection pool
|
|
@@ -372,14 +451,15 @@ class HindsightConfig:
|
|
|
372
451
|
db_pool_max_size=int(os.getenv(ENV_DB_POOL_MAX_SIZE, str(DEFAULT_DB_POOL_MAX_SIZE))),
|
|
373
452
|
db_command_timeout=int(os.getenv(ENV_DB_COMMAND_TIMEOUT, str(DEFAULT_DB_COMMAND_TIMEOUT))),
|
|
374
453
|
db_acquire_timeout=int(os.getenv(ENV_DB_ACQUIRE_TIMEOUT, str(DEFAULT_DB_ACQUIRE_TIMEOUT))),
|
|
375
|
-
#
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
),
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
454
|
+
# Worker configuration
|
|
455
|
+
worker_enabled=os.getenv(ENV_WORKER_ENABLED, str(DEFAULT_WORKER_ENABLED)).lower() == "true",
|
|
456
|
+
worker_id=os.getenv(ENV_WORKER_ID) or DEFAULT_WORKER_ID,
|
|
457
|
+
worker_poll_interval_ms=int(os.getenv(ENV_WORKER_POLL_INTERVAL_MS, str(DEFAULT_WORKER_POLL_INTERVAL_MS))),
|
|
458
|
+
worker_max_retries=int(os.getenv(ENV_WORKER_MAX_RETRIES, str(DEFAULT_WORKER_MAX_RETRIES))),
|
|
459
|
+
worker_batch_size=int(os.getenv(ENV_WORKER_BATCH_SIZE, str(DEFAULT_WORKER_BATCH_SIZE))),
|
|
460
|
+
worker_http_port=int(os.getenv(ENV_WORKER_HTTP_PORT, str(DEFAULT_WORKER_HTTP_PORT))),
|
|
461
|
+
# Reflect agent settings
|
|
462
|
+
reflect_max_iterations=int(os.getenv(ENV_REFLECT_MAX_ITERATIONS, str(DEFAULT_REFLECT_MAX_ITERATIONS))),
|
|
383
463
|
)
|
|
384
464
|
|
|
385
465
|
def get_llm_base_url(self) -> str:
|
|
@@ -410,12 +490,28 @@ class HindsightConfig:
|
|
|
410
490
|
return log_level_map.get(self.log_level.lower(), logging.INFO)
|
|
411
491
|
|
|
412
492
|
def configure_logging(self) -> None:
|
|
413
|
-
"""Configure Python logging based on the log level.
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
)
|
|
493
|
+
"""Configure Python logging based on the log level and format.
|
|
494
|
+
|
|
495
|
+
When log_format is "json", outputs structured JSON logs with a severity
|
|
496
|
+
field that GCP Cloud Logging can parse for proper log level categorization.
|
|
497
|
+
"""
|
|
498
|
+
root_logger = logging.getLogger()
|
|
499
|
+
root_logger.setLevel(self.get_python_log_level())
|
|
500
|
+
|
|
501
|
+
# Remove existing handlers
|
|
502
|
+
for handler in root_logger.handlers[:]:
|
|
503
|
+
root_logger.removeHandler(handler)
|
|
504
|
+
|
|
505
|
+
# Create handler writing to stdout (GCP treats stderr as ERROR)
|
|
506
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
507
|
+
handler.setLevel(self.get_python_log_level())
|
|
508
|
+
|
|
509
|
+
if self.log_format == "json":
|
|
510
|
+
handler.setFormatter(JsonFormatter())
|
|
511
|
+
else:
|
|
512
|
+
handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s"))
|
|
513
|
+
|
|
514
|
+
root_logger.addHandler(handler)
|
|
419
515
|
|
|
420
516
|
def log_config(self) -> None:
|
|
421
517
|
"""Log the current configuration (without sensitive values)."""
|
|
@@ -429,6 +525,10 @@ class HindsightConfig:
|
|
|
429
525
|
reflect_provider = self.reflect_llm_provider or self.llm_provider
|
|
430
526
|
reflect_model = self.reflect_llm_model or self.llm_model
|
|
431
527
|
logger.info(f"LLM (reflect): provider={reflect_provider}, model={reflect_model}")
|
|
528
|
+
if self.consolidation_llm_provider or self.consolidation_llm_model:
|
|
529
|
+
consolidation_provider = self.consolidation_llm_provider or self.llm_provider
|
|
530
|
+
consolidation_model = self.consolidation_llm_model or self.llm_model
|
|
531
|
+
logger.info(f"LLM (consolidation): provider={consolidation_provider}, model={consolidation_model}")
|
|
432
532
|
logger.info(f"Embeddings: provider={self.embeddings_provider}")
|
|
433
533
|
logger.info(f"Reranker: provider={self.reranker_provider}")
|
|
434
534
|
logger.info(f"Graph retriever: {self.graph_retriever}")
|