hindsight-api 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +1 -1
- hindsight_api/admin/cli.py +59 -0
- hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
- hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
- hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
- hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
- hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
- hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
- hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
- hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
- hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
- hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
- hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
- hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
- hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
- hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
- hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
- hindsight_api/api/http.py +1120 -93
- hindsight_api/api/mcp.py +11 -191
- hindsight_api/config.py +174 -46
- hindsight_api/engine/consolidation/__init__.py +5 -0
- hindsight_api/engine/consolidation/consolidator.py +926 -0
- hindsight_api/engine/consolidation/prompts.py +77 -0
- hindsight_api/engine/cross_encoder.py +153 -22
- hindsight_api/engine/directives/__init__.py +5 -0
- hindsight_api/engine/directives/models.py +37 -0
- hindsight_api/engine/embeddings.py +136 -13
- hindsight_api/engine/interface.py +32 -13
- hindsight_api/engine/llm_wrapper.py +505 -43
- hindsight_api/engine/memory_engine.py +2101 -1094
- hindsight_api/engine/mental_models/__init__.py +14 -0
- hindsight_api/engine/mental_models/models.py +53 -0
- hindsight_api/engine/reflect/__init__.py +18 -0
- hindsight_api/engine/reflect/agent.py +933 -0
- hindsight_api/engine/reflect/models.py +109 -0
- hindsight_api/engine/reflect/observations.py +186 -0
- hindsight_api/engine/reflect/prompts.py +483 -0
- hindsight_api/engine/reflect/tools.py +437 -0
- hindsight_api/engine/reflect/tools_schema.py +250 -0
- hindsight_api/engine/response_models.py +130 -4
- hindsight_api/engine/retain/bank_utils.py +79 -201
- hindsight_api/engine/retain/fact_extraction.py +81 -48
- hindsight_api/engine/retain/fact_storage.py +5 -8
- hindsight_api/engine/retain/link_utils.py +5 -8
- hindsight_api/engine/retain/orchestrator.py +1 -55
- hindsight_api/engine/retain/types.py +2 -2
- hindsight_api/engine/search/graph_retrieval.py +2 -2
- hindsight_api/engine/search/link_expansion_retrieval.py +164 -29
- hindsight_api/engine/search/mpfp_retrieval.py +1 -1
- hindsight_api/engine/search/retrieval.py +14 -14
- hindsight_api/engine/search/think_utils.py +41 -140
- hindsight_api/engine/search/trace.py +0 -1
- hindsight_api/engine/search/tracer.py +2 -5
- hindsight_api/engine/search/types.py +0 -3
- hindsight_api/engine/task_backend.py +112 -196
- hindsight_api/engine/utils.py +0 -151
- hindsight_api/extensions/__init__.py +10 -1
- hindsight_api/extensions/builtin/tenant.py +11 -4
- hindsight_api/extensions/operation_validator.py +81 -4
- hindsight_api/extensions/tenant.py +26 -0
- hindsight_api/main.py +28 -5
- hindsight_api/mcp_local.py +12 -53
- hindsight_api/mcp_tools.py +494 -0
- hindsight_api/models.py +0 -2
- hindsight_api/worker/__init__.py +11 -0
- hindsight_api/worker/main.py +296 -0
- hindsight_api/worker/poller.py +486 -0
- {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.1.dist-info}/METADATA +12 -6
- hindsight_api-0.4.1.dist-info/RECORD +112 -0
- {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.1.dist-info}/entry_points.txt +1 -0
- hindsight_api/engine/retain/observation_regeneration.py +0 -254
- hindsight_api/engine/search/observation_utils.py +0 -125
- hindsight_api/engine/search/scoring.py +0 -159
- hindsight_api-0.3.0.dist-info/RECORD +0 -82
- {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.1.dist-info}/WHEEL +0 -0
hindsight_api/api/mcp.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Hindsight MCP Server implementation using FastMCP."""
|
|
1
|
+
"""Hindsight MCP Server implementation using FastMCP (HTTP transport)."""
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
import logging
|
|
@@ -8,8 +8,7 @@ from contextvars import ContextVar
|
|
|
8
8
|
from fastmcp import FastMCP
|
|
9
9
|
|
|
10
10
|
from hindsight_api import MemoryEngine
|
|
11
|
-
from hindsight_api.
|
|
12
|
-
from hindsight_api.models import RequestContext
|
|
11
|
+
from hindsight_api.mcp_tools import MCPToolsConfig, register_mcp_tools
|
|
13
12
|
|
|
14
13
|
# Configure logging from HINDSIGHT_API_LOG_LEVEL environment variable
|
|
15
14
|
_log_level_str = os.environ.get("HINDSIGHT_API_LOG_LEVEL", "info").lower()
|
|
@@ -52,194 +51,15 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
|
|
|
52
51
|
# Use stateless_http=True for Claude Code compatibility
|
|
53
52
|
mcp = FastMCP("hindsight-mcp-server", stateless_http=True)
|
|
54
53
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
Use this tool PROACTIVELY whenever the user shares:
|
|
66
|
-
- Personal facts, preferences, or interests
|
|
67
|
-
- Important events or milestones
|
|
68
|
-
- User history, experiences, or background
|
|
69
|
-
- Decisions, opinions, or stated preferences
|
|
70
|
-
- Goals, plans, or future intentions
|
|
71
|
-
- Relationships or people mentioned
|
|
72
|
-
- Work context, projects, or responsibilities
|
|
73
|
-
|
|
74
|
-
Args:
|
|
75
|
-
content: The fact/memory to store (be specific and include relevant details)
|
|
76
|
-
context: Category for the memory (e.g., 'preferences', 'work', 'hobbies', 'family'). Default: 'general'
|
|
77
|
-
async_processing: If True, queue for background processing and return immediately. If False, wait for completion. Default: True
|
|
78
|
-
bank_id: Optional bank to store in (defaults to session bank). Use for cross-bank operations.
|
|
79
|
-
"""
|
|
80
|
-
try:
|
|
81
|
-
target_bank = bank_id or get_current_bank_id()
|
|
82
|
-
if target_bank is None:
|
|
83
|
-
return "Error: No bank_id configured"
|
|
84
|
-
contents = [{"content": content, "context": context}]
|
|
85
|
-
if async_processing:
|
|
86
|
-
# Queue for background processing and return immediately
|
|
87
|
-
result = await memory.submit_async_retain(
|
|
88
|
-
bank_id=target_bank, contents=contents, request_context=RequestContext()
|
|
89
|
-
)
|
|
90
|
-
return f"Memory queued for background processing (operation_id: {result.get('operation_id', 'N/A')})"
|
|
91
|
-
else:
|
|
92
|
-
# Wait for completion
|
|
93
|
-
await memory.retain_batch_async(
|
|
94
|
-
bank_id=target_bank,
|
|
95
|
-
contents=contents,
|
|
96
|
-
request_context=RequestContext(),
|
|
97
|
-
)
|
|
98
|
-
return f"Memory stored successfully in bank '{target_bank}'"
|
|
99
|
-
except Exception as e:
|
|
100
|
-
logger.error(f"Error storing memory: {e}", exc_info=True)
|
|
101
|
-
return f"Error: {str(e)}"
|
|
102
|
-
|
|
103
|
-
@mcp.tool()
|
|
104
|
-
async def recall(query: str, max_tokens: int = 4096, bank_id: str | None = None) -> str:
|
|
105
|
-
"""
|
|
106
|
-
Search memories to provide personalized, context-aware responses.
|
|
107
|
-
|
|
108
|
-
Use this tool PROACTIVELY to:
|
|
109
|
-
- Check user's preferences before making suggestions
|
|
110
|
-
- Recall user's history to provide continuity
|
|
111
|
-
- Remember user's goals and context
|
|
112
|
-
- Personalize responses based on past interactions
|
|
113
|
-
|
|
114
|
-
Args:
|
|
115
|
-
query: Natural language search query (e.g., "user's food preferences", "what projects is user working on")
|
|
116
|
-
max_tokens: Maximum tokens in the response (default: 4096)
|
|
117
|
-
bank_id: Optional bank to search in (defaults to session bank). Use for cross-bank operations.
|
|
118
|
-
"""
|
|
119
|
-
try:
|
|
120
|
-
target_bank = bank_id or get_current_bank_id()
|
|
121
|
-
if target_bank is None:
|
|
122
|
-
return "Error: No bank_id configured"
|
|
123
|
-
from hindsight_api.engine.memory_engine import Budget
|
|
124
|
-
|
|
125
|
-
recall_result = await memory.recall_async(
|
|
126
|
-
bank_id=target_bank,
|
|
127
|
-
query=query,
|
|
128
|
-
fact_type=list(VALID_RECALL_FACT_TYPES),
|
|
129
|
-
budget=Budget.HIGH,
|
|
130
|
-
max_tokens=max_tokens,
|
|
131
|
-
request_context=RequestContext(),
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
# Use model's JSON serialization
|
|
135
|
-
return recall_result.model_dump_json(indent=2)
|
|
136
|
-
except Exception as e:
|
|
137
|
-
logger.error(f"Error searching: {e}", exc_info=True)
|
|
138
|
-
return f'{{"error": "{e}", "results": []}}'
|
|
139
|
-
|
|
140
|
-
@mcp.tool()
|
|
141
|
-
async def reflect(query: str, context: str | None = None, budget: str = "low", bank_id: str | None = None) -> str:
|
|
142
|
-
"""
|
|
143
|
-
Generate thoughtful analysis by synthesizing stored memories with the bank's personality.
|
|
144
|
-
|
|
145
|
-
WHEN TO USE THIS TOOL:
|
|
146
|
-
Use reflect when you need reasoned analysis, not just fact retrieval. This tool
|
|
147
|
-
thinks through the question using everything the bank knows and its personality traits.
|
|
148
|
-
|
|
149
|
-
EXAMPLES OF GOOD QUERIES:
|
|
150
|
-
- "What patterns have emerged in how I approach debugging?"
|
|
151
|
-
- "Based on my past decisions, what architectural style do I prefer?"
|
|
152
|
-
- "What might be the best approach for this problem given what you know about me?"
|
|
153
|
-
- "How should I prioritize these tasks based on my goals?"
|
|
154
|
-
|
|
155
|
-
HOW IT DIFFERS FROM RECALL:
|
|
156
|
-
- recall: Returns raw facts matching your search (fast lookup)
|
|
157
|
-
- reflect: Reasons across memories to form a synthesized answer (deeper analysis)
|
|
158
|
-
|
|
159
|
-
Use recall for "what did I say about X?" and reflect for "what should I do about X?"
|
|
160
|
-
|
|
161
|
-
Args:
|
|
162
|
-
query: The question or topic to reflect on
|
|
163
|
-
context: Optional context about why this reflection is needed
|
|
164
|
-
budget: Search budget - 'low', 'mid', or 'high' (default: 'low')
|
|
165
|
-
bank_id: Optional bank to reflect in (defaults to session bank). Use for cross-bank operations.
|
|
166
|
-
"""
|
|
167
|
-
try:
|
|
168
|
-
target_bank = bank_id or get_current_bank_id()
|
|
169
|
-
if target_bank is None:
|
|
170
|
-
return "Error: No bank_id configured"
|
|
171
|
-
from hindsight_api.engine.memory_engine import Budget
|
|
172
|
-
|
|
173
|
-
# Map string budget to enum
|
|
174
|
-
budget_map = {"low": Budget.LOW, "mid": Budget.MID, "high": Budget.HIGH}
|
|
175
|
-
budget_enum = budget_map.get(budget.lower(), Budget.LOW)
|
|
176
|
-
|
|
177
|
-
reflect_result = await memory.reflect_async(
|
|
178
|
-
bank_id=target_bank,
|
|
179
|
-
query=query,
|
|
180
|
-
budget=budget_enum,
|
|
181
|
-
context=context,
|
|
182
|
-
request_context=RequestContext(),
|
|
183
|
-
)
|
|
184
|
-
|
|
185
|
-
return reflect_result.model_dump_json(indent=2)
|
|
186
|
-
except Exception as e:
|
|
187
|
-
logger.error(f"Error reflecting: {e}", exc_info=True)
|
|
188
|
-
return f'{{"error": "{e}", "text": ""}}'
|
|
189
|
-
|
|
190
|
-
@mcp.tool()
|
|
191
|
-
async def list_banks() -> str:
|
|
192
|
-
"""
|
|
193
|
-
List all available memory banks.
|
|
194
|
-
|
|
195
|
-
Use this tool to discover what memory banks exist in the system.
|
|
196
|
-
Each bank is an isolated memory store (like a separate "brain").
|
|
197
|
-
|
|
198
|
-
Returns:
|
|
199
|
-
JSON list of banks with their IDs, names, dispositions, and backgrounds.
|
|
200
|
-
"""
|
|
201
|
-
try:
|
|
202
|
-
banks = await memory.list_banks(request_context=RequestContext())
|
|
203
|
-
return json.dumps({"banks": banks}, indent=2)
|
|
204
|
-
except Exception as e:
|
|
205
|
-
logger.error(f"Error listing banks: {e}", exc_info=True)
|
|
206
|
-
return f'{{"error": "{e}", "banks": []}}'
|
|
207
|
-
|
|
208
|
-
@mcp.tool()
|
|
209
|
-
async def create_bank(bank_id: str, name: str | None = None, background: str | None = None) -> str:
|
|
210
|
-
"""
|
|
211
|
-
Create a new memory bank or get an existing one.
|
|
212
|
-
|
|
213
|
-
Memory banks are isolated stores - each one is like a separate "brain" for a user/agent.
|
|
214
|
-
Banks are auto-created with default settings if they don't exist.
|
|
215
|
-
|
|
216
|
-
Args:
|
|
217
|
-
bank_id: Unique identifier for the bank (e.g., 'user-123', 'agent-alpha')
|
|
218
|
-
name: Optional human-friendly name for the bank
|
|
219
|
-
background: Optional background context about the bank's owner/purpose
|
|
220
|
-
"""
|
|
221
|
-
try:
|
|
222
|
-
# get_bank_profile auto-creates bank if it doesn't exist
|
|
223
|
-
profile = await memory.get_bank_profile(bank_id, request_context=RequestContext())
|
|
224
|
-
|
|
225
|
-
# Update name/background if provided
|
|
226
|
-
if name is not None or background is not None:
|
|
227
|
-
await memory.update_bank(
|
|
228
|
-
bank_id,
|
|
229
|
-
name=name,
|
|
230
|
-
background=background,
|
|
231
|
-
request_context=RequestContext(),
|
|
232
|
-
)
|
|
233
|
-
# Fetch updated profile
|
|
234
|
-
profile = await memory.get_bank_profile(bank_id, request_context=RequestContext())
|
|
235
|
-
|
|
236
|
-
# Serialize disposition if it's a Pydantic model
|
|
237
|
-
if "disposition" in profile and hasattr(profile["disposition"], "model_dump"):
|
|
238
|
-
profile["disposition"] = profile["disposition"].model_dump()
|
|
239
|
-
return json.dumps(profile, indent=2)
|
|
240
|
-
except Exception as e:
|
|
241
|
-
logger.error(f"Error creating bank: {e}", exc_info=True)
|
|
242
|
-
return f'{{"error": "{e}"}}'
|
|
54
|
+
# Configure and register tools using shared module
|
|
55
|
+
config = MCPToolsConfig(
|
|
56
|
+
bank_id_resolver=get_current_bank_id,
|
|
57
|
+
include_bank_id_param=True, # HTTP MCP supports multi-bank via parameter
|
|
58
|
+
tools=None, # All tools
|
|
59
|
+
retain_fire_and_forget=False, # HTTP MCP supports sync/async modes
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
register_mcp_tools(mcp, memory, config)
|
|
243
63
|
|
|
244
64
|
return mcp
|
|
245
65
|
|
hindsight_api/config.py
CHANGED
|
@@ -4,9 +4,12 @@ Centralized configuration for Hindsight API.
|
|
|
4
4
|
All environment variables and their defaults are defined here.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import json
|
|
7
8
|
import logging
|
|
8
9
|
import os
|
|
10
|
+
import sys
|
|
9
11
|
from dataclasses import dataclass
|
|
12
|
+
from datetime import datetime, timezone
|
|
10
13
|
|
|
11
14
|
from dotenv import find_dotenv, load_dotenv
|
|
12
15
|
|
|
@@ -17,6 +20,7 @@ logger = logging.getLogger(__name__)
|
|
|
17
20
|
|
|
18
21
|
# Environment variable names
|
|
19
22
|
ENV_DATABASE_URL = "HINDSIGHT_API_DATABASE_URL"
|
|
23
|
+
ENV_DATABASE_SCHEMA = "HINDSIGHT_API_DATABASE_SCHEMA"
|
|
20
24
|
ENV_LLM_PROVIDER = "HINDSIGHT_API_LLM_PROVIDER"
|
|
21
25
|
ENV_LLM_API_KEY = "HINDSIGHT_API_LLM_API_KEY"
|
|
22
26
|
ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
|
|
@@ -36,8 +40,14 @@ ENV_REFLECT_LLM_API_KEY = "HINDSIGHT_API_REFLECT_LLM_API_KEY"
|
|
|
36
40
|
ENV_REFLECT_LLM_MODEL = "HINDSIGHT_API_REFLECT_LLM_MODEL"
|
|
37
41
|
ENV_REFLECT_LLM_BASE_URL = "HINDSIGHT_API_REFLECT_LLM_BASE_URL"
|
|
38
42
|
|
|
43
|
+
ENV_CONSOLIDATION_LLM_PROVIDER = "HINDSIGHT_API_CONSOLIDATION_LLM_PROVIDER"
|
|
44
|
+
ENV_CONSOLIDATION_LLM_API_KEY = "HINDSIGHT_API_CONSOLIDATION_LLM_API_KEY"
|
|
45
|
+
ENV_CONSOLIDATION_LLM_MODEL = "HINDSIGHT_API_CONSOLIDATION_LLM_MODEL"
|
|
46
|
+
ENV_CONSOLIDATION_LLM_BASE_URL = "HINDSIGHT_API_CONSOLIDATION_LLM_BASE_URL"
|
|
47
|
+
|
|
39
48
|
ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
|
|
40
49
|
ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
|
|
50
|
+
ENV_EMBEDDINGS_LOCAL_FORCE_CPU = "HINDSIGHT_API_EMBEDDINGS_LOCAL_FORCE_CPU"
|
|
41
51
|
ENV_EMBEDDINGS_TEI_URL = "HINDSIGHT_API_EMBEDDINGS_TEI_URL"
|
|
42
52
|
ENV_EMBEDDINGS_OPENAI_API_KEY = "HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY"
|
|
43
53
|
ENV_EMBEDDINGS_OPENAI_MODEL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL"
|
|
@@ -57,6 +67,7 @@ ENV_RERANKER_LITELLM_MODEL = "HINDSIGHT_API_RERANKER_LITELLM_MODEL"
|
|
|
57
67
|
|
|
58
68
|
ENV_RERANKER_PROVIDER = "HINDSIGHT_API_RERANKER_PROVIDER"
|
|
59
69
|
ENV_RERANKER_LOCAL_MODEL = "HINDSIGHT_API_RERANKER_LOCAL_MODEL"
|
|
70
|
+
ENV_RERANKER_LOCAL_FORCE_CPU = "HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU"
|
|
60
71
|
ENV_RERANKER_LOCAL_MAX_CONCURRENT = "HINDSIGHT_API_RERANKER_LOCAL_MAX_CONCURRENT"
|
|
61
72
|
ENV_RERANKER_TEI_URL = "HINDSIGHT_API_RERANKER_TEI_URL"
|
|
62
73
|
ENV_RERANKER_TEI_BATCH_SIZE = "HINDSIGHT_API_RERANKER_TEI_BATCH_SIZE"
|
|
@@ -68,6 +79,7 @@ ENV_RERANKER_FLASHRANK_CACHE_DIR = "HINDSIGHT_API_RERANKER_FLASHRANK_CACHE_DIR"
|
|
|
68
79
|
ENV_HOST = "HINDSIGHT_API_HOST"
|
|
69
80
|
ENV_PORT = "HINDSIGHT_API_PORT"
|
|
70
81
|
ENV_LOG_LEVEL = "HINDSIGHT_API_LOG_LEVEL"
|
|
82
|
+
ENV_LOG_FORMAT = "HINDSIGHT_API_LOG_FORMAT"
|
|
71
83
|
ENV_WORKERS = "HINDSIGHT_API_WORKERS"
|
|
72
84
|
ENV_MCP_ENABLED = "HINDSIGHT_API_MCP_ENABLED"
|
|
73
85
|
ENV_GRAPH_RETRIEVER = "HINDSIGHT_API_GRAPH_RETRIEVER"
|
|
@@ -76,18 +88,21 @@ ENV_RECALL_MAX_CONCURRENT = "HINDSIGHT_API_RECALL_MAX_CONCURRENT"
|
|
|
76
88
|
ENV_RECALL_CONNECTION_BUDGET = "HINDSIGHT_API_RECALL_CONNECTION_BUDGET"
|
|
77
89
|
ENV_MCP_LOCAL_BANK_ID = "HINDSIGHT_API_MCP_LOCAL_BANK_ID"
|
|
78
90
|
ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
|
|
79
|
-
|
|
80
|
-
# Observation thresholds
|
|
81
|
-
ENV_OBSERVATION_MIN_FACTS = "HINDSIGHT_API_OBSERVATION_MIN_FACTS"
|
|
82
|
-
ENV_OBSERVATION_TOP_ENTITIES = "HINDSIGHT_API_OBSERVATION_TOP_ENTITIES"
|
|
91
|
+
ENV_MENTAL_MODEL_REFRESH_CONCURRENCY = "HINDSIGHT_API_MENTAL_MODEL_REFRESH_CONCURRENCY"
|
|
83
92
|
|
|
84
93
|
# Retain settings
|
|
85
94
|
ENV_RETAIN_MAX_COMPLETION_TOKENS = "HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS"
|
|
86
95
|
ENV_RETAIN_CHUNK_SIZE = "HINDSIGHT_API_RETAIN_CHUNK_SIZE"
|
|
87
96
|
ENV_RETAIN_EXTRACT_CAUSAL_LINKS = "HINDSIGHT_API_RETAIN_EXTRACT_CAUSAL_LINKS"
|
|
88
97
|
ENV_RETAIN_EXTRACTION_MODE = "HINDSIGHT_API_RETAIN_EXTRACTION_MODE"
|
|
98
|
+
ENV_RETAIN_CUSTOM_INSTRUCTIONS = "HINDSIGHT_API_RETAIN_CUSTOM_INSTRUCTIONS"
|
|
89
99
|
ENV_RETAIN_OBSERVATIONS_ASYNC = "HINDSIGHT_API_RETAIN_OBSERVATIONS_ASYNC"
|
|
90
100
|
|
|
101
|
+
# Observations settings (consolidated knowledge from facts)
|
|
102
|
+
ENV_ENABLE_OBSERVATIONS = "HINDSIGHT_API_ENABLE_OBSERVATIONS"
|
|
103
|
+
ENV_CONSOLIDATION_BATCH_SIZE = "HINDSIGHT_API_CONSOLIDATION_BATCH_SIZE"
|
|
104
|
+
ENV_CONSOLIDATION_MAX_TOKENS = "HINDSIGHT_API_CONSOLIDATION_MAX_TOKENS"
|
|
105
|
+
|
|
91
106
|
# Optimization flags
|
|
92
107
|
ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
|
|
93
108
|
ENV_LAZY_RERANKER = "HINDSIGHT_API_LAZY_RERANKER"
|
|
@@ -101,13 +116,20 @@ ENV_DB_POOL_MAX_SIZE = "HINDSIGHT_API_DB_POOL_MAX_SIZE"
|
|
|
101
116
|
ENV_DB_COMMAND_TIMEOUT = "HINDSIGHT_API_DB_COMMAND_TIMEOUT"
|
|
102
117
|
ENV_DB_ACQUIRE_TIMEOUT = "HINDSIGHT_API_DB_ACQUIRE_TIMEOUT"
|
|
103
118
|
|
|
104
|
-
#
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
119
|
+
# Worker configuration (distributed task processing)
|
|
120
|
+
ENV_WORKER_ENABLED = "HINDSIGHT_API_WORKER_ENABLED"
|
|
121
|
+
ENV_WORKER_ID = "HINDSIGHT_API_WORKER_ID"
|
|
122
|
+
ENV_WORKER_POLL_INTERVAL_MS = "HINDSIGHT_API_WORKER_POLL_INTERVAL_MS"
|
|
123
|
+
ENV_WORKER_MAX_RETRIES = "HINDSIGHT_API_WORKER_MAX_RETRIES"
|
|
124
|
+
ENV_WORKER_BATCH_SIZE = "HINDSIGHT_API_WORKER_BATCH_SIZE"
|
|
125
|
+
ENV_WORKER_HTTP_PORT = "HINDSIGHT_API_WORKER_HTTP_PORT"
|
|
126
|
+
|
|
127
|
+
# Reflect agent settings
|
|
128
|
+
ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
|
|
108
129
|
|
|
109
130
|
# Default values
|
|
110
131
|
DEFAULT_DATABASE_URL = "pg0"
|
|
132
|
+
DEFAULT_DATABASE_SCHEMA = "public"
|
|
111
133
|
DEFAULT_LLM_PROVIDER = "openai"
|
|
112
134
|
DEFAULT_LLM_MODEL = "gpt-5-mini"
|
|
113
135
|
DEFAULT_LLM_MAX_CONCURRENT = 32
|
|
@@ -115,11 +137,13 @@ DEFAULT_LLM_TIMEOUT = 120.0 # seconds
|
|
|
115
137
|
|
|
116
138
|
DEFAULT_EMBEDDINGS_PROVIDER = "local"
|
|
117
139
|
DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
|
|
140
|
+
DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU = False # Force CPU mode for local embeddings (avoids MPS/XPC issues on macOS)
|
|
118
141
|
DEFAULT_EMBEDDINGS_OPENAI_MODEL = "text-embedding-3-small"
|
|
119
142
|
DEFAULT_EMBEDDING_DIMENSION = 384
|
|
120
143
|
|
|
121
144
|
DEFAULT_RERANKER_PROVIDER = "local"
|
|
122
145
|
DEFAULT_RERANKER_LOCAL_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
|
146
|
+
DEFAULT_RERANKER_LOCAL_FORCE_CPU = False # Force CPU mode for local reranker (avoids MPS/XPC issues on macOS)
|
|
123
147
|
DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT = 4 # Limit concurrent CPU-bound reranking to prevent thrashing
|
|
124
148
|
DEFAULT_RERANKER_TEI_BATCH_SIZE = 128
|
|
125
149
|
DEFAULT_RERANKER_TEI_MAX_CONCURRENT = 8
|
|
@@ -138,6 +162,7 @@ DEFAULT_RERANKER_LITELLM_MODEL = "cohere/rerank-english-v3.0"
|
|
|
138
162
|
DEFAULT_HOST = "0.0.0.0"
|
|
139
163
|
DEFAULT_PORT = 8888
|
|
140
164
|
DEFAULT_LOG_LEVEL = "info"
|
|
165
|
+
DEFAULT_LOG_FORMAT = "text" # Options: "text", "json"
|
|
141
166
|
DEFAULT_WORKERS = 1
|
|
142
167
|
DEFAULT_MCP_ENABLED = True
|
|
143
168
|
DEFAULT_GRAPH_RETRIEVER = "link_expansion" # Options: "link_expansion", "mpfp", "bfs"
|
|
@@ -145,19 +170,22 @@ DEFAULT_MPFP_TOP_K_NEIGHBORS = 20 # Fan-out limit per node in MPFP graph traver
|
|
|
145
170
|
DEFAULT_RECALL_MAX_CONCURRENT = 32 # Max concurrent recall operations per worker
|
|
146
171
|
DEFAULT_RECALL_CONNECTION_BUDGET = 4 # Max concurrent DB connections per recall operation
|
|
147
172
|
DEFAULT_MCP_LOCAL_BANK_ID = "mcp"
|
|
148
|
-
|
|
149
|
-
# Observation thresholds
|
|
150
|
-
DEFAULT_OBSERVATION_MIN_FACTS = 5 # Min facts required to generate entity observations
|
|
151
|
-
DEFAULT_OBSERVATION_TOP_ENTITIES = 5 # Max entities to process per retain batch
|
|
173
|
+
DEFAULT_MENTAL_MODEL_REFRESH_CONCURRENCY = 8 # Max concurrent mental model refreshes
|
|
152
174
|
|
|
153
175
|
# Retain settings
|
|
154
176
|
DEFAULT_RETAIN_MAX_COMPLETION_TOKENS = 64000 # Max tokens for fact extraction LLM call
|
|
155
177
|
DEFAULT_RETAIN_CHUNK_SIZE = 3000 # Max chars per chunk for fact extraction
|
|
156
178
|
DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS = True # Extract causal links between facts
|
|
157
|
-
DEFAULT_RETAIN_EXTRACTION_MODE = "concise" # Extraction mode: "concise" or "
|
|
158
|
-
RETAIN_EXTRACTION_MODES = ("concise", "verbose") # Allowed extraction modes
|
|
179
|
+
DEFAULT_RETAIN_EXTRACTION_MODE = "concise" # Extraction mode: "concise", "verbose", or "custom"
|
|
180
|
+
RETAIN_EXTRACTION_MODES = ("concise", "verbose", "custom") # Allowed extraction modes
|
|
181
|
+
DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS = None # Custom extraction guidelines (only used when mode="custom")
|
|
159
182
|
DEFAULT_RETAIN_OBSERVATIONS_ASYNC = False # Run observation generation async (after retain completes)
|
|
160
183
|
|
|
184
|
+
# Observations defaults (consolidated knowledge from facts)
|
|
185
|
+
DEFAULT_ENABLE_OBSERVATIONS = True # Observations enabled by default
|
|
186
|
+
DEFAULT_CONSOLIDATION_BATCH_SIZE = 50 # Memories to load per batch (internal memory optimization)
|
|
187
|
+
DEFAULT_CONSOLIDATION_MAX_TOKENS = 1024 # Max tokens for recall when finding related observations
|
|
188
|
+
|
|
161
189
|
# Database migrations
|
|
162
190
|
DEFAULT_RUN_MIGRATIONS_ON_STARTUP = True
|
|
163
191
|
|
|
@@ -167,10 +195,16 @@ DEFAULT_DB_POOL_MAX_SIZE = 100
|
|
|
167
195
|
DEFAULT_DB_COMMAND_TIMEOUT = 60 # seconds
|
|
168
196
|
DEFAULT_DB_ACQUIRE_TIMEOUT = 30 # seconds
|
|
169
197
|
|
|
170
|
-
#
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
198
|
+
# Worker configuration (distributed task processing)
|
|
199
|
+
DEFAULT_WORKER_ENABLED = True # API runs worker by default (standalone mode)
|
|
200
|
+
DEFAULT_WORKER_ID = None # Will use hostname if not specified
|
|
201
|
+
DEFAULT_WORKER_POLL_INTERVAL_MS = 500 # Poll database every 500ms
|
|
202
|
+
DEFAULT_WORKER_MAX_RETRIES = 3 # Max retries before marking task failed
|
|
203
|
+
DEFAULT_WORKER_BATCH_SIZE = 10 # Tasks to claim per poll cycle
|
|
204
|
+
DEFAULT_WORKER_HTTP_PORT = 8889 # HTTP port for worker metrics/health
|
|
205
|
+
|
|
206
|
+
# Reflect agent settings
|
|
207
|
+
DEFAULT_REFLECT_MAX_ITERATIONS = 10 # Max tool call iterations before forcing response
|
|
174
208
|
|
|
175
209
|
# Default MCP tool descriptions (can be customized via env vars)
|
|
176
210
|
DEFAULT_MCP_RETAIN_DESCRIPTION = """Store important information to long-term memory.
|
|
@@ -196,6 +230,36 @@ Use this tool PROACTIVELY to:
|
|
|
196
230
|
EMBEDDING_DIMENSION = DEFAULT_EMBEDDING_DIMENSION
|
|
197
231
|
|
|
198
232
|
|
|
233
|
+
class JsonFormatter(logging.Formatter):
|
|
234
|
+
"""JSON formatter for structured logging.
|
|
235
|
+
|
|
236
|
+
Outputs logs in JSON format with a 'severity' field that cloud logging
|
|
237
|
+
systems (GCP, AWS CloudWatch, etc.) can parse to correctly categorize log levels.
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
SEVERITY_MAP = {
|
|
241
|
+
logging.DEBUG: "DEBUG",
|
|
242
|
+
logging.INFO: "INFO",
|
|
243
|
+
logging.WARNING: "WARNING",
|
|
244
|
+
logging.ERROR: "ERROR",
|
|
245
|
+
logging.CRITICAL: "CRITICAL",
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
249
|
+
log_entry = {
|
|
250
|
+
"severity": self.SEVERITY_MAP.get(record.levelno, "DEFAULT"),
|
|
251
|
+
"message": record.getMessage(),
|
|
252
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
253
|
+
"logger": record.name,
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
# Add exception info if present
|
|
257
|
+
if record.exc_info:
|
|
258
|
+
log_entry["exception"] = self.formatException(record.exc_info)
|
|
259
|
+
|
|
260
|
+
return json.dumps(log_entry)
|
|
261
|
+
|
|
262
|
+
|
|
199
263
|
def _validate_extraction_mode(mode: str) -> str:
|
|
200
264
|
"""Validate and normalize extraction mode."""
|
|
201
265
|
mode_lower = mode.lower()
|
|
@@ -214,6 +278,7 @@ class HindsightConfig:
|
|
|
214
278
|
|
|
215
279
|
# Database
|
|
216
280
|
database_url: str
|
|
281
|
+
database_schema: str
|
|
217
282
|
|
|
218
283
|
# LLM (default, used as fallback for per-operation config)
|
|
219
284
|
llm_provider: str
|
|
@@ -234,9 +299,15 @@ class HindsightConfig:
|
|
|
234
299
|
reflect_llm_model: str | None
|
|
235
300
|
reflect_llm_base_url: str | None
|
|
236
301
|
|
|
302
|
+
consolidation_llm_provider: str | None
|
|
303
|
+
consolidation_llm_api_key: str | None
|
|
304
|
+
consolidation_llm_model: str | None
|
|
305
|
+
consolidation_llm_base_url: str | None
|
|
306
|
+
|
|
237
307
|
# Embeddings
|
|
238
308
|
embeddings_provider: str
|
|
239
309
|
embeddings_local_model: str
|
|
310
|
+
embeddings_local_force_cpu: bool
|
|
240
311
|
embeddings_tei_url: str | None
|
|
241
312
|
embeddings_openai_base_url: str | None
|
|
242
313
|
embeddings_cohere_base_url: str | None
|
|
@@ -244,6 +315,8 @@ class HindsightConfig:
|
|
|
244
315
|
# Reranker
|
|
245
316
|
reranker_provider: str
|
|
246
317
|
reranker_local_model: str
|
|
318
|
+
reranker_local_force_cpu: bool
|
|
319
|
+
reranker_local_max_concurrent: int
|
|
247
320
|
reranker_tei_url: str | None
|
|
248
321
|
reranker_tei_batch_size: int
|
|
249
322
|
reranker_tei_max_concurrent: int
|
|
@@ -254,6 +327,7 @@ class HindsightConfig:
|
|
|
254
327
|
host: str
|
|
255
328
|
port: int
|
|
256
329
|
log_level: str
|
|
330
|
+
log_format: str
|
|
257
331
|
mcp_enabled: bool
|
|
258
332
|
|
|
259
333
|
# Recall
|
|
@@ -261,18 +335,21 @@ class HindsightConfig:
|
|
|
261
335
|
mpfp_top_k_neighbors: int
|
|
262
336
|
recall_max_concurrent: int
|
|
263
337
|
recall_connection_budget: int
|
|
264
|
-
|
|
265
|
-
# Observation thresholds
|
|
266
|
-
observation_min_facts: int
|
|
267
|
-
observation_top_entities: int
|
|
338
|
+
mental_model_refresh_concurrency: int
|
|
268
339
|
|
|
269
340
|
# Retain settings
|
|
270
341
|
retain_max_completion_tokens: int
|
|
271
342
|
retain_chunk_size: int
|
|
272
343
|
retain_extract_causal_links: bool
|
|
273
344
|
retain_extraction_mode: str
|
|
345
|
+
retain_custom_instructions: str | None
|
|
274
346
|
retain_observations_async: bool
|
|
275
347
|
|
|
348
|
+
# Observations settings (consolidated knowledge from facts)
|
|
349
|
+
enable_observations: bool
|
|
350
|
+
consolidation_batch_size: int
|
|
351
|
+
consolidation_max_tokens: int
|
|
352
|
+
|
|
276
353
|
# Optimization flags
|
|
277
354
|
skip_llm_verification: bool
|
|
278
355
|
lazy_reranker: bool
|
|
@@ -286,10 +363,16 @@ class HindsightConfig:
|
|
|
286
363
|
db_command_timeout: int
|
|
287
364
|
db_acquire_timeout: int
|
|
288
365
|
|
|
289
|
-
#
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
366
|
+
# Worker configuration (distributed task processing)
|
|
367
|
+
worker_enabled: bool
|
|
368
|
+
worker_id: str | None
|
|
369
|
+
worker_poll_interval_ms: int
|
|
370
|
+
worker_max_retries: int
|
|
371
|
+
worker_batch_size: int
|
|
372
|
+
worker_http_port: int
|
|
373
|
+
|
|
374
|
+
# Reflect agent settings
|
|
375
|
+
reflect_max_iterations: int
|
|
293
376
|
|
|
294
377
|
@classmethod
|
|
295
378
|
def from_env(cls) -> "HindsightConfig":
|
|
@@ -297,6 +380,7 @@ class HindsightConfig:
|
|
|
297
380
|
return cls(
|
|
298
381
|
# Database
|
|
299
382
|
database_url=os.getenv(ENV_DATABASE_URL, DEFAULT_DATABASE_URL),
|
|
383
|
+
database_schema=os.getenv(ENV_DATABASE_SCHEMA, DEFAULT_DATABASE_SCHEMA),
|
|
300
384
|
# LLM
|
|
301
385
|
llm_provider=os.getenv(ENV_LLM_PROVIDER, DEFAULT_LLM_PROVIDER),
|
|
302
386
|
llm_api_key=os.getenv(ENV_LLM_API_KEY),
|
|
@@ -313,15 +397,30 @@ class HindsightConfig:
|
|
|
313
397
|
reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
|
|
314
398
|
reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL) or None,
|
|
315
399
|
reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
|
|
400
|
+
consolidation_llm_provider=os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER) or None,
|
|
401
|
+
consolidation_llm_api_key=os.getenv(ENV_CONSOLIDATION_LLM_API_KEY) or None,
|
|
402
|
+
consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL) or None,
|
|
403
|
+
consolidation_llm_base_url=os.getenv(ENV_CONSOLIDATION_LLM_BASE_URL) or None,
|
|
316
404
|
# Embeddings
|
|
317
405
|
embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
|
|
318
406
|
embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
|
|
407
|
+
embeddings_local_force_cpu=os.getenv(
|
|
408
|
+
ENV_EMBEDDINGS_LOCAL_FORCE_CPU, str(DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU)
|
|
409
|
+
).lower()
|
|
410
|
+
in ("true", "1"),
|
|
319
411
|
embeddings_tei_url=os.getenv(ENV_EMBEDDINGS_TEI_URL),
|
|
320
412
|
embeddings_openai_base_url=os.getenv(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None,
|
|
321
413
|
embeddings_cohere_base_url=os.getenv(ENV_EMBEDDINGS_COHERE_BASE_URL) or None,
|
|
322
414
|
# Reranker
|
|
323
415
|
reranker_provider=os.getenv(ENV_RERANKER_PROVIDER, DEFAULT_RERANKER_PROVIDER),
|
|
324
416
|
reranker_local_model=os.getenv(ENV_RERANKER_LOCAL_MODEL, DEFAULT_RERANKER_LOCAL_MODEL),
|
|
417
|
+
reranker_local_force_cpu=os.getenv(
|
|
418
|
+
ENV_RERANKER_LOCAL_FORCE_CPU, str(DEFAULT_RERANKER_LOCAL_FORCE_CPU)
|
|
419
|
+
).lower()
|
|
420
|
+
in ("true", "1"),
|
|
421
|
+
reranker_local_max_concurrent=int(
|
|
422
|
+
os.getenv(ENV_RERANKER_LOCAL_MAX_CONCURRENT, str(DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT))
|
|
423
|
+
),
|
|
325
424
|
reranker_tei_url=os.getenv(ENV_RERANKER_TEI_URL),
|
|
326
425
|
reranker_tei_batch_size=int(os.getenv(ENV_RERANKER_TEI_BATCH_SIZE, str(DEFAULT_RERANKER_TEI_BATCH_SIZE))),
|
|
327
426
|
reranker_tei_max_concurrent=int(
|
|
@@ -333,6 +432,7 @@ class HindsightConfig:
|
|
|
333
432
|
host=os.getenv(ENV_HOST, DEFAULT_HOST),
|
|
334
433
|
port=int(os.getenv(ENV_PORT, DEFAULT_PORT)),
|
|
335
434
|
log_level=os.getenv(ENV_LOG_LEVEL, DEFAULT_LOG_LEVEL),
|
|
435
|
+
log_format=os.getenv(ENV_LOG_FORMAT, DEFAULT_LOG_FORMAT).lower(),
|
|
336
436
|
mcp_enabled=os.getenv(ENV_MCP_ENABLED, str(DEFAULT_MCP_ENABLED)).lower() == "true",
|
|
337
437
|
# Recall
|
|
338
438
|
graph_retriever=os.getenv(ENV_GRAPH_RETRIEVER, DEFAULT_GRAPH_RETRIEVER),
|
|
@@ -341,14 +441,12 @@ class HindsightConfig:
|
|
|
341
441
|
recall_connection_budget=int(
|
|
342
442
|
os.getenv(ENV_RECALL_CONNECTION_BUDGET, str(DEFAULT_RECALL_CONNECTION_BUDGET))
|
|
343
443
|
),
|
|
444
|
+
mental_model_refresh_concurrency=int(
|
|
445
|
+
os.getenv(ENV_MENTAL_MODEL_REFRESH_CONCURRENCY, str(DEFAULT_MENTAL_MODEL_REFRESH_CONCURRENCY))
|
|
446
|
+
),
|
|
344
447
|
# Optimization flags
|
|
345
448
|
skip_llm_verification=os.getenv(ENV_SKIP_LLM_VERIFICATION, "false").lower() == "true",
|
|
346
449
|
lazy_reranker=os.getenv(ENV_LAZY_RERANKER, "false").lower() == "true",
|
|
347
|
-
# Observation thresholds
|
|
348
|
-
observation_min_facts=int(os.getenv(ENV_OBSERVATION_MIN_FACTS, str(DEFAULT_OBSERVATION_MIN_FACTS))),
|
|
349
|
-
observation_top_entities=int(
|
|
350
|
-
os.getenv(ENV_OBSERVATION_TOP_ENTITIES, str(DEFAULT_OBSERVATION_TOP_ENTITIES))
|
|
351
|
-
),
|
|
352
450
|
# Retain settings
|
|
353
451
|
retain_max_completion_tokens=int(
|
|
354
452
|
os.getenv(ENV_RETAIN_MAX_COMPLETION_TOKENS, str(DEFAULT_RETAIN_MAX_COMPLETION_TOKENS))
|
|
@@ -361,10 +459,19 @@ class HindsightConfig:
|
|
|
361
459
|
retain_extraction_mode=_validate_extraction_mode(
|
|
362
460
|
os.getenv(ENV_RETAIN_EXTRACTION_MODE, DEFAULT_RETAIN_EXTRACTION_MODE)
|
|
363
461
|
),
|
|
462
|
+
retain_custom_instructions=os.getenv(ENV_RETAIN_CUSTOM_INSTRUCTIONS) or DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS,
|
|
364
463
|
retain_observations_async=os.getenv(
|
|
365
464
|
ENV_RETAIN_OBSERVATIONS_ASYNC, str(DEFAULT_RETAIN_OBSERVATIONS_ASYNC)
|
|
366
465
|
).lower()
|
|
367
466
|
== "true",
|
|
467
|
+
# Observations settings (consolidated knowledge from facts)
|
|
468
|
+
enable_observations=os.getenv(ENV_ENABLE_OBSERVATIONS, str(DEFAULT_ENABLE_OBSERVATIONS)).lower() == "true",
|
|
469
|
+
consolidation_batch_size=int(
|
|
470
|
+
os.getenv(ENV_CONSOLIDATION_BATCH_SIZE, str(DEFAULT_CONSOLIDATION_BATCH_SIZE))
|
|
471
|
+
),
|
|
472
|
+
consolidation_max_tokens=int(
|
|
473
|
+
os.getenv(ENV_CONSOLIDATION_MAX_TOKENS, str(DEFAULT_CONSOLIDATION_MAX_TOKENS))
|
|
474
|
+
),
|
|
368
475
|
# Database migrations
|
|
369
476
|
run_migrations_on_startup=os.getenv(ENV_RUN_MIGRATIONS_ON_STARTUP, "true").lower() == "true",
|
|
370
477
|
# Database connection pool
|
|
@@ -372,14 +479,15 @@ class HindsightConfig:
|
|
|
372
479
|
db_pool_max_size=int(os.getenv(ENV_DB_POOL_MAX_SIZE, str(DEFAULT_DB_POOL_MAX_SIZE))),
|
|
373
480
|
db_command_timeout=int(os.getenv(ENV_DB_COMMAND_TIMEOUT, str(DEFAULT_DB_COMMAND_TIMEOUT))),
|
|
374
481
|
db_acquire_timeout=int(os.getenv(ENV_DB_ACQUIRE_TIMEOUT, str(DEFAULT_DB_ACQUIRE_TIMEOUT))),
|
|
375
|
-
#
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
),
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
482
|
+
# Worker configuration
|
|
483
|
+
worker_enabled=os.getenv(ENV_WORKER_ENABLED, str(DEFAULT_WORKER_ENABLED)).lower() == "true",
|
|
484
|
+
worker_id=os.getenv(ENV_WORKER_ID) or DEFAULT_WORKER_ID,
|
|
485
|
+
worker_poll_interval_ms=int(os.getenv(ENV_WORKER_POLL_INTERVAL_MS, str(DEFAULT_WORKER_POLL_INTERVAL_MS))),
|
|
486
|
+
worker_max_retries=int(os.getenv(ENV_WORKER_MAX_RETRIES, str(DEFAULT_WORKER_MAX_RETRIES))),
|
|
487
|
+
worker_batch_size=int(os.getenv(ENV_WORKER_BATCH_SIZE, str(DEFAULT_WORKER_BATCH_SIZE))),
|
|
488
|
+
worker_http_port=int(os.getenv(ENV_WORKER_HTTP_PORT, str(DEFAULT_WORKER_HTTP_PORT))),
|
|
489
|
+
# Reflect agent settings
|
|
490
|
+
reflect_max_iterations=int(os.getenv(ENV_REFLECT_MAX_ITERATIONS, str(DEFAULT_REFLECT_MAX_ITERATIONS))),
|
|
383
491
|
)
|
|
384
492
|
|
|
385
493
|
def get_llm_base_url(self) -> str:
|
|
@@ -410,16 +518,32 @@ class HindsightConfig:
|
|
|
410
518
|
return log_level_map.get(self.log_level.lower(), logging.INFO)
|
|
411
519
|
|
|
412
520
|
def configure_logging(self) -> None:
|
|
413
|
-
"""Configure Python logging based on the log level.
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
)
|
|
521
|
+
"""Configure Python logging based on the log level and format.
|
|
522
|
+
|
|
523
|
+
When log_format is "json", outputs structured JSON logs with a severity
|
|
524
|
+
field that GCP Cloud Logging can parse for proper log level categorization.
|
|
525
|
+
"""
|
|
526
|
+
root_logger = logging.getLogger()
|
|
527
|
+
root_logger.setLevel(self.get_python_log_level())
|
|
528
|
+
|
|
529
|
+
# Remove existing handlers
|
|
530
|
+
for handler in root_logger.handlers[:]:
|
|
531
|
+
root_logger.removeHandler(handler)
|
|
532
|
+
|
|
533
|
+
# Create handler writing to stdout (GCP treats stderr as ERROR)
|
|
534
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
535
|
+
handler.setLevel(self.get_python_log_level())
|
|
536
|
+
|
|
537
|
+
if self.log_format == "json":
|
|
538
|
+
handler.setFormatter(JsonFormatter())
|
|
539
|
+
else:
|
|
540
|
+
handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s"))
|
|
541
|
+
|
|
542
|
+
root_logger.addHandler(handler)
|
|
419
543
|
|
|
420
544
|
def log_config(self) -> None:
|
|
421
545
|
"""Log the current configuration (without sensitive values)."""
|
|
422
|
-
logger.info(f"Database: {self.database_url}")
|
|
546
|
+
logger.info(f"Database: {self.database_url} (schema: {self.database_schema})")
|
|
423
547
|
logger.info(f"LLM: provider={self.llm_provider}, model={self.llm_model}")
|
|
424
548
|
if self.retain_llm_provider or self.retain_llm_model:
|
|
425
549
|
retain_provider = self.retain_llm_provider or self.llm_provider
|
|
@@ -429,6 +553,10 @@ class HindsightConfig:
|
|
|
429
553
|
reflect_provider = self.reflect_llm_provider or self.llm_provider
|
|
430
554
|
reflect_model = self.reflect_llm_model or self.llm_model
|
|
431
555
|
logger.info(f"LLM (reflect): provider={reflect_provider}, model={reflect_model}")
|
|
556
|
+
if self.consolidation_llm_provider or self.consolidation_llm_model:
|
|
557
|
+
consolidation_provider = self.consolidation_llm_provider or self.llm_provider
|
|
558
|
+
consolidation_model = self.consolidation_llm_model or self.llm_model
|
|
559
|
+
logger.info(f"LLM (consolidation): provider={consolidation_provider}, model={consolidation_model}")
|
|
432
560
|
logger.info(f"Embeddings: provider={self.embeddings_provider}")
|
|
433
561
|
logger.info(f"Reranker: provider={self.reranker_provider}")
|
|
434
562
|
logger.info(f"Graph retriever: {self.graph_retriever}")
|