hindsight-api 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. hindsight_api/__init__.py +1 -1
  2. hindsight_api/admin/cli.py +59 -0
  3. hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
  4. hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
  5. hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
  6. hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
  7. hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
  8. hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
  9. hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
  10. hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
  11. hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
  12. hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
  13. hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
  14. hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
  15. hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
  16. hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
  17. hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
  18. hindsight_api/api/http.py +1120 -93
  19. hindsight_api/api/mcp.py +11 -191
  20. hindsight_api/config.py +174 -46
  21. hindsight_api/engine/consolidation/__init__.py +5 -0
  22. hindsight_api/engine/consolidation/consolidator.py +926 -0
  23. hindsight_api/engine/consolidation/prompts.py +77 -0
  24. hindsight_api/engine/cross_encoder.py +153 -22
  25. hindsight_api/engine/directives/__init__.py +5 -0
  26. hindsight_api/engine/directives/models.py +37 -0
  27. hindsight_api/engine/embeddings.py +136 -13
  28. hindsight_api/engine/interface.py +32 -13
  29. hindsight_api/engine/llm_wrapper.py +505 -43
  30. hindsight_api/engine/memory_engine.py +2101 -1094
  31. hindsight_api/engine/mental_models/__init__.py +14 -0
  32. hindsight_api/engine/mental_models/models.py +53 -0
  33. hindsight_api/engine/reflect/__init__.py +18 -0
  34. hindsight_api/engine/reflect/agent.py +933 -0
  35. hindsight_api/engine/reflect/models.py +109 -0
  36. hindsight_api/engine/reflect/observations.py +186 -0
  37. hindsight_api/engine/reflect/prompts.py +483 -0
  38. hindsight_api/engine/reflect/tools.py +437 -0
  39. hindsight_api/engine/reflect/tools_schema.py +250 -0
  40. hindsight_api/engine/response_models.py +130 -4
  41. hindsight_api/engine/retain/bank_utils.py +79 -201
  42. hindsight_api/engine/retain/fact_extraction.py +81 -48
  43. hindsight_api/engine/retain/fact_storage.py +5 -8
  44. hindsight_api/engine/retain/link_utils.py +5 -8
  45. hindsight_api/engine/retain/orchestrator.py +1 -55
  46. hindsight_api/engine/retain/types.py +2 -2
  47. hindsight_api/engine/search/graph_retrieval.py +2 -2
  48. hindsight_api/engine/search/link_expansion_retrieval.py +164 -29
  49. hindsight_api/engine/search/mpfp_retrieval.py +1 -1
  50. hindsight_api/engine/search/retrieval.py +14 -14
  51. hindsight_api/engine/search/think_utils.py +41 -140
  52. hindsight_api/engine/search/trace.py +0 -1
  53. hindsight_api/engine/search/tracer.py +2 -5
  54. hindsight_api/engine/search/types.py +0 -3
  55. hindsight_api/engine/task_backend.py +112 -196
  56. hindsight_api/engine/utils.py +0 -151
  57. hindsight_api/extensions/__init__.py +10 -1
  58. hindsight_api/extensions/builtin/tenant.py +11 -4
  59. hindsight_api/extensions/operation_validator.py +81 -4
  60. hindsight_api/extensions/tenant.py +26 -0
  61. hindsight_api/main.py +28 -5
  62. hindsight_api/mcp_local.py +12 -53
  63. hindsight_api/mcp_tools.py +494 -0
  64. hindsight_api/models.py +0 -2
  65. hindsight_api/worker/__init__.py +11 -0
  66. hindsight_api/worker/main.py +296 -0
  67. hindsight_api/worker/poller.py +486 -0
  68. {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.1.dist-info}/METADATA +12 -6
  69. hindsight_api-0.4.1.dist-info/RECORD +112 -0
  70. {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.1.dist-info}/entry_points.txt +1 -0
  71. hindsight_api/engine/retain/observation_regeneration.py +0 -254
  72. hindsight_api/engine/search/observation_utils.py +0 -125
  73. hindsight_api/engine/search/scoring.py +0 -159
  74. hindsight_api-0.3.0.dist-info/RECORD +0 -82
  75. {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.1.dist-info}/WHEEL +0 -0
hindsight_api/api/mcp.py CHANGED
@@ -1,4 +1,4 @@
1
- """Hindsight MCP Server implementation using FastMCP."""
1
+ """Hindsight MCP Server implementation using FastMCP (HTTP transport)."""
2
2
 
3
3
  import json
4
4
  import logging
@@ -8,8 +8,7 @@ from contextvars import ContextVar
8
8
  from fastmcp import FastMCP
9
9
 
10
10
  from hindsight_api import MemoryEngine
11
- from hindsight_api.engine.response_models import VALID_RECALL_FACT_TYPES
12
- from hindsight_api.models import RequestContext
11
+ from hindsight_api.mcp_tools import MCPToolsConfig, register_mcp_tools
13
12
 
14
13
  # Configure logging from HINDSIGHT_API_LOG_LEVEL environment variable
15
14
  _log_level_str = os.environ.get("HINDSIGHT_API_LOG_LEVEL", "info").lower()
@@ -52,194 +51,15 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
52
51
  # Use stateless_http=True for Claude Code compatibility
53
52
  mcp = FastMCP("hindsight-mcp-server", stateless_http=True)
54
53
 
55
- @mcp.tool()
56
- async def retain(
57
- content: str,
58
- context: str = "general",
59
- async_processing: bool = True,
60
- bank_id: str | None = None,
61
- ) -> str:
62
- """
63
- Store important information to long-term memory.
64
-
65
- Use this tool PROACTIVELY whenever the user shares:
66
- - Personal facts, preferences, or interests
67
- - Important events or milestones
68
- - User history, experiences, or background
69
- - Decisions, opinions, or stated preferences
70
- - Goals, plans, or future intentions
71
- - Relationships or people mentioned
72
- - Work context, projects, or responsibilities
73
-
74
- Args:
75
- content: The fact/memory to store (be specific and include relevant details)
76
- context: Category for the memory (e.g., 'preferences', 'work', 'hobbies', 'family'). Default: 'general'
77
- async_processing: If True, queue for background processing and return immediately. If False, wait for completion. Default: True
78
- bank_id: Optional bank to store in (defaults to session bank). Use for cross-bank operations.
79
- """
80
- try:
81
- target_bank = bank_id or get_current_bank_id()
82
- if target_bank is None:
83
- return "Error: No bank_id configured"
84
- contents = [{"content": content, "context": context}]
85
- if async_processing:
86
- # Queue for background processing and return immediately
87
- result = await memory.submit_async_retain(
88
- bank_id=target_bank, contents=contents, request_context=RequestContext()
89
- )
90
- return f"Memory queued for background processing (operation_id: {result.get('operation_id', 'N/A')})"
91
- else:
92
- # Wait for completion
93
- await memory.retain_batch_async(
94
- bank_id=target_bank,
95
- contents=contents,
96
- request_context=RequestContext(),
97
- )
98
- return f"Memory stored successfully in bank '{target_bank}'"
99
- except Exception as e:
100
- logger.error(f"Error storing memory: {e}", exc_info=True)
101
- return f"Error: {str(e)}"
102
-
103
- @mcp.tool()
104
- async def recall(query: str, max_tokens: int = 4096, bank_id: str | None = None) -> str:
105
- """
106
- Search memories to provide personalized, context-aware responses.
107
-
108
- Use this tool PROACTIVELY to:
109
- - Check user's preferences before making suggestions
110
- - Recall user's history to provide continuity
111
- - Remember user's goals and context
112
- - Personalize responses based on past interactions
113
-
114
- Args:
115
- query: Natural language search query (e.g., "user's food preferences", "what projects is user working on")
116
- max_tokens: Maximum tokens in the response (default: 4096)
117
- bank_id: Optional bank to search in (defaults to session bank). Use for cross-bank operations.
118
- """
119
- try:
120
- target_bank = bank_id or get_current_bank_id()
121
- if target_bank is None:
122
- return "Error: No bank_id configured"
123
- from hindsight_api.engine.memory_engine import Budget
124
-
125
- recall_result = await memory.recall_async(
126
- bank_id=target_bank,
127
- query=query,
128
- fact_type=list(VALID_RECALL_FACT_TYPES),
129
- budget=Budget.HIGH,
130
- max_tokens=max_tokens,
131
- request_context=RequestContext(),
132
- )
133
-
134
- # Use model's JSON serialization
135
- return recall_result.model_dump_json(indent=2)
136
- except Exception as e:
137
- logger.error(f"Error searching: {e}", exc_info=True)
138
- return f'{{"error": "{e}", "results": []}}'
139
-
140
- @mcp.tool()
141
- async def reflect(query: str, context: str | None = None, budget: str = "low", bank_id: str | None = None) -> str:
142
- """
143
- Generate thoughtful analysis by synthesizing stored memories with the bank's personality.
144
-
145
- WHEN TO USE THIS TOOL:
146
- Use reflect when you need reasoned analysis, not just fact retrieval. This tool
147
- thinks through the question using everything the bank knows and its personality traits.
148
-
149
- EXAMPLES OF GOOD QUERIES:
150
- - "What patterns have emerged in how I approach debugging?"
151
- - "Based on my past decisions, what architectural style do I prefer?"
152
- - "What might be the best approach for this problem given what you know about me?"
153
- - "How should I prioritize these tasks based on my goals?"
154
-
155
- HOW IT DIFFERS FROM RECALL:
156
- - recall: Returns raw facts matching your search (fast lookup)
157
- - reflect: Reasons across memories to form a synthesized answer (deeper analysis)
158
-
159
- Use recall for "what did I say about X?" and reflect for "what should I do about X?"
160
-
161
- Args:
162
- query: The question or topic to reflect on
163
- context: Optional context about why this reflection is needed
164
- budget: Search budget - 'low', 'mid', or 'high' (default: 'low')
165
- bank_id: Optional bank to reflect in (defaults to session bank). Use for cross-bank operations.
166
- """
167
- try:
168
- target_bank = bank_id or get_current_bank_id()
169
- if target_bank is None:
170
- return "Error: No bank_id configured"
171
- from hindsight_api.engine.memory_engine import Budget
172
-
173
- # Map string budget to enum
174
- budget_map = {"low": Budget.LOW, "mid": Budget.MID, "high": Budget.HIGH}
175
- budget_enum = budget_map.get(budget.lower(), Budget.LOW)
176
-
177
- reflect_result = await memory.reflect_async(
178
- bank_id=target_bank,
179
- query=query,
180
- budget=budget_enum,
181
- context=context,
182
- request_context=RequestContext(),
183
- )
184
-
185
- return reflect_result.model_dump_json(indent=2)
186
- except Exception as e:
187
- logger.error(f"Error reflecting: {e}", exc_info=True)
188
- return f'{{"error": "{e}", "text": ""}}'
189
-
190
- @mcp.tool()
191
- async def list_banks() -> str:
192
- """
193
- List all available memory banks.
194
-
195
- Use this tool to discover what memory banks exist in the system.
196
- Each bank is an isolated memory store (like a separate "brain").
197
-
198
- Returns:
199
- JSON list of banks with their IDs, names, dispositions, and backgrounds.
200
- """
201
- try:
202
- banks = await memory.list_banks(request_context=RequestContext())
203
- return json.dumps({"banks": banks}, indent=2)
204
- except Exception as e:
205
- logger.error(f"Error listing banks: {e}", exc_info=True)
206
- return f'{{"error": "{e}", "banks": []}}'
207
-
208
- @mcp.tool()
209
- async def create_bank(bank_id: str, name: str | None = None, background: str | None = None) -> str:
210
- """
211
- Create a new memory bank or get an existing one.
212
-
213
- Memory banks are isolated stores - each one is like a separate "brain" for a user/agent.
214
- Banks are auto-created with default settings if they don't exist.
215
-
216
- Args:
217
- bank_id: Unique identifier for the bank (e.g., 'user-123', 'agent-alpha')
218
- name: Optional human-friendly name for the bank
219
- background: Optional background context about the bank's owner/purpose
220
- """
221
- try:
222
- # get_bank_profile auto-creates bank if it doesn't exist
223
- profile = await memory.get_bank_profile(bank_id, request_context=RequestContext())
224
-
225
- # Update name/background if provided
226
- if name is not None or background is not None:
227
- await memory.update_bank(
228
- bank_id,
229
- name=name,
230
- background=background,
231
- request_context=RequestContext(),
232
- )
233
- # Fetch updated profile
234
- profile = await memory.get_bank_profile(bank_id, request_context=RequestContext())
235
-
236
- # Serialize disposition if it's a Pydantic model
237
- if "disposition" in profile and hasattr(profile["disposition"], "model_dump"):
238
- profile["disposition"] = profile["disposition"].model_dump()
239
- return json.dumps(profile, indent=2)
240
- except Exception as e:
241
- logger.error(f"Error creating bank: {e}", exc_info=True)
242
- return f'{{"error": "{e}"}}'
54
+ # Configure and register tools using shared module
55
+ config = MCPToolsConfig(
56
+ bank_id_resolver=get_current_bank_id,
57
+ include_bank_id_param=True, # HTTP MCP supports multi-bank via parameter
58
+ tools=None, # All tools
59
+ retain_fire_and_forget=False, # HTTP MCP supports sync/async modes
60
+ )
61
+
62
+ register_mcp_tools(mcp, memory, config)
243
63
 
244
64
  return mcp
245
65
 
hindsight_api/config.py CHANGED
@@ -4,9 +4,12 @@ Centralized configuration for Hindsight API.
4
4
  All environment variables and their defaults are defined here.
5
5
  """
6
6
 
7
+ import json
7
8
  import logging
8
9
  import os
10
+ import sys
9
11
  from dataclasses import dataclass
12
+ from datetime import datetime, timezone
10
13
 
11
14
  from dotenv import find_dotenv, load_dotenv
12
15
 
@@ -17,6 +20,7 @@ logger = logging.getLogger(__name__)
17
20
 
18
21
  # Environment variable names
19
22
  ENV_DATABASE_URL = "HINDSIGHT_API_DATABASE_URL"
23
+ ENV_DATABASE_SCHEMA = "HINDSIGHT_API_DATABASE_SCHEMA"
20
24
  ENV_LLM_PROVIDER = "HINDSIGHT_API_LLM_PROVIDER"
21
25
  ENV_LLM_API_KEY = "HINDSIGHT_API_LLM_API_KEY"
22
26
  ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
@@ -36,8 +40,14 @@ ENV_REFLECT_LLM_API_KEY = "HINDSIGHT_API_REFLECT_LLM_API_KEY"
36
40
  ENV_REFLECT_LLM_MODEL = "HINDSIGHT_API_REFLECT_LLM_MODEL"
37
41
  ENV_REFLECT_LLM_BASE_URL = "HINDSIGHT_API_REFLECT_LLM_BASE_URL"
38
42
 
43
+ ENV_CONSOLIDATION_LLM_PROVIDER = "HINDSIGHT_API_CONSOLIDATION_LLM_PROVIDER"
44
+ ENV_CONSOLIDATION_LLM_API_KEY = "HINDSIGHT_API_CONSOLIDATION_LLM_API_KEY"
45
+ ENV_CONSOLIDATION_LLM_MODEL = "HINDSIGHT_API_CONSOLIDATION_LLM_MODEL"
46
+ ENV_CONSOLIDATION_LLM_BASE_URL = "HINDSIGHT_API_CONSOLIDATION_LLM_BASE_URL"
47
+
39
48
  ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
40
49
  ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
50
+ ENV_EMBEDDINGS_LOCAL_FORCE_CPU = "HINDSIGHT_API_EMBEDDINGS_LOCAL_FORCE_CPU"
41
51
  ENV_EMBEDDINGS_TEI_URL = "HINDSIGHT_API_EMBEDDINGS_TEI_URL"
42
52
  ENV_EMBEDDINGS_OPENAI_API_KEY = "HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY"
43
53
  ENV_EMBEDDINGS_OPENAI_MODEL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL"
@@ -57,6 +67,7 @@ ENV_RERANKER_LITELLM_MODEL = "HINDSIGHT_API_RERANKER_LITELLM_MODEL"
57
67
 
58
68
  ENV_RERANKER_PROVIDER = "HINDSIGHT_API_RERANKER_PROVIDER"
59
69
  ENV_RERANKER_LOCAL_MODEL = "HINDSIGHT_API_RERANKER_LOCAL_MODEL"
70
+ ENV_RERANKER_LOCAL_FORCE_CPU = "HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU"
60
71
  ENV_RERANKER_LOCAL_MAX_CONCURRENT = "HINDSIGHT_API_RERANKER_LOCAL_MAX_CONCURRENT"
61
72
  ENV_RERANKER_TEI_URL = "HINDSIGHT_API_RERANKER_TEI_URL"
62
73
  ENV_RERANKER_TEI_BATCH_SIZE = "HINDSIGHT_API_RERANKER_TEI_BATCH_SIZE"
@@ -68,6 +79,7 @@ ENV_RERANKER_FLASHRANK_CACHE_DIR = "HINDSIGHT_API_RERANKER_FLASHRANK_CACHE_DIR"
68
79
  ENV_HOST = "HINDSIGHT_API_HOST"
69
80
  ENV_PORT = "HINDSIGHT_API_PORT"
70
81
  ENV_LOG_LEVEL = "HINDSIGHT_API_LOG_LEVEL"
82
+ ENV_LOG_FORMAT = "HINDSIGHT_API_LOG_FORMAT"
71
83
  ENV_WORKERS = "HINDSIGHT_API_WORKERS"
72
84
  ENV_MCP_ENABLED = "HINDSIGHT_API_MCP_ENABLED"
73
85
  ENV_GRAPH_RETRIEVER = "HINDSIGHT_API_GRAPH_RETRIEVER"
@@ -76,18 +88,21 @@ ENV_RECALL_MAX_CONCURRENT = "HINDSIGHT_API_RECALL_MAX_CONCURRENT"
76
88
  ENV_RECALL_CONNECTION_BUDGET = "HINDSIGHT_API_RECALL_CONNECTION_BUDGET"
77
89
  ENV_MCP_LOCAL_BANK_ID = "HINDSIGHT_API_MCP_LOCAL_BANK_ID"
78
90
  ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
79
-
80
- # Observation thresholds
81
- ENV_OBSERVATION_MIN_FACTS = "HINDSIGHT_API_OBSERVATION_MIN_FACTS"
82
- ENV_OBSERVATION_TOP_ENTITIES = "HINDSIGHT_API_OBSERVATION_TOP_ENTITIES"
91
+ ENV_MENTAL_MODEL_REFRESH_CONCURRENCY = "HINDSIGHT_API_MENTAL_MODEL_REFRESH_CONCURRENCY"
83
92
 
84
93
  # Retain settings
85
94
  ENV_RETAIN_MAX_COMPLETION_TOKENS = "HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS"
86
95
  ENV_RETAIN_CHUNK_SIZE = "HINDSIGHT_API_RETAIN_CHUNK_SIZE"
87
96
  ENV_RETAIN_EXTRACT_CAUSAL_LINKS = "HINDSIGHT_API_RETAIN_EXTRACT_CAUSAL_LINKS"
88
97
  ENV_RETAIN_EXTRACTION_MODE = "HINDSIGHT_API_RETAIN_EXTRACTION_MODE"
98
+ ENV_RETAIN_CUSTOM_INSTRUCTIONS = "HINDSIGHT_API_RETAIN_CUSTOM_INSTRUCTIONS"
89
99
  ENV_RETAIN_OBSERVATIONS_ASYNC = "HINDSIGHT_API_RETAIN_OBSERVATIONS_ASYNC"
90
100
 
101
+ # Observations settings (consolidated knowledge from facts)
102
+ ENV_ENABLE_OBSERVATIONS = "HINDSIGHT_API_ENABLE_OBSERVATIONS"
103
+ ENV_CONSOLIDATION_BATCH_SIZE = "HINDSIGHT_API_CONSOLIDATION_BATCH_SIZE"
104
+ ENV_CONSOLIDATION_MAX_TOKENS = "HINDSIGHT_API_CONSOLIDATION_MAX_TOKENS"
105
+
91
106
  # Optimization flags
92
107
  ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
93
108
  ENV_LAZY_RERANKER = "HINDSIGHT_API_LAZY_RERANKER"
@@ -101,13 +116,20 @@ ENV_DB_POOL_MAX_SIZE = "HINDSIGHT_API_DB_POOL_MAX_SIZE"
101
116
  ENV_DB_COMMAND_TIMEOUT = "HINDSIGHT_API_DB_COMMAND_TIMEOUT"
102
117
  ENV_DB_ACQUIRE_TIMEOUT = "HINDSIGHT_API_DB_ACQUIRE_TIMEOUT"
103
118
 
104
- # Background task processing
105
- ENV_TASK_BACKEND = "HINDSIGHT_API_TASK_BACKEND"
106
- ENV_TASK_BACKEND_MEMORY_BATCH_SIZE = "HINDSIGHT_API_TASK_BACKEND_MEMORY_BATCH_SIZE"
107
- ENV_TASK_BACKEND_MEMORY_BATCH_INTERVAL = "HINDSIGHT_API_TASK_BACKEND_MEMORY_BATCH_INTERVAL"
119
+ # Worker configuration (distributed task processing)
120
+ ENV_WORKER_ENABLED = "HINDSIGHT_API_WORKER_ENABLED"
121
+ ENV_WORKER_ID = "HINDSIGHT_API_WORKER_ID"
122
+ ENV_WORKER_POLL_INTERVAL_MS = "HINDSIGHT_API_WORKER_POLL_INTERVAL_MS"
123
+ ENV_WORKER_MAX_RETRIES = "HINDSIGHT_API_WORKER_MAX_RETRIES"
124
+ ENV_WORKER_BATCH_SIZE = "HINDSIGHT_API_WORKER_BATCH_SIZE"
125
+ ENV_WORKER_HTTP_PORT = "HINDSIGHT_API_WORKER_HTTP_PORT"
126
+
127
+ # Reflect agent settings
128
+ ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
108
129
 
109
130
  # Default values
110
131
  DEFAULT_DATABASE_URL = "pg0"
132
+ DEFAULT_DATABASE_SCHEMA = "public"
111
133
  DEFAULT_LLM_PROVIDER = "openai"
112
134
  DEFAULT_LLM_MODEL = "gpt-5-mini"
113
135
  DEFAULT_LLM_MAX_CONCURRENT = 32
@@ -115,11 +137,13 @@ DEFAULT_LLM_TIMEOUT = 120.0 # seconds
115
137
 
116
138
  DEFAULT_EMBEDDINGS_PROVIDER = "local"
117
139
  DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
140
+ DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU = False # Force CPU mode for local embeddings (avoids MPS/XPC issues on macOS)
118
141
  DEFAULT_EMBEDDINGS_OPENAI_MODEL = "text-embedding-3-small"
119
142
  DEFAULT_EMBEDDING_DIMENSION = 384
120
143
 
121
144
  DEFAULT_RERANKER_PROVIDER = "local"
122
145
  DEFAULT_RERANKER_LOCAL_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
146
+ DEFAULT_RERANKER_LOCAL_FORCE_CPU = False # Force CPU mode for local reranker (avoids MPS/XPC issues on macOS)
123
147
  DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT = 4 # Limit concurrent CPU-bound reranking to prevent thrashing
124
148
  DEFAULT_RERANKER_TEI_BATCH_SIZE = 128
125
149
  DEFAULT_RERANKER_TEI_MAX_CONCURRENT = 8
@@ -138,6 +162,7 @@ DEFAULT_RERANKER_LITELLM_MODEL = "cohere/rerank-english-v3.0"
138
162
  DEFAULT_HOST = "0.0.0.0"
139
163
  DEFAULT_PORT = 8888
140
164
  DEFAULT_LOG_LEVEL = "info"
165
+ DEFAULT_LOG_FORMAT = "text" # Options: "text", "json"
141
166
  DEFAULT_WORKERS = 1
142
167
  DEFAULT_MCP_ENABLED = True
143
168
  DEFAULT_GRAPH_RETRIEVER = "link_expansion" # Options: "link_expansion", "mpfp", "bfs"
@@ -145,19 +170,22 @@ DEFAULT_MPFP_TOP_K_NEIGHBORS = 20 # Fan-out limit per node in MPFP graph traver
145
170
  DEFAULT_RECALL_MAX_CONCURRENT = 32 # Max concurrent recall operations per worker
146
171
  DEFAULT_RECALL_CONNECTION_BUDGET = 4 # Max concurrent DB connections per recall operation
147
172
  DEFAULT_MCP_LOCAL_BANK_ID = "mcp"
148
-
149
- # Observation thresholds
150
- DEFAULT_OBSERVATION_MIN_FACTS = 5 # Min facts required to generate entity observations
151
- DEFAULT_OBSERVATION_TOP_ENTITIES = 5 # Max entities to process per retain batch
173
+ DEFAULT_MENTAL_MODEL_REFRESH_CONCURRENCY = 8 # Max concurrent mental model refreshes
152
174
 
153
175
  # Retain settings
154
176
  DEFAULT_RETAIN_MAX_COMPLETION_TOKENS = 64000 # Max tokens for fact extraction LLM call
155
177
  DEFAULT_RETAIN_CHUNK_SIZE = 3000 # Max chars per chunk for fact extraction
156
178
  DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS = True # Extract causal links between facts
157
- DEFAULT_RETAIN_EXTRACTION_MODE = "concise" # Extraction mode: "concise" or "verbose"
158
- RETAIN_EXTRACTION_MODES = ("concise", "verbose") # Allowed extraction modes
179
+ DEFAULT_RETAIN_EXTRACTION_MODE = "concise" # Extraction mode: "concise", "verbose", or "custom"
180
+ RETAIN_EXTRACTION_MODES = ("concise", "verbose", "custom") # Allowed extraction modes
181
+ DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS = None # Custom extraction guidelines (only used when mode="custom")
159
182
  DEFAULT_RETAIN_OBSERVATIONS_ASYNC = False # Run observation generation async (after retain completes)
160
183
 
184
+ # Observations defaults (consolidated knowledge from facts)
185
+ DEFAULT_ENABLE_OBSERVATIONS = True # Observations enabled by default
186
+ DEFAULT_CONSOLIDATION_BATCH_SIZE = 50 # Memories to load per batch (internal memory optimization)
187
+ DEFAULT_CONSOLIDATION_MAX_TOKENS = 1024 # Max tokens for recall when finding related observations
188
+
161
189
  # Database migrations
162
190
  DEFAULT_RUN_MIGRATIONS_ON_STARTUP = True
163
191
 
@@ -167,10 +195,16 @@ DEFAULT_DB_POOL_MAX_SIZE = 100
167
195
  DEFAULT_DB_COMMAND_TIMEOUT = 60 # seconds
168
196
  DEFAULT_DB_ACQUIRE_TIMEOUT = 30 # seconds
169
197
 
170
- # Background task processing
171
- DEFAULT_TASK_BACKEND = "memory" # Options: "memory", "noop"
172
- DEFAULT_TASK_BACKEND_MEMORY_BATCH_SIZE = 10
173
- DEFAULT_TASK_BACKEND_MEMORY_BATCH_INTERVAL = 1.0 # seconds
198
+ # Worker configuration (distributed task processing)
199
+ DEFAULT_WORKER_ENABLED = True # API runs worker by default (standalone mode)
200
+ DEFAULT_WORKER_ID = None # Will use hostname if not specified
201
+ DEFAULT_WORKER_POLL_INTERVAL_MS = 500 # Poll database every 500ms
202
+ DEFAULT_WORKER_MAX_RETRIES = 3 # Max retries before marking task failed
203
+ DEFAULT_WORKER_BATCH_SIZE = 10 # Tasks to claim per poll cycle
204
+ DEFAULT_WORKER_HTTP_PORT = 8889 # HTTP port for worker metrics/health
205
+
206
+ # Reflect agent settings
207
+ DEFAULT_REFLECT_MAX_ITERATIONS = 10 # Max tool call iterations before forcing response
174
208
 
175
209
  # Default MCP tool descriptions (can be customized via env vars)
176
210
  DEFAULT_MCP_RETAIN_DESCRIPTION = """Store important information to long-term memory.
@@ -196,6 +230,36 @@ Use this tool PROACTIVELY to:
196
230
  EMBEDDING_DIMENSION = DEFAULT_EMBEDDING_DIMENSION
197
231
 
198
232
 
233
+ class JsonFormatter(logging.Formatter):
234
+ """JSON formatter for structured logging.
235
+
236
+ Outputs logs in JSON format with a 'severity' field that cloud logging
237
+ systems (GCP, AWS CloudWatch, etc.) can parse to correctly categorize log levels.
238
+ """
239
+
240
+ SEVERITY_MAP = {
241
+ logging.DEBUG: "DEBUG",
242
+ logging.INFO: "INFO",
243
+ logging.WARNING: "WARNING",
244
+ logging.ERROR: "ERROR",
245
+ logging.CRITICAL: "CRITICAL",
246
+ }
247
+
248
+ def format(self, record: logging.LogRecord) -> str:
249
+ log_entry = {
250
+ "severity": self.SEVERITY_MAP.get(record.levelno, "DEFAULT"),
251
+ "message": record.getMessage(),
252
+ "timestamp": datetime.now(timezone.utc).isoformat(),
253
+ "logger": record.name,
254
+ }
255
+
256
+ # Add exception info if present
257
+ if record.exc_info:
258
+ log_entry["exception"] = self.formatException(record.exc_info)
259
+
260
+ return json.dumps(log_entry)
261
+
262
+
199
263
  def _validate_extraction_mode(mode: str) -> str:
200
264
  """Validate and normalize extraction mode."""
201
265
  mode_lower = mode.lower()
@@ -214,6 +278,7 @@ class HindsightConfig:
214
278
 
215
279
  # Database
216
280
  database_url: str
281
+ database_schema: str
217
282
 
218
283
  # LLM (default, used as fallback for per-operation config)
219
284
  llm_provider: str
@@ -234,9 +299,15 @@ class HindsightConfig:
234
299
  reflect_llm_model: str | None
235
300
  reflect_llm_base_url: str | None
236
301
 
302
+ consolidation_llm_provider: str | None
303
+ consolidation_llm_api_key: str | None
304
+ consolidation_llm_model: str | None
305
+ consolidation_llm_base_url: str | None
306
+
237
307
  # Embeddings
238
308
  embeddings_provider: str
239
309
  embeddings_local_model: str
310
+ embeddings_local_force_cpu: bool
240
311
  embeddings_tei_url: str | None
241
312
  embeddings_openai_base_url: str | None
242
313
  embeddings_cohere_base_url: str | None
@@ -244,6 +315,8 @@ class HindsightConfig:
244
315
  # Reranker
245
316
  reranker_provider: str
246
317
  reranker_local_model: str
318
+ reranker_local_force_cpu: bool
319
+ reranker_local_max_concurrent: int
247
320
  reranker_tei_url: str | None
248
321
  reranker_tei_batch_size: int
249
322
  reranker_tei_max_concurrent: int
@@ -254,6 +327,7 @@ class HindsightConfig:
254
327
  host: str
255
328
  port: int
256
329
  log_level: str
330
+ log_format: str
257
331
  mcp_enabled: bool
258
332
 
259
333
  # Recall
@@ -261,18 +335,21 @@ class HindsightConfig:
261
335
  mpfp_top_k_neighbors: int
262
336
  recall_max_concurrent: int
263
337
  recall_connection_budget: int
264
-
265
- # Observation thresholds
266
- observation_min_facts: int
267
- observation_top_entities: int
338
+ mental_model_refresh_concurrency: int
268
339
 
269
340
  # Retain settings
270
341
  retain_max_completion_tokens: int
271
342
  retain_chunk_size: int
272
343
  retain_extract_causal_links: bool
273
344
  retain_extraction_mode: str
345
+ retain_custom_instructions: str | None
274
346
  retain_observations_async: bool
275
347
 
348
+ # Observations settings (consolidated knowledge from facts)
349
+ enable_observations: bool
350
+ consolidation_batch_size: int
351
+ consolidation_max_tokens: int
352
+
276
353
  # Optimization flags
277
354
  skip_llm_verification: bool
278
355
  lazy_reranker: bool
@@ -286,10 +363,16 @@ class HindsightConfig:
286
363
  db_command_timeout: int
287
364
  db_acquire_timeout: int
288
365
 
289
- # Background task processing
290
- task_backend: str
291
- task_backend_memory_batch_size: int
292
- task_backend_memory_batch_interval: float
366
+ # Worker configuration (distributed task processing)
367
+ worker_enabled: bool
368
+ worker_id: str | None
369
+ worker_poll_interval_ms: int
370
+ worker_max_retries: int
371
+ worker_batch_size: int
372
+ worker_http_port: int
373
+
374
+ # Reflect agent settings
375
+ reflect_max_iterations: int
293
376
 
294
377
  @classmethod
295
378
  def from_env(cls) -> "HindsightConfig":
@@ -297,6 +380,7 @@ class HindsightConfig:
297
380
  return cls(
298
381
  # Database
299
382
  database_url=os.getenv(ENV_DATABASE_URL, DEFAULT_DATABASE_URL),
383
+ database_schema=os.getenv(ENV_DATABASE_SCHEMA, DEFAULT_DATABASE_SCHEMA),
300
384
  # LLM
301
385
  llm_provider=os.getenv(ENV_LLM_PROVIDER, DEFAULT_LLM_PROVIDER),
302
386
  llm_api_key=os.getenv(ENV_LLM_API_KEY),
@@ -313,15 +397,30 @@ class HindsightConfig:
313
397
  reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
314
398
  reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL) or None,
315
399
  reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
400
+ consolidation_llm_provider=os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER) or None,
401
+ consolidation_llm_api_key=os.getenv(ENV_CONSOLIDATION_LLM_API_KEY) or None,
402
+ consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL) or None,
403
+ consolidation_llm_base_url=os.getenv(ENV_CONSOLIDATION_LLM_BASE_URL) or None,
316
404
  # Embeddings
317
405
  embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
318
406
  embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
407
+ embeddings_local_force_cpu=os.getenv(
408
+ ENV_EMBEDDINGS_LOCAL_FORCE_CPU, str(DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU)
409
+ ).lower()
410
+ in ("true", "1"),
319
411
  embeddings_tei_url=os.getenv(ENV_EMBEDDINGS_TEI_URL),
320
412
  embeddings_openai_base_url=os.getenv(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None,
321
413
  embeddings_cohere_base_url=os.getenv(ENV_EMBEDDINGS_COHERE_BASE_URL) or None,
322
414
  # Reranker
323
415
  reranker_provider=os.getenv(ENV_RERANKER_PROVIDER, DEFAULT_RERANKER_PROVIDER),
324
416
  reranker_local_model=os.getenv(ENV_RERANKER_LOCAL_MODEL, DEFAULT_RERANKER_LOCAL_MODEL),
417
+ reranker_local_force_cpu=os.getenv(
418
+ ENV_RERANKER_LOCAL_FORCE_CPU, str(DEFAULT_RERANKER_LOCAL_FORCE_CPU)
419
+ ).lower()
420
+ in ("true", "1"),
421
+ reranker_local_max_concurrent=int(
422
+ os.getenv(ENV_RERANKER_LOCAL_MAX_CONCURRENT, str(DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT))
423
+ ),
325
424
  reranker_tei_url=os.getenv(ENV_RERANKER_TEI_URL),
326
425
  reranker_tei_batch_size=int(os.getenv(ENV_RERANKER_TEI_BATCH_SIZE, str(DEFAULT_RERANKER_TEI_BATCH_SIZE))),
327
426
  reranker_tei_max_concurrent=int(
@@ -333,6 +432,7 @@ class HindsightConfig:
333
432
  host=os.getenv(ENV_HOST, DEFAULT_HOST),
334
433
  port=int(os.getenv(ENV_PORT, DEFAULT_PORT)),
335
434
  log_level=os.getenv(ENV_LOG_LEVEL, DEFAULT_LOG_LEVEL),
435
+ log_format=os.getenv(ENV_LOG_FORMAT, DEFAULT_LOG_FORMAT).lower(),
336
436
  mcp_enabled=os.getenv(ENV_MCP_ENABLED, str(DEFAULT_MCP_ENABLED)).lower() == "true",
337
437
  # Recall
338
438
  graph_retriever=os.getenv(ENV_GRAPH_RETRIEVER, DEFAULT_GRAPH_RETRIEVER),
@@ -341,14 +441,12 @@ class HindsightConfig:
341
441
  recall_connection_budget=int(
342
442
  os.getenv(ENV_RECALL_CONNECTION_BUDGET, str(DEFAULT_RECALL_CONNECTION_BUDGET))
343
443
  ),
444
+ mental_model_refresh_concurrency=int(
445
+ os.getenv(ENV_MENTAL_MODEL_REFRESH_CONCURRENCY, str(DEFAULT_MENTAL_MODEL_REFRESH_CONCURRENCY))
446
+ ),
344
447
  # Optimization flags
345
448
  skip_llm_verification=os.getenv(ENV_SKIP_LLM_VERIFICATION, "false").lower() == "true",
346
449
  lazy_reranker=os.getenv(ENV_LAZY_RERANKER, "false").lower() == "true",
347
- # Observation thresholds
348
- observation_min_facts=int(os.getenv(ENV_OBSERVATION_MIN_FACTS, str(DEFAULT_OBSERVATION_MIN_FACTS))),
349
- observation_top_entities=int(
350
- os.getenv(ENV_OBSERVATION_TOP_ENTITIES, str(DEFAULT_OBSERVATION_TOP_ENTITIES))
351
- ),
352
450
  # Retain settings
353
451
  retain_max_completion_tokens=int(
354
452
  os.getenv(ENV_RETAIN_MAX_COMPLETION_TOKENS, str(DEFAULT_RETAIN_MAX_COMPLETION_TOKENS))
@@ -361,10 +459,19 @@ class HindsightConfig:
361
459
  retain_extraction_mode=_validate_extraction_mode(
362
460
  os.getenv(ENV_RETAIN_EXTRACTION_MODE, DEFAULT_RETAIN_EXTRACTION_MODE)
363
461
  ),
462
+ retain_custom_instructions=os.getenv(ENV_RETAIN_CUSTOM_INSTRUCTIONS) or DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS,
364
463
  retain_observations_async=os.getenv(
365
464
  ENV_RETAIN_OBSERVATIONS_ASYNC, str(DEFAULT_RETAIN_OBSERVATIONS_ASYNC)
366
465
  ).lower()
367
466
  == "true",
467
+ # Observations settings (consolidated knowledge from facts)
468
+ enable_observations=os.getenv(ENV_ENABLE_OBSERVATIONS, str(DEFAULT_ENABLE_OBSERVATIONS)).lower() == "true",
469
+ consolidation_batch_size=int(
470
+ os.getenv(ENV_CONSOLIDATION_BATCH_SIZE, str(DEFAULT_CONSOLIDATION_BATCH_SIZE))
471
+ ),
472
+ consolidation_max_tokens=int(
473
+ os.getenv(ENV_CONSOLIDATION_MAX_TOKENS, str(DEFAULT_CONSOLIDATION_MAX_TOKENS))
474
+ ),
368
475
  # Database migrations
369
476
  run_migrations_on_startup=os.getenv(ENV_RUN_MIGRATIONS_ON_STARTUP, "true").lower() == "true",
370
477
  # Database connection pool
@@ -372,14 +479,15 @@ class HindsightConfig:
372
479
  db_pool_max_size=int(os.getenv(ENV_DB_POOL_MAX_SIZE, str(DEFAULT_DB_POOL_MAX_SIZE))),
373
480
  db_command_timeout=int(os.getenv(ENV_DB_COMMAND_TIMEOUT, str(DEFAULT_DB_COMMAND_TIMEOUT))),
374
481
  db_acquire_timeout=int(os.getenv(ENV_DB_ACQUIRE_TIMEOUT, str(DEFAULT_DB_ACQUIRE_TIMEOUT))),
375
- # Background task processing
376
- task_backend=os.getenv(ENV_TASK_BACKEND, DEFAULT_TASK_BACKEND),
377
- task_backend_memory_batch_size=int(
378
- os.getenv(ENV_TASK_BACKEND_MEMORY_BATCH_SIZE, str(DEFAULT_TASK_BACKEND_MEMORY_BATCH_SIZE))
379
- ),
380
- task_backend_memory_batch_interval=float(
381
- os.getenv(ENV_TASK_BACKEND_MEMORY_BATCH_INTERVAL, str(DEFAULT_TASK_BACKEND_MEMORY_BATCH_INTERVAL))
382
- ),
482
+ # Worker configuration
483
+ worker_enabled=os.getenv(ENV_WORKER_ENABLED, str(DEFAULT_WORKER_ENABLED)).lower() == "true",
484
+ worker_id=os.getenv(ENV_WORKER_ID) or DEFAULT_WORKER_ID,
485
+ worker_poll_interval_ms=int(os.getenv(ENV_WORKER_POLL_INTERVAL_MS, str(DEFAULT_WORKER_POLL_INTERVAL_MS))),
486
+ worker_max_retries=int(os.getenv(ENV_WORKER_MAX_RETRIES, str(DEFAULT_WORKER_MAX_RETRIES))),
487
+ worker_batch_size=int(os.getenv(ENV_WORKER_BATCH_SIZE, str(DEFAULT_WORKER_BATCH_SIZE))),
488
+ worker_http_port=int(os.getenv(ENV_WORKER_HTTP_PORT, str(DEFAULT_WORKER_HTTP_PORT))),
489
+ # Reflect agent settings
490
+ reflect_max_iterations=int(os.getenv(ENV_REFLECT_MAX_ITERATIONS, str(DEFAULT_REFLECT_MAX_ITERATIONS))),
383
491
  )
384
492
 
385
493
  def get_llm_base_url(self) -> str:
@@ -410,16 +518,32 @@ class HindsightConfig:
410
518
  return log_level_map.get(self.log_level.lower(), logging.INFO)
411
519
 
412
520
  def configure_logging(self) -> None:
413
- """Configure Python logging based on the log level."""
414
- logging.basicConfig(
415
- level=self.get_python_log_level(),
416
- format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
417
- force=True, # Override any existing configuration
418
- )
521
+ """Configure Python logging based on the log level and format.
522
+
523
+ When log_format is "json", outputs structured JSON logs with a severity
524
+ field that GCP Cloud Logging can parse for proper log level categorization.
525
+ """
526
+ root_logger = logging.getLogger()
527
+ root_logger.setLevel(self.get_python_log_level())
528
+
529
+ # Remove existing handlers
530
+ for handler in root_logger.handlers[:]:
531
+ root_logger.removeHandler(handler)
532
+
533
+ # Create handler writing to stdout (GCP treats stderr as ERROR)
534
+ handler = logging.StreamHandler(sys.stdout)
535
+ handler.setLevel(self.get_python_log_level())
536
+
537
+ if self.log_format == "json":
538
+ handler.setFormatter(JsonFormatter())
539
+ else:
540
+ handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s"))
541
+
542
+ root_logger.addHandler(handler)
419
543
 
420
544
  def log_config(self) -> None:
421
545
  """Log the current configuration (without sensitive values)."""
422
- logger.info(f"Database: {self.database_url}")
546
+ logger.info(f"Database: {self.database_url} (schema: {self.database_schema})")
423
547
  logger.info(f"LLM: provider={self.llm_provider}, model={self.llm_model}")
424
548
  if self.retain_llm_provider or self.retain_llm_model:
425
549
  retain_provider = self.retain_llm_provider or self.llm_provider
@@ -429,6 +553,10 @@ class HindsightConfig:
429
553
  reflect_provider = self.reflect_llm_provider or self.llm_provider
430
554
  reflect_model = self.reflect_llm_model or self.llm_model
431
555
  logger.info(f"LLM (reflect): provider={reflect_provider}, model={reflect_model}")
556
+ if self.consolidation_llm_provider or self.consolidation_llm_model:
557
+ consolidation_provider = self.consolidation_llm_provider or self.llm_provider
558
+ consolidation_model = self.consolidation_llm_model or self.llm_model
559
+ logger.info(f"LLM (consolidation): provider={consolidation_provider}, model={consolidation_model}")
432
560
  logger.info(f"Embeddings: provider={self.embeddings_provider}")
433
561
  logger.info(f"Reranker: provider={self.reranker_provider}")
434
562
  logger.info(f"Graph retriever: {self.graph_retriever}")