hindsight-api 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. hindsight_api/admin/cli.py +59 -0
  2. hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
  3. hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
  4. hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
  5. hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
  6. hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
  7. hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
  8. hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
  9. hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
  10. hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
  11. hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
  12. hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
  13. hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
  14. hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
  15. hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
  16. hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
  17. hindsight_api/api/http.py +1119 -93
  18. hindsight_api/api/mcp.py +11 -191
  19. hindsight_api/config.py +145 -45
  20. hindsight_api/engine/consolidation/__init__.py +5 -0
  21. hindsight_api/engine/consolidation/consolidator.py +859 -0
  22. hindsight_api/engine/consolidation/prompts.py +69 -0
  23. hindsight_api/engine/cross_encoder.py +114 -9
  24. hindsight_api/engine/directives/__init__.py +5 -0
  25. hindsight_api/engine/directives/models.py +37 -0
  26. hindsight_api/engine/embeddings.py +102 -5
  27. hindsight_api/engine/interface.py +32 -13
  28. hindsight_api/engine/llm_wrapper.py +505 -43
  29. hindsight_api/engine/memory_engine.py +2090 -1089
  30. hindsight_api/engine/mental_models/__init__.py +14 -0
  31. hindsight_api/engine/mental_models/models.py +53 -0
  32. hindsight_api/engine/reflect/__init__.py +18 -0
  33. hindsight_api/engine/reflect/agent.py +933 -0
  34. hindsight_api/engine/reflect/models.py +109 -0
  35. hindsight_api/engine/reflect/observations.py +186 -0
  36. hindsight_api/engine/reflect/prompts.py +483 -0
  37. hindsight_api/engine/reflect/tools.py +437 -0
  38. hindsight_api/engine/reflect/tools_schema.py +250 -0
  39. hindsight_api/engine/response_models.py +130 -4
  40. hindsight_api/engine/retain/bank_utils.py +79 -201
  41. hindsight_api/engine/retain/fact_extraction.py +81 -48
  42. hindsight_api/engine/retain/fact_storage.py +5 -8
  43. hindsight_api/engine/retain/link_utils.py +5 -8
  44. hindsight_api/engine/retain/orchestrator.py +1 -55
  45. hindsight_api/engine/retain/types.py +2 -2
  46. hindsight_api/engine/search/graph_retrieval.py +2 -2
  47. hindsight_api/engine/search/link_expansion_retrieval.py +164 -29
  48. hindsight_api/engine/search/mpfp_retrieval.py +1 -1
  49. hindsight_api/engine/search/retrieval.py +14 -14
  50. hindsight_api/engine/search/think_utils.py +41 -140
  51. hindsight_api/engine/search/trace.py +0 -1
  52. hindsight_api/engine/search/tracer.py +2 -5
  53. hindsight_api/engine/search/types.py +0 -3
  54. hindsight_api/engine/task_backend.py +112 -196
  55. hindsight_api/engine/utils.py +0 -151
  56. hindsight_api/extensions/__init__.py +10 -1
  57. hindsight_api/extensions/builtin/tenant.py +5 -1
  58. hindsight_api/extensions/operation_validator.py +81 -4
  59. hindsight_api/extensions/tenant.py +26 -0
  60. hindsight_api/main.py +16 -5
  61. hindsight_api/mcp_local.py +12 -53
  62. hindsight_api/mcp_tools.py +494 -0
  63. hindsight_api/models.py +0 -2
  64. hindsight_api/worker/__init__.py +11 -0
  65. hindsight_api/worker/main.py +296 -0
  66. hindsight_api/worker/poller.py +486 -0
  67. {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.0.dist-info}/METADATA +12 -6
  68. hindsight_api-0.4.0.dist-info/RECORD +112 -0
  69. {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.0.dist-info}/entry_points.txt +1 -0
  70. hindsight_api/engine/retain/observation_regeneration.py +0 -254
  71. hindsight_api/engine/search/observation_utils.py +0 -125
  72. hindsight_api/engine/search/scoring.py +0 -159
  73. hindsight_api-0.3.0.dist-info/RECORD +0 -82
  74. {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.0.dist-info}/WHEEL +0 -0
hindsight_api/api/mcp.py CHANGED
@@ -1,4 +1,4 @@
1
- """Hindsight MCP Server implementation using FastMCP."""
1
+ """Hindsight MCP Server implementation using FastMCP (HTTP transport)."""
2
2
 
3
3
  import json
4
4
  import logging
@@ -8,8 +8,7 @@ from contextvars import ContextVar
8
8
  from fastmcp import FastMCP
9
9
 
10
10
  from hindsight_api import MemoryEngine
11
- from hindsight_api.engine.response_models import VALID_RECALL_FACT_TYPES
12
- from hindsight_api.models import RequestContext
11
+ from hindsight_api.mcp_tools import MCPToolsConfig, register_mcp_tools
13
12
 
14
13
  # Configure logging from HINDSIGHT_API_LOG_LEVEL environment variable
15
14
  _log_level_str = os.environ.get("HINDSIGHT_API_LOG_LEVEL", "info").lower()
@@ -52,194 +51,15 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
52
51
  # Use stateless_http=True for Claude Code compatibility
53
52
  mcp = FastMCP("hindsight-mcp-server", stateless_http=True)
54
53
 
55
- @mcp.tool()
56
- async def retain(
57
- content: str,
58
- context: str = "general",
59
- async_processing: bool = True,
60
- bank_id: str | None = None,
61
- ) -> str:
62
- """
63
- Store important information to long-term memory.
64
-
65
- Use this tool PROACTIVELY whenever the user shares:
66
- - Personal facts, preferences, or interests
67
- - Important events or milestones
68
- - User history, experiences, or background
69
- - Decisions, opinions, or stated preferences
70
- - Goals, plans, or future intentions
71
- - Relationships or people mentioned
72
- - Work context, projects, or responsibilities
73
-
74
- Args:
75
- content: The fact/memory to store (be specific and include relevant details)
76
- context: Category for the memory (e.g., 'preferences', 'work', 'hobbies', 'family'). Default: 'general'
77
- async_processing: If True, queue for background processing and return immediately. If False, wait for completion. Default: True
78
- bank_id: Optional bank to store in (defaults to session bank). Use for cross-bank operations.
79
- """
80
- try:
81
- target_bank = bank_id or get_current_bank_id()
82
- if target_bank is None:
83
- return "Error: No bank_id configured"
84
- contents = [{"content": content, "context": context}]
85
- if async_processing:
86
- # Queue for background processing and return immediately
87
- result = await memory.submit_async_retain(
88
- bank_id=target_bank, contents=contents, request_context=RequestContext()
89
- )
90
- return f"Memory queued for background processing (operation_id: {result.get('operation_id', 'N/A')})"
91
- else:
92
- # Wait for completion
93
- await memory.retain_batch_async(
94
- bank_id=target_bank,
95
- contents=contents,
96
- request_context=RequestContext(),
97
- )
98
- return f"Memory stored successfully in bank '{target_bank}'"
99
- except Exception as e:
100
- logger.error(f"Error storing memory: {e}", exc_info=True)
101
- return f"Error: {str(e)}"
102
-
103
- @mcp.tool()
104
- async def recall(query: str, max_tokens: int = 4096, bank_id: str | None = None) -> str:
105
- """
106
- Search memories to provide personalized, context-aware responses.
107
-
108
- Use this tool PROACTIVELY to:
109
- - Check user's preferences before making suggestions
110
- - Recall user's history to provide continuity
111
- - Remember user's goals and context
112
- - Personalize responses based on past interactions
113
-
114
- Args:
115
- query: Natural language search query (e.g., "user's food preferences", "what projects is user working on")
116
- max_tokens: Maximum tokens in the response (default: 4096)
117
- bank_id: Optional bank to search in (defaults to session bank). Use for cross-bank operations.
118
- """
119
- try:
120
- target_bank = bank_id or get_current_bank_id()
121
- if target_bank is None:
122
- return "Error: No bank_id configured"
123
- from hindsight_api.engine.memory_engine import Budget
124
-
125
- recall_result = await memory.recall_async(
126
- bank_id=target_bank,
127
- query=query,
128
- fact_type=list(VALID_RECALL_FACT_TYPES),
129
- budget=Budget.HIGH,
130
- max_tokens=max_tokens,
131
- request_context=RequestContext(),
132
- )
133
-
134
- # Use model's JSON serialization
135
- return recall_result.model_dump_json(indent=2)
136
- except Exception as e:
137
- logger.error(f"Error searching: {e}", exc_info=True)
138
- return f'{{"error": "{e}", "results": []}}'
139
-
140
- @mcp.tool()
141
- async def reflect(query: str, context: str | None = None, budget: str = "low", bank_id: str | None = None) -> str:
142
- """
143
- Generate thoughtful analysis by synthesizing stored memories with the bank's personality.
144
-
145
- WHEN TO USE THIS TOOL:
146
- Use reflect when you need reasoned analysis, not just fact retrieval. This tool
147
- thinks through the question using everything the bank knows and its personality traits.
148
-
149
- EXAMPLES OF GOOD QUERIES:
150
- - "What patterns have emerged in how I approach debugging?"
151
- - "Based on my past decisions, what architectural style do I prefer?"
152
- - "What might be the best approach for this problem given what you know about me?"
153
- - "How should I prioritize these tasks based on my goals?"
154
-
155
- HOW IT DIFFERS FROM RECALL:
156
- - recall: Returns raw facts matching your search (fast lookup)
157
- - reflect: Reasons across memories to form a synthesized answer (deeper analysis)
158
-
159
- Use recall for "what did I say about X?" and reflect for "what should I do about X?"
160
-
161
- Args:
162
- query: The question or topic to reflect on
163
- context: Optional context about why this reflection is needed
164
- budget: Search budget - 'low', 'mid', or 'high' (default: 'low')
165
- bank_id: Optional bank to reflect in (defaults to session bank). Use for cross-bank operations.
166
- """
167
- try:
168
- target_bank = bank_id or get_current_bank_id()
169
- if target_bank is None:
170
- return "Error: No bank_id configured"
171
- from hindsight_api.engine.memory_engine import Budget
172
-
173
- # Map string budget to enum
174
- budget_map = {"low": Budget.LOW, "mid": Budget.MID, "high": Budget.HIGH}
175
- budget_enum = budget_map.get(budget.lower(), Budget.LOW)
176
-
177
- reflect_result = await memory.reflect_async(
178
- bank_id=target_bank,
179
- query=query,
180
- budget=budget_enum,
181
- context=context,
182
- request_context=RequestContext(),
183
- )
184
-
185
- return reflect_result.model_dump_json(indent=2)
186
- except Exception as e:
187
- logger.error(f"Error reflecting: {e}", exc_info=True)
188
- return f'{{"error": "{e}", "text": ""}}'
189
-
190
- @mcp.tool()
191
- async def list_banks() -> str:
192
- """
193
- List all available memory banks.
194
-
195
- Use this tool to discover what memory banks exist in the system.
196
- Each bank is an isolated memory store (like a separate "brain").
197
-
198
- Returns:
199
- JSON list of banks with their IDs, names, dispositions, and backgrounds.
200
- """
201
- try:
202
- banks = await memory.list_banks(request_context=RequestContext())
203
- return json.dumps({"banks": banks}, indent=2)
204
- except Exception as e:
205
- logger.error(f"Error listing banks: {e}", exc_info=True)
206
- return f'{{"error": "{e}", "banks": []}}'
207
-
208
- @mcp.tool()
209
- async def create_bank(bank_id: str, name: str | None = None, background: str | None = None) -> str:
210
- """
211
- Create a new memory bank or get an existing one.
212
-
213
- Memory banks are isolated stores - each one is like a separate "brain" for a user/agent.
214
- Banks are auto-created with default settings if they don't exist.
215
-
216
- Args:
217
- bank_id: Unique identifier for the bank (e.g., 'user-123', 'agent-alpha')
218
- name: Optional human-friendly name for the bank
219
- background: Optional background context about the bank's owner/purpose
220
- """
221
- try:
222
- # get_bank_profile auto-creates bank if it doesn't exist
223
- profile = await memory.get_bank_profile(bank_id, request_context=RequestContext())
224
-
225
- # Update name/background if provided
226
- if name is not None or background is not None:
227
- await memory.update_bank(
228
- bank_id,
229
- name=name,
230
- background=background,
231
- request_context=RequestContext(),
232
- )
233
- # Fetch updated profile
234
- profile = await memory.get_bank_profile(bank_id, request_context=RequestContext())
235
-
236
- # Serialize disposition if it's a Pydantic model
237
- if "disposition" in profile and hasattr(profile["disposition"], "model_dump"):
238
- profile["disposition"] = profile["disposition"].model_dump()
239
- return json.dumps(profile, indent=2)
240
- except Exception as e:
241
- logger.error(f"Error creating bank: {e}", exc_info=True)
242
- return f'{{"error": "{e}"}}'
54
+ # Configure and register tools using shared module
55
+ config = MCPToolsConfig(
56
+ bank_id_resolver=get_current_bank_id,
57
+ include_bank_id_param=True, # HTTP MCP supports multi-bank via parameter
58
+ tools=None, # All tools
59
+ retain_fire_and_forget=False, # HTTP MCP supports sync/async modes
60
+ )
61
+
62
+ register_mcp_tools(mcp, memory, config)
243
63
 
244
64
  return mcp
245
65
 
hindsight_api/config.py CHANGED
@@ -4,9 +4,12 @@ Centralized configuration for Hindsight API.
4
4
  All environment variables and their defaults are defined here.
5
5
  """
6
6
 
7
+ import json
7
8
  import logging
8
9
  import os
10
+ import sys
9
11
  from dataclasses import dataclass
12
+ from datetime import datetime, timezone
10
13
 
11
14
  from dotenv import find_dotenv, load_dotenv
12
15
 
@@ -36,6 +39,11 @@ ENV_REFLECT_LLM_API_KEY = "HINDSIGHT_API_REFLECT_LLM_API_KEY"
36
39
  ENV_REFLECT_LLM_MODEL = "HINDSIGHT_API_REFLECT_LLM_MODEL"
37
40
  ENV_REFLECT_LLM_BASE_URL = "HINDSIGHT_API_REFLECT_LLM_BASE_URL"
38
41
 
42
+ ENV_CONSOLIDATION_LLM_PROVIDER = "HINDSIGHT_API_CONSOLIDATION_LLM_PROVIDER"
43
+ ENV_CONSOLIDATION_LLM_API_KEY = "HINDSIGHT_API_CONSOLIDATION_LLM_API_KEY"
44
+ ENV_CONSOLIDATION_LLM_MODEL = "HINDSIGHT_API_CONSOLIDATION_LLM_MODEL"
45
+ ENV_CONSOLIDATION_LLM_BASE_URL = "HINDSIGHT_API_CONSOLIDATION_LLM_BASE_URL"
46
+
39
47
  ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
40
48
  ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
41
49
  ENV_EMBEDDINGS_TEI_URL = "HINDSIGHT_API_EMBEDDINGS_TEI_URL"
@@ -68,6 +76,7 @@ ENV_RERANKER_FLASHRANK_CACHE_DIR = "HINDSIGHT_API_RERANKER_FLASHRANK_CACHE_DIR"
68
76
  ENV_HOST = "HINDSIGHT_API_HOST"
69
77
  ENV_PORT = "HINDSIGHT_API_PORT"
70
78
  ENV_LOG_LEVEL = "HINDSIGHT_API_LOG_LEVEL"
79
+ ENV_LOG_FORMAT = "HINDSIGHT_API_LOG_FORMAT"
71
80
  ENV_WORKERS = "HINDSIGHT_API_WORKERS"
72
81
  ENV_MCP_ENABLED = "HINDSIGHT_API_MCP_ENABLED"
73
82
  ENV_GRAPH_RETRIEVER = "HINDSIGHT_API_GRAPH_RETRIEVER"
@@ -76,18 +85,20 @@ ENV_RECALL_MAX_CONCURRENT = "HINDSIGHT_API_RECALL_MAX_CONCURRENT"
76
85
  ENV_RECALL_CONNECTION_BUDGET = "HINDSIGHT_API_RECALL_CONNECTION_BUDGET"
77
86
  ENV_MCP_LOCAL_BANK_ID = "HINDSIGHT_API_MCP_LOCAL_BANK_ID"
78
87
  ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
79
-
80
- # Observation thresholds
81
- ENV_OBSERVATION_MIN_FACTS = "HINDSIGHT_API_OBSERVATION_MIN_FACTS"
82
- ENV_OBSERVATION_TOP_ENTITIES = "HINDSIGHT_API_OBSERVATION_TOP_ENTITIES"
88
+ ENV_MENTAL_MODEL_REFRESH_CONCURRENCY = "HINDSIGHT_API_MENTAL_MODEL_REFRESH_CONCURRENCY"
83
89
 
84
90
  # Retain settings
85
91
  ENV_RETAIN_MAX_COMPLETION_TOKENS = "HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS"
86
92
  ENV_RETAIN_CHUNK_SIZE = "HINDSIGHT_API_RETAIN_CHUNK_SIZE"
87
93
  ENV_RETAIN_EXTRACT_CAUSAL_LINKS = "HINDSIGHT_API_RETAIN_EXTRACT_CAUSAL_LINKS"
88
94
  ENV_RETAIN_EXTRACTION_MODE = "HINDSIGHT_API_RETAIN_EXTRACTION_MODE"
95
+ ENV_RETAIN_CUSTOM_INSTRUCTIONS = "HINDSIGHT_API_RETAIN_CUSTOM_INSTRUCTIONS"
89
96
  ENV_RETAIN_OBSERVATIONS_ASYNC = "HINDSIGHT_API_RETAIN_OBSERVATIONS_ASYNC"
90
97
 
98
+ # Observations settings (consolidated knowledge from facts)
99
+ ENV_ENABLE_OBSERVATIONS = "HINDSIGHT_API_ENABLE_OBSERVATIONS"
100
+ ENV_CONSOLIDATION_BATCH_SIZE = "HINDSIGHT_API_CONSOLIDATION_BATCH_SIZE"
101
+
91
102
  # Optimization flags
92
103
  ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
93
104
  ENV_LAZY_RERANKER = "HINDSIGHT_API_LAZY_RERANKER"
@@ -101,10 +112,16 @@ ENV_DB_POOL_MAX_SIZE = "HINDSIGHT_API_DB_POOL_MAX_SIZE"
101
112
  ENV_DB_COMMAND_TIMEOUT = "HINDSIGHT_API_DB_COMMAND_TIMEOUT"
102
113
  ENV_DB_ACQUIRE_TIMEOUT = "HINDSIGHT_API_DB_ACQUIRE_TIMEOUT"
103
114
 
104
- # Background task processing
105
- ENV_TASK_BACKEND = "HINDSIGHT_API_TASK_BACKEND"
106
- ENV_TASK_BACKEND_MEMORY_BATCH_SIZE = "HINDSIGHT_API_TASK_BACKEND_MEMORY_BATCH_SIZE"
107
- ENV_TASK_BACKEND_MEMORY_BATCH_INTERVAL = "HINDSIGHT_API_TASK_BACKEND_MEMORY_BATCH_INTERVAL"
115
+ # Worker configuration (distributed task processing)
116
+ ENV_WORKER_ENABLED = "HINDSIGHT_API_WORKER_ENABLED"
117
+ ENV_WORKER_ID = "HINDSIGHT_API_WORKER_ID"
118
+ ENV_WORKER_POLL_INTERVAL_MS = "HINDSIGHT_API_WORKER_POLL_INTERVAL_MS"
119
+ ENV_WORKER_MAX_RETRIES = "HINDSIGHT_API_WORKER_MAX_RETRIES"
120
+ ENV_WORKER_BATCH_SIZE = "HINDSIGHT_API_WORKER_BATCH_SIZE"
121
+ ENV_WORKER_HTTP_PORT = "HINDSIGHT_API_WORKER_HTTP_PORT"
122
+
123
+ # Reflect agent settings
124
+ ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
108
125
 
109
126
  # Default values
110
127
  DEFAULT_DATABASE_URL = "pg0"
@@ -138,6 +155,7 @@ DEFAULT_RERANKER_LITELLM_MODEL = "cohere/rerank-english-v3.0"
138
155
  DEFAULT_HOST = "0.0.0.0"
139
156
  DEFAULT_PORT = 8888
140
157
  DEFAULT_LOG_LEVEL = "info"
158
+ DEFAULT_LOG_FORMAT = "text" # Options: "text", "json"
141
159
  DEFAULT_WORKERS = 1
142
160
  DEFAULT_MCP_ENABLED = True
143
161
  DEFAULT_GRAPH_RETRIEVER = "link_expansion" # Options: "link_expansion", "mpfp", "bfs"
@@ -145,19 +163,21 @@ DEFAULT_MPFP_TOP_K_NEIGHBORS = 20 # Fan-out limit per node in MPFP graph traver
145
163
  DEFAULT_RECALL_MAX_CONCURRENT = 32 # Max concurrent recall operations per worker
146
164
  DEFAULT_RECALL_CONNECTION_BUDGET = 4 # Max concurrent DB connections per recall operation
147
165
  DEFAULT_MCP_LOCAL_BANK_ID = "mcp"
148
-
149
- # Observation thresholds
150
- DEFAULT_OBSERVATION_MIN_FACTS = 5 # Min facts required to generate entity observations
151
- DEFAULT_OBSERVATION_TOP_ENTITIES = 5 # Max entities to process per retain batch
166
+ DEFAULT_MENTAL_MODEL_REFRESH_CONCURRENCY = 8 # Max concurrent mental model refreshes
152
167
 
153
168
  # Retain settings
154
169
  DEFAULT_RETAIN_MAX_COMPLETION_TOKENS = 64000 # Max tokens for fact extraction LLM call
155
170
  DEFAULT_RETAIN_CHUNK_SIZE = 3000 # Max chars per chunk for fact extraction
156
171
  DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS = True # Extract causal links between facts
157
- DEFAULT_RETAIN_EXTRACTION_MODE = "concise" # Extraction mode: "concise" or "verbose"
158
- RETAIN_EXTRACTION_MODES = ("concise", "verbose") # Allowed extraction modes
172
+ DEFAULT_RETAIN_EXTRACTION_MODE = "concise" # Extraction mode: "concise", "verbose", or "custom"
173
+ RETAIN_EXTRACTION_MODES = ("concise", "verbose", "custom") # Allowed extraction modes
174
+ DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS = None # Custom extraction guidelines (only used when mode="custom")
159
175
  DEFAULT_RETAIN_OBSERVATIONS_ASYNC = False # Run observation generation async (after retain completes)
160
176
 
177
+ # Observations defaults (consolidated knowledge from facts)
178
+ DEFAULT_ENABLE_OBSERVATIONS = True # Observations enabled by default
179
+ DEFAULT_CONSOLIDATION_BATCH_SIZE = 50 # Memories to load per batch (internal memory optimization)
180
+
161
181
  # Database migrations
162
182
  DEFAULT_RUN_MIGRATIONS_ON_STARTUP = True
163
183
 
@@ -167,10 +187,16 @@ DEFAULT_DB_POOL_MAX_SIZE = 100
167
187
  DEFAULT_DB_COMMAND_TIMEOUT = 60 # seconds
168
188
  DEFAULT_DB_ACQUIRE_TIMEOUT = 30 # seconds
169
189
 
170
- # Background task processing
171
- DEFAULT_TASK_BACKEND = "memory" # Options: "memory", "noop"
172
- DEFAULT_TASK_BACKEND_MEMORY_BATCH_SIZE = 10
173
- DEFAULT_TASK_BACKEND_MEMORY_BATCH_INTERVAL = 1.0 # seconds
190
+ # Worker configuration (distributed task processing)
191
+ DEFAULT_WORKER_ENABLED = True # API runs worker by default (standalone mode)
192
+ DEFAULT_WORKER_ID = None # Will use hostname if not specified
193
+ DEFAULT_WORKER_POLL_INTERVAL_MS = 500 # Poll database every 500ms
194
+ DEFAULT_WORKER_MAX_RETRIES = 3 # Max retries before marking task failed
195
+ DEFAULT_WORKER_BATCH_SIZE = 10 # Tasks to claim per poll cycle
196
+ DEFAULT_WORKER_HTTP_PORT = 8889 # HTTP port for worker metrics/health
197
+
198
+ # Reflect agent settings
199
+ DEFAULT_REFLECT_MAX_ITERATIONS = 10 # Max tool call iterations before forcing response
174
200
 
175
201
  # Default MCP tool descriptions (can be customized via env vars)
176
202
  DEFAULT_MCP_RETAIN_DESCRIPTION = """Store important information to long-term memory.
@@ -196,6 +222,36 @@ Use this tool PROACTIVELY to:
196
222
  EMBEDDING_DIMENSION = DEFAULT_EMBEDDING_DIMENSION
197
223
 
198
224
 
225
+ class JsonFormatter(logging.Formatter):
226
+ """JSON formatter for structured logging.
227
+
228
+ Outputs logs in JSON format with a 'severity' field that cloud logging
229
+ systems (GCP, AWS CloudWatch, etc.) can parse to correctly categorize log levels.
230
+ """
231
+
232
+ SEVERITY_MAP = {
233
+ logging.DEBUG: "DEBUG",
234
+ logging.INFO: "INFO",
235
+ logging.WARNING: "WARNING",
236
+ logging.ERROR: "ERROR",
237
+ logging.CRITICAL: "CRITICAL",
238
+ }
239
+
240
+ def format(self, record: logging.LogRecord) -> str:
241
+ log_entry = {
242
+ "severity": self.SEVERITY_MAP.get(record.levelno, "DEFAULT"),
243
+ "message": record.getMessage(),
244
+ "timestamp": datetime.now(timezone.utc).isoformat(),
245
+ "logger": record.name,
246
+ }
247
+
248
+ # Add exception info if present
249
+ if record.exc_info:
250
+ log_entry["exception"] = self.formatException(record.exc_info)
251
+
252
+ return json.dumps(log_entry)
253
+
254
+
199
255
  def _validate_extraction_mode(mode: str) -> str:
200
256
  """Validate and normalize extraction mode."""
201
257
  mode_lower = mode.lower()
@@ -234,6 +290,11 @@ class HindsightConfig:
234
290
  reflect_llm_model: str | None
235
291
  reflect_llm_base_url: str | None
236
292
 
293
+ consolidation_llm_provider: str | None
294
+ consolidation_llm_api_key: str | None
295
+ consolidation_llm_model: str | None
296
+ consolidation_llm_base_url: str | None
297
+
237
298
  # Embeddings
238
299
  embeddings_provider: str
239
300
  embeddings_local_model: str
@@ -254,6 +315,7 @@ class HindsightConfig:
254
315
  host: str
255
316
  port: int
256
317
  log_level: str
318
+ log_format: str
257
319
  mcp_enabled: bool
258
320
 
259
321
  # Recall
@@ -261,18 +323,20 @@ class HindsightConfig:
261
323
  mpfp_top_k_neighbors: int
262
324
  recall_max_concurrent: int
263
325
  recall_connection_budget: int
264
-
265
- # Observation thresholds
266
- observation_min_facts: int
267
- observation_top_entities: int
326
+ mental_model_refresh_concurrency: int
268
327
 
269
328
  # Retain settings
270
329
  retain_max_completion_tokens: int
271
330
  retain_chunk_size: int
272
331
  retain_extract_causal_links: bool
273
332
  retain_extraction_mode: str
333
+ retain_custom_instructions: str | None
274
334
  retain_observations_async: bool
275
335
 
336
+ # Observations settings (consolidated knowledge from facts)
337
+ enable_observations: bool
338
+ consolidation_batch_size: int
339
+
276
340
  # Optimization flags
277
341
  skip_llm_verification: bool
278
342
  lazy_reranker: bool
@@ -286,10 +350,16 @@ class HindsightConfig:
286
350
  db_command_timeout: int
287
351
  db_acquire_timeout: int
288
352
 
289
- # Background task processing
290
- task_backend: str
291
- task_backend_memory_batch_size: int
292
- task_backend_memory_batch_interval: float
353
+ # Worker configuration (distributed task processing)
354
+ worker_enabled: bool
355
+ worker_id: str | None
356
+ worker_poll_interval_ms: int
357
+ worker_max_retries: int
358
+ worker_batch_size: int
359
+ worker_http_port: int
360
+
361
+ # Reflect agent settings
362
+ reflect_max_iterations: int
293
363
 
294
364
  @classmethod
295
365
  def from_env(cls) -> "HindsightConfig":
@@ -313,6 +383,10 @@ class HindsightConfig:
313
383
  reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
314
384
  reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL) or None,
315
385
  reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
386
+ consolidation_llm_provider=os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER) or None,
387
+ consolidation_llm_api_key=os.getenv(ENV_CONSOLIDATION_LLM_API_KEY) or None,
388
+ consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL) or None,
389
+ consolidation_llm_base_url=os.getenv(ENV_CONSOLIDATION_LLM_BASE_URL) or None,
316
390
  # Embeddings
317
391
  embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
318
392
  embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
@@ -333,6 +407,7 @@ class HindsightConfig:
333
407
  host=os.getenv(ENV_HOST, DEFAULT_HOST),
334
408
  port=int(os.getenv(ENV_PORT, DEFAULT_PORT)),
335
409
  log_level=os.getenv(ENV_LOG_LEVEL, DEFAULT_LOG_LEVEL),
410
+ log_format=os.getenv(ENV_LOG_FORMAT, DEFAULT_LOG_FORMAT).lower(),
336
411
  mcp_enabled=os.getenv(ENV_MCP_ENABLED, str(DEFAULT_MCP_ENABLED)).lower() == "true",
337
412
  # Recall
338
413
  graph_retriever=os.getenv(ENV_GRAPH_RETRIEVER, DEFAULT_GRAPH_RETRIEVER),
@@ -341,14 +416,12 @@ class HindsightConfig:
341
416
  recall_connection_budget=int(
342
417
  os.getenv(ENV_RECALL_CONNECTION_BUDGET, str(DEFAULT_RECALL_CONNECTION_BUDGET))
343
418
  ),
419
+ mental_model_refresh_concurrency=int(
420
+ os.getenv(ENV_MENTAL_MODEL_REFRESH_CONCURRENCY, str(DEFAULT_MENTAL_MODEL_REFRESH_CONCURRENCY))
421
+ ),
344
422
  # Optimization flags
345
423
  skip_llm_verification=os.getenv(ENV_SKIP_LLM_VERIFICATION, "false").lower() == "true",
346
424
  lazy_reranker=os.getenv(ENV_LAZY_RERANKER, "false").lower() == "true",
347
- # Observation thresholds
348
- observation_min_facts=int(os.getenv(ENV_OBSERVATION_MIN_FACTS, str(DEFAULT_OBSERVATION_MIN_FACTS))),
349
- observation_top_entities=int(
350
- os.getenv(ENV_OBSERVATION_TOP_ENTITIES, str(DEFAULT_OBSERVATION_TOP_ENTITIES))
351
- ),
352
425
  # Retain settings
353
426
  retain_max_completion_tokens=int(
354
427
  os.getenv(ENV_RETAIN_MAX_COMPLETION_TOKENS, str(DEFAULT_RETAIN_MAX_COMPLETION_TOKENS))
@@ -361,10 +434,16 @@ class HindsightConfig:
361
434
  retain_extraction_mode=_validate_extraction_mode(
362
435
  os.getenv(ENV_RETAIN_EXTRACTION_MODE, DEFAULT_RETAIN_EXTRACTION_MODE)
363
436
  ),
437
+ retain_custom_instructions=os.getenv(ENV_RETAIN_CUSTOM_INSTRUCTIONS) or DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS,
364
438
  retain_observations_async=os.getenv(
365
439
  ENV_RETAIN_OBSERVATIONS_ASYNC, str(DEFAULT_RETAIN_OBSERVATIONS_ASYNC)
366
440
  ).lower()
367
441
  == "true",
442
+ # Observations settings (consolidated knowledge from facts)
443
+ enable_observations=os.getenv(ENV_ENABLE_OBSERVATIONS, str(DEFAULT_ENABLE_OBSERVATIONS)).lower() == "true",
444
+ consolidation_batch_size=int(
445
+ os.getenv(ENV_CONSOLIDATION_BATCH_SIZE, str(DEFAULT_CONSOLIDATION_BATCH_SIZE))
446
+ ),
368
447
  # Database migrations
369
448
  run_migrations_on_startup=os.getenv(ENV_RUN_MIGRATIONS_ON_STARTUP, "true").lower() == "true",
370
449
  # Database connection pool
@@ -372,14 +451,15 @@ class HindsightConfig:
372
451
  db_pool_max_size=int(os.getenv(ENV_DB_POOL_MAX_SIZE, str(DEFAULT_DB_POOL_MAX_SIZE))),
373
452
  db_command_timeout=int(os.getenv(ENV_DB_COMMAND_TIMEOUT, str(DEFAULT_DB_COMMAND_TIMEOUT))),
374
453
  db_acquire_timeout=int(os.getenv(ENV_DB_ACQUIRE_TIMEOUT, str(DEFAULT_DB_ACQUIRE_TIMEOUT))),
375
- # Background task processing
376
- task_backend=os.getenv(ENV_TASK_BACKEND, DEFAULT_TASK_BACKEND),
377
- task_backend_memory_batch_size=int(
378
- os.getenv(ENV_TASK_BACKEND_MEMORY_BATCH_SIZE, str(DEFAULT_TASK_BACKEND_MEMORY_BATCH_SIZE))
379
- ),
380
- task_backend_memory_batch_interval=float(
381
- os.getenv(ENV_TASK_BACKEND_MEMORY_BATCH_INTERVAL, str(DEFAULT_TASK_BACKEND_MEMORY_BATCH_INTERVAL))
382
- ),
454
+ # Worker configuration
455
+ worker_enabled=os.getenv(ENV_WORKER_ENABLED, str(DEFAULT_WORKER_ENABLED)).lower() == "true",
456
+ worker_id=os.getenv(ENV_WORKER_ID) or DEFAULT_WORKER_ID,
457
+ worker_poll_interval_ms=int(os.getenv(ENV_WORKER_POLL_INTERVAL_MS, str(DEFAULT_WORKER_POLL_INTERVAL_MS))),
458
+ worker_max_retries=int(os.getenv(ENV_WORKER_MAX_RETRIES, str(DEFAULT_WORKER_MAX_RETRIES))),
459
+ worker_batch_size=int(os.getenv(ENV_WORKER_BATCH_SIZE, str(DEFAULT_WORKER_BATCH_SIZE))),
460
+ worker_http_port=int(os.getenv(ENV_WORKER_HTTP_PORT, str(DEFAULT_WORKER_HTTP_PORT))),
461
+ # Reflect agent settings
462
+ reflect_max_iterations=int(os.getenv(ENV_REFLECT_MAX_ITERATIONS, str(DEFAULT_REFLECT_MAX_ITERATIONS))),
383
463
  )
384
464
 
385
465
  def get_llm_base_url(self) -> str:
@@ -410,12 +490,28 @@ class HindsightConfig:
410
490
  return log_level_map.get(self.log_level.lower(), logging.INFO)
411
491
 
412
492
  def configure_logging(self) -> None:
413
- """Configure Python logging based on the log level."""
414
- logging.basicConfig(
415
- level=self.get_python_log_level(),
416
- format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
417
- force=True, # Override any existing configuration
418
- )
493
+ """Configure Python logging based on the log level and format.
494
+
495
+ When log_format is "json", outputs structured JSON logs with a severity
496
+ field that GCP Cloud Logging can parse for proper log level categorization.
497
+ """
498
+ root_logger = logging.getLogger()
499
+ root_logger.setLevel(self.get_python_log_level())
500
+
501
+ # Remove existing handlers
502
+ for handler in root_logger.handlers[:]:
503
+ root_logger.removeHandler(handler)
504
+
505
+ # Create handler writing to stdout (GCP treats stderr as ERROR)
506
+ handler = logging.StreamHandler(sys.stdout)
507
+ handler.setLevel(self.get_python_log_level())
508
+
509
+ if self.log_format == "json":
510
+ handler.setFormatter(JsonFormatter())
511
+ else:
512
+ handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s"))
513
+
514
+ root_logger.addHandler(handler)
419
515
 
420
516
  def log_config(self) -> None:
421
517
  """Log the current configuration (without sensitive values)."""
@@ -429,6 +525,10 @@ class HindsightConfig:
429
525
  reflect_provider = self.reflect_llm_provider or self.llm_provider
430
526
  reflect_model = self.reflect_llm_model or self.llm_model
431
527
  logger.info(f"LLM (reflect): provider={reflect_provider}, model={reflect_model}")
528
+ if self.consolidation_llm_provider or self.consolidation_llm_model:
529
+ consolidation_provider = self.consolidation_llm_provider or self.llm_provider
530
+ consolidation_model = self.consolidation_llm_model or self.llm_model
531
+ logger.info(f"LLM (consolidation): provider={consolidation_provider}, model={consolidation_model}")
432
532
  logger.info(f"Embeddings: provider={self.embeddings_provider}")
433
533
  logger.info(f"Reranker: provider={self.reranker_provider}")
434
534
  logger.info(f"Graph retriever: {self.graph_retriever}")
@@ -0,0 +1,5 @@
1
+ """Consolidation engine for automatic learning creation from memories."""
2
+
3
+ from .consolidator import run_consolidation_job
4
+
5
+ __all__ = ["run_consolidation_job"]