hindsight-api 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. hindsight_api/admin/__init__.py +1 -0
  2. hindsight_api/admin/cli.py +311 -0
  3. hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
  4. hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
  5. hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
  6. hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
  7. hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
  8. hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
  9. hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
  10. hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
  11. hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
  12. hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
  13. hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
  14. hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
  15. hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
  16. hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
  17. hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
  18. hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
  19. hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
  20. hindsight_api/api/http.py +1406 -118
  21. hindsight_api/api/mcp.py +11 -196
  22. hindsight_api/config.py +359 -27
  23. hindsight_api/engine/consolidation/__init__.py +5 -0
  24. hindsight_api/engine/consolidation/consolidator.py +859 -0
  25. hindsight_api/engine/consolidation/prompts.py +69 -0
  26. hindsight_api/engine/cross_encoder.py +706 -88
  27. hindsight_api/engine/db_budget.py +284 -0
  28. hindsight_api/engine/db_utils.py +11 -0
  29. hindsight_api/engine/directives/__init__.py +5 -0
  30. hindsight_api/engine/directives/models.py +37 -0
  31. hindsight_api/engine/embeddings.py +553 -29
  32. hindsight_api/engine/entity_resolver.py +8 -5
  33. hindsight_api/engine/interface.py +40 -17
  34. hindsight_api/engine/llm_wrapper.py +744 -68
  35. hindsight_api/engine/memory_engine.py +2505 -1017
  36. hindsight_api/engine/mental_models/__init__.py +14 -0
  37. hindsight_api/engine/mental_models/models.py +53 -0
  38. hindsight_api/engine/query_analyzer.py +4 -3
  39. hindsight_api/engine/reflect/__init__.py +18 -0
  40. hindsight_api/engine/reflect/agent.py +933 -0
  41. hindsight_api/engine/reflect/models.py +109 -0
  42. hindsight_api/engine/reflect/observations.py +186 -0
  43. hindsight_api/engine/reflect/prompts.py +483 -0
  44. hindsight_api/engine/reflect/tools.py +437 -0
  45. hindsight_api/engine/reflect/tools_schema.py +250 -0
  46. hindsight_api/engine/response_models.py +168 -4
  47. hindsight_api/engine/retain/bank_utils.py +79 -201
  48. hindsight_api/engine/retain/fact_extraction.py +424 -195
  49. hindsight_api/engine/retain/fact_storage.py +35 -12
  50. hindsight_api/engine/retain/link_utils.py +29 -24
  51. hindsight_api/engine/retain/orchestrator.py +24 -43
  52. hindsight_api/engine/retain/types.py +11 -2
  53. hindsight_api/engine/search/graph_retrieval.py +43 -14
  54. hindsight_api/engine/search/link_expansion_retrieval.py +391 -0
  55. hindsight_api/engine/search/mpfp_retrieval.py +362 -117
  56. hindsight_api/engine/search/reranking.py +2 -2
  57. hindsight_api/engine/search/retrieval.py +848 -201
  58. hindsight_api/engine/search/tags.py +172 -0
  59. hindsight_api/engine/search/think_utils.py +42 -141
  60. hindsight_api/engine/search/trace.py +12 -1
  61. hindsight_api/engine/search/tracer.py +26 -6
  62. hindsight_api/engine/search/types.py +21 -3
  63. hindsight_api/engine/task_backend.py +113 -106
  64. hindsight_api/engine/utils.py +1 -152
  65. hindsight_api/extensions/__init__.py +10 -1
  66. hindsight_api/extensions/builtin/tenant.py +5 -1
  67. hindsight_api/extensions/context.py +10 -1
  68. hindsight_api/extensions/operation_validator.py +81 -4
  69. hindsight_api/extensions/tenant.py +26 -0
  70. hindsight_api/main.py +69 -6
  71. hindsight_api/mcp_local.py +12 -53
  72. hindsight_api/mcp_tools.py +494 -0
  73. hindsight_api/metrics.py +433 -48
  74. hindsight_api/migrations.py +141 -1
  75. hindsight_api/models.py +3 -3
  76. hindsight_api/pg0.py +53 -0
  77. hindsight_api/server.py +39 -2
  78. hindsight_api/worker/__init__.py +11 -0
  79. hindsight_api/worker/main.py +296 -0
  80. hindsight_api/worker/poller.py +486 -0
  81. {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/METADATA +16 -6
  82. hindsight_api-0.4.0.dist-info/RECORD +112 -0
  83. {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/entry_points.txt +2 -0
  84. hindsight_api/engine/retain/observation_regeneration.py +0 -254
  85. hindsight_api/engine/search/observation_utils.py +0 -125
  86. hindsight_api/engine/search/scoring.py +0 -159
  87. hindsight_api-0.2.1.dist-info/RECORD +0 -75
  88. {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/WHEEL +0 -0
hindsight_api/config.py CHANGED
@@ -4,9 +4,17 @@ Centralized configuration for Hindsight API.
4
4
  All environment variables and their defaults are defined here.
5
5
  """
6
6
 
7
+ import json
7
8
  import logging
8
9
  import os
10
+ import sys
9
11
  from dataclasses import dataclass
12
+ from datetime import datetime, timezone
13
+
14
+ from dotenv import find_dotenv, load_dotenv
15
+
16
+ # Load .env file, searching current and parent directories (overrides existing env vars)
17
+ load_dotenv(find_dotenv(usecwd=True), override=True)
10
18
 
11
19
  logger = logging.getLogger(__name__)
12
20
 
@@ -18,31 +26,103 @@ ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
18
26
  ENV_LLM_BASE_URL = "HINDSIGHT_API_LLM_BASE_URL"
19
27
  ENV_LLM_MAX_CONCURRENT = "HINDSIGHT_API_LLM_MAX_CONCURRENT"
20
28
  ENV_LLM_TIMEOUT = "HINDSIGHT_API_LLM_TIMEOUT"
29
+ ENV_LLM_GROQ_SERVICE_TIER = "HINDSIGHT_API_LLM_GROQ_SERVICE_TIER"
30
+
31
+ # Per-operation LLM configuration (optional, falls back to global LLM config)
32
+ ENV_RETAIN_LLM_PROVIDER = "HINDSIGHT_API_RETAIN_LLM_PROVIDER"
33
+ ENV_RETAIN_LLM_API_KEY = "HINDSIGHT_API_RETAIN_LLM_API_KEY"
34
+ ENV_RETAIN_LLM_MODEL = "HINDSIGHT_API_RETAIN_LLM_MODEL"
35
+ ENV_RETAIN_LLM_BASE_URL = "HINDSIGHT_API_RETAIN_LLM_BASE_URL"
36
+
37
+ ENV_REFLECT_LLM_PROVIDER = "HINDSIGHT_API_REFLECT_LLM_PROVIDER"
38
+ ENV_REFLECT_LLM_API_KEY = "HINDSIGHT_API_REFLECT_LLM_API_KEY"
39
+ ENV_REFLECT_LLM_MODEL = "HINDSIGHT_API_REFLECT_LLM_MODEL"
40
+ ENV_REFLECT_LLM_BASE_URL = "HINDSIGHT_API_REFLECT_LLM_BASE_URL"
41
+
42
+ ENV_CONSOLIDATION_LLM_PROVIDER = "HINDSIGHT_API_CONSOLIDATION_LLM_PROVIDER"
43
+ ENV_CONSOLIDATION_LLM_API_KEY = "HINDSIGHT_API_CONSOLIDATION_LLM_API_KEY"
44
+ ENV_CONSOLIDATION_LLM_MODEL = "HINDSIGHT_API_CONSOLIDATION_LLM_MODEL"
45
+ ENV_CONSOLIDATION_LLM_BASE_URL = "HINDSIGHT_API_CONSOLIDATION_LLM_BASE_URL"
21
46
 
22
47
  ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
23
48
  ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
24
49
  ENV_EMBEDDINGS_TEI_URL = "HINDSIGHT_API_EMBEDDINGS_TEI_URL"
50
+ ENV_EMBEDDINGS_OPENAI_API_KEY = "HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY"
51
+ ENV_EMBEDDINGS_OPENAI_MODEL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL"
52
+ ENV_EMBEDDINGS_OPENAI_BASE_URL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_BASE_URL"
53
+
54
+ ENV_COHERE_API_KEY = "HINDSIGHT_API_COHERE_API_KEY"
55
+ ENV_EMBEDDINGS_COHERE_MODEL = "HINDSIGHT_API_EMBEDDINGS_COHERE_MODEL"
56
+ ENV_EMBEDDINGS_COHERE_BASE_URL = "HINDSIGHT_API_EMBEDDINGS_COHERE_BASE_URL"
57
+ ENV_RERANKER_COHERE_MODEL = "HINDSIGHT_API_RERANKER_COHERE_MODEL"
58
+ ENV_RERANKER_COHERE_BASE_URL = "HINDSIGHT_API_RERANKER_COHERE_BASE_URL"
59
+
60
+ # LiteLLM gateway configuration (for embeddings and reranker via LiteLLM proxy)
61
+ ENV_LITELLM_API_BASE = "HINDSIGHT_API_LITELLM_API_BASE"
62
+ ENV_LITELLM_API_KEY = "HINDSIGHT_API_LITELLM_API_KEY"
63
+ ENV_EMBEDDINGS_LITELLM_MODEL = "HINDSIGHT_API_EMBEDDINGS_LITELLM_MODEL"
64
+ ENV_RERANKER_LITELLM_MODEL = "HINDSIGHT_API_RERANKER_LITELLM_MODEL"
25
65
 
26
66
  ENV_RERANKER_PROVIDER = "HINDSIGHT_API_RERANKER_PROVIDER"
27
67
  ENV_RERANKER_LOCAL_MODEL = "HINDSIGHT_API_RERANKER_LOCAL_MODEL"
68
+ ENV_RERANKER_LOCAL_MAX_CONCURRENT = "HINDSIGHT_API_RERANKER_LOCAL_MAX_CONCURRENT"
28
69
  ENV_RERANKER_TEI_URL = "HINDSIGHT_API_RERANKER_TEI_URL"
70
+ ENV_RERANKER_TEI_BATCH_SIZE = "HINDSIGHT_API_RERANKER_TEI_BATCH_SIZE"
71
+ ENV_RERANKER_TEI_MAX_CONCURRENT = "HINDSIGHT_API_RERANKER_TEI_MAX_CONCURRENT"
72
+ ENV_RERANKER_MAX_CANDIDATES = "HINDSIGHT_API_RERANKER_MAX_CANDIDATES"
73
+ ENV_RERANKER_FLASHRANK_MODEL = "HINDSIGHT_API_RERANKER_FLASHRANK_MODEL"
74
+ ENV_RERANKER_FLASHRANK_CACHE_DIR = "HINDSIGHT_API_RERANKER_FLASHRANK_CACHE_DIR"
29
75
 
30
76
  ENV_HOST = "HINDSIGHT_API_HOST"
31
77
  ENV_PORT = "HINDSIGHT_API_PORT"
32
78
  ENV_LOG_LEVEL = "HINDSIGHT_API_LOG_LEVEL"
79
+ ENV_LOG_FORMAT = "HINDSIGHT_API_LOG_FORMAT"
80
+ ENV_WORKERS = "HINDSIGHT_API_WORKERS"
33
81
  ENV_MCP_ENABLED = "HINDSIGHT_API_MCP_ENABLED"
34
82
  ENV_GRAPH_RETRIEVER = "HINDSIGHT_API_GRAPH_RETRIEVER"
83
+ ENV_MPFP_TOP_K_NEIGHBORS = "HINDSIGHT_API_MPFP_TOP_K_NEIGHBORS"
84
+ ENV_RECALL_MAX_CONCURRENT = "HINDSIGHT_API_RECALL_MAX_CONCURRENT"
85
+ ENV_RECALL_CONNECTION_BUDGET = "HINDSIGHT_API_RECALL_CONNECTION_BUDGET"
35
86
  ENV_MCP_LOCAL_BANK_ID = "HINDSIGHT_API_MCP_LOCAL_BANK_ID"
36
87
  ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
88
+ ENV_MENTAL_MODEL_REFRESH_CONCURRENCY = "HINDSIGHT_API_MENTAL_MODEL_REFRESH_CONCURRENCY"
89
+
90
+ # Retain settings
91
+ ENV_RETAIN_MAX_COMPLETION_TOKENS = "HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS"
92
+ ENV_RETAIN_CHUNK_SIZE = "HINDSIGHT_API_RETAIN_CHUNK_SIZE"
93
+ ENV_RETAIN_EXTRACT_CAUSAL_LINKS = "HINDSIGHT_API_RETAIN_EXTRACT_CAUSAL_LINKS"
94
+ ENV_RETAIN_EXTRACTION_MODE = "HINDSIGHT_API_RETAIN_EXTRACTION_MODE"
95
+ ENV_RETAIN_CUSTOM_INSTRUCTIONS = "HINDSIGHT_API_RETAIN_CUSTOM_INSTRUCTIONS"
96
+ ENV_RETAIN_OBSERVATIONS_ASYNC = "HINDSIGHT_API_RETAIN_OBSERVATIONS_ASYNC"
37
97
 
38
- # Observation thresholds
39
- ENV_OBSERVATION_MIN_FACTS = "HINDSIGHT_API_OBSERVATION_MIN_FACTS"
40
- ENV_OBSERVATION_TOP_ENTITIES = "HINDSIGHT_API_OBSERVATION_TOP_ENTITIES"
98
+ # Observations settings (consolidated knowledge from facts)
99
+ ENV_ENABLE_OBSERVATIONS = "HINDSIGHT_API_ENABLE_OBSERVATIONS"
100
+ ENV_CONSOLIDATION_BATCH_SIZE = "HINDSIGHT_API_CONSOLIDATION_BATCH_SIZE"
41
101
 
42
102
  # Optimization flags
43
103
  ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
44
104
  ENV_LAZY_RERANKER = "HINDSIGHT_API_LAZY_RERANKER"
45
105
 
106
+ # Database migrations
107
+ ENV_RUN_MIGRATIONS_ON_STARTUP = "HINDSIGHT_API_RUN_MIGRATIONS_ON_STARTUP"
108
+
109
+ # Database connection pool
110
+ ENV_DB_POOL_MIN_SIZE = "HINDSIGHT_API_DB_POOL_MIN_SIZE"
111
+ ENV_DB_POOL_MAX_SIZE = "HINDSIGHT_API_DB_POOL_MAX_SIZE"
112
+ ENV_DB_COMMAND_TIMEOUT = "HINDSIGHT_API_DB_COMMAND_TIMEOUT"
113
+ ENV_DB_ACQUIRE_TIMEOUT = "HINDSIGHT_API_DB_ACQUIRE_TIMEOUT"
114
+
115
+ # Worker configuration (distributed task processing)
116
+ ENV_WORKER_ENABLED = "HINDSIGHT_API_WORKER_ENABLED"
117
+ ENV_WORKER_ID = "HINDSIGHT_API_WORKER_ID"
118
+ ENV_WORKER_POLL_INTERVAL_MS = "HINDSIGHT_API_WORKER_POLL_INTERVAL_MS"
119
+ ENV_WORKER_MAX_RETRIES = "HINDSIGHT_API_WORKER_MAX_RETRIES"
120
+ ENV_WORKER_BATCH_SIZE = "HINDSIGHT_API_WORKER_BATCH_SIZE"
121
+ ENV_WORKER_HTTP_PORT = "HINDSIGHT_API_WORKER_HTTP_PORT"
122
+
123
+ # Reflect agent settings
124
+ ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
125
+
46
126
  # Default values
47
127
  DEFAULT_DATABASE_URL = "pg0"
48
128
  DEFAULT_LLM_PROVIDER = "openai"
@@ -52,20 +132,71 @@ DEFAULT_LLM_TIMEOUT = 120.0 # seconds
52
132
 
53
133
  DEFAULT_EMBEDDINGS_PROVIDER = "local"
54
134
  DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
135
+ DEFAULT_EMBEDDINGS_OPENAI_MODEL = "text-embedding-3-small"
136
+ DEFAULT_EMBEDDING_DIMENSION = 384
55
137
 
56
138
  DEFAULT_RERANKER_PROVIDER = "local"
57
139
  DEFAULT_RERANKER_LOCAL_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
140
+ DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT = 4 # Limit concurrent CPU-bound reranking to prevent thrashing
141
+ DEFAULT_RERANKER_TEI_BATCH_SIZE = 128
142
+ DEFAULT_RERANKER_TEI_MAX_CONCURRENT = 8
143
+ DEFAULT_RERANKER_MAX_CANDIDATES = 300
144
+ DEFAULT_RERANKER_FLASHRANK_MODEL = "ms-marco-MiniLM-L-12-v2" # Best balance of speed and quality
145
+ DEFAULT_RERANKER_FLASHRANK_CACHE_DIR = None # Use default cache directory
146
+
147
+ DEFAULT_EMBEDDINGS_COHERE_MODEL = "embed-english-v3.0"
148
+ DEFAULT_RERANKER_COHERE_MODEL = "rerank-english-v3.0"
149
+
150
+ # LiteLLM defaults
151
+ DEFAULT_LITELLM_API_BASE = "http://localhost:4000"
152
+ DEFAULT_EMBEDDINGS_LITELLM_MODEL = "text-embedding-3-small"
153
+ DEFAULT_RERANKER_LITELLM_MODEL = "cohere/rerank-english-v3.0"
58
154
 
59
155
  DEFAULT_HOST = "0.0.0.0"
60
156
  DEFAULT_PORT = 8888
61
157
  DEFAULT_LOG_LEVEL = "info"
158
+ DEFAULT_LOG_FORMAT = "text" # Options: "text", "json"
159
+ DEFAULT_WORKERS = 1
62
160
  DEFAULT_MCP_ENABLED = True
63
- DEFAULT_GRAPH_RETRIEVER = "bfs" # Options: "bfs", "mpfp"
161
+ DEFAULT_GRAPH_RETRIEVER = "link_expansion" # Options: "link_expansion", "mpfp", "bfs"
162
+ DEFAULT_MPFP_TOP_K_NEIGHBORS = 20 # Fan-out limit per node in MPFP graph traversal
163
+ DEFAULT_RECALL_MAX_CONCURRENT = 32 # Max concurrent recall operations per worker
164
+ DEFAULT_RECALL_CONNECTION_BUDGET = 4 # Max concurrent DB connections per recall operation
64
165
  DEFAULT_MCP_LOCAL_BANK_ID = "mcp"
65
-
66
- # Observation thresholds
67
- DEFAULT_OBSERVATION_MIN_FACTS = 5 # Min facts required to generate entity observations
68
- DEFAULT_OBSERVATION_TOP_ENTITIES = 5 # Max entities to process per retain batch
166
+ DEFAULT_MENTAL_MODEL_REFRESH_CONCURRENCY = 8 # Max concurrent mental model refreshes
167
+
168
+ # Retain settings
169
+ DEFAULT_RETAIN_MAX_COMPLETION_TOKENS = 64000 # Max tokens for fact extraction LLM call
170
+ DEFAULT_RETAIN_CHUNK_SIZE = 3000 # Max chars per chunk for fact extraction
171
+ DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS = True # Extract causal links between facts
172
+ DEFAULT_RETAIN_EXTRACTION_MODE = "concise" # Extraction mode: "concise", "verbose", or "custom"
173
+ RETAIN_EXTRACTION_MODES = ("concise", "verbose", "custom") # Allowed extraction modes
174
+ DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS = None # Custom extraction guidelines (only used when mode="custom")
175
+ DEFAULT_RETAIN_OBSERVATIONS_ASYNC = False # Run observation generation async (after retain completes)
176
+
177
+ # Observations defaults (consolidated knowledge from facts)
178
+ DEFAULT_ENABLE_OBSERVATIONS = True # Observations enabled by default
179
+ DEFAULT_CONSOLIDATION_BATCH_SIZE = 50 # Memories to load per batch (internal memory optimization)
180
+
181
+ # Database migrations
182
+ DEFAULT_RUN_MIGRATIONS_ON_STARTUP = True
183
+
184
+ # Database connection pool
185
+ DEFAULT_DB_POOL_MIN_SIZE = 5
186
+ DEFAULT_DB_POOL_MAX_SIZE = 100
187
+ DEFAULT_DB_COMMAND_TIMEOUT = 60 # seconds
188
+ DEFAULT_DB_ACQUIRE_TIMEOUT = 30 # seconds
189
+
190
+ # Worker configuration (distributed task processing)
191
+ DEFAULT_WORKER_ENABLED = True # API runs worker by default (standalone mode)
192
+ DEFAULT_WORKER_ID = None # Will use hostname if not specified
193
+ DEFAULT_WORKER_POLL_INTERVAL_MS = 500 # Poll database every 500ms
194
+ DEFAULT_WORKER_MAX_RETRIES = 3 # Max retries before marking task failed
195
+ DEFAULT_WORKER_BATCH_SIZE = 10 # Tasks to claim per poll cycle
196
+ DEFAULT_WORKER_HTTP_PORT = 8889 # HTTP port for worker metrics/health
197
+
198
+ # Reflect agent settings
199
+ DEFAULT_REFLECT_MAX_ITERATIONS = 10 # Max tool call iterations before forcing response
69
200
 
70
201
  # Default MCP tool descriptions (can be customized via env vars)
71
202
  DEFAULT_MCP_RETAIN_DESCRIPTION = """Store important information to long-term memory.
@@ -87,8 +218,50 @@ Use this tool PROACTIVELY to:
87
218
  - Remember user's goals and context
88
219
  - Personalize responses based on past interactions"""
89
220
 
90
- # Required embedding dimension for database schema
91
- EMBEDDING_DIMENSION = 384
221
+ # Default embedding dimension (used by initial migration, adjusted at runtime)
222
+ EMBEDDING_DIMENSION = DEFAULT_EMBEDDING_DIMENSION
223
+
224
+
225
+ class JsonFormatter(logging.Formatter):
226
+ """JSON formatter for structured logging.
227
+
228
+ Outputs logs in JSON format with a 'severity' field that cloud logging
229
+ systems (GCP, AWS CloudWatch, etc.) can parse to correctly categorize log levels.
230
+ """
231
+
232
+ SEVERITY_MAP = {
233
+ logging.DEBUG: "DEBUG",
234
+ logging.INFO: "INFO",
235
+ logging.WARNING: "WARNING",
236
+ logging.ERROR: "ERROR",
237
+ logging.CRITICAL: "CRITICAL",
238
+ }
239
+
240
+ def format(self, record: logging.LogRecord) -> str:
241
+ log_entry = {
242
+ "severity": self.SEVERITY_MAP.get(record.levelno, "DEFAULT"),
243
+ "message": record.getMessage(),
244
+ "timestamp": datetime.now(timezone.utc).isoformat(),
245
+ "logger": record.name,
246
+ }
247
+
248
+ # Add exception info if present
249
+ if record.exc_info:
250
+ log_entry["exception"] = self.formatException(record.exc_info)
251
+
252
+ return json.dumps(log_entry)
253
+
254
+
255
+ def _validate_extraction_mode(mode: str) -> str:
256
+ """Validate and normalize extraction mode."""
257
+ mode_lower = mode.lower()
258
+ if mode_lower not in RETAIN_EXTRACTION_MODES:
259
+ logger.warning(
260
+ f"Invalid extraction mode '{mode}', must be one of {RETAIN_EXTRACTION_MODES}. "
261
+ f"Defaulting to '{DEFAULT_RETAIN_EXTRACTION_MODE}'."
262
+ )
263
+ return DEFAULT_RETAIN_EXTRACTION_MODE
264
+ return mode_lower
92
265
 
93
266
 
94
267
  @dataclass
@@ -98,7 +271,7 @@ class HindsightConfig:
98
271
  # Database
99
272
  database_url: str
100
273
 
101
- # LLM
274
+ # LLM (default, used as fallback for per-operation config)
102
275
  llm_provider: str
103
276
  llm_api_key: str | None
104
277
  llm_model: str
@@ -106,33 +279,88 @@ class HindsightConfig:
106
279
  llm_max_concurrent: int
107
280
  llm_timeout: float
108
281
 
282
+ # Per-operation LLM configuration (None = use default LLM config)
283
+ retain_llm_provider: str | None
284
+ retain_llm_api_key: str | None
285
+ retain_llm_model: str | None
286
+ retain_llm_base_url: str | None
287
+
288
+ reflect_llm_provider: str | None
289
+ reflect_llm_api_key: str | None
290
+ reflect_llm_model: str | None
291
+ reflect_llm_base_url: str | None
292
+
293
+ consolidation_llm_provider: str | None
294
+ consolidation_llm_api_key: str | None
295
+ consolidation_llm_model: str | None
296
+ consolidation_llm_base_url: str | None
297
+
109
298
  # Embeddings
110
299
  embeddings_provider: str
111
300
  embeddings_local_model: str
112
301
  embeddings_tei_url: str | None
302
+ embeddings_openai_base_url: str | None
303
+ embeddings_cohere_base_url: str | None
113
304
 
114
305
  # Reranker
115
306
  reranker_provider: str
116
307
  reranker_local_model: str
117
308
  reranker_tei_url: str | None
309
+ reranker_tei_batch_size: int
310
+ reranker_tei_max_concurrent: int
311
+ reranker_max_candidates: int
312
+ reranker_cohere_base_url: str | None
118
313
 
119
314
  # Server
120
315
  host: str
121
316
  port: int
122
317
  log_level: str
318
+ log_format: str
123
319
  mcp_enabled: bool
124
320
 
125
321
  # Recall
126
322
  graph_retriever: str
127
-
128
- # Observation thresholds
129
- observation_min_facts: int
130
- observation_top_entities: int
323
+ mpfp_top_k_neighbors: int
324
+ recall_max_concurrent: int
325
+ recall_connection_budget: int
326
+ mental_model_refresh_concurrency: int
327
+
328
+ # Retain settings
329
+ retain_max_completion_tokens: int
330
+ retain_chunk_size: int
331
+ retain_extract_causal_links: bool
332
+ retain_extraction_mode: str
333
+ retain_custom_instructions: str | None
334
+ retain_observations_async: bool
335
+
336
+ # Observations settings (consolidated knowledge from facts)
337
+ enable_observations: bool
338
+ consolidation_batch_size: int
131
339
 
132
340
  # Optimization flags
133
341
  skip_llm_verification: bool
134
342
  lazy_reranker: bool
135
343
 
344
+ # Database migrations
345
+ run_migrations_on_startup: bool
346
+
347
+ # Database connection pool
348
+ db_pool_min_size: int
349
+ db_pool_max_size: int
350
+ db_command_timeout: int
351
+ db_acquire_timeout: int
352
+
353
+ # Worker configuration (distributed task processing)
354
+ worker_enabled: bool
355
+ worker_id: str | None
356
+ worker_poll_interval_ms: int
357
+ worker_max_retries: int
358
+ worker_batch_size: int
359
+ worker_http_port: int
360
+
361
+ # Reflect agent settings
362
+ reflect_max_iterations: int
363
+
136
364
  @classmethod
137
365
  def from_env(cls) -> "HindsightConfig":
138
366
  """Create configuration from environment variables."""
@@ -146,29 +374,92 @@ class HindsightConfig:
146
374
  llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
147
375
  llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
148
376
  llm_timeout=float(os.getenv(ENV_LLM_TIMEOUT, str(DEFAULT_LLM_TIMEOUT))),
377
+ # Per-operation LLM config (None = use default)
378
+ retain_llm_provider=os.getenv(ENV_RETAIN_LLM_PROVIDER) or None,
379
+ retain_llm_api_key=os.getenv(ENV_RETAIN_LLM_API_KEY) or None,
380
+ retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL) or None,
381
+ retain_llm_base_url=os.getenv(ENV_RETAIN_LLM_BASE_URL) or None,
382
+ reflect_llm_provider=os.getenv(ENV_REFLECT_LLM_PROVIDER) or None,
383
+ reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
384
+ reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL) or None,
385
+ reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
386
+ consolidation_llm_provider=os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER) or None,
387
+ consolidation_llm_api_key=os.getenv(ENV_CONSOLIDATION_LLM_API_KEY) or None,
388
+ consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL) or None,
389
+ consolidation_llm_base_url=os.getenv(ENV_CONSOLIDATION_LLM_BASE_URL) or None,
149
390
  # Embeddings
150
391
  embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
151
392
  embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
152
393
  embeddings_tei_url=os.getenv(ENV_EMBEDDINGS_TEI_URL),
394
+ embeddings_openai_base_url=os.getenv(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None,
395
+ embeddings_cohere_base_url=os.getenv(ENV_EMBEDDINGS_COHERE_BASE_URL) or None,
153
396
  # Reranker
154
397
  reranker_provider=os.getenv(ENV_RERANKER_PROVIDER, DEFAULT_RERANKER_PROVIDER),
155
398
  reranker_local_model=os.getenv(ENV_RERANKER_LOCAL_MODEL, DEFAULT_RERANKER_LOCAL_MODEL),
156
399
  reranker_tei_url=os.getenv(ENV_RERANKER_TEI_URL),
400
+ reranker_tei_batch_size=int(os.getenv(ENV_RERANKER_TEI_BATCH_SIZE, str(DEFAULT_RERANKER_TEI_BATCH_SIZE))),
401
+ reranker_tei_max_concurrent=int(
402
+ os.getenv(ENV_RERANKER_TEI_MAX_CONCURRENT, str(DEFAULT_RERANKER_TEI_MAX_CONCURRENT))
403
+ ),
404
+ reranker_max_candidates=int(os.getenv(ENV_RERANKER_MAX_CANDIDATES, str(DEFAULT_RERANKER_MAX_CANDIDATES))),
405
+ reranker_cohere_base_url=os.getenv(ENV_RERANKER_COHERE_BASE_URL) or None,
157
406
  # Server
158
407
  host=os.getenv(ENV_HOST, DEFAULT_HOST),
159
408
  port=int(os.getenv(ENV_PORT, DEFAULT_PORT)),
160
409
  log_level=os.getenv(ENV_LOG_LEVEL, DEFAULT_LOG_LEVEL),
410
+ log_format=os.getenv(ENV_LOG_FORMAT, DEFAULT_LOG_FORMAT).lower(),
161
411
  mcp_enabled=os.getenv(ENV_MCP_ENABLED, str(DEFAULT_MCP_ENABLED)).lower() == "true",
162
412
  # Recall
163
413
  graph_retriever=os.getenv(ENV_GRAPH_RETRIEVER, DEFAULT_GRAPH_RETRIEVER),
414
+ mpfp_top_k_neighbors=int(os.getenv(ENV_MPFP_TOP_K_NEIGHBORS, str(DEFAULT_MPFP_TOP_K_NEIGHBORS))),
415
+ recall_max_concurrent=int(os.getenv(ENV_RECALL_MAX_CONCURRENT, str(DEFAULT_RECALL_MAX_CONCURRENT))),
416
+ recall_connection_budget=int(
417
+ os.getenv(ENV_RECALL_CONNECTION_BUDGET, str(DEFAULT_RECALL_CONNECTION_BUDGET))
418
+ ),
419
+ mental_model_refresh_concurrency=int(
420
+ os.getenv(ENV_MENTAL_MODEL_REFRESH_CONCURRENCY, str(DEFAULT_MENTAL_MODEL_REFRESH_CONCURRENCY))
421
+ ),
164
422
  # Optimization flags
165
423
  skip_llm_verification=os.getenv(ENV_SKIP_LLM_VERIFICATION, "false").lower() == "true",
166
424
  lazy_reranker=os.getenv(ENV_LAZY_RERANKER, "false").lower() == "true",
167
- # Observation thresholds
168
- observation_min_facts=int(os.getenv(ENV_OBSERVATION_MIN_FACTS, str(DEFAULT_OBSERVATION_MIN_FACTS))),
169
- observation_top_entities=int(
170
- os.getenv(ENV_OBSERVATION_TOP_ENTITIES, str(DEFAULT_OBSERVATION_TOP_ENTITIES))
425
+ # Retain settings
426
+ retain_max_completion_tokens=int(
427
+ os.getenv(ENV_RETAIN_MAX_COMPLETION_TOKENS, str(DEFAULT_RETAIN_MAX_COMPLETION_TOKENS))
428
+ ),
429
+ retain_chunk_size=int(os.getenv(ENV_RETAIN_CHUNK_SIZE, str(DEFAULT_RETAIN_CHUNK_SIZE))),
430
+ retain_extract_causal_links=os.getenv(
431
+ ENV_RETAIN_EXTRACT_CAUSAL_LINKS, str(DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS)
432
+ ).lower()
433
+ == "true",
434
+ retain_extraction_mode=_validate_extraction_mode(
435
+ os.getenv(ENV_RETAIN_EXTRACTION_MODE, DEFAULT_RETAIN_EXTRACTION_MODE)
171
436
  ),
437
+ retain_custom_instructions=os.getenv(ENV_RETAIN_CUSTOM_INSTRUCTIONS) or DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS,
438
+ retain_observations_async=os.getenv(
439
+ ENV_RETAIN_OBSERVATIONS_ASYNC, str(DEFAULT_RETAIN_OBSERVATIONS_ASYNC)
440
+ ).lower()
441
+ == "true",
442
+ # Observations settings (consolidated knowledge from facts)
443
+ enable_observations=os.getenv(ENV_ENABLE_OBSERVATIONS, str(DEFAULT_ENABLE_OBSERVATIONS)).lower() == "true",
444
+ consolidation_batch_size=int(
445
+ os.getenv(ENV_CONSOLIDATION_BATCH_SIZE, str(DEFAULT_CONSOLIDATION_BATCH_SIZE))
446
+ ),
447
+ # Database migrations
448
+ run_migrations_on_startup=os.getenv(ENV_RUN_MIGRATIONS_ON_STARTUP, "true").lower() == "true",
449
+ # Database connection pool
450
+ db_pool_min_size=int(os.getenv(ENV_DB_POOL_MIN_SIZE, str(DEFAULT_DB_POOL_MIN_SIZE))),
451
+ db_pool_max_size=int(os.getenv(ENV_DB_POOL_MAX_SIZE, str(DEFAULT_DB_POOL_MAX_SIZE))),
452
+ db_command_timeout=int(os.getenv(ENV_DB_COMMAND_TIMEOUT, str(DEFAULT_DB_COMMAND_TIMEOUT))),
453
+ db_acquire_timeout=int(os.getenv(ENV_DB_ACQUIRE_TIMEOUT, str(DEFAULT_DB_ACQUIRE_TIMEOUT))),
454
+ # Worker configuration
455
+ worker_enabled=os.getenv(ENV_WORKER_ENABLED, str(DEFAULT_WORKER_ENABLED)).lower() == "true",
456
+ worker_id=os.getenv(ENV_WORKER_ID) or DEFAULT_WORKER_ID,
457
+ worker_poll_interval_ms=int(os.getenv(ENV_WORKER_POLL_INTERVAL_MS, str(DEFAULT_WORKER_POLL_INTERVAL_MS))),
458
+ worker_max_retries=int(os.getenv(ENV_WORKER_MAX_RETRIES, str(DEFAULT_WORKER_MAX_RETRIES))),
459
+ worker_batch_size=int(os.getenv(ENV_WORKER_BATCH_SIZE, str(DEFAULT_WORKER_BATCH_SIZE))),
460
+ worker_http_port=int(os.getenv(ENV_WORKER_HTTP_PORT, str(DEFAULT_WORKER_HTTP_PORT))),
461
+ # Reflect agent settings
462
+ reflect_max_iterations=int(os.getenv(ENV_REFLECT_MAX_ITERATIONS, str(DEFAULT_REFLECT_MAX_ITERATIONS))),
172
463
  )
173
464
 
174
465
  def get_llm_base_url(self) -> str:
@@ -199,22 +490,63 @@ class HindsightConfig:
199
490
  return log_level_map.get(self.log_level.lower(), logging.INFO)
200
491
 
201
492
  def configure_logging(self) -> None:
202
- """Configure Python logging based on the log level."""
203
- logging.basicConfig(
204
- level=self.get_python_log_level(),
205
- format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
206
- force=True, # Override any existing configuration
207
- )
493
+ """Configure Python logging based on the log level and format.
494
+
495
+ When log_format is "json", outputs structured JSON logs with a severity
496
+ field that GCP Cloud Logging can parse for proper log level categorization.
497
+ """
498
+ root_logger = logging.getLogger()
499
+ root_logger.setLevel(self.get_python_log_level())
500
+
501
+ # Remove existing handlers
502
+ for handler in root_logger.handlers[:]:
503
+ root_logger.removeHandler(handler)
504
+
505
+ # Create handler writing to stdout (GCP treats stderr as ERROR)
506
+ handler = logging.StreamHandler(sys.stdout)
507
+ handler.setLevel(self.get_python_log_level())
508
+
509
+ if self.log_format == "json":
510
+ handler.setFormatter(JsonFormatter())
511
+ else:
512
+ handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s"))
513
+
514
+ root_logger.addHandler(handler)
208
515
 
209
516
  def log_config(self) -> None:
210
517
  """Log the current configuration (without sensitive values)."""
211
518
  logger.info(f"Database: {self.database_url}")
212
519
  logger.info(f"LLM: provider={self.llm_provider}, model={self.llm_model}")
520
+ if self.retain_llm_provider or self.retain_llm_model:
521
+ retain_provider = self.retain_llm_provider or self.llm_provider
522
+ retain_model = self.retain_llm_model or self.llm_model
523
+ logger.info(f"LLM (retain): provider={retain_provider}, model={retain_model}")
524
+ if self.reflect_llm_provider or self.reflect_llm_model:
525
+ reflect_provider = self.reflect_llm_provider or self.llm_provider
526
+ reflect_model = self.reflect_llm_model or self.llm_model
527
+ logger.info(f"LLM (reflect): provider={reflect_provider}, model={reflect_model}")
528
+ if self.consolidation_llm_provider or self.consolidation_llm_model:
529
+ consolidation_provider = self.consolidation_llm_provider or self.llm_provider
530
+ consolidation_model = self.consolidation_llm_model or self.llm_model
531
+ logger.info(f"LLM (consolidation): provider={consolidation_provider}, model={consolidation_model}")
213
532
  logger.info(f"Embeddings: provider={self.embeddings_provider}")
214
533
  logger.info(f"Reranker: provider={self.reranker_provider}")
215
534
  logger.info(f"Graph retriever: {self.graph_retriever}")
216
535
 
217
536
 
537
+ # Cached config instance
538
+ _config_cache: HindsightConfig | None = None
539
+
540
+
218
541
  def get_config() -> HindsightConfig:
219
- """Get the current configuration from environment variables."""
220
- return HindsightConfig.from_env()
542
+ """Get the cached configuration, loading from environment on first call."""
543
+ global _config_cache
544
+ if _config_cache is None:
545
+ _config_cache = HindsightConfig.from_env()
546
+ return _config_cache
547
+
548
+
549
+ def clear_config_cache() -> None:
550
+ """Clear the config cache. Useful for testing or reloading config."""
551
+ global _config_cache
552
+ _config_cache = None
@@ -0,0 +1,5 @@
1
+ """Consolidation engine for automatic learning creation from memories."""
2
+
3
+ from .consolidator import run_consolidation_job
4
+
5
+ __all__ = ["run_consolidation_job"]