hindsight-api 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/admin/__init__.py +1 -0
- hindsight_api/admin/cli.py +311 -0
- hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
- hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
- hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
- hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
- hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
- hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
- hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
- hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
- hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
- hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
- hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
- hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
- hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
- hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
- hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
- hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
- hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
- hindsight_api/api/http.py +1406 -118
- hindsight_api/api/mcp.py +11 -196
- hindsight_api/config.py +359 -27
- hindsight_api/engine/consolidation/__init__.py +5 -0
- hindsight_api/engine/consolidation/consolidator.py +859 -0
- hindsight_api/engine/consolidation/prompts.py +69 -0
- hindsight_api/engine/cross_encoder.py +706 -88
- hindsight_api/engine/db_budget.py +284 -0
- hindsight_api/engine/db_utils.py +11 -0
- hindsight_api/engine/directives/__init__.py +5 -0
- hindsight_api/engine/directives/models.py +37 -0
- hindsight_api/engine/embeddings.py +553 -29
- hindsight_api/engine/entity_resolver.py +8 -5
- hindsight_api/engine/interface.py +40 -17
- hindsight_api/engine/llm_wrapper.py +744 -68
- hindsight_api/engine/memory_engine.py +2505 -1017
- hindsight_api/engine/mental_models/__init__.py +14 -0
- hindsight_api/engine/mental_models/models.py +53 -0
- hindsight_api/engine/query_analyzer.py +4 -3
- hindsight_api/engine/reflect/__init__.py +18 -0
- hindsight_api/engine/reflect/agent.py +933 -0
- hindsight_api/engine/reflect/models.py +109 -0
- hindsight_api/engine/reflect/observations.py +186 -0
- hindsight_api/engine/reflect/prompts.py +483 -0
- hindsight_api/engine/reflect/tools.py +437 -0
- hindsight_api/engine/reflect/tools_schema.py +250 -0
- hindsight_api/engine/response_models.py +168 -4
- hindsight_api/engine/retain/bank_utils.py +79 -201
- hindsight_api/engine/retain/fact_extraction.py +424 -195
- hindsight_api/engine/retain/fact_storage.py +35 -12
- hindsight_api/engine/retain/link_utils.py +29 -24
- hindsight_api/engine/retain/orchestrator.py +24 -43
- hindsight_api/engine/retain/types.py +11 -2
- hindsight_api/engine/search/graph_retrieval.py +43 -14
- hindsight_api/engine/search/link_expansion_retrieval.py +391 -0
- hindsight_api/engine/search/mpfp_retrieval.py +362 -117
- hindsight_api/engine/search/reranking.py +2 -2
- hindsight_api/engine/search/retrieval.py +848 -201
- hindsight_api/engine/search/tags.py +172 -0
- hindsight_api/engine/search/think_utils.py +42 -141
- hindsight_api/engine/search/trace.py +12 -1
- hindsight_api/engine/search/tracer.py +26 -6
- hindsight_api/engine/search/types.py +21 -3
- hindsight_api/engine/task_backend.py +113 -106
- hindsight_api/engine/utils.py +1 -152
- hindsight_api/extensions/__init__.py +10 -1
- hindsight_api/extensions/builtin/tenant.py +5 -1
- hindsight_api/extensions/context.py +10 -1
- hindsight_api/extensions/operation_validator.py +81 -4
- hindsight_api/extensions/tenant.py +26 -0
- hindsight_api/main.py +69 -6
- hindsight_api/mcp_local.py +12 -53
- hindsight_api/mcp_tools.py +494 -0
- hindsight_api/metrics.py +433 -48
- hindsight_api/migrations.py +141 -1
- hindsight_api/models.py +3 -3
- hindsight_api/pg0.py +53 -0
- hindsight_api/server.py +39 -2
- hindsight_api/worker/__init__.py +11 -0
- hindsight_api/worker/main.py +296 -0
- hindsight_api/worker/poller.py +486 -0
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/METADATA +16 -6
- hindsight_api-0.4.0.dist-info/RECORD +112 -0
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/entry_points.txt +2 -0
- hindsight_api/engine/retain/observation_regeneration.py +0 -254
- hindsight_api/engine/search/observation_utils.py +0 -125
- hindsight_api/engine/search/scoring.py +0 -159
- hindsight_api-0.2.1.dist-info/RECORD +0 -75
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/WHEEL +0 -0
hindsight_api/config.py
CHANGED
|
@@ -4,9 +4,17 @@ Centralized configuration for Hindsight API.
|
|
|
4
4
|
All environment variables and their defaults are defined here.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import json
|
|
7
8
|
import logging
|
|
8
9
|
import os
|
|
10
|
+
import sys
|
|
9
11
|
from dataclasses import dataclass
|
|
12
|
+
from datetime import datetime, timezone
|
|
13
|
+
|
|
14
|
+
from dotenv import find_dotenv, load_dotenv
|
|
15
|
+
|
|
16
|
+
# Load .env file, searching current and parent directories (overrides existing env vars)
|
|
17
|
+
load_dotenv(find_dotenv(usecwd=True), override=True)
|
|
10
18
|
|
|
11
19
|
logger = logging.getLogger(__name__)
|
|
12
20
|
|
|
@@ -18,31 +26,103 @@ ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
|
|
|
18
26
|
ENV_LLM_BASE_URL = "HINDSIGHT_API_LLM_BASE_URL"
|
|
19
27
|
ENV_LLM_MAX_CONCURRENT = "HINDSIGHT_API_LLM_MAX_CONCURRENT"
|
|
20
28
|
ENV_LLM_TIMEOUT = "HINDSIGHT_API_LLM_TIMEOUT"
|
|
29
|
+
ENV_LLM_GROQ_SERVICE_TIER = "HINDSIGHT_API_LLM_GROQ_SERVICE_TIER"
|
|
30
|
+
|
|
31
|
+
# Per-operation LLM configuration (optional, falls back to global LLM config)
|
|
32
|
+
ENV_RETAIN_LLM_PROVIDER = "HINDSIGHT_API_RETAIN_LLM_PROVIDER"
|
|
33
|
+
ENV_RETAIN_LLM_API_KEY = "HINDSIGHT_API_RETAIN_LLM_API_KEY"
|
|
34
|
+
ENV_RETAIN_LLM_MODEL = "HINDSIGHT_API_RETAIN_LLM_MODEL"
|
|
35
|
+
ENV_RETAIN_LLM_BASE_URL = "HINDSIGHT_API_RETAIN_LLM_BASE_URL"
|
|
36
|
+
|
|
37
|
+
ENV_REFLECT_LLM_PROVIDER = "HINDSIGHT_API_REFLECT_LLM_PROVIDER"
|
|
38
|
+
ENV_REFLECT_LLM_API_KEY = "HINDSIGHT_API_REFLECT_LLM_API_KEY"
|
|
39
|
+
ENV_REFLECT_LLM_MODEL = "HINDSIGHT_API_REFLECT_LLM_MODEL"
|
|
40
|
+
ENV_REFLECT_LLM_BASE_URL = "HINDSIGHT_API_REFLECT_LLM_BASE_URL"
|
|
41
|
+
|
|
42
|
+
ENV_CONSOLIDATION_LLM_PROVIDER = "HINDSIGHT_API_CONSOLIDATION_LLM_PROVIDER"
|
|
43
|
+
ENV_CONSOLIDATION_LLM_API_KEY = "HINDSIGHT_API_CONSOLIDATION_LLM_API_KEY"
|
|
44
|
+
ENV_CONSOLIDATION_LLM_MODEL = "HINDSIGHT_API_CONSOLIDATION_LLM_MODEL"
|
|
45
|
+
ENV_CONSOLIDATION_LLM_BASE_URL = "HINDSIGHT_API_CONSOLIDATION_LLM_BASE_URL"
|
|
21
46
|
|
|
22
47
|
ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
|
|
23
48
|
ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
|
|
24
49
|
ENV_EMBEDDINGS_TEI_URL = "HINDSIGHT_API_EMBEDDINGS_TEI_URL"
|
|
50
|
+
ENV_EMBEDDINGS_OPENAI_API_KEY = "HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY"
|
|
51
|
+
ENV_EMBEDDINGS_OPENAI_MODEL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL"
|
|
52
|
+
ENV_EMBEDDINGS_OPENAI_BASE_URL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_BASE_URL"
|
|
53
|
+
|
|
54
|
+
ENV_COHERE_API_KEY = "HINDSIGHT_API_COHERE_API_KEY"
|
|
55
|
+
ENV_EMBEDDINGS_COHERE_MODEL = "HINDSIGHT_API_EMBEDDINGS_COHERE_MODEL"
|
|
56
|
+
ENV_EMBEDDINGS_COHERE_BASE_URL = "HINDSIGHT_API_EMBEDDINGS_COHERE_BASE_URL"
|
|
57
|
+
ENV_RERANKER_COHERE_MODEL = "HINDSIGHT_API_RERANKER_COHERE_MODEL"
|
|
58
|
+
ENV_RERANKER_COHERE_BASE_URL = "HINDSIGHT_API_RERANKER_COHERE_BASE_URL"
|
|
59
|
+
|
|
60
|
+
# LiteLLM gateway configuration (for embeddings and reranker via LiteLLM proxy)
|
|
61
|
+
ENV_LITELLM_API_BASE = "HINDSIGHT_API_LITELLM_API_BASE"
|
|
62
|
+
ENV_LITELLM_API_KEY = "HINDSIGHT_API_LITELLM_API_KEY"
|
|
63
|
+
ENV_EMBEDDINGS_LITELLM_MODEL = "HINDSIGHT_API_EMBEDDINGS_LITELLM_MODEL"
|
|
64
|
+
ENV_RERANKER_LITELLM_MODEL = "HINDSIGHT_API_RERANKER_LITELLM_MODEL"
|
|
25
65
|
|
|
26
66
|
ENV_RERANKER_PROVIDER = "HINDSIGHT_API_RERANKER_PROVIDER"
|
|
27
67
|
ENV_RERANKER_LOCAL_MODEL = "HINDSIGHT_API_RERANKER_LOCAL_MODEL"
|
|
68
|
+
ENV_RERANKER_LOCAL_MAX_CONCURRENT = "HINDSIGHT_API_RERANKER_LOCAL_MAX_CONCURRENT"
|
|
28
69
|
ENV_RERANKER_TEI_URL = "HINDSIGHT_API_RERANKER_TEI_URL"
|
|
70
|
+
ENV_RERANKER_TEI_BATCH_SIZE = "HINDSIGHT_API_RERANKER_TEI_BATCH_SIZE"
|
|
71
|
+
ENV_RERANKER_TEI_MAX_CONCURRENT = "HINDSIGHT_API_RERANKER_TEI_MAX_CONCURRENT"
|
|
72
|
+
ENV_RERANKER_MAX_CANDIDATES = "HINDSIGHT_API_RERANKER_MAX_CANDIDATES"
|
|
73
|
+
ENV_RERANKER_FLASHRANK_MODEL = "HINDSIGHT_API_RERANKER_FLASHRANK_MODEL"
|
|
74
|
+
ENV_RERANKER_FLASHRANK_CACHE_DIR = "HINDSIGHT_API_RERANKER_FLASHRANK_CACHE_DIR"
|
|
29
75
|
|
|
30
76
|
ENV_HOST = "HINDSIGHT_API_HOST"
|
|
31
77
|
ENV_PORT = "HINDSIGHT_API_PORT"
|
|
32
78
|
ENV_LOG_LEVEL = "HINDSIGHT_API_LOG_LEVEL"
|
|
79
|
+
ENV_LOG_FORMAT = "HINDSIGHT_API_LOG_FORMAT"
|
|
80
|
+
ENV_WORKERS = "HINDSIGHT_API_WORKERS"
|
|
33
81
|
ENV_MCP_ENABLED = "HINDSIGHT_API_MCP_ENABLED"
|
|
34
82
|
ENV_GRAPH_RETRIEVER = "HINDSIGHT_API_GRAPH_RETRIEVER"
|
|
83
|
+
ENV_MPFP_TOP_K_NEIGHBORS = "HINDSIGHT_API_MPFP_TOP_K_NEIGHBORS"
|
|
84
|
+
ENV_RECALL_MAX_CONCURRENT = "HINDSIGHT_API_RECALL_MAX_CONCURRENT"
|
|
85
|
+
ENV_RECALL_CONNECTION_BUDGET = "HINDSIGHT_API_RECALL_CONNECTION_BUDGET"
|
|
35
86
|
ENV_MCP_LOCAL_BANK_ID = "HINDSIGHT_API_MCP_LOCAL_BANK_ID"
|
|
36
87
|
ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
|
|
88
|
+
ENV_MENTAL_MODEL_REFRESH_CONCURRENCY = "HINDSIGHT_API_MENTAL_MODEL_REFRESH_CONCURRENCY"
|
|
89
|
+
|
|
90
|
+
# Retain settings
|
|
91
|
+
ENV_RETAIN_MAX_COMPLETION_TOKENS = "HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS"
|
|
92
|
+
ENV_RETAIN_CHUNK_SIZE = "HINDSIGHT_API_RETAIN_CHUNK_SIZE"
|
|
93
|
+
ENV_RETAIN_EXTRACT_CAUSAL_LINKS = "HINDSIGHT_API_RETAIN_EXTRACT_CAUSAL_LINKS"
|
|
94
|
+
ENV_RETAIN_EXTRACTION_MODE = "HINDSIGHT_API_RETAIN_EXTRACTION_MODE"
|
|
95
|
+
ENV_RETAIN_CUSTOM_INSTRUCTIONS = "HINDSIGHT_API_RETAIN_CUSTOM_INSTRUCTIONS"
|
|
96
|
+
ENV_RETAIN_OBSERVATIONS_ASYNC = "HINDSIGHT_API_RETAIN_OBSERVATIONS_ASYNC"
|
|
37
97
|
|
|
38
|
-
#
|
|
39
|
-
|
|
40
|
-
|
|
98
|
+
# Observations settings (consolidated knowledge from facts)
|
|
99
|
+
ENV_ENABLE_OBSERVATIONS = "HINDSIGHT_API_ENABLE_OBSERVATIONS"
|
|
100
|
+
ENV_CONSOLIDATION_BATCH_SIZE = "HINDSIGHT_API_CONSOLIDATION_BATCH_SIZE"
|
|
41
101
|
|
|
42
102
|
# Optimization flags
|
|
43
103
|
ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
|
|
44
104
|
ENV_LAZY_RERANKER = "HINDSIGHT_API_LAZY_RERANKER"
|
|
45
105
|
|
|
106
|
+
# Database migrations
|
|
107
|
+
ENV_RUN_MIGRATIONS_ON_STARTUP = "HINDSIGHT_API_RUN_MIGRATIONS_ON_STARTUP"
|
|
108
|
+
|
|
109
|
+
# Database connection pool
|
|
110
|
+
ENV_DB_POOL_MIN_SIZE = "HINDSIGHT_API_DB_POOL_MIN_SIZE"
|
|
111
|
+
ENV_DB_POOL_MAX_SIZE = "HINDSIGHT_API_DB_POOL_MAX_SIZE"
|
|
112
|
+
ENV_DB_COMMAND_TIMEOUT = "HINDSIGHT_API_DB_COMMAND_TIMEOUT"
|
|
113
|
+
ENV_DB_ACQUIRE_TIMEOUT = "HINDSIGHT_API_DB_ACQUIRE_TIMEOUT"
|
|
114
|
+
|
|
115
|
+
# Worker configuration (distributed task processing)
|
|
116
|
+
ENV_WORKER_ENABLED = "HINDSIGHT_API_WORKER_ENABLED"
|
|
117
|
+
ENV_WORKER_ID = "HINDSIGHT_API_WORKER_ID"
|
|
118
|
+
ENV_WORKER_POLL_INTERVAL_MS = "HINDSIGHT_API_WORKER_POLL_INTERVAL_MS"
|
|
119
|
+
ENV_WORKER_MAX_RETRIES = "HINDSIGHT_API_WORKER_MAX_RETRIES"
|
|
120
|
+
ENV_WORKER_BATCH_SIZE = "HINDSIGHT_API_WORKER_BATCH_SIZE"
|
|
121
|
+
ENV_WORKER_HTTP_PORT = "HINDSIGHT_API_WORKER_HTTP_PORT"
|
|
122
|
+
|
|
123
|
+
# Reflect agent settings
|
|
124
|
+
ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
|
|
125
|
+
|
|
46
126
|
# Default values
|
|
47
127
|
DEFAULT_DATABASE_URL = "pg0"
|
|
48
128
|
DEFAULT_LLM_PROVIDER = "openai"
|
|
@@ -52,20 +132,71 @@ DEFAULT_LLM_TIMEOUT = 120.0 # seconds
|
|
|
52
132
|
|
|
53
133
|
DEFAULT_EMBEDDINGS_PROVIDER = "local"
|
|
54
134
|
DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
|
|
135
|
+
DEFAULT_EMBEDDINGS_OPENAI_MODEL = "text-embedding-3-small"
|
|
136
|
+
DEFAULT_EMBEDDING_DIMENSION = 384
|
|
55
137
|
|
|
56
138
|
DEFAULT_RERANKER_PROVIDER = "local"
|
|
57
139
|
DEFAULT_RERANKER_LOCAL_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
|
140
|
+
DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT = 4 # Limit concurrent CPU-bound reranking to prevent thrashing
|
|
141
|
+
DEFAULT_RERANKER_TEI_BATCH_SIZE = 128
|
|
142
|
+
DEFAULT_RERANKER_TEI_MAX_CONCURRENT = 8
|
|
143
|
+
DEFAULT_RERANKER_MAX_CANDIDATES = 300
|
|
144
|
+
DEFAULT_RERANKER_FLASHRANK_MODEL = "ms-marco-MiniLM-L-12-v2" # Best balance of speed and quality
|
|
145
|
+
DEFAULT_RERANKER_FLASHRANK_CACHE_DIR = None # Use default cache directory
|
|
146
|
+
|
|
147
|
+
DEFAULT_EMBEDDINGS_COHERE_MODEL = "embed-english-v3.0"
|
|
148
|
+
DEFAULT_RERANKER_COHERE_MODEL = "rerank-english-v3.0"
|
|
149
|
+
|
|
150
|
+
# LiteLLM defaults
|
|
151
|
+
DEFAULT_LITELLM_API_BASE = "http://localhost:4000"
|
|
152
|
+
DEFAULT_EMBEDDINGS_LITELLM_MODEL = "text-embedding-3-small"
|
|
153
|
+
DEFAULT_RERANKER_LITELLM_MODEL = "cohere/rerank-english-v3.0"
|
|
58
154
|
|
|
59
155
|
DEFAULT_HOST = "0.0.0.0"
|
|
60
156
|
DEFAULT_PORT = 8888
|
|
61
157
|
DEFAULT_LOG_LEVEL = "info"
|
|
158
|
+
DEFAULT_LOG_FORMAT = "text" # Options: "text", "json"
|
|
159
|
+
DEFAULT_WORKERS = 1
|
|
62
160
|
DEFAULT_MCP_ENABLED = True
|
|
63
|
-
DEFAULT_GRAPH_RETRIEVER = "
|
|
161
|
+
DEFAULT_GRAPH_RETRIEVER = "link_expansion" # Options: "link_expansion", "mpfp", "bfs"
|
|
162
|
+
DEFAULT_MPFP_TOP_K_NEIGHBORS = 20 # Fan-out limit per node in MPFP graph traversal
|
|
163
|
+
DEFAULT_RECALL_MAX_CONCURRENT = 32 # Max concurrent recall operations per worker
|
|
164
|
+
DEFAULT_RECALL_CONNECTION_BUDGET = 4 # Max concurrent DB connections per recall operation
|
|
64
165
|
DEFAULT_MCP_LOCAL_BANK_ID = "mcp"
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
166
|
+
DEFAULT_MENTAL_MODEL_REFRESH_CONCURRENCY = 8 # Max concurrent mental model refreshes
|
|
167
|
+
|
|
168
|
+
# Retain settings
|
|
169
|
+
DEFAULT_RETAIN_MAX_COMPLETION_TOKENS = 64000 # Max tokens for fact extraction LLM call
|
|
170
|
+
DEFAULT_RETAIN_CHUNK_SIZE = 3000 # Max chars per chunk for fact extraction
|
|
171
|
+
DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS = True # Extract causal links between facts
|
|
172
|
+
DEFAULT_RETAIN_EXTRACTION_MODE = "concise" # Extraction mode: "concise", "verbose", or "custom"
|
|
173
|
+
RETAIN_EXTRACTION_MODES = ("concise", "verbose", "custom") # Allowed extraction modes
|
|
174
|
+
DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS = None # Custom extraction guidelines (only used when mode="custom")
|
|
175
|
+
DEFAULT_RETAIN_OBSERVATIONS_ASYNC = False # Run observation generation async (after retain completes)
|
|
176
|
+
|
|
177
|
+
# Observations defaults (consolidated knowledge from facts)
|
|
178
|
+
DEFAULT_ENABLE_OBSERVATIONS = True # Observations enabled by default
|
|
179
|
+
DEFAULT_CONSOLIDATION_BATCH_SIZE = 50 # Memories to load per batch (internal memory optimization)
|
|
180
|
+
|
|
181
|
+
# Database migrations
|
|
182
|
+
DEFAULT_RUN_MIGRATIONS_ON_STARTUP = True
|
|
183
|
+
|
|
184
|
+
# Database connection pool
|
|
185
|
+
DEFAULT_DB_POOL_MIN_SIZE = 5
|
|
186
|
+
DEFAULT_DB_POOL_MAX_SIZE = 100
|
|
187
|
+
DEFAULT_DB_COMMAND_TIMEOUT = 60 # seconds
|
|
188
|
+
DEFAULT_DB_ACQUIRE_TIMEOUT = 30 # seconds
|
|
189
|
+
|
|
190
|
+
# Worker configuration (distributed task processing)
|
|
191
|
+
DEFAULT_WORKER_ENABLED = True # API runs worker by default (standalone mode)
|
|
192
|
+
DEFAULT_WORKER_ID = None # Will use hostname if not specified
|
|
193
|
+
DEFAULT_WORKER_POLL_INTERVAL_MS = 500 # Poll database every 500ms
|
|
194
|
+
DEFAULT_WORKER_MAX_RETRIES = 3 # Max retries before marking task failed
|
|
195
|
+
DEFAULT_WORKER_BATCH_SIZE = 10 # Tasks to claim per poll cycle
|
|
196
|
+
DEFAULT_WORKER_HTTP_PORT = 8889 # HTTP port for worker metrics/health
|
|
197
|
+
|
|
198
|
+
# Reflect agent settings
|
|
199
|
+
DEFAULT_REFLECT_MAX_ITERATIONS = 10 # Max tool call iterations before forcing response
|
|
69
200
|
|
|
70
201
|
# Default MCP tool descriptions (can be customized via env vars)
|
|
71
202
|
DEFAULT_MCP_RETAIN_DESCRIPTION = """Store important information to long-term memory.
|
|
@@ -87,8 +218,50 @@ Use this tool PROACTIVELY to:
|
|
|
87
218
|
- Remember user's goals and context
|
|
88
219
|
- Personalize responses based on past interactions"""
|
|
89
220
|
|
|
90
|
-
#
|
|
91
|
-
EMBEDDING_DIMENSION =
|
|
221
|
+
# Default embedding dimension (used by initial migration, adjusted at runtime)
|
|
222
|
+
EMBEDDING_DIMENSION = DEFAULT_EMBEDDING_DIMENSION
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
class JsonFormatter(logging.Formatter):
|
|
226
|
+
"""JSON formatter for structured logging.
|
|
227
|
+
|
|
228
|
+
Outputs logs in JSON format with a 'severity' field that cloud logging
|
|
229
|
+
systems (GCP, AWS CloudWatch, etc.) can parse to correctly categorize log levels.
|
|
230
|
+
"""
|
|
231
|
+
|
|
232
|
+
SEVERITY_MAP = {
|
|
233
|
+
logging.DEBUG: "DEBUG",
|
|
234
|
+
logging.INFO: "INFO",
|
|
235
|
+
logging.WARNING: "WARNING",
|
|
236
|
+
logging.ERROR: "ERROR",
|
|
237
|
+
logging.CRITICAL: "CRITICAL",
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
241
|
+
log_entry = {
|
|
242
|
+
"severity": self.SEVERITY_MAP.get(record.levelno, "DEFAULT"),
|
|
243
|
+
"message": record.getMessage(),
|
|
244
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
245
|
+
"logger": record.name,
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
# Add exception info if present
|
|
249
|
+
if record.exc_info:
|
|
250
|
+
log_entry["exception"] = self.formatException(record.exc_info)
|
|
251
|
+
|
|
252
|
+
return json.dumps(log_entry)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _validate_extraction_mode(mode: str) -> str:
|
|
256
|
+
"""Validate and normalize extraction mode."""
|
|
257
|
+
mode_lower = mode.lower()
|
|
258
|
+
if mode_lower not in RETAIN_EXTRACTION_MODES:
|
|
259
|
+
logger.warning(
|
|
260
|
+
f"Invalid extraction mode '{mode}', must be one of {RETAIN_EXTRACTION_MODES}. "
|
|
261
|
+
f"Defaulting to '{DEFAULT_RETAIN_EXTRACTION_MODE}'."
|
|
262
|
+
)
|
|
263
|
+
return DEFAULT_RETAIN_EXTRACTION_MODE
|
|
264
|
+
return mode_lower
|
|
92
265
|
|
|
93
266
|
|
|
94
267
|
@dataclass
|
|
@@ -98,7 +271,7 @@ class HindsightConfig:
|
|
|
98
271
|
# Database
|
|
99
272
|
database_url: str
|
|
100
273
|
|
|
101
|
-
# LLM
|
|
274
|
+
# LLM (default, used as fallback for per-operation config)
|
|
102
275
|
llm_provider: str
|
|
103
276
|
llm_api_key: str | None
|
|
104
277
|
llm_model: str
|
|
@@ -106,33 +279,88 @@ class HindsightConfig:
|
|
|
106
279
|
llm_max_concurrent: int
|
|
107
280
|
llm_timeout: float
|
|
108
281
|
|
|
282
|
+
# Per-operation LLM configuration (None = use default LLM config)
|
|
283
|
+
retain_llm_provider: str | None
|
|
284
|
+
retain_llm_api_key: str | None
|
|
285
|
+
retain_llm_model: str | None
|
|
286
|
+
retain_llm_base_url: str | None
|
|
287
|
+
|
|
288
|
+
reflect_llm_provider: str | None
|
|
289
|
+
reflect_llm_api_key: str | None
|
|
290
|
+
reflect_llm_model: str | None
|
|
291
|
+
reflect_llm_base_url: str | None
|
|
292
|
+
|
|
293
|
+
consolidation_llm_provider: str | None
|
|
294
|
+
consolidation_llm_api_key: str | None
|
|
295
|
+
consolidation_llm_model: str | None
|
|
296
|
+
consolidation_llm_base_url: str | None
|
|
297
|
+
|
|
109
298
|
# Embeddings
|
|
110
299
|
embeddings_provider: str
|
|
111
300
|
embeddings_local_model: str
|
|
112
301
|
embeddings_tei_url: str | None
|
|
302
|
+
embeddings_openai_base_url: str | None
|
|
303
|
+
embeddings_cohere_base_url: str | None
|
|
113
304
|
|
|
114
305
|
# Reranker
|
|
115
306
|
reranker_provider: str
|
|
116
307
|
reranker_local_model: str
|
|
117
308
|
reranker_tei_url: str | None
|
|
309
|
+
reranker_tei_batch_size: int
|
|
310
|
+
reranker_tei_max_concurrent: int
|
|
311
|
+
reranker_max_candidates: int
|
|
312
|
+
reranker_cohere_base_url: str | None
|
|
118
313
|
|
|
119
314
|
# Server
|
|
120
315
|
host: str
|
|
121
316
|
port: int
|
|
122
317
|
log_level: str
|
|
318
|
+
log_format: str
|
|
123
319
|
mcp_enabled: bool
|
|
124
320
|
|
|
125
321
|
# Recall
|
|
126
322
|
graph_retriever: str
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
323
|
+
mpfp_top_k_neighbors: int
|
|
324
|
+
recall_max_concurrent: int
|
|
325
|
+
recall_connection_budget: int
|
|
326
|
+
mental_model_refresh_concurrency: int
|
|
327
|
+
|
|
328
|
+
# Retain settings
|
|
329
|
+
retain_max_completion_tokens: int
|
|
330
|
+
retain_chunk_size: int
|
|
331
|
+
retain_extract_causal_links: bool
|
|
332
|
+
retain_extraction_mode: str
|
|
333
|
+
retain_custom_instructions: str | None
|
|
334
|
+
retain_observations_async: bool
|
|
335
|
+
|
|
336
|
+
# Observations settings (consolidated knowledge from facts)
|
|
337
|
+
enable_observations: bool
|
|
338
|
+
consolidation_batch_size: int
|
|
131
339
|
|
|
132
340
|
# Optimization flags
|
|
133
341
|
skip_llm_verification: bool
|
|
134
342
|
lazy_reranker: bool
|
|
135
343
|
|
|
344
|
+
# Database migrations
|
|
345
|
+
run_migrations_on_startup: bool
|
|
346
|
+
|
|
347
|
+
# Database connection pool
|
|
348
|
+
db_pool_min_size: int
|
|
349
|
+
db_pool_max_size: int
|
|
350
|
+
db_command_timeout: int
|
|
351
|
+
db_acquire_timeout: int
|
|
352
|
+
|
|
353
|
+
# Worker configuration (distributed task processing)
|
|
354
|
+
worker_enabled: bool
|
|
355
|
+
worker_id: str | None
|
|
356
|
+
worker_poll_interval_ms: int
|
|
357
|
+
worker_max_retries: int
|
|
358
|
+
worker_batch_size: int
|
|
359
|
+
worker_http_port: int
|
|
360
|
+
|
|
361
|
+
# Reflect agent settings
|
|
362
|
+
reflect_max_iterations: int
|
|
363
|
+
|
|
136
364
|
@classmethod
|
|
137
365
|
def from_env(cls) -> "HindsightConfig":
|
|
138
366
|
"""Create configuration from environment variables."""
|
|
@@ -146,29 +374,92 @@ class HindsightConfig:
|
|
|
146
374
|
llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
|
|
147
375
|
llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
|
|
148
376
|
llm_timeout=float(os.getenv(ENV_LLM_TIMEOUT, str(DEFAULT_LLM_TIMEOUT))),
|
|
377
|
+
# Per-operation LLM config (None = use default)
|
|
378
|
+
retain_llm_provider=os.getenv(ENV_RETAIN_LLM_PROVIDER) or None,
|
|
379
|
+
retain_llm_api_key=os.getenv(ENV_RETAIN_LLM_API_KEY) or None,
|
|
380
|
+
retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL) or None,
|
|
381
|
+
retain_llm_base_url=os.getenv(ENV_RETAIN_LLM_BASE_URL) or None,
|
|
382
|
+
reflect_llm_provider=os.getenv(ENV_REFLECT_LLM_PROVIDER) or None,
|
|
383
|
+
reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
|
|
384
|
+
reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL) or None,
|
|
385
|
+
reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
|
|
386
|
+
consolidation_llm_provider=os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER) or None,
|
|
387
|
+
consolidation_llm_api_key=os.getenv(ENV_CONSOLIDATION_LLM_API_KEY) or None,
|
|
388
|
+
consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL) or None,
|
|
389
|
+
consolidation_llm_base_url=os.getenv(ENV_CONSOLIDATION_LLM_BASE_URL) or None,
|
|
149
390
|
# Embeddings
|
|
150
391
|
embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
|
|
151
392
|
embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
|
|
152
393
|
embeddings_tei_url=os.getenv(ENV_EMBEDDINGS_TEI_URL),
|
|
394
|
+
embeddings_openai_base_url=os.getenv(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None,
|
|
395
|
+
embeddings_cohere_base_url=os.getenv(ENV_EMBEDDINGS_COHERE_BASE_URL) or None,
|
|
153
396
|
# Reranker
|
|
154
397
|
reranker_provider=os.getenv(ENV_RERANKER_PROVIDER, DEFAULT_RERANKER_PROVIDER),
|
|
155
398
|
reranker_local_model=os.getenv(ENV_RERANKER_LOCAL_MODEL, DEFAULT_RERANKER_LOCAL_MODEL),
|
|
156
399
|
reranker_tei_url=os.getenv(ENV_RERANKER_TEI_URL),
|
|
400
|
+
reranker_tei_batch_size=int(os.getenv(ENV_RERANKER_TEI_BATCH_SIZE, str(DEFAULT_RERANKER_TEI_BATCH_SIZE))),
|
|
401
|
+
reranker_tei_max_concurrent=int(
|
|
402
|
+
os.getenv(ENV_RERANKER_TEI_MAX_CONCURRENT, str(DEFAULT_RERANKER_TEI_MAX_CONCURRENT))
|
|
403
|
+
),
|
|
404
|
+
reranker_max_candidates=int(os.getenv(ENV_RERANKER_MAX_CANDIDATES, str(DEFAULT_RERANKER_MAX_CANDIDATES))),
|
|
405
|
+
reranker_cohere_base_url=os.getenv(ENV_RERANKER_COHERE_BASE_URL) or None,
|
|
157
406
|
# Server
|
|
158
407
|
host=os.getenv(ENV_HOST, DEFAULT_HOST),
|
|
159
408
|
port=int(os.getenv(ENV_PORT, DEFAULT_PORT)),
|
|
160
409
|
log_level=os.getenv(ENV_LOG_LEVEL, DEFAULT_LOG_LEVEL),
|
|
410
|
+
log_format=os.getenv(ENV_LOG_FORMAT, DEFAULT_LOG_FORMAT).lower(),
|
|
161
411
|
mcp_enabled=os.getenv(ENV_MCP_ENABLED, str(DEFAULT_MCP_ENABLED)).lower() == "true",
|
|
162
412
|
# Recall
|
|
163
413
|
graph_retriever=os.getenv(ENV_GRAPH_RETRIEVER, DEFAULT_GRAPH_RETRIEVER),
|
|
414
|
+
mpfp_top_k_neighbors=int(os.getenv(ENV_MPFP_TOP_K_NEIGHBORS, str(DEFAULT_MPFP_TOP_K_NEIGHBORS))),
|
|
415
|
+
recall_max_concurrent=int(os.getenv(ENV_RECALL_MAX_CONCURRENT, str(DEFAULT_RECALL_MAX_CONCURRENT))),
|
|
416
|
+
recall_connection_budget=int(
|
|
417
|
+
os.getenv(ENV_RECALL_CONNECTION_BUDGET, str(DEFAULT_RECALL_CONNECTION_BUDGET))
|
|
418
|
+
),
|
|
419
|
+
mental_model_refresh_concurrency=int(
|
|
420
|
+
os.getenv(ENV_MENTAL_MODEL_REFRESH_CONCURRENCY, str(DEFAULT_MENTAL_MODEL_REFRESH_CONCURRENCY))
|
|
421
|
+
),
|
|
164
422
|
# Optimization flags
|
|
165
423
|
skip_llm_verification=os.getenv(ENV_SKIP_LLM_VERIFICATION, "false").lower() == "true",
|
|
166
424
|
lazy_reranker=os.getenv(ENV_LAZY_RERANKER, "false").lower() == "true",
|
|
167
|
-
#
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
425
|
+
# Retain settings
|
|
426
|
+
retain_max_completion_tokens=int(
|
|
427
|
+
os.getenv(ENV_RETAIN_MAX_COMPLETION_TOKENS, str(DEFAULT_RETAIN_MAX_COMPLETION_TOKENS))
|
|
428
|
+
),
|
|
429
|
+
retain_chunk_size=int(os.getenv(ENV_RETAIN_CHUNK_SIZE, str(DEFAULT_RETAIN_CHUNK_SIZE))),
|
|
430
|
+
retain_extract_causal_links=os.getenv(
|
|
431
|
+
ENV_RETAIN_EXTRACT_CAUSAL_LINKS, str(DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS)
|
|
432
|
+
).lower()
|
|
433
|
+
== "true",
|
|
434
|
+
retain_extraction_mode=_validate_extraction_mode(
|
|
435
|
+
os.getenv(ENV_RETAIN_EXTRACTION_MODE, DEFAULT_RETAIN_EXTRACTION_MODE)
|
|
171
436
|
),
|
|
437
|
+
retain_custom_instructions=os.getenv(ENV_RETAIN_CUSTOM_INSTRUCTIONS) or DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS,
|
|
438
|
+
retain_observations_async=os.getenv(
|
|
439
|
+
ENV_RETAIN_OBSERVATIONS_ASYNC, str(DEFAULT_RETAIN_OBSERVATIONS_ASYNC)
|
|
440
|
+
).lower()
|
|
441
|
+
== "true",
|
|
442
|
+
# Observations settings (consolidated knowledge from facts)
|
|
443
|
+
enable_observations=os.getenv(ENV_ENABLE_OBSERVATIONS, str(DEFAULT_ENABLE_OBSERVATIONS)).lower() == "true",
|
|
444
|
+
consolidation_batch_size=int(
|
|
445
|
+
os.getenv(ENV_CONSOLIDATION_BATCH_SIZE, str(DEFAULT_CONSOLIDATION_BATCH_SIZE))
|
|
446
|
+
),
|
|
447
|
+
# Database migrations
|
|
448
|
+
run_migrations_on_startup=os.getenv(ENV_RUN_MIGRATIONS_ON_STARTUP, "true").lower() == "true",
|
|
449
|
+
# Database connection pool
|
|
450
|
+
db_pool_min_size=int(os.getenv(ENV_DB_POOL_MIN_SIZE, str(DEFAULT_DB_POOL_MIN_SIZE))),
|
|
451
|
+
db_pool_max_size=int(os.getenv(ENV_DB_POOL_MAX_SIZE, str(DEFAULT_DB_POOL_MAX_SIZE))),
|
|
452
|
+
db_command_timeout=int(os.getenv(ENV_DB_COMMAND_TIMEOUT, str(DEFAULT_DB_COMMAND_TIMEOUT))),
|
|
453
|
+
db_acquire_timeout=int(os.getenv(ENV_DB_ACQUIRE_TIMEOUT, str(DEFAULT_DB_ACQUIRE_TIMEOUT))),
|
|
454
|
+
# Worker configuration
|
|
455
|
+
worker_enabled=os.getenv(ENV_WORKER_ENABLED, str(DEFAULT_WORKER_ENABLED)).lower() == "true",
|
|
456
|
+
worker_id=os.getenv(ENV_WORKER_ID) or DEFAULT_WORKER_ID,
|
|
457
|
+
worker_poll_interval_ms=int(os.getenv(ENV_WORKER_POLL_INTERVAL_MS, str(DEFAULT_WORKER_POLL_INTERVAL_MS))),
|
|
458
|
+
worker_max_retries=int(os.getenv(ENV_WORKER_MAX_RETRIES, str(DEFAULT_WORKER_MAX_RETRIES))),
|
|
459
|
+
worker_batch_size=int(os.getenv(ENV_WORKER_BATCH_SIZE, str(DEFAULT_WORKER_BATCH_SIZE))),
|
|
460
|
+
worker_http_port=int(os.getenv(ENV_WORKER_HTTP_PORT, str(DEFAULT_WORKER_HTTP_PORT))),
|
|
461
|
+
# Reflect agent settings
|
|
462
|
+
reflect_max_iterations=int(os.getenv(ENV_REFLECT_MAX_ITERATIONS, str(DEFAULT_REFLECT_MAX_ITERATIONS))),
|
|
172
463
|
)
|
|
173
464
|
|
|
174
465
|
def get_llm_base_url(self) -> str:
|
|
@@ -199,22 +490,63 @@ class HindsightConfig:
|
|
|
199
490
|
return log_level_map.get(self.log_level.lower(), logging.INFO)
|
|
200
491
|
|
|
201
492
|
def configure_logging(self) -> None:
|
|
202
|
-
"""Configure Python logging based on the log level.
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
)
|
|
493
|
+
"""Configure Python logging based on the log level and format.
|
|
494
|
+
|
|
495
|
+
When log_format is "json", outputs structured JSON logs with a severity
|
|
496
|
+
field that GCP Cloud Logging can parse for proper log level categorization.
|
|
497
|
+
"""
|
|
498
|
+
root_logger = logging.getLogger()
|
|
499
|
+
root_logger.setLevel(self.get_python_log_level())
|
|
500
|
+
|
|
501
|
+
# Remove existing handlers
|
|
502
|
+
for handler in root_logger.handlers[:]:
|
|
503
|
+
root_logger.removeHandler(handler)
|
|
504
|
+
|
|
505
|
+
# Create handler writing to stdout (GCP treats stderr as ERROR)
|
|
506
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
507
|
+
handler.setLevel(self.get_python_log_level())
|
|
508
|
+
|
|
509
|
+
if self.log_format == "json":
|
|
510
|
+
handler.setFormatter(JsonFormatter())
|
|
511
|
+
else:
|
|
512
|
+
handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s"))
|
|
513
|
+
|
|
514
|
+
root_logger.addHandler(handler)
|
|
208
515
|
|
|
209
516
|
def log_config(self) -> None:
|
|
210
517
|
"""Log the current configuration (without sensitive values)."""
|
|
211
518
|
logger.info(f"Database: {self.database_url}")
|
|
212
519
|
logger.info(f"LLM: provider={self.llm_provider}, model={self.llm_model}")
|
|
520
|
+
if self.retain_llm_provider or self.retain_llm_model:
|
|
521
|
+
retain_provider = self.retain_llm_provider or self.llm_provider
|
|
522
|
+
retain_model = self.retain_llm_model or self.llm_model
|
|
523
|
+
logger.info(f"LLM (retain): provider={retain_provider}, model={retain_model}")
|
|
524
|
+
if self.reflect_llm_provider or self.reflect_llm_model:
|
|
525
|
+
reflect_provider = self.reflect_llm_provider or self.llm_provider
|
|
526
|
+
reflect_model = self.reflect_llm_model or self.llm_model
|
|
527
|
+
logger.info(f"LLM (reflect): provider={reflect_provider}, model={reflect_model}")
|
|
528
|
+
if self.consolidation_llm_provider or self.consolidation_llm_model:
|
|
529
|
+
consolidation_provider = self.consolidation_llm_provider or self.llm_provider
|
|
530
|
+
consolidation_model = self.consolidation_llm_model or self.llm_model
|
|
531
|
+
logger.info(f"LLM (consolidation): provider={consolidation_provider}, model={consolidation_model}")
|
|
213
532
|
logger.info(f"Embeddings: provider={self.embeddings_provider}")
|
|
214
533
|
logger.info(f"Reranker: provider={self.reranker_provider}")
|
|
215
534
|
logger.info(f"Graph retriever: {self.graph_retriever}")
|
|
216
535
|
|
|
217
536
|
|
|
537
|
+
# Cached config instance
|
|
538
|
+
_config_cache: HindsightConfig | None = None
|
|
539
|
+
|
|
540
|
+
|
|
218
541
|
def get_config() -> HindsightConfig:
|
|
219
|
-
"""Get the
|
|
220
|
-
|
|
542
|
+
"""Get the cached configuration, loading from environment on first call."""
|
|
543
|
+
global _config_cache
|
|
544
|
+
if _config_cache is None:
|
|
545
|
+
_config_cache = HindsightConfig.from_env()
|
|
546
|
+
return _config_cache
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def clear_config_cache() -> None:
|
|
550
|
+
"""Clear the config cache. Useful for testing or reloading config."""
|
|
551
|
+
global _config_cache
|
|
552
|
+
_config_cache = None
|