hindsight-api 0.0.21__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +10 -2
- hindsight_api/alembic/README +1 -0
- hindsight_api/alembic/env.py +146 -0
- hindsight_api/alembic/script.py.mako +28 -0
- hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +274 -0
- hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +70 -0
- hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +39 -0
- hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +48 -0
- hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +62 -0
- hindsight_api/alembic/versions/rename_personality_to_disposition.py +65 -0
- hindsight_api/api/__init__.py +2 -4
- hindsight_api/api/http.py +112 -164
- hindsight_api/api/mcp.py +2 -1
- hindsight_api/config.py +154 -0
- hindsight_api/engine/__init__.py +7 -2
- hindsight_api/engine/cross_encoder.py +225 -16
- hindsight_api/engine/embeddings.py +198 -19
- hindsight_api/engine/entity_resolver.py +56 -29
- hindsight_api/engine/llm_wrapper.py +147 -106
- hindsight_api/engine/memory_engine.py +337 -192
- hindsight_api/engine/response_models.py +15 -17
- hindsight_api/engine/retain/bank_utils.py +25 -35
- hindsight_api/engine/retain/entity_processing.py +5 -5
- hindsight_api/engine/retain/fact_extraction.py +86 -24
- hindsight_api/engine/retain/fact_storage.py +1 -1
- hindsight_api/engine/retain/link_creation.py +12 -6
- hindsight_api/engine/retain/link_utils.py +50 -56
- hindsight_api/engine/retain/observation_regeneration.py +264 -0
- hindsight_api/engine/retain/orchestrator.py +31 -44
- hindsight_api/engine/retain/types.py +14 -0
- hindsight_api/engine/search/reranking.py +6 -10
- hindsight_api/engine/search/retrieval.py +2 -2
- hindsight_api/engine/search/think_utils.py +59 -30
- hindsight_api/engine/search/tracer.py +1 -1
- hindsight_api/main.py +201 -0
- hindsight_api/migrations.py +61 -39
- hindsight_api/models.py +1 -2
- hindsight_api/pg0.py +17 -36
- hindsight_api/server.py +43 -0
- {hindsight_api-0.0.21.dist-info → hindsight_api-0.1.1.dist-info}/METADATA +2 -3
- hindsight_api-0.1.1.dist-info/RECORD +60 -0
- hindsight_api-0.1.1.dist-info/entry_points.txt +2 -0
- hindsight_api/cli.py +0 -128
- hindsight_api/web/__init__.py +0 -12
- hindsight_api/web/server.py +0 -109
- hindsight_api-0.0.21.dist-info/RECORD +0 -50
- hindsight_api-0.0.21.dist-info/entry_points.txt +0 -2
- {hindsight_api-0.0.21.dist-info → hindsight_api-0.1.1.dist-info}/WHEEL +0 -0
|
@@ -170,9 +170,9 @@ async def retrieve_graph(
|
|
|
170
170
|
batch_activations[unit_id] = activation
|
|
171
171
|
|
|
172
172
|
# Batch fetch neighbors for all nodes in this batch
|
|
173
|
-
# Fetch top weighted neighbors (batch_size *
|
|
173
|
+
# Fetch top weighted neighbors (batch_size * 20 = ~400 for good distribution)
|
|
174
174
|
if batch_nodes and budget_remaining > 0:
|
|
175
|
-
max_neighbors = len(batch_nodes) *
|
|
175
|
+
max_neighbors = len(batch_nodes) * 20
|
|
176
176
|
neighbors = await conn.fetch(
|
|
177
177
|
"""
|
|
178
178
|
SELECT mu.id, mu.text, mu.context, mu.occurred_start, mu.occurred_end, mu.mentioned_at,
|
|
@@ -28,30 +28,48 @@ class OpinionExtractionResponse(BaseModel):
|
|
|
28
28
|
)
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
def
|
|
32
|
-
"""Convert trait value to descriptive text."""
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
else:
|
|
42
|
-
return f"very low {name}"
|
|
31
|
+
def describe_trait_level(value: int) -> str:
|
|
32
|
+
"""Convert trait value (1-5) to descriptive text."""
|
|
33
|
+
levels = {
|
|
34
|
+
1: "very low",
|
|
35
|
+
2: "low",
|
|
36
|
+
3: "moderate",
|
|
37
|
+
4: "high",
|
|
38
|
+
5: "very high"
|
|
39
|
+
}
|
|
40
|
+
return levels.get(value, "moderate")
|
|
43
41
|
|
|
44
42
|
|
|
45
43
|
def build_disposition_description(disposition: DispositionTraits) -> str:
|
|
46
44
|
"""Build a disposition description string from disposition traits."""
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
45
|
+
skepticism_desc = {
|
|
46
|
+
1: "You are very trusting and tend to take information at face value.",
|
|
47
|
+
2: "You tend to trust information but may question obvious inconsistencies.",
|
|
48
|
+
3: "You have a balanced approach to information, neither too trusting nor too skeptical.",
|
|
49
|
+
4: "You are somewhat skeptical and often question the reliability of information.",
|
|
50
|
+
5: "You are highly skeptical and critically examine all information for accuracy and hidden motives."
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
literalism_desc = {
|
|
54
|
+
1: "You interpret information very flexibly, reading between the lines and inferring intent.",
|
|
55
|
+
2: "You tend to consider context and implied meaning alongside literal statements.",
|
|
56
|
+
3: "You balance literal interpretation with contextual understanding.",
|
|
57
|
+
4: "You prefer to interpret information more literally and precisely.",
|
|
58
|
+
5: "You interpret information very literally and focus on exact wording and commitments."
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
empathy_desc = {
|
|
62
|
+
1: "You focus primarily on facts and data, setting aside emotional context.",
|
|
63
|
+
2: "You consider facts first but acknowledge emotional factors exist.",
|
|
64
|
+
3: "You balance factual analysis with emotional understanding.",
|
|
65
|
+
4: "You give significant weight to emotional context and human factors.",
|
|
66
|
+
5: "You strongly consider the emotional state and circumstances of others when forming memories."
|
|
67
|
+
}
|
|
53
68
|
|
|
54
|
-
|
|
69
|
+
return f"""Your disposition traits:
|
|
70
|
+
- Skepticism ({describe_trait_level(disposition.skepticism)}): {skepticism_desc.get(disposition.skepticism, skepticism_desc[3])}
|
|
71
|
+
- Literalism ({describe_trait_level(disposition.literalism)}): {literalism_desc.get(disposition.literalism, literalism_desc[3])}
|
|
72
|
+
- Empathy ({describe_trait_level(disposition.empathy)}): {empathy_desc.get(disposition.empathy, empathy_desc[3])}"""
|
|
55
73
|
|
|
56
74
|
|
|
57
75
|
def format_facts_for_prompt(facts: List[MemoryFact]) -> str:
|
|
@@ -78,10 +96,6 @@ def format_facts_for_prompt(facts: List[MemoryFact]) -> str:
|
|
|
78
96
|
elif isinstance(occurred_start, datetime):
|
|
79
97
|
fact_obj["occurred_start"] = occurred_start.strftime('%Y-%m-%d %H:%M:%S')
|
|
80
98
|
|
|
81
|
-
# Add activation if available
|
|
82
|
-
if fact.activation is not None:
|
|
83
|
-
fact_obj["score"] = fact.activation
|
|
84
|
-
|
|
85
99
|
formatted.append(fact_obj)
|
|
86
100
|
|
|
87
101
|
return json.dumps(formatted, indent=2)
|
|
@@ -141,13 +155,28 @@ Based on everything I know, believe, and who I am (including my name, dispositio
|
|
|
141
155
|
|
|
142
156
|
def get_system_message(disposition: DispositionTraits) -> str:
|
|
143
157
|
"""Get the system message for the think LLM call."""
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
158
|
+
# Build disposition-specific instructions based on trait values
|
|
159
|
+
instructions = []
|
|
160
|
+
|
|
161
|
+
# Skepticism influences how much to question/doubt information
|
|
162
|
+
if disposition.skepticism >= 4:
|
|
163
|
+
instructions.append("Be skeptical of claims and look for potential issues or inconsistencies.")
|
|
164
|
+
elif disposition.skepticism <= 2:
|
|
165
|
+
instructions.append("Trust the information provided and take statements at face value.")
|
|
166
|
+
|
|
167
|
+
# Literalism influences interpretation style
|
|
168
|
+
if disposition.literalism >= 4:
|
|
169
|
+
instructions.append("Interpret information literally and focus on exact commitments and wording.")
|
|
170
|
+
elif disposition.literalism <= 2:
|
|
171
|
+
instructions.append("Read between the lines and consider implied meaning and context.")
|
|
172
|
+
|
|
173
|
+
# Empathy influences consideration of emotional factors
|
|
174
|
+
if disposition.empathy >= 4:
|
|
175
|
+
instructions.append("Consider the emotional state and circumstances behind the information.")
|
|
176
|
+
elif disposition.empathy <= 2:
|
|
177
|
+
instructions.append("Focus on facts and outcomes rather than emotional context.")
|
|
178
|
+
|
|
179
|
+
disposition_instruction = " ".join(instructions) if instructions else "Balance your disposition traits when interpreting information."
|
|
151
180
|
|
|
152
181
|
return f"You are a person with your own thoughts, experiences, opinions, and disposition. When asked a question, respond naturally like a human would - share what you genuinely think based on what you know and have experienced. {disposition_instruction} Be direct, express your views confidently, and use 'I think', 'I believe', 'in my view', etc. Respond in plain text without markdown formatting."
|
|
153
182
|
|
|
@@ -368,7 +368,7 @@ class SearchTracer:
|
|
|
368
368
|
|
|
369
369
|
# Extract score components (only include non-None values)
|
|
370
370
|
score_components = {}
|
|
371
|
-
for key in ["semantic_similarity", "bm25_score", "rrf_score", "recency_normalized", "frequency_normalized"]:
|
|
371
|
+
for key in ["semantic_similarity", "bm25_score", "rrf_score", "recency_normalized", "frequency_normalized", "cross_encoder_score", "cross_encoder_score_normalized"]:
|
|
372
372
|
if key in result and result[key] is not None:
|
|
373
373
|
score_components[key] = result[key]
|
|
374
374
|
|
hindsight_api/main.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Command-line interface for Hindsight API.
|
|
3
|
+
|
|
4
|
+
Run the server with:
|
|
5
|
+
hindsight-api
|
|
6
|
+
|
|
7
|
+
Stop with Ctrl+C.
|
|
8
|
+
"""
|
|
9
|
+
import argparse
|
|
10
|
+
import asyncio
|
|
11
|
+
import atexit
|
|
12
|
+
import os
|
|
13
|
+
import signal
|
|
14
|
+
import sys
|
|
15
|
+
import warnings
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
import uvicorn
|
|
19
|
+
|
|
20
|
+
from . import MemoryEngine
|
|
21
|
+
from .api import create_app
|
|
22
|
+
from .config import get_config, HindsightConfig
|
|
23
|
+
|
|
24
|
+
# Filter deprecation warnings from third-party libraries
|
|
25
|
+
warnings.filterwarnings("ignore", message="websockets.legacy is deprecated")
|
|
26
|
+
warnings.filterwarnings("ignore", message="websockets.server.WebSocketServerProtocol is deprecated")
|
|
27
|
+
|
|
28
|
+
# Disable tokenizers parallelism to avoid warnings
|
|
29
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
30
|
+
|
|
31
|
+
# Global reference for cleanup
|
|
32
|
+
_memory: Optional[MemoryEngine] = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _cleanup():
|
|
36
|
+
"""Synchronous cleanup function to stop resources on exit."""
|
|
37
|
+
global _memory
|
|
38
|
+
if _memory is not None and _memory._pg0 is not None:
|
|
39
|
+
try:
|
|
40
|
+
loop = asyncio.new_event_loop()
|
|
41
|
+
loop.run_until_complete(_memory._pg0.stop())
|
|
42
|
+
loop.close()
|
|
43
|
+
print("\npg0 stopped.")
|
|
44
|
+
except Exception as e:
|
|
45
|
+
print(f"\nError stopping pg0: {e}")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _signal_handler(signum, frame):
|
|
49
|
+
"""Handle SIGINT/SIGTERM to ensure cleanup."""
|
|
50
|
+
print(f"\nReceived signal {signum}, shutting down...")
|
|
51
|
+
_cleanup()
|
|
52
|
+
sys.exit(0)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def main():
|
|
56
|
+
"""Main entry point for the CLI."""
|
|
57
|
+
global _memory
|
|
58
|
+
|
|
59
|
+
# Load configuration from environment (for CLI args defaults)
|
|
60
|
+
config = get_config()
|
|
61
|
+
|
|
62
|
+
parser = argparse.ArgumentParser(
|
|
63
|
+
prog="hindsight-api",
|
|
64
|
+
description="Hindsight API Server",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Server options
|
|
68
|
+
parser.add_argument(
|
|
69
|
+
"--host", default=config.host,
|
|
70
|
+
help=f"Host to bind to (default: {config.host}, env: HINDSIGHT_API_HOST)"
|
|
71
|
+
)
|
|
72
|
+
parser.add_argument(
|
|
73
|
+
"--port", type=int, default=config.port,
|
|
74
|
+
help=f"Port to bind to (default: {config.port}, env: HINDSIGHT_API_PORT)"
|
|
75
|
+
)
|
|
76
|
+
parser.add_argument(
|
|
77
|
+
"--log-level", default=config.log_level,
|
|
78
|
+
choices=["critical", "error", "warning", "info", "debug", "trace"],
|
|
79
|
+
help=f"Log level (default: {config.log_level}, env: HINDSIGHT_API_LOG_LEVEL)"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Development options
|
|
83
|
+
parser.add_argument(
|
|
84
|
+
"--reload", action="store_true",
|
|
85
|
+
help="Enable auto-reload on code changes (development only)"
|
|
86
|
+
)
|
|
87
|
+
parser.add_argument(
|
|
88
|
+
"--workers", type=int, default=1,
|
|
89
|
+
help="Number of worker processes (default: 1)"
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Access log options
|
|
93
|
+
parser.add_argument(
|
|
94
|
+
"--access-log", action="store_true",
|
|
95
|
+
help="Enable access log"
|
|
96
|
+
)
|
|
97
|
+
parser.add_argument(
|
|
98
|
+
"--no-access-log", dest="access_log", action="store_false",
|
|
99
|
+
help="Disable access log (default)"
|
|
100
|
+
)
|
|
101
|
+
parser.set_defaults(access_log=False)
|
|
102
|
+
|
|
103
|
+
# Proxy options
|
|
104
|
+
parser.add_argument(
|
|
105
|
+
"--proxy-headers", action="store_true",
|
|
106
|
+
help="Enable X-Forwarded-Proto, X-Forwarded-For headers"
|
|
107
|
+
)
|
|
108
|
+
parser.add_argument(
|
|
109
|
+
"--forwarded-allow-ips", default=None,
|
|
110
|
+
help="Comma separated list of IPs to trust with proxy headers"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# SSL options
|
|
114
|
+
parser.add_argument(
|
|
115
|
+
"--ssl-keyfile", default=None,
|
|
116
|
+
help="SSL key file"
|
|
117
|
+
)
|
|
118
|
+
parser.add_argument(
|
|
119
|
+
"--ssl-certfile", default=None,
|
|
120
|
+
help="SSL certificate file"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
args = parser.parse_args()
|
|
124
|
+
|
|
125
|
+
# Configure Python logging based on log level
|
|
126
|
+
# Update config with CLI override if provided
|
|
127
|
+
if args.log_level != config.log_level:
|
|
128
|
+
config = HindsightConfig(
|
|
129
|
+
database_url=config.database_url,
|
|
130
|
+
llm_provider=config.llm_provider,
|
|
131
|
+
llm_api_key=config.llm_api_key,
|
|
132
|
+
llm_model=config.llm_model,
|
|
133
|
+
llm_base_url=config.llm_base_url,
|
|
134
|
+
embeddings_provider=config.embeddings_provider,
|
|
135
|
+
embeddings_local_model=config.embeddings_local_model,
|
|
136
|
+
embeddings_tei_url=config.embeddings_tei_url,
|
|
137
|
+
reranker_provider=config.reranker_provider,
|
|
138
|
+
reranker_local_model=config.reranker_local_model,
|
|
139
|
+
reranker_tei_url=config.reranker_tei_url,
|
|
140
|
+
host=args.host,
|
|
141
|
+
port=args.port,
|
|
142
|
+
log_level=args.log_level,
|
|
143
|
+
mcp_enabled=config.mcp_enabled,
|
|
144
|
+
)
|
|
145
|
+
config.configure_logging()
|
|
146
|
+
|
|
147
|
+
# Register cleanup handlers
|
|
148
|
+
atexit.register(_cleanup)
|
|
149
|
+
signal.signal(signal.SIGINT, _signal_handler)
|
|
150
|
+
signal.signal(signal.SIGTERM, _signal_handler)
|
|
151
|
+
|
|
152
|
+
# Create MemoryEngine (reads configuration from environment)
|
|
153
|
+
_memory = MemoryEngine()
|
|
154
|
+
|
|
155
|
+
# Create FastAPI app
|
|
156
|
+
app = create_app(
|
|
157
|
+
memory=_memory,
|
|
158
|
+
http_api_enabled=True,
|
|
159
|
+
mcp_api_enabled=config.mcp_enabled,
|
|
160
|
+
mcp_mount_path="/mcp",
|
|
161
|
+
initialize_memory=True,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# Prepare uvicorn config
|
|
165
|
+
uvicorn_config = {
|
|
166
|
+
"app": app,
|
|
167
|
+
"host": args.host,
|
|
168
|
+
"port": args.port,
|
|
169
|
+
"log_level": args.log_level,
|
|
170
|
+
"access_log": args.access_log,
|
|
171
|
+
"proxy_headers": args.proxy_headers,
|
|
172
|
+
"ws": "wsproto", # Use wsproto instead of websockets to avoid deprecation warnings
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
# Add optional parameters if provided
|
|
176
|
+
if args.reload:
|
|
177
|
+
uvicorn_config["reload"] = True
|
|
178
|
+
if args.workers > 1:
|
|
179
|
+
uvicorn_config["workers"] = args.workers
|
|
180
|
+
if args.forwarded_allow_ips:
|
|
181
|
+
uvicorn_config["forwarded_allow_ips"] = args.forwarded_allow_ips
|
|
182
|
+
if args.ssl_keyfile:
|
|
183
|
+
uvicorn_config["ssl_keyfile"] = args.ssl_keyfile
|
|
184
|
+
if args.ssl_certfile:
|
|
185
|
+
uvicorn_config["ssl_certfile"] = args.ssl_certfile
|
|
186
|
+
|
|
187
|
+
print(f"\nStarting Hindsight API...")
|
|
188
|
+
print(f" URL: http://{args.host}:{args.port}")
|
|
189
|
+
print(f" Database: {config.database_url}")
|
|
190
|
+
print(f" LLM: {config.llm_provider} / {config.llm_model}")
|
|
191
|
+
print(f" Embeddings: {config.embeddings_provider}")
|
|
192
|
+
print(f" Reranker: {config.reranker_provider}")
|
|
193
|
+
if config.mcp_enabled:
|
|
194
|
+
print(f" MCP: enabled at /mcp")
|
|
195
|
+
print()
|
|
196
|
+
|
|
197
|
+
uvicorn.run(**uvicorn_config)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
if __name__ == "__main__":
|
|
201
|
+
main()
|
hindsight_api/migrations.py
CHANGED
|
@@ -3,8 +3,8 @@ Database migration management using Alembic.
|
|
|
3
3
|
|
|
4
4
|
This module provides programmatic access to run database migrations
|
|
5
5
|
on application startup. It is designed to be safe for concurrent
|
|
6
|
-
execution
|
|
7
|
-
|
|
6
|
+
execution using PostgreSQL advisory locks to coordinate between
|
|
7
|
+
distributed workers.
|
|
8
8
|
|
|
9
9
|
Important: All migrations must be backward-compatible to allow
|
|
10
10
|
safe rolling deployments.
|
|
@@ -19,19 +19,51 @@ from typing import Optional
|
|
|
19
19
|
|
|
20
20
|
from alembic import command
|
|
21
21
|
from alembic.config import Config
|
|
22
|
+
from sqlalchemy import create_engine, text
|
|
22
23
|
|
|
23
24
|
logger = logging.getLogger(__name__)
|
|
24
25
|
|
|
26
|
+
# Advisory lock ID for migrations (arbitrary unique number)
|
|
27
|
+
MIGRATION_LOCK_ID = 123456789
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _run_migrations_internal(database_url: str, script_location: str) -> None:
|
|
31
|
+
"""
|
|
32
|
+
Internal function to run migrations without locking.
|
|
33
|
+
"""
|
|
34
|
+
logger.info(f"Running database migrations to head...")
|
|
35
|
+
logger.info(f"Database URL: {database_url}")
|
|
36
|
+
logger.info(f"Script location: {script_location}")
|
|
37
|
+
|
|
38
|
+
# Create Alembic configuration programmatically (no alembic.ini needed)
|
|
39
|
+
alembic_cfg = Config()
|
|
40
|
+
|
|
41
|
+
# Set the script location (where alembic versions are stored)
|
|
42
|
+
alembic_cfg.set_main_option("script_location", script_location)
|
|
43
|
+
|
|
44
|
+
# Set the database URL
|
|
45
|
+
alembic_cfg.set_main_option("sqlalchemy.url", database_url)
|
|
46
|
+
|
|
47
|
+
# Configure logging (optional, but helps with debugging)
|
|
48
|
+
# Uses Python's logging system instead of alembic.ini
|
|
49
|
+
alembic_cfg.set_main_option("prepend_sys_path", ".")
|
|
50
|
+
|
|
51
|
+
# Set path_separator to avoid deprecation warning
|
|
52
|
+
alembic_cfg.set_main_option("path_separator", "os")
|
|
53
|
+
|
|
54
|
+
# Run migrations to head (latest version)
|
|
55
|
+
command.upgrade(alembic_cfg, "head")
|
|
56
|
+
|
|
57
|
+
logger.info("Database migrations completed successfully")
|
|
25
58
|
|
|
26
59
|
|
|
27
60
|
def run_migrations(database_url: str, script_location: Optional[str] = None) -> None:
|
|
28
61
|
"""
|
|
29
62
|
Run database migrations to the latest version using programmatic Alembic configuration.
|
|
30
63
|
|
|
31
|
-
This function is safe to call
|
|
32
|
-
-
|
|
33
|
-
-
|
|
34
|
-
- PostgreSQL transactions prevent concurrent migration conflicts
|
|
64
|
+
This function is safe to call from multiple distributed workers simultaneously:
|
|
65
|
+
- Uses PostgreSQL advisory lock to ensure only one worker runs migrations at a time
|
|
66
|
+
- Other workers wait for the lock, then verify migrations are complete
|
|
35
67
|
- If schema is already up-to-date, this is a fast no-op
|
|
36
68
|
|
|
37
69
|
Args:
|
|
@@ -56,11 +88,11 @@ def run_migrations(database_url: str, script_location: Optional[str] = None) ->
|
|
|
56
88
|
try:
|
|
57
89
|
# Determine script location
|
|
58
90
|
if script_location is None:
|
|
59
|
-
# Default: use the alembic directory
|
|
60
|
-
# This file is in:
|
|
61
|
-
#
|
|
62
|
-
|
|
63
|
-
script_location = str(
|
|
91
|
+
# Default: use the alembic directory inside the hindsight_api package
|
|
92
|
+
# This file is in: hindsight_api/migrations.py
|
|
93
|
+
# Alembic is in: hindsight_api/alembic/
|
|
94
|
+
package_dir = Path(__file__).parent
|
|
95
|
+
script_location = str(package_dir / "alembic")
|
|
64
96
|
|
|
65
97
|
script_path = Path(script_location)
|
|
66
98
|
if not script_path.exists():
|
|
@@ -69,32 +101,22 @@ def run_migrations(database_url: str, script_location: Optional[str] = None) ->
|
|
|
69
101
|
"Database migrations cannot be run."
|
|
70
102
|
)
|
|
71
103
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
# Set path_separator to avoid deprecation warning
|
|
90
|
-
alembic_cfg.set_main_option("path_separator", "os")
|
|
91
|
-
|
|
92
|
-
# Run migrations to head (latest version)
|
|
93
|
-
# Note: Alembic may call sys.exit() on errors instead of raising exceptions
|
|
94
|
-
# We rely on the outer try/except and logging to catch issues
|
|
95
|
-
command.upgrade(alembic_cfg, "head")
|
|
96
|
-
|
|
97
|
-
logger.info("Database migrations completed successfully")
|
|
104
|
+
# Use PostgreSQL advisory lock to coordinate between distributed workers
|
|
105
|
+
engine = create_engine(database_url)
|
|
106
|
+
with engine.connect() as conn:
|
|
107
|
+
# pg_advisory_lock blocks until the lock is acquired
|
|
108
|
+
# The lock is automatically released when the connection closes
|
|
109
|
+
logger.debug(f"Acquiring migration advisory lock (id={MIGRATION_LOCK_ID})...")
|
|
110
|
+
conn.execute(text(f"SELECT pg_advisory_lock({MIGRATION_LOCK_ID})"))
|
|
111
|
+
logger.debug("Migration advisory lock acquired")
|
|
112
|
+
|
|
113
|
+
try:
|
|
114
|
+
# Run migrations while holding the lock
|
|
115
|
+
_run_migrations_internal(database_url, script_location)
|
|
116
|
+
finally:
|
|
117
|
+
# Explicitly release the lock (also released on connection close)
|
|
118
|
+
conn.execute(text(f"SELECT pg_advisory_unlock({MIGRATION_LOCK_ID})"))
|
|
119
|
+
logger.debug("Migration advisory lock released")
|
|
98
120
|
|
|
99
121
|
except FileNotFoundError:
|
|
100
122
|
logger.error(f"Alembic script location not found at {script_location}")
|
|
@@ -140,8 +162,8 @@ def check_migration_status(database_url: Optional[str] = None, script_location:
|
|
|
140
162
|
|
|
141
163
|
# Get head revision from migration scripts
|
|
142
164
|
if script_location is None:
|
|
143
|
-
|
|
144
|
-
script_location = str(
|
|
165
|
+
package_dir = Path(__file__).parent
|
|
166
|
+
script_location = str(package_dir / "alembic")
|
|
145
167
|
|
|
146
168
|
script_path = Path(script_location)
|
|
147
169
|
if not script_path.exists():
|
hindsight_api/models.py
CHANGED
|
@@ -292,8 +292,7 @@ class Bank(Base):
|
|
|
292
292
|
JSONB,
|
|
293
293
|
nullable=False,
|
|
294
294
|
server_default=sql_text(
|
|
295
|
-
'\'{"
|
|
296
|
-
'"agreeableness": 0.5, "neuroticism": 0.5, "bias_strength": 0.5}\'::jsonb'
|
|
295
|
+
'\'{"skepticism": 3, "literalism": 3, "empathy": 3}\'::jsonb'
|
|
297
296
|
)
|
|
298
297
|
)
|
|
299
298
|
background: Mapped[str] = mapped_column(Text, nullable=False, server_default="")
|
hindsight_api/pg0.py
CHANGED
|
@@ -153,46 +153,18 @@ class EmbeddedPostgres:
|
|
|
153
153
|
"""
|
|
154
154
|
Ensure pg0 is available.
|
|
155
155
|
|
|
156
|
-
|
|
156
|
+
Checks PATH and default location. If not found, raises an error
|
|
157
|
+
instructing the user to install pg0 manually.
|
|
157
158
|
"""
|
|
158
159
|
if self.is_installed():
|
|
159
160
|
logger.debug(f"pg0 found at {self._binary_path}")
|
|
160
161
|
return
|
|
161
162
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
# Install to default location
|
|
169
|
-
install_dir = Path.home() / ".hindsight" / "bin"
|
|
170
|
-
install_dir.mkdir(parents=True, exist_ok=True)
|
|
171
|
-
install_path = install_dir / "pg0"
|
|
172
|
-
|
|
173
|
-
# Download the binary
|
|
174
|
-
download_url = get_download_url(self.version)
|
|
175
|
-
logger.info(f"Downloading from {download_url}")
|
|
176
|
-
|
|
177
|
-
try:
|
|
178
|
-
async with httpx.AsyncClient(follow_redirects=True, timeout=300.0) as client:
|
|
179
|
-
response = await client.get(download_url)
|
|
180
|
-
response.raise_for_status()
|
|
181
|
-
|
|
182
|
-
# Write binary to disk
|
|
183
|
-
with open(install_path, "wb") as f:
|
|
184
|
-
f.write(response.content)
|
|
185
|
-
|
|
186
|
-
# Make executable on Unix
|
|
187
|
-
if platform.system() != "Windows":
|
|
188
|
-
st = os.stat(install_path)
|
|
189
|
-
os.chmod(install_path, st.st_mode | stat.S_IEXEC)
|
|
190
|
-
|
|
191
|
-
self._binary_path = install_path
|
|
192
|
-
logger.info(f"Installed pg0 to {install_path}")
|
|
193
|
-
|
|
194
|
-
except httpx.HTTPError as e:
|
|
195
|
-
raise RuntimeError(f"Failed to download pg0: {e}") from e
|
|
163
|
+
raise RuntimeError(
|
|
164
|
+
"pg0 is not installed. Please install it manually:\n"
|
|
165
|
+
" curl -fsSL https://github.com/vectorize-io/pg0/releases/latest/download/pg0-linux-amd64 -o ~/.local/bin/pg0 && chmod +x ~/.local/bin/pg0\n"
|
|
166
|
+
"Or visit: https://github.com/vectorize-io/pg0/releases"
|
|
167
|
+
)
|
|
196
168
|
|
|
197
169
|
def _run_command(self, *args: str, capture_output: bool = True) -> subprocess.CompletedProcess:
|
|
198
170
|
"""Run a pg0 command synchronously."""
|
|
@@ -227,6 +199,13 @@ class EmbeddedPostgres:
|
|
|
227
199
|
return match.group(1)
|
|
228
200
|
return None
|
|
229
201
|
|
|
202
|
+
async def _get_version(self) -> str:
|
|
203
|
+
"""Get the pg0 version."""
|
|
204
|
+
returncode, stdout, stderr = await self._run_command_async("--version", timeout=10)
|
|
205
|
+
if returncode == 0 and stdout:
|
|
206
|
+
return stdout.strip()
|
|
207
|
+
return "unknown"
|
|
208
|
+
|
|
230
209
|
async def start(self, max_retries: int = 3, retry_delay: float = 2.0) -> str:
|
|
231
210
|
"""
|
|
232
211
|
Start the PostgreSQL server with retry logic.
|
|
@@ -244,7 +223,9 @@ class EmbeddedPostgres:
|
|
|
244
223
|
if not self.is_installed():
|
|
245
224
|
raise RuntimeError("pg0 is not installed. Call ensure_installed() first.")
|
|
246
225
|
|
|
247
|
-
|
|
226
|
+
# Log pg0 version
|
|
227
|
+
version = await self._get_version()
|
|
228
|
+
logger.info(f"Starting embedded PostgreSQL with pg0 {version} (name: {self.name}, port: {self.port})...")
|
|
248
229
|
|
|
249
230
|
last_error = None
|
|
250
231
|
for attempt in range(1, max_retries + 1):
|
hindsight_api/server.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""
|
|
2
|
+
FastAPI server for Hindsight API.
|
|
3
|
+
|
|
4
|
+
This module provides the ASGI app for uvicorn import string usage:
|
|
5
|
+
uvicorn hindsight_api.server:app
|
|
6
|
+
|
|
7
|
+
For CLI usage, use the hindsight-api command instead.
|
|
8
|
+
"""
|
|
9
|
+
import os
|
|
10
|
+
import warnings
|
|
11
|
+
|
|
12
|
+
# Filter deprecation warnings from third-party libraries
|
|
13
|
+
warnings.filterwarnings("ignore", message="websockets.legacy is deprecated")
|
|
14
|
+
warnings.filterwarnings("ignore", message="websockets.server.WebSocketServerProtocol is deprecated")
|
|
15
|
+
|
|
16
|
+
from hindsight_api import MemoryEngine
|
|
17
|
+
from hindsight_api.api import create_app
|
|
18
|
+
from hindsight_api.config import get_config
|
|
19
|
+
|
|
20
|
+
# Disable tokenizers parallelism to avoid warnings
|
|
21
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
22
|
+
|
|
23
|
+
# Load configuration and configure logging
|
|
24
|
+
config = get_config()
|
|
25
|
+
config.configure_logging()
|
|
26
|
+
|
|
27
|
+
# Create app at module level (required for uvicorn import string)
|
|
28
|
+
# MemoryEngine reads configuration from environment variables automatically
|
|
29
|
+
_memory = MemoryEngine()
|
|
30
|
+
|
|
31
|
+
# Create unified app with both HTTP and optionally MCP
|
|
32
|
+
app = create_app(
|
|
33
|
+
memory=_memory,
|
|
34
|
+
http_api_enabled=True,
|
|
35
|
+
mcp_api_enabled=config.mcp_enabled,
|
|
36
|
+
mcp_mount_path="/mcp"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
if __name__ == "__main__":
|
|
41
|
+
# When run directly, delegate to the CLI
|
|
42
|
+
from hindsight_api.main import main
|
|
43
|
+
main()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hindsight-api
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.1.1
|
|
4
4
|
Summary: Temporal + Semantic + Entity Memory System for AI agents using PostgreSQL
|
|
5
5
|
Requires-Python: >=3.11
|
|
6
6
|
Requires-Dist: alembic>=1.17.1
|
|
@@ -23,7 +23,7 @@ Requires-Dist: pydantic>=2.0.0
|
|
|
23
23
|
Requires-Dist: python-dateutil>=2.8.0
|
|
24
24
|
Requires-Dist: python-dotenv>=1.0.0
|
|
25
25
|
Requires-Dist: rich>=13.0.0
|
|
26
|
-
Requires-Dist: sentence-transformers>=
|
|
26
|
+
Requires-Dist: sentence-transformers>=3.0.0
|
|
27
27
|
Requires-Dist: sqlalchemy>=2.0.44
|
|
28
28
|
Requires-Dist: tiktoken>=0.12.0
|
|
29
29
|
Requires-Dist: torch>=2.0.0
|
|
@@ -36,7 +36,6 @@ Requires-Dist: pytest-asyncio>=0.21.0; extra == 'test'
|
|
|
36
36
|
Requires-Dist: pytest-timeout>=2.4.0; extra == 'test'
|
|
37
37
|
Requires-Dist: pytest-xdist>=3.0.0; extra == 'test'
|
|
38
38
|
Requires-Dist: pytest>=7.0.0; extra == 'test'
|
|
39
|
-
Requires-Dist: testcontainers[postgres]>=4.0.0; extra == 'test'
|
|
40
39
|
Description-Content-Type: text/markdown
|
|
41
40
|
|
|
42
41
|
# Memory
|