hindsight-api 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/admin/__init__.py +1 -0
- hindsight_api/admin/cli.py +252 -0
- hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
- hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
- hindsight_api/api/http.py +282 -20
- hindsight_api/api/mcp.py +47 -52
- hindsight_api/config.py +238 -6
- hindsight_api/engine/cross_encoder.py +599 -86
- hindsight_api/engine/db_budget.py +284 -0
- hindsight_api/engine/db_utils.py +11 -0
- hindsight_api/engine/embeddings.py +453 -26
- hindsight_api/engine/entity_resolver.py +8 -5
- hindsight_api/engine/interface.py +8 -4
- hindsight_api/engine/llm_wrapper.py +241 -27
- hindsight_api/engine/memory_engine.py +609 -122
- hindsight_api/engine/query_analyzer.py +4 -3
- hindsight_api/engine/response_models.py +38 -0
- hindsight_api/engine/retain/fact_extraction.py +388 -192
- hindsight_api/engine/retain/fact_storage.py +34 -8
- hindsight_api/engine/retain/link_utils.py +24 -16
- hindsight_api/engine/retain/orchestrator.py +52 -17
- hindsight_api/engine/retain/types.py +9 -0
- hindsight_api/engine/search/graph_retrieval.py +42 -13
- hindsight_api/engine/search/link_expansion_retrieval.py +256 -0
- hindsight_api/engine/search/mpfp_retrieval.py +362 -117
- hindsight_api/engine/search/reranking.py +2 -2
- hindsight_api/engine/search/retrieval.py +847 -200
- hindsight_api/engine/search/tags.py +172 -0
- hindsight_api/engine/search/think_utils.py +1 -1
- hindsight_api/engine/search/trace.py +12 -0
- hindsight_api/engine/search/tracer.py +24 -1
- hindsight_api/engine/search/types.py +21 -0
- hindsight_api/engine/task_backend.py +109 -18
- hindsight_api/engine/utils.py +1 -1
- hindsight_api/extensions/context.py +10 -1
- hindsight_api/main.py +56 -4
- hindsight_api/metrics.py +433 -48
- hindsight_api/migrations.py +141 -1
- hindsight_api/models.py +3 -1
- hindsight_api/pg0.py +53 -0
- hindsight_api/server.py +39 -2
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.3.0.dist-info}/METADATA +5 -1
- hindsight_api-0.3.0.dist-info/RECORD +82 -0
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.3.0.dist-info}/entry_points.txt +1 -0
- hindsight_api-0.2.1.dist-info/RECORD +0 -75
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.3.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Admin CLI for Hindsight
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Hindsight Admin CLI - backup and restore operations.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import io
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
import zipfile
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
import asyncpg
|
|
15
|
+
import typer
|
|
16
|
+
|
|
17
|
+
from ..config import HindsightConfig
|
|
18
|
+
from ..pg0 import parse_pg0_url, resolve_database_url
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _fq_table(table: str, schema: str) -> str:
|
|
22
|
+
"""Get fully-qualified table name with schema prefix."""
|
|
23
|
+
return f"{schema}.{table}"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# Setup logging
|
|
27
|
+
logging.basicConfig(
|
|
28
|
+
level=logging.INFO,
|
|
29
|
+
format="%(message)s",
|
|
30
|
+
)
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
app = typer.Typer(name="hindsight-admin", help="Hindsight administrative commands")
|
|
34
|
+
|
|
35
|
+
# Tables to backup/restore in dependency order
|
|
36
|
+
# Import must happen in this order due to foreign key constraints
|
|
37
|
+
BACKUP_TABLES = [
|
|
38
|
+
"banks",
|
|
39
|
+
"documents",
|
|
40
|
+
"entities",
|
|
41
|
+
"chunks",
|
|
42
|
+
"memory_units",
|
|
43
|
+
"unit_entities",
|
|
44
|
+
"entity_cooccurrences",
|
|
45
|
+
"memory_links",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
MANIFEST_VERSION = "1"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
async def _backup(database_url: str, output_path: Path, schema: str = "public") -> dict[str, Any]:
|
|
52
|
+
"""Backup all tables to a zip file using binary COPY protocol."""
|
|
53
|
+
conn = await asyncpg.connect(database_url)
|
|
54
|
+
try:
|
|
55
|
+
tables: dict[str, Any] = {}
|
|
56
|
+
manifest: dict[str, Any] = {
|
|
57
|
+
"version": MANIFEST_VERSION,
|
|
58
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
59
|
+
"schema": schema,
|
|
60
|
+
"tables": tables,
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Use a transaction with REPEATABLE READ isolation to get a consistent
|
|
64
|
+
# snapshot across all tables. This prevents race conditions where
|
|
65
|
+
# entity_cooccurrences could reference entities created after the
|
|
66
|
+
# entities table was backed up.
|
|
67
|
+
async with conn.transaction(isolation="repeatable_read"):
|
|
68
|
+
with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
|
69
|
+
for i, table in enumerate(BACKUP_TABLES, 1):
|
|
70
|
+
typer.echo(f" [{i}/{len(BACKUP_TABLES)}] Backing up {table}...", nl=False)
|
|
71
|
+
|
|
72
|
+
buffer = io.BytesIO()
|
|
73
|
+
|
|
74
|
+
# Use binary COPY for exact type preservation
|
|
75
|
+
# asyncpg requires schema_name as separate parameter
|
|
76
|
+
await conn.copy_from_table(table, schema_name=schema, output=buffer, format="binary")
|
|
77
|
+
|
|
78
|
+
data = buffer.getvalue()
|
|
79
|
+
zf.writestr(f"{table}.bin", data)
|
|
80
|
+
|
|
81
|
+
# Get row count for manifest
|
|
82
|
+
qualified_table = _fq_table(table, schema)
|
|
83
|
+
row_count = await conn.fetchval(f"SELECT COUNT(*) FROM {qualified_table}")
|
|
84
|
+
tables[table] = {
|
|
85
|
+
"rows": row_count,
|
|
86
|
+
"size_bytes": len(data),
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
typer.echo(f" {row_count} rows")
|
|
90
|
+
|
|
91
|
+
zf.writestr("manifest.json", json.dumps(manifest, indent=2))
|
|
92
|
+
|
|
93
|
+
return manifest
|
|
94
|
+
finally:
|
|
95
|
+
await conn.close()
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
async def _restore(database_url: str, input_path: Path, schema: str = "public") -> dict[str, Any]:
|
|
99
|
+
"""Restore all tables from a zip file using binary COPY protocol."""
|
|
100
|
+
conn = await asyncpg.connect(database_url)
|
|
101
|
+
try:
|
|
102
|
+
with zipfile.ZipFile(input_path, "r") as zf:
|
|
103
|
+
# Read and validate manifest
|
|
104
|
+
manifest: dict[str, Any] = json.loads(zf.read("manifest.json"))
|
|
105
|
+
if manifest.get("version") != MANIFEST_VERSION:
|
|
106
|
+
raise ValueError(f"Unsupported backup version: {manifest.get('version')}")
|
|
107
|
+
|
|
108
|
+
# Use a transaction for atomic restore - either all tables are
|
|
109
|
+
# restored or none are, preventing partial/inconsistent state.
|
|
110
|
+
async with conn.transaction():
|
|
111
|
+
typer.echo(" Clearing existing data...")
|
|
112
|
+
# Truncate tables in reverse order (respects FK constraints)
|
|
113
|
+
for table in reversed(BACKUP_TABLES):
|
|
114
|
+
qualified_table = _fq_table(table, schema)
|
|
115
|
+
await conn.execute(f"TRUNCATE TABLE {qualified_table} CASCADE")
|
|
116
|
+
|
|
117
|
+
# Restore tables in forward order
|
|
118
|
+
for i, table in enumerate(BACKUP_TABLES, 1):
|
|
119
|
+
filename = f"{table}.bin"
|
|
120
|
+
if filename not in zf.namelist():
|
|
121
|
+
typer.echo(f" [{i}/{len(BACKUP_TABLES)}] {table}: skipped (not in backup)")
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
expected_rows = manifest["tables"].get(table, {}).get("rows", "?")
|
|
125
|
+
typer.echo(f" [{i}/{len(BACKUP_TABLES)}] Restoring {table}... {expected_rows} rows")
|
|
126
|
+
|
|
127
|
+
data = zf.read(filename)
|
|
128
|
+
buffer = io.BytesIO(data)
|
|
129
|
+
# asyncpg requires schema_name as separate parameter
|
|
130
|
+
await conn.copy_to_table(table, schema_name=schema, source=buffer, format="binary")
|
|
131
|
+
|
|
132
|
+
# Refresh materialized view
|
|
133
|
+
typer.echo(" Refreshing materialized views...")
|
|
134
|
+
await conn.execute(f"REFRESH MATERIALIZED VIEW {_fq_table('memory_units_bm25', schema)}")
|
|
135
|
+
|
|
136
|
+
return manifest
|
|
137
|
+
finally:
|
|
138
|
+
await conn.close()
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
async def _run_backup(db_url: str, output: Path, schema: str = "public") -> dict[str, Any]:
|
|
142
|
+
"""Resolve database URL and run backup."""
|
|
143
|
+
is_pg0, instance_name, _ = parse_pg0_url(db_url)
|
|
144
|
+
if is_pg0:
|
|
145
|
+
typer.echo(f"Starting embedded PostgreSQL (instance: {instance_name})...")
|
|
146
|
+
resolved_url = await resolve_database_url(db_url)
|
|
147
|
+
return await _backup(resolved_url, output, schema)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
async def _run_restore(db_url: str, input_file: Path, schema: str = "public") -> dict[str, Any]:
|
|
151
|
+
"""Resolve database URL and run restore."""
|
|
152
|
+
is_pg0, instance_name, _ = parse_pg0_url(db_url)
|
|
153
|
+
if is_pg0:
|
|
154
|
+
typer.echo(f"Starting embedded PostgreSQL (instance: {instance_name})...")
|
|
155
|
+
resolved_url = await resolve_database_url(db_url)
|
|
156
|
+
return await _restore(resolved_url, input_file, schema)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@app.command()
|
|
160
|
+
def backup(
|
|
161
|
+
output: Path = typer.Argument(..., help="Output file path (.zip)"),
|
|
162
|
+
schema: str = typer.Option("public", "--schema", "-s", help="Database schema to backup"),
|
|
163
|
+
):
|
|
164
|
+
"""Backup the Hindsight database to a zip file."""
|
|
165
|
+
config = HindsightConfig.from_env()
|
|
166
|
+
|
|
167
|
+
if not config.database_url:
|
|
168
|
+
typer.echo("Error: Database URL not configured.", err=True)
|
|
169
|
+
typer.echo("Set HINDSIGHT_API_DATABASE_URL environment variable.", err=True)
|
|
170
|
+
raise typer.Exit(1)
|
|
171
|
+
|
|
172
|
+
if output.suffix != ".zip":
|
|
173
|
+
output = output.with_suffix(".zip")
|
|
174
|
+
|
|
175
|
+
typer.echo(f"Backing up database (schema: {schema}) to {output}...")
|
|
176
|
+
|
|
177
|
+
manifest = asyncio.run(_run_backup(config.database_url, output, schema))
|
|
178
|
+
|
|
179
|
+
total_rows = sum(t["rows"] for t in manifest["tables"].values())
|
|
180
|
+
typer.echo(f"Backed up {total_rows} rows across {len(BACKUP_TABLES)} tables")
|
|
181
|
+
typer.echo(f"Backup saved to {output}")
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@app.command()
|
|
185
|
+
def restore(
|
|
186
|
+
input_file: Path = typer.Argument(..., help="Input backup file (.zip)"),
|
|
187
|
+
schema: str = typer.Option("public", "--schema", "-s", help="Database schema to restore to"),
|
|
188
|
+
yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt"),
|
|
189
|
+
):
|
|
190
|
+
"""Restore the database from a backup file. WARNING: This deletes all existing data."""
|
|
191
|
+
config = HindsightConfig.from_env()
|
|
192
|
+
|
|
193
|
+
if not config.database_url:
|
|
194
|
+
typer.echo("Error: Database URL not configured.", err=True)
|
|
195
|
+
typer.echo("Set HINDSIGHT_API_DATABASE_URL environment variable.", err=True)
|
|
196
|
+
raise typer.Exit(1)
|
|
197
|
+
|
|
198
|
+
if not input_file.exists():
|
|
199
|
+
typer.echo(f"Error: File not found: {input_file}", err=True)
|
|
200
|
+
raise typer.Exit(1)
|
|
201
|
+
|
|
202
|
+
if not yes:
|
|
203
|
+
typer.confirm(
|
|
204
|
+
"This will DELETE all existing data and replace it with the backup. Continue?",
|
|
205
|
+
abort=True,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
typer.echo(f"Restoring database (schema: {schema}) from {input_file}...")
|
|
209
|
+
|
|
210
|
+
manifest = asyncio.run(_run_restore(config.database_url, input_file, schema))
|
|
211
|
+
|
|
212
|
+
total_rows = sum(t["rows"] for t in manifest["tables"].values())
|
|
213
|
+
typer.echo(f"Restored {total_rows} rows across {len(BACKUP_TABLES)} tables")
|
|
214
|
+
typer.echo("Restore complete")
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
async def _run_migration(db_url: str, schema: str = "public") -> None:
|
|
218
|
+
"""Resolve database URL and run migrations."""
|
|
219
|
+
from ..migrations import run_migrations
|
|
220
|
+
|
|
221
|
+
is_pg0, instance_name, _ = parse_pg0_url(db_url)
|
|
222
|
+
if is_pg0:
|
|
223
|
+
typer.echo(f"Starting embedded PostgreSQL (instance: {instance_name})...")
|
|
224
|
+
resolved_url = await resolve_database_url(db_url)
|
|
225
|
+
run_migrations(resolved_url, schema=schema)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
@app.command(name="run-db-migration")
|
|
229
|
+
def run_db_migration(
|
|
230
|
+
schema: str = typer.Option("public", "--schema", "-s", help="Database schema to run migrations on"),
|
|
231
|
+
):
|
|
232
|
+
"""Run database migrations to the latest version."""
|
|
233
|
+
config = HindsightConfig.from_env()
|
|
234
|
+
|
|
235
|
+
if not config.database_url:
|
|
236
|
+
typer.echo("Error: Database URL not configured.", err=True)
|
|
237
|
+
typer.echo("Set HINDSIGHT_API_DATABASE_URL environment variable.", err=True)
|
|
238
|
+
raise typer.Exit(1)
|
|
239
|
+
|
|
240
|
+
typer.echo(f"Running database migrations (schema: {schema})...")
|
|
241
|
+
|
|
242
|
+
asyncio.run(_run_migration(config.database_url, schema))
|
|
243
|
+
|
|
244
|
+
typer.echo("Database migrations completed successfully")
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def main():
|
|
248
|
+
app()
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
if __name__ == "__main__":
|
|
252
|
+
main()
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""add_memory_links_from_type_weight_index
|
|
2
|
+
|
|
3
|
+
Revision ID: f1a2b3c4d5e6
|
|
4
|
+
Revises: e0a1b2c3d4e5
|
|
5
|
+
Create Date: 2025-01-12
|
|
6
|
+
|
|
7
|
+
Add composite index on memory_links (from_unit_id, link_type, weight DESC)
|
|
8
|
+
to optimize MPFP graph traversal queries that need top-k edges per type.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from collections.abc import Sequence
|
|
12
|
+
|
|
13
|
+
from alembic import context, op
|
|
14
|
+
|
|
15
|
+
# revision identifiers, used by Alembic.
|
|
16
|
+
revision: str = "f1a2b3c4d5e6"
|
|
17
|
+
down_revision: str | Sequence[str] | None = "e0a1b2c3d4e5"
|
|
18
|
+
branch_labels: str | Sequence[str] | None = None
|
|
19
|
+
depends_on: str | Sequence[str] | None = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _get_schema_prefix() -> str:
|
|
23
|
+
"""Get schema prefix for table names (e.g., 'tenant_x.' or '' for public)."""
|
|
24
|
+
schema = context.config.get_main_option("target_schema")
|
|
25
|
+
return f'"{schema}".' if schema else ""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def upgrade() -> None:
|
|
29
|
+
"""Add composite index for efficient MPFP edge loading."""
|
|
30
|
+
schema = _get_schema_prefix()
|
|
31
|
+
# Create composite index for efficient top-k per (from_node, link_type) queries
|
|
32
|
+
# This enables LATERAL joins to use index-only scans with early termination
|
|
33
|
+
# Note: Not using CONCURRENTLY here as it requires running outside a transaction
|
|
34
|
+
# For production with large tables, consider running this manually with CONCURRENTLY
|
|
35
|
+
op.execute(
|
|
36
|
+
f"CREATE INDEX IF NOT EXISTS idx_memory_links_from_type_weight "
|
|
37
|
+
f"ON {schema}memory_links(from_unit_id, link_type, weight DESC)"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def downgrade() -> None:
|
|
42
|
+
"""Remove the composite index."""
|
|
43
|
+
schema = _get_schema_prefix()
|
|
44
|
+
op.execute(f"DROP INDEX IF EXISTS {schema}idx_memory_links_from_type_weight")
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""add_tags_column
|
|
2
|
+
|
|
3
|
+
Revision ID: g2a3b4c5d6e7
|
|
4
|
+
Revises: f1a2b3c4d5e6
|
|
5
|
+
Create Date: 2025-01-13
|
|
6
|
+
|
|
7
|
+
Add tags column to memory_units and documents tables for visibility scoping.
|
|
8
|
+
Tags enable filtering memories by scope (e.g., user IDs, session IDs) during recall/reflect.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from collections.abc import Sequence
|
|
12
|
+
|
|
13
|
+
from alembic import context, op
|
|
14
|
+
|
|
15
|
+
# revision identifiers, used by Alembic.
|
|
16
|
+
revision: str = "g2a3b4c5d6e7"
|
|
17
|
+
down_revision: str | Sequence[str] | None = "f1a2b3c4d5e6"
|
|
18
|
+
branch_labels: str | Sequence[str] | None = None
|
|
19
|
+
depends_on: str | Sequence[str] | None = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _get_schema_prefix() -> str:
|
|
23
|
+
"""Get schema prefix for table names (e.g., 'tenant_x.' or '' for public)."""
|
|
24
|
+
schema = context.config.get_main_option("target_schema")
|
|
25
|
+
return f'"{schema}".' if schema else ""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def upgrade() -> None:
|
|
29
|
+
"""Add tags column to memory_units and documents tables."""
|
|
30
|
+
schema = _get_schema_prefix()
|
|
31
|
+
|
|
32
|
+
# Add tags column to memory_units table
|
|
33
|
+
op.execute(f"ALTER TABLE {schema}memory_units ADD COLUMN IF NOT EXISTS tags VARCHAR[] NOT NULL DEFAULT '{{}}'")
|
|
34
|
+
|
|
35
|
+
# Create GIN index for efficient array containment queries (tags && ARRAY['x'])
|
|
36
|
+
op.execute(f"CREATE INDEX IF NOT EXISTS idx_memory_units_tags ON {schema}memory_units USING GIN (tags)")
|
|
37
|
+
|
|
38
|
+
# Add tags column to documents table for document-level tags
|
|
39
|
+
op.execute(f"ALTER TABLE {schema}documents ADD COLUMN IF NOT EXISTS tags VARCHAR[] NOT NULL DEFAULT '{{}}'")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def downgrade() -> None:
|
|
43
|
+
"""Remove tags columns and index."""
|
|
44
|
+
schema = _get_schema_prefix()
|
|
45
|
+
|
|
46
|
+
op.execute(f"DROP INDEX IF EXISTS {schema}idx_memory_units_tags")
|
|
47
|
+
op.execute(f"ALTER TABLE {schema}memory_units DROP COLUMN IF EXISTS tags")
|
|
48
|
+
op.execute(f"ALTER TABLE {schema}documents DROP COLUMN IF EXISTS tags")
|