hindsight-api 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +10 -9
- hindsight_api/alembic/env.py +5 -8
- hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +266 -180
- hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +32 -32
- hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +11 -11
- hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +7 -12
- hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +23 -15
- hindsight_api/alembic/versions/rename_personality_to_disposition.py +30 -21
- hindsight_api/api/__init__.py +10 -10
- hindsight_api/api/http.py +575 -593
- hindsight_api/api/mcp.py +31 -33
- hindsight_api/banner.py +13 -6
- hindsight_api/config.py +17 -12
- hindsight_api/engine/__init__.py +9 -9
- hindsight_api/engine/cross_encoder.py +23 -27
- hindsight_api/engine/db_utils.py +5 -4
- hindsight_api/engine/embeddings.py +22 -21
- hindsight_api/engine/entity_resolver.py +81 -75
- hindsight_api/engine/llm_wrapper.py +74 -88
- hindsight_api/engine/memory_engine.py +663 -673
- hindsight_api/engine/query_analyzer.py +100 -97
- hindsight_api/engine/response_models.py +105 -106
- hindsight_api/engine/retain/__init__.py +9 -16
- hindsight_api/engine/retain/bank_utils.py +34 -58
- hindsight_api/engine/retain/chunk_storage.py +4 -12
- hindsight_api/engine/retain/deduplication.py +9 -28
- hindsight_api/engine/retain/embedding_processing.py +4 -11
- hindsight_api/engine/retain/embedding_utils.py +3 -4
- hindsight_api/engine/retain/entity_processing.py +7 -17
- hindsight_api/engine/retain/fact_extraction.py +155 -165
- hindsight_api/engine/retain/fact_storage.py +11 -23
- hindsight_api/engine/retain/link_creation.py +11 -39
- hindsight_api/engine/retain/link_utils.py +166 -95
- hindsight_api/engine/retain/observation_regeneration.py +39 -52
- hindsight_api/engine/retain/orchestrator.py +72 -62
- hindsight_api/engine/retain/types.py +49 -43
- hindsight_api/engine/search/__init__.py +15 -1
- hindsight_api/engine/search/fusion.py +6 -15
- hindsight_api/engine/search/graph_retrieval.py +234 -0
- hindsight_api/engine/search/mpfp_retrieval.py +438 -0
- hindsight_api/engine/search/observation_utils.py +9 -16
- hindsight_api/engine/search/reranking.py +4 -7
- hindsight_api/engine/search/retrieval.py +388 -193
- hindsight_api/engine/search/scoring.py +5 -7
- hindsight_api/engine/search/temporal_extraction.py +8 -11
- hindsight_api/engine/search/think_utils.py +115 -39
- hindsight_api/engine/search/trace.py +68 -38
- hindsight_api/engine/search/tracer.py +49 -35
- hindsight_api/engine/search/types.py +22 -16
- hindsight_api/engine/task_backend.py +21 -26
- hindsight_api/engine/utils.py +25 -10
- hindsight_api/main.py +21 -40
- hindsight_api/mcp_local.py +190 -0
- hindsight_api/metrics.py +44 -30
- hindsight_api/migrations.py +10 -8
- hindsight_api/models.py +60 -72
- hindsight_api/pg0.py +64 -337
- hindsight_api/server.py +3 -6
- {hindsight_api-0.1.4.dist-info → hindsight_api-0.1.6.dist-info}/METADATA +6 -5
- hindsight_api-0.1.6.dist-info/RECORD +64 -0
- {hindsight_api-0.1.4.dist-info → hindsight_api-0.1.6.dist-info}/entry_points.txt +1 -0
- hindsight_api-0.1.4.dist-info/RECORD +0 -61
- {hindsight_api-0.1.4.dist-info → hindsight_api-0.1.6.dist-info}/WHEEL +0 -0
hindsight_api/__init__.py
CHANGED
|
@@ -3,23 +3,24 @@ Memory System for AI Agents.
|
|
|
3
3
|
|
|
4
4
|
Temporal + Semantic Memory Architecture using PostgreSQL with pgvector.
|
|
5
5
|
"""
|
|
6
|
+
|
|
7
|
+
from .config import HindsightConfig, get_config
|
|
8
|
+
from .engine.cross_encoder import CrossEncoderModel, LocalSTCrossEncoder, RemoteTEICrossEncoder
|
|
9
|
+
from .engine.embeddings import Embeddings, LocalSTEmbeddings, RemoteTEIEmbeddings
|
|
10
|
+
from .engine.llm_wrapper import LLMConfig
|
|
6
11
|
from .engine.memory_engine import MemoryEngine
|
|
7
12
|
from .engine.search.trace import (
|
|
8
|
-
SearchTrace,
|
|
9
|
-
QueryInfo,
|
|
10
13
|
EntryPoint,
|
|
11
|
-
NodeVisit,
|
|
12
|
-
WeightComponents,
|
|
13
14
|
LinkInfo,
|
|
15
|
+
NodeVisit,
|
|
14
16
|
PruningDecision,
|
|
15
|
-
|
|
17
|
+
QueryInfo,
|
|
16
18
|
SearchPhaseMetrics,
|
|
19
|
+
SearchSummary,
|
|
20
|
+
SearchTrace,
|
|
21
|
+
WeightComponents,
|
|
17
22
|
)
|
|
18
23
|
from .engine.search.tracer import SearchTracer
|
|
19
|
-
from .engine.embeddings import Embeddings, LocalSTEmbeddings, RemoteTEIEmbeddings
|
|
20
|
-
from .engine.cross_encoder import CrossEncoderModel, LocalSTCrossEncoder, RemoteTEICrossEncoder
|
|
21
|
-
from .engine.llm_wrapper import LLMConfig
|
|
22
|
-
from .config import HindsightConfig, get_config
|
|
23
24
|
|
|
24
25
|
__all__ = [
|
|
25
26
|
"MemoryEngine",
|
hindsight_api/alembic/env.py
CHANGED
|
@@ -2,20 +2,19 @@
|
|
|
2
2
|
Alembic environment configuration for SQLAlchemy with pgvector.
|
|
3
3
|
Uses synchronous psycopg2 driver for migrations to avoid pgbouncer issues.
|
|
4
4
|
"""
|
|
5
|
+
|
|
5
6
|
import logging
|
|
6
7
|
import os
|
|
7
|
-
import sys
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
|
|
10
|
-
from sqlalchemy import pool, engine_from_config
|
|
11
|
-
from sqlalchemy.engine import Connection
|
|
12
|
-
|
|
13
10
|
from alembic import context
|
|
14
11
|
from dotenv import load_dotenv
|
|
12
|
+
from sqlalchemy import engine_from_config, pool
|
|
15
13
|
|
|
16
14
|
# Import your models here
|
|
17
15
|
from hindsight_api.models import Base
|
|
18
16
|
|
|
17
|
+
|
|
19
18
|
# Load environment variables based on HINDSIGHT_API_DATABASE_URL env var or default to local
|
|
20
19
|
def load_env():
|
|
21
20
|
"""Load environment variables from .env"""
|
|
@@ -30,6 +29,7 @@ def load_env():
|
|
|
30
29
|
if env_file.exists():
|
|
31
30
|
load_dotenv(env_file)
|
|
32
31
|
|
|
32
|
+
|
|
33
33
|
load_env()
|
|
34
34
|
|
|
35
35
|
# this is the Alembic Config object, which provides
|
|
@@ -128,10 +128,7 @@ def run_migrations_online() -> None:
|
|
|
128
128
|
connection.execute(text("SET SESSION CHARACTERISTICS AS TRANSACTION READ WRITE"))
|
|
129
129
|
connection.commit() # Commit the SET command
|
|
130
130
|
|
|
131
|
-
context.configure(
|
|
132
|
-
connection=connection,
|
|
133
|
-
target_metadata=target_metadata
|
|
134
|
-
)
|
|
131
|
+
context.configure(connection=connection, target_metadata=target_metadata)
|
|
135
132
|
|
|
136
133
|
with context.begin_transaction():
|
|
137
134
|
context.run_migrations()
|
|
@@ -5,120 +5,150 @@ Revises:
|
|
|
5
5
|
Create Date: 2025-11-27 11:54:19.228030
|
|
6
6
|
|
|
7
7
|
"""
|
|
8
|
-
from typing import Sequence, Union
|
|
9
8
|
|
|
10
|
-
from
|
|
9
|
+
from collections.abc import Sequence
|
|
10
|
+
|
|
11
11
|
import sqlalchemy as sa
|
|
12
|
-
from
|
|
12
|
+
from alembic import op
|
|
13
13
|
from pgvector.sqlalchemy import Vector
|
|
14
|
-
|
|
14
|
+
from sqlalchemy.dialects import postgresql
|
|
15
15
|
|
|
16
16
|
# revision identifiers, used by Alembic.
|
|
17
|
-
revision: str =
|
|
18
|
-
down_revision:
|
|
19
|
-
branch_labels:
|
|
20
|
-
depends_on:
|
|
17
|
+
revision: str = "5a366d414dce"
|
|
18
|
+
down_revision: str | Sequence[str] | None = None
|
|
19
|
+
branch_labels: str | Sequence[str] | None = None
|
|
20
|
+
depends_on: str | Sequence[str] | None = None
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
def upgrade() -> None:
|
|
24
24
|
"""Upgrade schema - create all tables from scratch."""
|
|
25
25
|
|
|
26
26
|
# Enable required extensions
|
|
27
|
-
op.execute(
|
|
27
|
+
op.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
|
28
28
|
|
|
29
29
|
# Create banks table
|
|
30
30
|
op.create_table(
|
|
31
|
-
|
|
32
|
-
sa.Column(
|
|
33
|
-
sa.Column(
|
|
34
|
-
sa.Column(
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
31
|
+
"banks",
|
|
32
|
+
sa.Column("bank_id", sa.Text(), nullable=False),
|
|
33
|
+
sa.Column("name", sa.Text(), nullable=True),
|
|
34
|
+
sa.Column(
|
|
35
|
+
"personality",
|
|
36
|
+
postgresql.JSONB(astext_type=sa.Text()),
|
|
37
|
+
server_default=sa.text("'{}'::jsonb"),
|
|
38
|
+
nullable=False,
|
|
39
|
+
),
|
|
40
|
+
sa.Column("background", sa.Text(), nullable=True),
|
|
41
|
+
sa.Column("created_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
|
|
42
|
+
sa.Column("updated_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
|
|
43
|
+
sa.PrimaryKeyConstraint("bank_id", name=op.f("pk_banks")),
|
|
39
44
|
)
|
|
40
45
|
|
|
41
46
|
# Create documents table
|
|
42
47
|
op.create_table(
|
|
43
|
-
|
|
44
|
-
sa.Column(
|
|
45
|
-
sa.Column(
|
|
46
|
-
sa.Column(
|
|
47
|
-
sa.Column(
|
|
48
|
-
sa.Column(
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
sa.
|
|
48
|
+
"documents",
|
|
49
|
+
sa.Column("id", sa.Text(), nullable=False),
|
|
50
|
+
sa.Column("bank_id", sa.Text(), nullable=False),
|
|
51
|
+
sa.Column("original_text", sa.Text(), nullable=True),
|
|
52
|
+
sa.Column("content_hash", sa.Text(), nullable=True),
|
|
53
|
+
sa.Column(
|
|
54
|
+
"metadata", postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False
|
|
55
|
+
),
|
|
56
|
+
sa.Column("created_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
|
|
57
|
+
sa.Column("updated_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
|
|
58
|
+
sa.PrimaryKeyConstraint("id", "bank_id", name=op.f("pk_documents")),
|
|
52
59
|
)
|
|
53
|
-
op.create_index(
|
|
54
|
-
op.create_index(
|
|
60
|
+
op.create_index("idx_documents_bank_id", "documents", ["bank_id"])
|
|
61
|
+
op.create_index("idx_documents_content_hash", "documents", ["content_hash"])
|
|
55
62
|
|
|
56
63
|
# Create async_operations table
|
|
57
64
|
op.create_table(
|
|
58
|
-
|
|
59
|
-
sa.Column(
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
sa.Column(
|
|
63
|
-
sa.Column(
|
|
64
|
-
sa.Column(
|
|
65
|
-
sa.Column(
|
|
66
|
-
sa.Column(
|
|
67
|
-
sa.Column(
|
|
68
|
-
sa.
|
|
69
|
-
sa.
|
|
65
|
+
"async_operations",
|
|
66
|
+
sa.Column(
|
|
67
|
+
"operation_id", postgresql.UUID(as_uuid=True), server_default=sa.text("gen_random_uuid()"), nullable=False
|
|
68
|
+
),
|
|
69
|
+
sa.Column("bank_id", sa.Text(), nullable=False),
|
|
70
|
+
sa.Column("operation_type", sa.Text(), nullable=False),
|
|
71
|
+
sa.Column("status", sa.Text(), server_default="pending", nullable=False),
|
|
72
|
+
sa.Column("created_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
|
|
73
|
+
sa.Column("updated_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
|
|
74
|
+
sa.Column("completed_at", postgresql.TIMESTAMP(timezone=True), nullable=True),
|
|
75
|
+
sa.Column("error_message", sa.Text(), nullable=True),
|
|
76
|
+
sa.Column(
|
|
77
|
+
"result_metadata",
|
|
78
|
+
postgresql.JSONB(astext_type=sa.Text()),
|
|
79
|
+
server_default=sa.text("'{}'::jsonb"),
|
|
80
|
+
nullable=False,
|
|
81
|
+
),
|
|
82
|
+
sa.PrimaryKeyConstraint("operation_id", name=op.f("pk_async_operations")),
|
|
83
|
+
sa.CheckConstraint(
|
|
84
|
+
"status IN ('pending', 'processing', 'completed', 'failed')", name="async_operations_status_check"
|
|
85
|
+
),
|
|
70
86
|
)
|
|
71
|
-
op.create_index(
|
|
72
|
-
op.create_index(
|
|
73
|
-
op.create_index(
|
|
87
|
+
op.create_index("idx_async_operations_bank_id", "async_operations", ["bank_id"])
|
|
88
|
+
op.create_index("idx_async_operations_status", "async_operations", ["status"])
|
|
89
|
+
op.create_index("idx_async_operations_bank_status", "async_operations", ["bank_id", "status"])
|
|
74
90
|
|
|
75
91
|
# Create entities table
|
|
76
92
|
op.create_table(
|
|
77
|
-
|
|
78
|
-
sa.Column(
|
|
79
|
-
sa.Column(
|
|
80
|
-
sa.Column(
|
|
81
|
-
sa.Column(
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
sa.Column(
|
|
85
|
-
sa.
|
|
93
|
+
"entities",
|
|
94
|
+
sa.Column("id", postgresql.UUID(as_uuid=True), server_default=sa.text("gen_random_uuid()"), nullable=False),
|
|
95
|
+
sa.Column("canonical_name", sa.Text(), nullable=False),
|
|
96
|
+
sa.Column("bank_id", sa.Text(), nullable=False),
|
|
97
|
+
sa.Column(
|
|
98
|
+
"metadata", postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False
|
|
99
|
+
),
|
|
100
|
+
sa.Column("first_seen", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
|
|
101
|
+
sa.Column("last_seen", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
|
|
102
|
+
sa.Column("mention_count", sa.Integer(), server_default="1", nullable=False),
|
|
103
|
+
sa.PrimaryKeyConstraint("id", name=op.f("pk_entities")),
|
|
86
104
|
)
|
|
87
|
-
op.create_index(
|
|
88
|
-
op.create_index(
|
|
89
|
-
op.create_index(
|
|
105
|
+
op.create_index("idx_entities_bank_id", "entities", ["bank_id"])
|
|
106
|
+
op.create_index("idx_entities_canonical_name", "entities", ["canonical_name"])
|
|
107
|
+
op.create_index("idx_entities_bank_name", "entities", ["bank_id", "canonical_name"])
|
|
90
108
|
# Create unique index on (bank_id, LOWER(canonical_name)) for entity resolution
|
|
91
|
-
op.execute(
|
|
109
|
+
op.execute("CREATE UNIQUE INDEX idx_entities_bank_lower_name ON entities (bank_id, LOWER(canonical_name))")
|
|
92
110
|
|
|
93
111
|
# Create memory_units table
|
|
94
112
|
op.create_table(
|
|
95
|
-
|
|
96
|
-
sa.Column(
|
|
97
|
-
sa.Column(
|
|
98
|
-
sa.Column(
|
|
99
|
-
sa.Column(
|
|
100
|
-
sa.Column(
|
|
101
|
-
sa.Column(
|
|
102
|
-
sa.Column(
|
|
103
|
-
sa.Column(
|
|
104
|
-
sa.Column(
|
|
105
|
-
sa.Column(
|
|
106
|
-
sa.Column(
|
|
107
|
-
sa.Column(
|
|
108
|
-
sa.Column(
|
|
109
|
-
sa.Column(
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
sa.
|
|
113
|
-
sa.
|
|
114
|
-
sa.
|
|
115
|
-
|
|
113
|
+
"memory_units",
|
|
114
|
+
sa.Column("id", postgresql.UUID(as_uuid=True), server_default=sa.text("gen_random_uuid()"), nullable=False),
|
|
115
|
+
sa.Column("bank_id", sa.Text(), nullable=False),
|
|
116
|
+
sa.Column("document_id", sa.Text(), nullable=True),
|
|
117
|
+
sa.Column("text", sa.Text(), nullable=False),
|
|
118
|
+
sa.Column("embedding", Vector(384), nullable=True),
|
|
119
|
+
sa.Column("context", sa.Text(), nullable=True),
|
|
120
|
+
sa.Column("event_date", postgresql.TIMESTAMP(timezone=True), nullable=False),
|
|
121
|
+
sa.Column("occurred_start", postgresql.TIMESTAMP(timezone=True), nullable=True),
|
|
122
|
+
sa.Column("occurred_end", postgresql.TIMESTAMP(timezone=True), nullable=True),
|
|
123
|
+
sa.Column("mentioned_at", postgresql.TIMESTAMP(timezone=True), nullable=True),
|
|
124
|
+
sa.Column("fact_type", sa.Text(), server_default="world", nullable=False),
|
|
125
|
+
sa.Column("confidence_score", sa.Float(), nullable=True),
|
|
126
|
+
sa.Column("access_count", sa.Integer(), server_default="0", nullable=False),
|
|
127
|
+
sa.Column(
|
|
128
|
+
"metadata", postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False
|
|
129
|
+
),
|
|
130
|
+
sa.Column("created_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
|
|
131
|
+
sa.Column("updated_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
|
|
132
|
+
sa.ForeignKeyConstraint(
|
|
133
|
+
["document_id", "bank_id"],
|
|
134
|
+
["documents.id", "documents.bank_id"],
|
|
135
|
+
name="memory_units_document_fkey",
|
|
136
|
+
ondelete="CASCADE",
|
|
137
|
+
),
|
|
138
|
+
sa.PrimaryKeyConstraint("id", name=op.f("pk_memory_units")),
|
|
139
|
+
sa.CheckConstraint(
|
|
140
|
+
"fact_type IN ('world', 'bank', 'opinion', 'observation')", name="memory_units_fact_type_check"
|
|
141
|
+
),
|
|
142
|
+
sa.CheckConstraint(
|
|
143
|
+
"confidence_score IS NULL OR (confidence_score >= 0.0 AND confidence_score <= 1.0)",
|
|
144
|
+
name="memory_units_confidence_range_check",
|
|
145
|
+
),
|
|
116
146
|
sa.CheckConstraint(
|
|
117
147
|
"(fact_type = 'opinion' AND confidence_score IS NOT NULL) OR "
|
|
118
148
|
"(fact_type = 'observation') OR "
|
|
119
149
|
"(fact_type NOT IN ('opinion', 'observation') AND confidence_score IS NULL)",
|
|
120
|
-
name=
|
|
121
|
-
)
|
|
150
|
+
name="confidence_score_fact_type_check",
|
|
151
|
+
),
|
|
122
152
|
)
|
|
123
153
|
|
|
124
154
|
# Add search_vector column for full-text search
|
|
@@ -128,18 +158,41 @@ def upgrade() -> None:
|
|
|
128
158
|
GENERATED ALWAYS AS (to_tsvector('english', COALESCE(text, '') || ' ' || COALESCE(context, ''))) STORED
|
|
129
159
|
""")
|
|
130
160
|
|
|
131
|
-
op.create_index(
|
|
132
|
-
op.create_index(
|
|
133
|
-
op.create_index(
|
|
134
|
-
op.create_index(
|
|
135
|
-
op.create_index(
|
|
136
|
-
op.create_index(
|
|
137
|
-
op.create_index(
|
|
138
|
-
op.create_index(
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
op.create_index(
|
|
142
|
-
|
|
161
|
+
op.create_index("idx_memory_units_bank_id", "memory_units", ["bank_id"])
|
|
162
|
+
op.create_index("idx_memory_units_document_id", "memory_units", ["document_id"])
|
|
163
|
+
op.create_index("idx_memory_units_event_date", "memory_units", [sa.text("event_date DESC")])
|
|
164
|
+
op.create_index("idx_memory_units_bank_date", "memory_units", ["bank_id", sa.text("event_date DESC")])
|
|
165
|
+
op.create_index("idx_memory_units_access_count", "memory_units", [sa.text("access_count DESC")])
|
|
166
|
+
op.create_index("idx_memory_units_fact_type", "memory_units", ["fact_type"])
|
|
167
|
+
op.create_index("idx_memory_units_bank_fact_type", "memory_units", ["bank_id", "fact_type"])
|
|
168
|
+
op.create_index(
|
|
169
|
+
"idx_memory_units_bank_type_date", "memory_units", ["bank_id", "fact_type", sa.text("event_date DESC")]
|
|
170
|
+
)
|
|
171
|
+
op.create_index(
|
|
172
|
+
"idx_memory_units_opinion_confidence",
|
|
173
|
+
"memory_units",
|
|
174
|
+
["bank_id", sa.text("confidence_score DESC")],
|
|
175
|
+
postgresql_where=sa.text("fact_type = 'opinion'"),
|
|
176
|
+
)
|
|
177
|
+
op.create_index(
|
|
178
|
+
"idx_memory_units_opinion_date",
|
|
179
|
+
"memory_units",
|
|
180
|
+
["bank_id", sa.text("event_date DESC")],
|
|
181
|
+
postgresql_where=sa.text("fact_type = 'opinion'"),
|
|
182
|
+
)
|
|
183
|
+
op.create_index(
|
|
184
|
+
"idx_memory_units_observation_date",
|
|
185
|
+
"memory_units",
|
|
186
|
+
["bank_id", sa.text("event_date DESC")],
|
|
187
|
+
postgresql_where=sa.text("fact_type = 'observation'"),
|
|
188
|
+
)
|
|
189
|
+
op.create_index(
|
|
190
|
+
"idx_memory_units_embedding",
|
|
191
|
+
"memory_units",
|
|
192
|
+
["embedding"],
|
|
193
|
+
postgresql_using="hnsw",
|
|
194
|
+
postgresql_ops={"embedding": "vector_cosine_ops"},
|
|
195
|
+
)
|
|
143
196
|
|
|
144
197
|
# Create BM25 full-text search index on search_vector
|
|
145
198
|
op.execute("""
|
|
@@ -158,116 +211,149 @@ def upgrade() -> None:
|
|
|
158
211
|
FROM memory_units
|
|
159
212
|
""")
|
|
160
213
|
|
|
161
|
-
op.create_index(
|
|
162
|
-
op.create_index(
|
|
214
|
+
op.create_index("idx_memory_units_bm25_bank", "memory_units_bm25", ["bank_id"])
|
|
215
|
+
op.create_index("idx_memory_units_bm25_text_vector", "memory_units_bm25", ["text_vector"], postgresql_using="gin")
|
|
163
216
|
|
|
164
217
|
# Create entity_cooccurrences table
|
|
165
218
|
op.create_table(
|
|
166
|
-
|
|
167
|
-
sa.Column(
|
|
168
|
-
sa.Column(
|
|
169
|
-
sa.Column(
|
|
170
|
-
sa.Column(
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
sa.
|
|
174
|
-
|
|
219
|
+
"entity_cooccurrences",
|
|
220
|
+
sa.Column("entity_id_1", postgresql.UUID(as_uuid=True), nullable=False),
|
|
221
|
+
sa.Column("entity_id_2", postgresql.UUID(as_uuid=True), nullable=False),
|
|
222
|
+
sa.Column("cooccurrence_count", sa.Integer(), server_default="1", nullable=False),
|
|
223
|
+
sa.Column(
|
|
224
|
+
"last_cooccurred", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False
|
|
225
|
+
),
|
|
226
|
+
sa.ForeignKeyConstraint(
|
|
227
|
+
["entity_id_1"],
|
|
228
|
+
["entities.id"],
|
|
229
|
+
name=op.f("fk_entity_cooccurrences_entity_id_1_entities"),
|
|
230
|
+
ondelete="CASCADE",
|
|
231
|
+
),
|
|
232
|
+
sa.ForeignKeyConstraint(
|
|
233
|
+
["entity_id_2"],
|
|
234
|
+
["entities.id"],
|
|
235
|
+
name=op.f("fk_entity_cooccurrences_entity_id_2_entities"),
|
|
236
|
+
ondelete="CASCADE",
|
|
237
|
+
),
|
|
238
|
+
sa.PrimaryKeyConstraint("entity_id_1", "entity_id_2", name=op.f("pk_entity_cooccurrences")),
|
|
239
|
+
sa.CheckConstraint("entity_id_1 < entity_id_2", name="entity_cooccurrence_order_check"),
|
|
175
240
|
)
|
|
176
|
-
op.create_index(
|
|
177
|
-
op.create_index(
|
|
178
|
-
op.create_index(
|
|
241
|
+
op.create_index("idx_entity_cooccurrences_entity1", "entity_cooccurrences", ["entity_id_1"])
|
|
242
|
+
op.create_index("idx_entity_cooccurrences_entity2", "entity_cooccurrences", ["entity_id_2"])
|
|
243
|
+
op.create_index("idx_entity_cooccurrences_count", "entity_cooccurrences", [sa.text("cooccurrence_count DESC")])
|
|
179
244
|
|
|
180
245
|
# Create memory_links table
|
|
181
246
|
op.create_table(
|
|
182
|
-
|
|
183
|
-
sa.Column(
|
|
184
|
-
sa.Column(
|
|
185
|
-
sa.Column(
|
|
186
|
-
sa.Column(
|
|
187
|
-
sa.Column(
|
|
188
|
-
sa.Column(
|
|
189
|
-
sa.ForeignKeyConstraint(
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
sa.
|
|
193
|
-
|
|
247
|
+
"memory_links",
|
|
248
|
+
sa.Column("from_unit_id", postgresql.UUID(as_uuid=True), nullable=False),
|
|
249
|
+
sa.Column("to_unit_id", postgresql.UUID(as_uuid=True), nullable=False),
|
|
250
|
+
sa.Column("link_type", sa.Text(), nullable=False),
|
|
251
|
+
sa.Column("entity_id", postgresql.UUID(as_uuid=True), nullable=True),
|
|
252
|
+
sa.Column("weight", sa.Float(), server_default="1.0", nullable=False),
|
|
253
|
+
sa.Column("created_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
|
|
254
|
+
sa.ForeignKeyConstraint(
|
|
255
|
+
["entity_id"], ["entities.id"], name=op.f("fk_memory_links_entity_id_entities"), ondelete="CASCADE"
|
|
256
|
+
),
|
|
257
|
+
sa.ForeignKeyConstraint(
|
|
258
|
+
["from_unit_id"],
|
|
259
|
+
["memory_units.id"],
|
|
260
|
+
name=op.f("fk_memory_links_from_unit_id_memory_units"),
|
|
261
|
+
ondelete="CASCADE",
|
|
262
|
+
),
|
|
263
|
+
sa.ForeignKeyConstraint(
|
|
264
|
+
["to_unit_id"],
|
|
265
|
+
["memory_units.id"],
|
|
266
|
+
name=op.f("fk_memory_links_to_unit_id_memory_units"),
|
|
267
|
+
ondelete="CASCADE",
|
|
268
|
+
),
|
|
269
|
+
sa.CheckConstraint(
|
|
270
|
+
"link_type IN ('temporal', 'semantic', 'entity', 'causes', 'caused_by', 'enables', 'prevents')",
|
|
271
|
+
name="memory_links_link_type_check",
|
|
272
|
+
),
|
|
273
|
+
sa.CheckConstraint("weight >= 0.0 AND weight <= 1.0", name="memory_links_weight_check"),
|
|
194
274
|
)
|
|
195
275
|
# Create unique constraint using COALESCE for nullable entity_id
|
|
196
|
-
op.execute(
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
op.create_index(
|
|
200
|
-
op.create_index(
|
|
276
|
+
op.execute(
|
|
277
|
+
"CREATE UNIQUE INDEX idx_memory_links_unique ON memory_links (from_unit_id, to_unit_id, link_type, COALESCE(entity_id, '00000000-0000-0000-0000-000000000000'::uuid))"
|
|
278
|
+
)
|
|
279
|
+
op.create_index("idx_memory_links_from_unit", "memory_links", ["from_unit_id"])
|
|
280
|
+
op.create_index("idx_memory_links_to_unit", "memory_links", ["to_unit_id"])
|
|
281
|
+
op.create_index("idx_memory_links_entity", "memory_links", ["entity_id"])
|
|
282
|
+
op.create_index("idx_memory_links_link_type", "memory_links", ["link_type"])
|
|
201
283
|
|
|
202
284
|
# Create unit_entities table
|
|
203
285
|
op.create_table(
|
|
204
|
-
|
|
205
|
-
sa.Column(
|
|
206
|
-
sa.Column(
|
|
207
|
-
sa.ForeignKeyConstraint(
|
|
208
|
-
|
|
209
|
-
|
|
286
|
+
"unit_entities",
|
|
287
|
+
sa.Column("unit_id", postgresql.UUID(as_uuid=True), nullable=False),
|
|
288
|
+
sa.Column("entity_id", postgresql.UUID(as_uuid=True), nullable=False),
|
|
289
|
+
sa.ForeignKeyConstraint(
|
|
290
|
+
["entity_id"], ["entities.id"], name=op.f("fk_unit_entities_entity_id_entities"), ondelete="CASCADE"
|
|
291
|
+
),
|
|
292
|
+
sa.ForeignKeyConstraint(
|
|
293
|
+
["unit_id"], ["memory_units.id"], name=op.f("fk_unit_entities_unit_id_memory_units"), ondelete="CASCADE"
|
|
294
|
+
),
|
|
295
|
+
sa.PrimaryKeyConstraint("unit_id", "entity_id", name=op.f("pk_unit_entities")),
|
|
210
296
|
)
|
|
211
|
-
op.create_index(
|
|
212
|
-
op.create_index(
|
|
297
|
+
op.create_index("idx_unit_entities_unit", "unit_entities", ["unit_id"])
|
|
298
|
+
op.create_index("idx_unit_entities_entity", "unit_entities", ["entity_id"])
|
|
213
299
|
|
|
214
300
|
|
|
215
301
|
def downgrade() -> None:
|
|
216
302
|
"""Downgrade schema - drop all tables."""
|
|
217
303
|
|
|
218
304
|
# Drop tables in reverse dependency order
|
|
219
|
-
op.drop_index(
|
|
220
|
-
op.drop_index(
|
|
221
|
-
op.drop_table(
|
|
222
|
-
|
|
223
|
-
op.drop_index(
|
|
224
|
-
op.drop_index(
|
|
225
|
-
op.drop_index(
|
|
226
|
-
op.drop_index(
|
|
227
|
-
op.execute(
|
|
228
|
-
op.drop_table(
|
|
229
|
-
|
|
230
|
-
op.drop_index(
|
|
231
|
-
op.drop_index(
|
|
232
|
-
op.drop_index(
|
|
233
|
-
op.drop_table(
|
|
305
|
+
op.drop_index("idx_unit_entities_entity", table_name="unit_entities")
|
|
306
|
+
op.drop_index("idx_unit_entities_unit", table_name="unit_entities")
|
|
307
|
+
op.drop_table("unit_entities")
|
|
308
|
+
|
|
309
|
+
op.drop_index("idx_memory_links_link_type", table_name="memory_links")
|
|
310
|
+
op.drop_index("idx_memory_links_entity", table_name="memory_links")
|
|
311
|
+
op.drop_index("idx_memory_links_to_unit", table_name="memory_links")
|
|
312
|
+
op.drop_index("idx_memory_links_from_unit", table_name="memory_links")
|
|
313
|
+
op.execute("DROP INDEX IF EXISTS idx_memory_links_unique")
|
|
314
|
+
op.drop_table("memory_links")
|
|
315
|
+
|
|
316
|
+
op.drop_index("idx_entity_cooccurrences_count", table_name="entity_cooccurrences")
|
|
317
|
+
op.drop_index("idx_entity_cooccurrences_entity2", table_name="entity_cooccurrences")
|
|
318
|
+
op.drop_index("idx_entity_cooccurrences_entity1", table_name="entity_cooccurrences")
|
|
319
|
+
op.drop_table("entity_cooccurrences")
|
|
234
320
|
|
|
235
321
|
# Drop BM25 materialized view and index
|
|
236
|
-
op.drop_index(
|
|
237
|
-
op.drop_index(
|
|
238
|
-
op.execute(
|
|
239
|
-
|
|
240
|
-
op.drop_index(
|
|
241
|
-
op.drop_index(
|
|
242
|
-
op.drop_index(
|
|
243
|
-
op.drop_index(
|
|
244
|
-
op.drop_index(
|
|
245
|
-
op.drop_index(
|
|
246
|
-
op.drop_index(
|
|
247
|
-
op.drop_index(
|
|
248
|
-
op.drop_index(
|
|
249
|
-
op.drop_index(
|
|
250
|
-
op.drop_index(
|
|
251
|
-
op.drop_index(
|
|
252
|
-
op.execute(
|
|
253
|
-
op.drop_table(
|
|
254
|
-
|
|
255
|
-
op.execute(
|
|
256
|
-
op.drop_index(
|
|
257
|
-
op.drop_index(
|
|
258
|
-
op.drop_index(
|
|
259
|
-
op.drop_table(
|
|
260
|
-
|
|
261
|
-
op.drop_index(
|
|
262
|
-
op.drop_index(
|
|
263
|
-
op.drop_index(
|
|
264
|
-
op.drop_table(
|
|
265
|
-
|
|
266
|
-
op.drop_index(
|
|
267
|
-
op.drop_index(
|
|
268
|
-
op.drop_table(
|
|
269
|
-
|
|
270
|
-
op.drop_table(
|
|
322
|
+
op.drop_index("idx_memory_units_bm25_text_vector", table_name="memory_units_bm25")
|
|
323
|
+
op.drop_index("idx_memory_units_bm25_bank", table_name="memory_units_bm25")
|
|
324
|
+
op.execute("DROP MATERIALIZED VIEW IF EXISTS memory_units_bm25")
|
|
325
|
+
|
|
326
|
+
op.drop_index("idx_memory_units_embedding", table_name="memory_units")
|
|
327
|
+
op.drop_index("idx_memory_units_observation_date", table_name="memory_units")
|
|
328
|
+
op.drop_index("idx_memory_units_opinion_date", table_name="memory_units")
|
|
329
|
+
op.drop_index("idx_memory_units_opinion_confidence", table_name="memory_units")
|
|
330
|
+
op.drop_index("idx_memory_units_bank_type_date", table_name="memory_units")
|
|
331
|
+
op.drop_index("idx_memory_units_bank_fact_type", table_name="memory_units")
|
|
332
|
+
op.drop_index("idx_memory_units_fact_type", table_name="memory_units")
|
|
333
|
+
op.drop_index("idx_memory_units_access_count", table_name="memory_units")
|
|
334
|
+
op.drop_index("idx_memory_units_bank_date", table_name="memory_units")
|
|
335
|
+
op.drop_index("idx_memory_units_event_date", table_name="memory_units")
|
|
336
|
+
op.drop_index("idx_memory_units_document_id", table_name="memory_units")
|
|
337
|
+
op.drop_index("idx_memory_units_bank_id", table_name="memory_units")
|
|
338
|
+
op.execute("DROP INDEX IF EXISTS idx_memory_units_text_search")
|
|
339
|
+
op.drop_table("memory_units")
|
|
340
|
+
|
|
341
|
+
op.execute("DROP INDEX IF EXISTS idx_entities_bank_lower_name")
|
|
342
|
+
op.drop_index("idx_entities_bank_name", table_name="entities")
|
|
343
|
+
op.drop_index("idx_entities_canonical_name", table_name="entities")
|
|
344
|
+
op.drop_index("idx_entities_bank_id", table_name="entities")
|
|
345
|
+
op.drop_table("entities")
|
|
346
|
+
|
|
347
|
+
op.drop_index("idx_async_operations_bank_status", table_name="async_operations")
|
|
348
|
+
op.drop_index("idx_async_operations_status", table_name="async_operations")
|
|
349
|
+
op.drop_index("idx_async_operations_bank_id", table_name="async_operations")
|
|
350
|
+
op.drop_table("async_operations")
|
|
351
|
+
|
|
352
|
+
op.drop_index("idx_documents_content_hash", table_name="documents")
|
|
353
|
+
op.drop_index("idx_documents_bank_id", table_name="documents")
|
|
354
|
+
op.drop_table("documents")
|
|
355
|
+
|
|
356
|
+
op.drop_table("banks")
|
|
271
357
|
|
|
272
358
|
# Drop extensions (optional - comment out if you want to keep them)
|
|
273
359
|
# op.execute('DROP EXTENSION IF EXISTS vector')
|