hindsight-api 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. hindsight_api/__init__.py +10 -2
  2. hindsight_api/alembic/README +1 -0
  3. hindsight_api/alembic/env.py +146 -0
  4. hindsight_api/alembic/script.py.mako +28 -0
  5. hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +274 -0
  6. hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +70 -0
  7. hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +39 -0
  8. hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +48 -0
  9. hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +62 -0
  10. hindsight_api/alembic/versions/rename_personality_to_disposition.py +65 -0
  11. hindsight_api/api/http.py +84 -86
  12. hindsight_api/config.py +154 -0
  13. hindsight_api/engine/__init__.py +7 -2
  14. hindsight_api/engine/cross_encoder.py +219 -15
  15. hindsight_api/engine/embeddings.py +192 -18
  16. hindsight_api/engine/llm_wrapper.py +88 -139
  17. hindsight_api/engine/memory_engine.py +71 -51
  18. hindsight_api/engine/retain/bank_utils.py +2 -2
  19. hindsight_api/engine/retain/fact_extraction.py +1 -1
  20. hindsight_api/engine/search/reranking.py +6 -10
  21. hindsight_api/engine/search/tracer.py +1 -1
  22. hindsight_api/main.py +201 -0
  23. hindsight_api/migrations.py +7 -7
  24. hindsight_api/server.py +43 -0
  25. {hindsight_api-0.1.0.dist-info → hindsight_api-0.1.1.dist-info}/METADATA +1 -1
  26. {hindsight_api-0.1.0.dist-info → hindsight_api-0.1.1.dist-info}/RECORD +28 -19
  27. hindsight_api-0.1.1.dist-info/entry_points.txt +2 -0
  28. hindsight_api/cli.py +0 -127
  29. hindsight_api/web/__init__.py +0 -12
  30. hindsight_api/web/server.py +0 -109
  31. hindsight_api-0.1.0.dist-info/entry_points.txt +0 -2
  32. {hindsight_api-0.1.0.dist-info → hindsight_api-0.1.1.dist-info}/WHEEL +0 -0
hindsight_api/__init__.py CHANGED
@@ -16,11 +16,15 @@ from .engine.search.trace import (
16
16
  SearchPhaseMetrics,
17
17
  )
18
18
  from .engine.search.tracer import SearchTracer
19
- from .engine.embeddings import Embeddings, SentenceTransformersEmbeddings
19
+ from .engine.embeddings import Embeddings, LocalSTEmbeddings, RemoteTEIEmbeddings
20
+ from .engine.cross_encoder import CrossEncoderModel, LocalSTCrossEncoder, RemoteTEICrossEncoder
20
21
  from .engine.llm_wrapper import LLMConfig
22
+ from .config import HindsightConfig, get_config
21
23
 
22
24
  __all__ = [
23
25
  "MemoryEngine",
26
+ "HindsightConfig",
27
+ "get_config",
24
28
  "SearchTrace",
25
29
  "SearchTracer",
26
30
  "QueryInfo",
@@ -32,7 +36,11 @@ __all__ = [
32
36
  "SearchSummary",
33
37
  "SearchPhaseMetrics",
34
38
  "Embeddings",
35
- "SentenceTransformersEmbeddings",
39
+ "LocalSTEmbeddings",
40
+ "RemoteTEIEmbeddings",
41
+ "CrossEncoderModel",
42
+ "LocalSTCrossEncoder",
43
+ "RemoteTEICrossEncoder",
36
44
  "LLMConfig",
37
45
  ]
38
46
  __version__ = "0.1.0"
@@ -0,0 +1 @@
1
+ Generic single-database configuration.
@@ -0,0 +1,146 @@
1
+ """
2
+ Alembic environment configuration for SQLAlchemy with pgvector.
3
+ Uses synchronous psycopg2 driver for migrations to avoid pgbouncer issues.
4
+ """
5
+ import logging
6
+ import os
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ from sqlalchemy import pool, engine_from_config
11
+ from sqlalchemy.engine import Connection
12
+
13
+ from alembic import context
14
+ from dotenv import load_dotenv
15
+
16
+ # Import your models here
17
+ from hindsight_api.models import Base
18
+
19
+ # Load environment variables based on HINDSIGHT_API_DATABASE_URL env var or default to local
20
+ def load_env():
21
+ """Load environment variables from .env"""
22
+ # Check if HINDSIGHT_API_DATABASE_URL is already set (e.g., by CI/CD)
23
+ if os.getenv("HINDSIGHT_API_DATABASE_URL"):
24
+ return
25
+
26
+ # Look for .env file in the parent directory (root of the workspace)
27
+ root_dir = Path(__file__).parent.parent.parent
28
+ env_file = root_dir / ".env"
29
+
30
+ if env_file.exists():
31
+ load_dotenv(env_file)
32
+
33
+ load_env()
34
+
35
+ # this is the Alembic Config object, which provides
36
+ # access to the values within the .ini file in use.
37
+ config = context.config
38
+
39
+ # Note: We don't call fileConfig() here to avoid overriding the application's logging configuration.
40
+ # Alembic will use the existing logging configuration from the application.
41
+
42
+ # add your model's MetaData object here
43
+ # for 'autogenerate' support
44
+ target_metadata = Base.metadata
45
+
46
+ # other values from the config, defined by the needs of env.py,
47
+ # can be acquired:
48
+ # my_important_option = config.get_main_option("my_important_option")
49
+ # ... etc.
50
+
51
+
52
+ def get_database_url() -> str:
53
+ """
54
+ Get and process the database URL from config or environment.
55
+
56
+ Returns the URL with the correct driver (psycopg2) for migrations.
57
+ """
58
+ # Get database URL from config (set programmatically) or environment
59
+ database_url = config.get_main_option("sqlalchemy.url")
60
+ if not database_url:
61
+ database_url = os.getenv("HINDSIGHT_API_DATABASE_URL")
62
+ if not database_url:
63
+ raise ValueError(
64
+ "Database URL not found. "
65
+ "Set HINDSIGHT_API_DATABASE_URL environment variable or pass database_url to run_migrations()."
66
+ )
67
+
68
+ # For migrations, use psycopg2 (sync driver) to avoid pgbouncer prepared statement issues
69
+ if database_url.startswith("postgresql+asyncpg://"):
70
+ database_url = database_url.replace("postgresql+asyncpg://", "postgresql://", 1)
71
+ elif database_url.startswith("postgres+asyncpg://"):
72
+ database_url = database_url.replace("postgres+asyncpg://", "postgresql://", 1)
73
+
74
+ # Update config with processed URL for engine_from_config to use
75
+ config.set_main_option("sqlalchemy.url", database_url)
76
+
77
+ return database_url
78
+
79
+
80
+ def run_migrations_offline() -> None:
81
+ """Run migrations in 'offline' mode.
82
+
83
+ This configures the context with just a URL
84
+ and not an Engine, though an Engine is acceptable
85
+ here as well. By skipping the Engine creation
86
+ we don't even need a DBAPI to be available.
87
+
88
+ Calls to context.execute() here emit the given string to the
89
+ script output.
90
+
91
+ """
92
+ logging.info("running offline")
93
+ database_url = get_database_url()
94
+
95
+ context.configure(
96
+ url=database_url,
97
+ target_metadata=target_metadata,
98
+ literal_binds=True,
99
+ dialect_opts={"paramstyle": "named"},
100
+ )
101
+
102
+ with context.begin_transaction():
103
+ context.run_migrations()
104
+
105
+
106
+ def run_migrations_online() -> None:
107
+ """Run migrations in 'online' mode with synchronous engine."""
108
+ from sqlalchemy import event, text
109
+
110
+ get_database_url() # Process and set the database URL in config
111
+
112
+ connectable = engine_from_config(
113
+ config.get_section(config.config_ini_section, {}),
114
+ prefix="sqlalchemy.",
115
+ poolclass=pool.NullPool,
116
+ )
117
+
118
+ # Add event listener to ensure connection is in read-write mode
119
+ # This is needed for Supabase which may start connections in read-only mode
120
+ @event.listens_for(connectable, "connect")
121
+ def set_read_write_mode(dbapi_connection, connection_record):
122
+ cursor = dbapi_connection.cursor()
123
+ cursor.execute("SET SESSION CHARACTERISTICS AS TRANSACTION READ WRITE")
124
+ cursor.close()
125
+
126
+ with connectable.connect() as connection:
127
+ # Also explicitly set read-write mode on this connection
128
+ connection.execute(text("SET SESSION CHARACTERISTICS AS TRANSACTION READ WRITE"))
129
+ connection.commit() # Commit the SET command
130
+
131
+ context.configure(
132
+ connection=connection,
133
+ target_metadata=target_metadata
134
+ )
135
+
136
+ with context.begin_transaction():
137
+ context.run_migrations()
138
+
139
+ # Explicit commit to ensure changes are persisted (especially for Supabase)
140
+ connection.commit()
141
+
142
+
143
+ if context.is_offline_mode():
144
+ run_migrations_offline()
145
+ else:
146
+ run_migrations_online()
@@ -0,0 +1,28 @@
1
+ """${message}
2
+
3
+ Revision ID: ${up_revision}
4
+ Revises: ${down_revision | comma,n}
5
+ Create Date: ${create_date}
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ ${imports if imports else ""}
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = ${repr(up_revision)}
16
+ down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
17
+ branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
18
+ depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
19
+
20
+
21
+ def upgrade() -> None:
22
+ """Upgrade schema."""
23
+ ${upgrades if upgrades else "pass"}
24
+
25
+
26
+ def downgrade() -> None:
27
+ """Downgrade schema."""
28
+ ${downgrades if downgrades else "pass"}
@@ -0,0 +1,274 @@
1
+ """initial_schema
2
+
3
+ Revision ID: 5a366d414dce
4
+ Revises:
5
+ Create Date: 2025-11-27 11:54:19.228030
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ from sqlalchemy.dialects import postgresql
13
+ from pgvector.sqlalchemy import Vector
14
+
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = '5a366d414dce'
18
+ down_revision: Union[str, Sequence[str], None] = None
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade() -> None:
24
+ """Upgrade schema - create all tables from scratch."""
25
+
26
+ # Enable required extensions
27
+ op.execute('CREATE EXTENSION IF NOT EXISTS vector')
28
+
29
+ # Create banks table
30
+ op.create_table(
31
+ 'banks',
32
+ sa.Column('bank_id', sa.Text(), nullable=False),
33
+ sa.Column('name', sa.Text(), nullable=True),
34
+ sa.Column('personality', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
35
+ sa.Column('background', sa.Text(), nullable=True),
36
+ sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
37
+ sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
38
+ sa.PrimaryKeyConstraint('bank_id', name=op.f('pk_banks'))
39
+ )
40
+
41
+ # Create documents table
42
+ op.create_table(
43
+ 'documents',
44
+ sa.Column('id', sa.Text(), nullable=False),
45
+ sa.Column('bank_id', sa.Text(), nullable=False),
46
+ sa.Column('original_text', sa.Text(), nullable=True),
47
+ sa.Column('content_hash', sa.Text(), nullable=True),
48
+ sa.Column('metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
49
+ sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
50
+ sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
51
+ sa.PrimaryKeyConstraint('id', 'bank_id', name=op.f('pk_documents'))
52
+ )
53
+ op.create_index('idx_documents_bank_id', 'documents', ['bank_id'])
54
+ op.create_index('idx_documents_content_hash', 'documents', ['content_hash'])
55
+
56
+ # Create async_operations table
57
+ op.create_table(
58
+ 'async_operations',
59
+ sa.Column('operation_id', postgresql.UUID(as_uuid=True), server_default=sa.text('gen_random_uuid()'), nullable=False),
60
+ sa.Column('bank_id', sa.Text(), nullable=False),
61
+ sa.Column('operation_type', sa.Text(), nullable=False),
62
+ sa.Column('status', sa.Text(), server_default='pending', nullable=False),
63
+ sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
64
+ sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
65
+ sa.Column('completed_at', postgresql.TIMESTAMP(timezone=True), nullable=True),
66
+ sa.Column('error_message', sa.Text(), nullable=True),
67
+ sa.Column('result_metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
68
+ sa.PrimaryKeyConstraint('operation_id', name=op.f('pk_async_operations')),
69
+ sa.CheckConstraint("status IN ('pending', 'processing', 'completed', 'failed')", name='async_operations_status_check')
70
+ )
71
+ op.create_index('idx_async_operations_bank_id', 'async_operations', ['bank_id'])
72
+ op.create_index('idx_async_operations_status', 'async_operations', ['status'])
73
+ op.create_index('idx_async_operations_bank_status', 'async_operations', ['bank_id', 'status'])
74
+
75
+ # Create entities table
76
+ op.create_table(
77
+ 'entities',
78
+ sa.Column('id', postgresql.UUID(as_uuid=True), server_default=sa.text('gen_random_uuid()'), nullable=False),
79
+ sa.Column('canonical_name', sa.Text(), nullable=False),
80
+ sa.Column('bank_id', sa.Text(), nullable=False),
81
+ sa.Column('metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
82
+ sa.Column('first_seen', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
83
+ sa.Column('last_seen', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
84
+ sa.Column('mention_count', sa.Integer(), server_default='1', nullable=False),
85
+ sa.PrimaryKeyConstraint('id', name=op.f('pk_entities'))
86
+ )
87
+ op.create_index('idx_entities_bank_id', 'entities', ['bank_id'])
88
+ op.create_index('idx_entities_canonical_name', 'entities', ['canonical_name'])
89
+ op.create_index('idx_entities_bank_name', 'entities', ['bank_id', 'canonical_name'])
90
+ # Create unique index on (bank_id, LOWER(canonical_name)) for entity resolution
91
+ op.execute('CREATE UNIQUE INDEX idx_entities_bank_lower_name ON entities (bank_id, LOWER(canonical_name))')
92
+
93
+ # Create memory_units table
94
+ op.create_table(
95
+ 'memory_units',
96
+ sa.Column('id', postgresql.UUID(as_uuid=True), server_default=sa.text('gen_random_uuid()'), nullable=False),
97
+ sa.Column('bank_id', sa.Text(), nullable=False),
98
+ sa.Column('document_id', sa.Text(), nullable=True),
99
+ sa.Column('text', sa.Text(), nullable=False),
100
+ sa.Column('embedding', Vector(384), nullable=True),
101
+ sa.Column('context', sa.Text(), nullable=True),
102
+ sa.Column('event_date', postgresql.TIMESTAMP(timezone=True), nullable=False),
103
+ sa.Column('occurred_start', postgresql.TIMESTAMP(timezone=True), nullable=True),
104
+ sa.Column('occurred_end', postgresql.TIMESTAMP(timezone=True), nullable=True),
105
+ sa.Column('mentioned_at', postgresql.TIMESTAMP(timezone=True), nullable=True),
106
+ sa.Column('fact_type', sa.Text(), server_default='world', nullable=False),
107
+ sa.Column('confidence_score', sa.Float(), nullable=True),
108
+ sa.Column('access_count', sa.Integer(), server_default='0', nullable=False),
109
+ sa.Column('metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
110
+ sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
111
+ sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
112
+ sa.ForeignKeyConstraint(['document_id', 'bank_id'], ['documents.id', 'documents.bank_id'], name='memory_units_document_fkey', ondelete='CASCADE'),
113
+ sa.PrimaryKeyConstraint('id', name=op.f('pk_memory_units')),
114
+ sa.CheckConstraint("fact_type IN ('world', 'bank', 'opinion', 'observation')", name='memory_units_fact_type_check'),
115
+ sa.CheckConstraint("confidence_score IS NULL OR (confidence_score >= 0.0 AND confidence_score <= 1.0)", name='memory_units_confidence_range_check'),
116
+ sa.CheckConstraint(
117
+ "(fact_type = 'opinion' AND confidence_score IS NOT NULL) OR "
118
+ "(fact_type = 'observation') OR "
119
+ "(fact_type NOT IN ('opinion', 'observation') AND confidence_score IS NULL)",
120
+ name='confidence_score_fact_type_check'
121
+ )
122
+ )
123
+
124
+ # Add search_vector column for full-text search
125
+ op.execute("""
126
+ ALTER TABLE memory_units
127
+ ADD COLUMN search_vector tsvector
128
+ GENERATED ALWAYS AS (to_tsvector('english', COALESCE(text, '') || ' ' || COALESCE(context, ''))) STORED
129
+ """)
130
+
131
+ op.create_index('idx_memory_units_bank_id', 'memory_units', ['bank_id'])
132
+ op.create_index('idx_memory_units_document_id', 'memory_units', ['document_id'])
133
+ op.create_index('idx_memory_units_event_date', 'memory_units', [sa.text('event_date DESC')])
134
+ op.create_index('idx_memory_units_bank_date', 'memory_units', ['bank_id', sa.text('event_date DESC')])
135
+ op.create_index('idx_memory_units_access_count', 'memory_units', [sa.text('access_count DESC')])
136
+ op.create_index('idx_memory_units_fact_type', 'memory_units', ['fact_type'])
137
+ op.create_index('idx_memory_units_bank_fact_type', 'memory_units', ['bank_id', 'fact_type'])
138
+ op.create_index('idx_memory_units_bank_type_date', 'memory_units', ['bank_id', 'fact_type', sa.text('event_date DESC')])
139
+ op.create_index('idx_memory_units_opinion_confidence', 'memory_units', ['bank_id', sa.text('confidence_score DESC')], postgresql_where=sa.text("fact_type = 'opinion'"))
140
+ op.create_index('idx_memory_units_opinion_date', 'memory_units', ['bank_id', sa.text('event_date DESC')], postgresql_where=sa.text("fact_type = 'opinion'"))
141
+ op.create_index('idx_memory_units_observation_date', 'memory_units', ['bank_id', sa.text('event_date DESC')], postgresql_where=sa.text("fact_type = 'observation'"))
142
+ op.create_index('idx_memory_units_embedding', 'memory_units', ['embedding'], postgresql_using='hnsw', postgresql_ops={'embedding': 'vector_cosine_ops'})
143
+
144
+ # Create BM25 full-text search index on search_vector
145
+ op.execute("""
146
+ CREATE INDEX idx_memory_units_text_search ON memory_units
147
+ USING gin(search_vector)
148
+ """)
149
+
150
+ op.execute("""
151
+ CREATE MATERIALIZED VIEW memory_units_bm25 AS
152
+ SELECT
153
+ id,
154
+ bank_id,
155
+ text,
156
+ to_tsvector('english', text) AS text_vector,
157
+ log(1.0 + length(text)::float / (SELECT avg(length(text)) FROM memory_units)) AS doc_length_factor
158
+ FROM memory_units
159
+ """)
160
+
161
+ op.create_index('idx_memory_units_bm25_bank', 'memory_units_bm25', ['bank_id'])
162
+ op.create_index('idx_memory_units_bm25_text_vector', 'memory_units_bm25', ['text_vector'], postgresql_using='gin')
163
+
164
+ # Create entity_cooccurrences table
165
+ op.create_table(
166
+ 'entity_cooccurrences',
167
+ sa.Column('entity_id_1', postgresql.UUID(as_uuid=True), nullable=False),
168
+ sa.Column('entity_id_2', postgresql.UUID(as_uuid=True), nullable=False),
169
+ sa.Column('cooccurrence_count', sa.Integer(), server_default='1', nullable=False),
170
+ sa.Column('last_cooccurred', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
171
+ sa.ForeignKeyConstraint(['entity_id_1'], ['entities.id'], name=op.f('fk_entity_cooccurrences_entity_id_1_entities'), ondelete='CASCADE'),
172
+ sa.ForeignKeyConstraint(['entity_id_2'], ['entities.id'], name=op.f('fk_entity_cooccurrences_entity_id_2_entities'), ondelete='CASCADE'),
173
+ sa.PrimaryKeyConstraint('entity_id_1', 'entity_id_2', name=op.f('pk_entity_cooccurrences')),
174
+ sa.CheckConstraint('entity_id_1 < entity_id_2', name='entity_cooccurrence_order_check')
175
+ )
176
+ op.create_index('idx_entity_cooccurrences_entity1', 'entity_cooccurrences', ['entity_id_1'])
177
+ op.create_index('idx_entity_cooccurrences_entity2', 'entity_cooccurrences', ['entity_id_2'])
178
+ op.create_index('idx_entity_cooccurrences_count', 'entity_cooccurrences', [sa.text('cooccurrence_count DESC')])
179
+
180
+ # Create memory_links table
181
+ op.create_table(
182
+ 'memory_links',
183
+ sa.Column('from_unit_id', postgresql.UUID(as_uuid=True), nullable=False),
184
+ sa.Column('to_unit_id', postgresql.UUID(as_uuid=True), nullable=False),
185
+ sa.Column('link_type', sa.Text(), nullable=False),
186
+ sa.Column('entity_id', postgresql.UUID(as_uuid=True), nullable=True),
187
+ sa.Column('weight', sa.Float(), server_default='1.0', nullable=False),
188
+ sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
189
+ sa.ForeignKeyConstraint(['entity_id'], ['entities.id'], name=op.f('fk_memory_links_entity_id_entities'), ondelete='CASCADE'),
190
+ sa.ForeignKeyConstraint(['from_unit_id'], ['memory_units.id'], name=op.f('fk_memory_links_from_unit_id_memory_units'), ondelete='CASCADE'),
191
+ sa.ForeignKeyConstraint(['to_unit_id'], ['memory_units.id'], name=op.f('fk_memory_links_to_unit_id_memory_units'), ondelete='CASCADE'),
192
+ sa.CheckConstraint("link_type IN ('temporal', 'semantic', 'entity', 'causes', 'caused_by', 'enables', 'prevents')", name='memory_links_link_type_check'),
193
+ sa.CheckConstraint('weight >= 0.0 AND weight <= 1.0', name='memory_links_weight_check')
194
+ )
195
+ # Create unique constraint using COALESCE for nullable entity_id
196
+ op.execute("CREATE UNIQUE INDEX idx_memory_links_unique ON memory_links (from_unit_id, to_unit_id, link_type, COALESCE(entity_id, '00000000-0000-0000-0000-000000000000'::uuid))")
197
+ op.create_index('idx_memory_links_from_unit', 'memory_links', ['from_unit_id'])
198
+ op.create_index('idx_memory_links_to_unit', 'memory_links', ['to_unit_id'])
199
+ op.create_index('idx_memory_links_entity', 'memory_links', ['entity_id'])
200
+ op.create_index('idx_memory_links_link_type', 'memory_links', ['link_type'])
201
+
202
+ # Create unit_entities table
203
+ op.create_table(
204
+ 'unit_entities',
205
+ sa.Column('unit_id', postgresql.UUID(as_uuid=True), nullable=False),
206
+ sa.Column('entity_id', postgresql.UUID(as_uuid=True), nullable=False),
207
+ sa.ForeignKeyConstraint(['entity_id'], ['entities.id'], name=op.f('fk_unit_entities_entity_id_entities'), ondelete='CASCADE'),
208
+ sa.ForeignKeyConstraint(['unit_id'], ['memory_units.id'], name=op.f('fk_unit_entities_unit_id_memory_units'), ondelete='CASCADE'),
209
+ sa.PrimaryKeyConstraint('unit_id', 'entity_id', name=op.f('pk_unit_entities'))
210
+ )
211
+ op.create_index('idx_unit_entities_unit', 'unit_entities', ['unit_id'])
212
+ op.create_index('idx_unit_entities_entity', 'unit_entities', ['entity_id'])
213
+
214
+
215
+ def downgrade() -> None:
216
+ """Downgrade schema - drop all tables."""
217
+
218
+ # Drop tables in reverse dependency order
219
+ op.drop_index('idx_unit_entities_entity', table_name='unit_entities')
220
+ op.drop_index('idx_unit_entities_unit', table_name='unit_entities')
221
+ op.drop_table('unit_entities')
222
+
223
+ op.drop_index('idx_memory_links_link_type', table_name='memory_links')
224
+ op.drop_index('idx_memory_links_entity', table_name='memory_links')
225
+ op.drop_index('idx_memory_links_to_unit', table_name='memory_links')
226
+ op.drop_index('idx_memory_links_from_unit', table_name='memory_links')
227
+ op.execute('DROP INDEX IF EXISTS idx_memory_links_unique')
228
+ op.drop_table('memory_links')
229
+
230
+ op.drop_index('idx_entity_cooccurrences_count', table_name='entity_cooccurrences')
231
+ op.drop_index('idx_entity_cooccurrences_entity2', table_name='entity_cooccurrences')
232
+ op.drop_index('idx_entity_cooccurrences_entity1', table_name='entity_cooccurrences')
233
+ op.drop_table('entity_cooccurrences')
234
+
235
+ # Drop BM25 materialized view and index
236
+ op.drop_index('idx_memory_units_bm25_text_vector', table_name='memory_units_bm25')
237
+ op.drop_index('idx_memory_units_bm25_bank', table_name='memory_units_bm25')
238
+ op.execute('DROP MATERIALIZED VIEW IF EXISTS memory_units_bm25')
239
+
240
+ op.drop_index('idx_memory_units_embedding', table_name='memory_units')
241
+ op.drop_index('idx_memory_units_observation_date', table_name='memory_units')
242
+ op.drop_index('idx_memory_units_opinion_date', table_name='memory_units')
243
+ op.drop_index('idx_memory_units_opinion_confidence', table_name='memory_units')
244
+ op.drop_index('idx_memory_units_bank_type_date', table_name='memory_units')
245
+ op.drop_index('idx_memory_units_bank_fact_type', table_name='memory_units')
246
+ op.drop_index('idx_memory_units_fact_type', table_name='memory_units')
247
+ op.drop_index('idx_memory_units_access_count', table_name='memory_units')
248
+ op.drop_index('idx_memory_units_bank_date', table_name='memory_units')
249
+ op.drop_index('idx_memory_units_event_date', table_name='memory_units')
250
+ op.drop_index('idx_memory_units_document_id', table_name='memory_units')
251
+ op.drop_index('idx_memory_units_bank_id', table_name='memory_units')
252
+ op.execute('DROP INDEX IF EXISTS idx_memory_units_text_search')
253
+ op.drop_table('memory_units')
254
+
255
+ op.execute('DROP INDEX IF EXISTS idx_entities_bank_lower_name')
256
+ op.drop_index('idx_entities_bank_name', table_name='entities')
257
+ op.drop_index('idx_entities_canonical_name', table_name='entities')
258
+ op.drop_index('idx_entities_bank_id', table_name='entities')
259
+ op.drop_table('entities')
260
+
261
+ op.drop_index('idx_async_operations_bank_status', table_name='async_operations')
262
+ op.drop_index('idx_async_operations_status', table_name='async_operations')
263
+ op.drop_index('idx_async_operations_bank_id', table_name='async_operations')
264
+ op.drop_table('async_operations')
265
+
266
+ op.drop_index('idx_documents_content_hash', table_name='documents')
267
+ op.drop_index('idx_documents_bank_id', table_name='documents')
268
+ op.drop_table('documents')
269
+
270
+ op.drop_table('banks')
271
+
272
+ # Drop extensions (optional - comment out if you want to keep them)
273
+ # op.execute('DROP EXTENSION IF EXISTS vector')
274
+ # op.execute('DROP EXTENSION IF EXISTS "uuid-ossp"')
@@ -0,0 +1,70 @@
1
+ """add_chunks_table
2
+
3
+ Revision ID: b7c4d8e9f1a2
4
+ Revises: 5a366d414dce
5
+ Create Date: 2025-11-28 00:00:00.000000
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ from sqlalchemy.dialects import postgresql
13
+
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = 'b7c4d8e9f1a2'
17
+ down_revision: Union[str, Sequence[str], None] = '5a366d414dce'
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ """Add chunks table and link memory_units to chunks."""
24
+
25
+ # Create chunks table with single text PK (bank_id_document_id_chunk_index)
26
+ op.create_table(
27
+ 'chunks',
28
+ sa.Column('chunk_id', sa.Text(), nullable=False),
29
+ sa.Column('document_id', sa.Text(), nullable=False),
30
+ sa.Column('bank_id', sa.Text(), nullable=False),
31
+ sa.Column('chunk_index', sa.Integer(), nullable=False),
32
+ sa.Column('chunk_text', sa.Text(), nullable=False),
33
+ sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
34
+ sa.ForeignKeyConstraint(['document_id', 'bank_id'], ['documents.id', 'documents.bank_id'], name='chunks_document_fkey', ondelete='CASCADE'),
35
+ sa.PrimaryKeyConstraint('chunk_id', name=op.f('pk_chunks'))
36
+ )
37
+
38
+ # Add indexes for efficient queries
39
+ op.create_index('idx_chunks_document_id', 'chunks', ['document_id'])
40
+ op.create_index('idx_chunks_bank_id', 'chunks', ['bank_id'])
41
+
42
+ # Add chunk_id column to memory_units (nullable, as existing records won't have chunks)
43
+ op.add_column('memory_units', sa.Column('chunk_id', sa.Text(), nullable=True))
44
+
45
+ # Add foreign key constraint to chunks table
46
+ op.create_foreign_key(
47
+ 'memory_units_chunk_fkey',
48
+ 'memory_units',
49
+ 'chunks',
50
+ ['chunk_id'],
51
+ ['chunk_id'],
52
+ ondelete='SET NULL'
53
+ )
54
+
55
+ # Add index on chunk_id for efficient lookups
56
+ op.create_index('idx_memory_units_chunk_id', 'memory_units', ['chunk_id'])
57
+
58
+
59
+ def downgrade() -> None:
60
+ """Remove chunks table and chunk_id from memory_units."""
61
+
62
+ # Drop index and foreign key from memory_units
63
+ op.drop_index('idx_memory_units_chunk_id', table_name='memory_units')
64
+ op.drop_constraint('memory_units_chunk_fkey', 'memory_units', type_='foreignkey')
65
+ op.drop_column('memory_units', 'chunk_id')
66
+
67
+ # Drop chunks table indexes and table
68
+ op.drop_index('idx_chunks_bank_id', table_name='chunks')
69
+ op.drop_index('idx_chunks_document_id', table_name='chunks')
70
+ op.drop_table('chunks')
@@ -0,0 +1,39 @@
1
+ """add_retain_params_to_documents
2
+
3
+ Revision ID: c8e5f2a3b4d1
4
+ Revises: b7c4d8e9f1a2
5
+ Create Date: 2025-12-02 00:00:00.000000
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ from sqlalchemy.dialects import postgresql
13
+
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = 'c8e5f2a3b4d1'
17
+ down_revision: Union[str, Sequence[str], None] = 'b7c4d8e9f1a2'
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ """Add retain_params JSONB column to documents table."""
24
+
25
+ # Add retain_params column to store parameters passed during retain
26
+ op.add_column('documents', sa.Column('retain_params', postgresql.JSONB(), nullable=True))
27
+
28
+ # Add index for efficient queries on retain_params
29
+ op.create_index('idx_documents_retain_params', 'documents', ['retain_params'], postgresql_using='gin')
30
+
31
+
32
+ def downgrade() -> None:
33
+ """Remove retain_params column from documents table."""
34
+
35
+ # Drop index
36
+ op.drop_index('idx_documents_retain_params', table_name='documents')
37
+
38
+ # Drop column
39
+ op.drop_column('documents', 'retain_params')
@@ -0,0 +1,48 @@
1
+ """Rename fact_type 'bank' to 'experience'
2
+
3
+ Revision ID: d9f6a3b4c5e2
4
+ Revises: c8e5f2a3b4d1
5
+ Create Date: 2024-12-04 15:00:00.000000
6
+
7
+ """
8
+ from alembic import op
9
+ import sqlalchemy as sa
10
+
11
+
12
+ # revision identifiers, used by Alembic.
13
+ revision = 'd9f6a3b4c5e2'
14
+ down_revision = 'c8e5f2a3b4d1'
15
+ branch_labels = None
16
+ depends_on = None
17
+
18
+
19
+ def upgrade():
20
+ # Drop old check constraint FIRST (before updating data)
21
+ op.drop_constraint('memory_units_fact_type_check', 'memory_units', type_='check')
22
+
23
+ # Update existing 'bank' values to 'experience'
24
+ op.execute("UPDATE memory_units SET fact_type = 'experience' WHERE fact_type = 'bank'")
25
+ # Also update any 'interactions' values (in case of partial migration)
26
+ op.execute("UPDATE memory_units SET fact_type = 'experience' WHERE fact_type = 'interactions'")
27
+
28
+ # Create new check constraint with 'experience' instead of 'bank'
29
+ op.create_check_constraint(
30
+ 'memory_units_fact_type_check',
31
+ 'memory_units',
32
+ "fact_type IN ('world', 'experience', 'opinion', 'observation')"
33
+ )
34
+
35
+
36
+ def downgrade():
37
+ # Drop new check constraint FIRST
38
+ op.drop_constraint('memory_units_fact_type_check', 'memory_units', type_='check')
39
+
40
+ # Update 'experience' back to 'bank'
41
+ op.execute("UPDATE memory_units SET fact_type = 'bank' WHERE fact_type = 'experience'")
42
+
43
+ # Recreate old check constraint
44
+ op.create_check_constraint(
45
+ 'memory_units_fact_type_check',
46
+ 'memory_units',
47
+ "fact_type IN ('world', 'bank', 'opinion', 'observation')"
48
+ )