hindsight-api 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. hindsight_api/__init__.py +10 -9
  2. hindsight_api/alembic/env.py +5 -8
  3. hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +266 -180
  4. hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +32 -32
  5. hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +11 -11
  6. hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +7 -12
  7. hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +23 -15
  8. hindsight_api/alembic/versions/rename_personality_to_disposition.py +30 -21
  9. hindsight_api/api/__init__.py +10 -10
  10. hindsight_api/api/http.py +575 -593
  11. hindsight_api/api/mcp.py +31 -33
  12. hindsight_api/banner.py +13 -6
  13. hindsight_api/config.py +17 -12
  14. hindsight_api/engine/__init__.py +9 -9
  15. hindsight_api/engine/cross_encoder.py +23 -27
  16. hindsight_api/engine/db_utils.py +5 -4
  17. hindsight_api/engine/embeddings.py +22 -21
  18. hindsight_api/engine/entity_resolver.py +81 -75
  19. hindsight_api/engine/llm_wrapper.py +74 -88
  20. hindsight_api/engine/memory_engine.py +663 -673
  21. hindsight_api/engine/query_analyzer.py +100 -97
  22. hindsight_api/engine/response_models.py +105 -106
  23. hindsight_api/engine/retain/__init__.py +9 -16
  24. hindsight_api/engine/retain/bank_utils.py +34 -58
  25. hindsight_api/engine/retain/chunk_storage.py +4 -12
  26. hindsight_api/engine/retain/deduplication.py +9 -28
  27. hindsight_api/engine/retain/embedding_processing.py +4 -11
  28. hindsight_api/engine/retain/embedding_utils.py +3 -4
  29. hindsight_api/engine/retain/entity_processing.py +7 -17
  30. hindsight_api/engine/retain/fact_extraction.py +155 -165
  31. hindsight_api/engine/retain/fact_storage.py +11 -23
  32. hindsight_api/engine/retain/link_creation.py +11 -39
  33. hindsight_api/engine/retain/link_utils.py +166 -95
  34. hindsight_api/engine/retain/observation_regeneration.py +39 -52
  35. hindsight_api/engine/retain/orchestrator.py +72 -62
  36. hindsight_api/engine/retain/types.py +49 -43
  37. hindsight_api/engine/search/__init__.py +15 -1
  38. hindsight_api/engine/search/fusion.py +6 -15
  39. hindsight_api/engine/search/graph_retrieval.py +234 -0
  40. hindsight_api/engine/search/mpfp_retrieval.py +438 -0
  41. hindsight_api/engine/search/observation_utils.py +9 -16
  42. hindsight_api/engine/search/reranking.py +4 -7
  43. hindsight_api/engine/search/retrieval.py +388 -193
  44. hindsight_api/engine/search/scoring.py +5 -7
  45. hindsight_api/engine/search/temporal_extraction.py +8 -11
  46. hindsight_api/engine/search/think_utils.py +115 -39
  47. hindsight_api/engine/search/trace.py +68 -38
  48. hindsight_api/engine/search/tracer.py +49 -35
  49. hindsight_api/engine/search/types.py +22 -16
  50. hindsight_api/engine/task_backend.py +21 -26
  51. hindsight_api/engine/utils.py +25 -10
  52. hindsight_api/main.py +21 -40
  53. hindsight_api/mcp_local.py +190 -0
  54. hindsight_api/metrics.py +44 -30
  55. hindsight_api/migrations.py +10 -8
  56. hindsight_api/models.py +60 -72
  57. hindsight_api/pg0.py +64 -337
  58. hindsight_api/server.py +3 -6
  59. {hindsight_api-0.1.4.dist-info → hindsight_api-0.1.6.dist-info}/METADATA +6 -5
  60. hindsight_api-0.1.6.dist-info/RECORD +64 -0
  61. {hindsight_api-0.1.4.dist-info → hindsight_api-0.1.6.dist-info}/entry_points.txt +1 -0
  62. hindsight_api-0.1.4.dist-info/RECORD +0 -61
  63. {hindsight_api-0.1.4.dist-info → hindsight_api-0.1.6.dist-info}/WHEEL +0 -0
hindsight_api/__init__.py CHANGED
@@ -3,23 +3,24 @@ Memory System for AI Agents.
3
3
 
4
4
  Temporal + Semantic Memory Architecture using PostgreSQL with pgvector.
5
5
  """
6
+
7
+ from .config import HindsightConfig, get_config
8
+ from .engine.cross_encoder import CrossEncoderModel, LocalSTCrossEncoder, RemoteTEICrossEncoder
9
+ from .engine.embeddings import Embeddings, LocalSTEmbeddings, RemoteTEIEmbeddings
10
+ from .engine.llm_wrapper import LLMConfig
6
11
  from .engine.memory_engine import MemoryEngine
7
12
  from .engine.search.trace import (
8
- SearchTrace,
9
- QueryInfo,
10
13
  EntryPoint,
11
- NodeVisit,
12
- WeightComponents,
13
14
  LinkInfo,
15
+ NodeVisit,
14
16
  PruningDecision,
15
- SearchSummary,
17
+ QueryInfo,
16
18
  SearchPhaseMetrics,
19
+ SearchSummary,
20
+ SearchTrace,
21
+ WeightComponents,
17
22
  )
18
23
  from .engine.search.tracer import SearchTracer
19
- from .engine.embeddings import Embeddings, LocalSTEmbeddings, RemoteTEIEmbeddings
20
- from .engine.cross_encoder import CrossEncoderModel, LocalSTCrossEncoder, RemoteTEICrossEncoder
21
- from .engine.llm_wrapper import LLMConfig
22
- from .config import HindsightConfig, get_config
23
24
 
24
25
  __all__ = [
25
26
  "MemoryEngine",
@@ -2,20 +2,19 @@
2
2
  Alembic environment configuration for SQLAlchemy with pgvector.
3
3
  Uses synchronous psycopg2 driver for migrations to avoid pgbouncer issues.
4
4
  """
5
+
5
6
  import logging
6
7
  import os
7
- import sys
8
8
  from pathlib import Path
9
9
 
10
- from sqlalchemy import pool, engine_from_config
11
- from sqlalchemy.engine import Connection
12
-
13
10
  from alembic import context
14
11
  from dotenv import load_dotenv
12
+ from sqlalchemy import engine_from_config, pool
15
13
 
16
14
  # Import your models here
17
15
  from hindsight_api.models import Base
18
16
 
17
+
19
18
  # Load environment variables based on HINDSIGHT_API_DATABASE_URL env var or default to local
20
19
  def load_env():
21
20
  """Load environment variables from .env"""
@@ -30,6 +29,7 @@ def load_env():
30
29
  if env_file.exists():
31
30
  load_dotenv(env_file)
32
31
 
32
+
33
33
  load_env()
34
34
 
35
35
  # this is the Alembic Config object, which provides
@@ -128,10 +128,7 @@ def run_migrations_online() -> None:
128
128
  connection.execute(text("SET SESSION CHARACTERISTICS AS TRANSACTION READ WRITE"))
129
129
  connection.commit() # Commit the SET command
130
130
 
131
- context.configure(
132
- connection=connection,
133
- target_metadata=target_metadata
134
- )
131
+ context.configure(connection=connection, target_metadata=target_metadata)
135
132
 
136
133
  with context.begin_transaction():
137
134
  context.run_migrations()
@@ -5,120 +5,150 @@ Revises:
5
5
  Create Date: 2025-11-27 11:54:19.228030
6
6
 
7
7
  """
8
- from typing import Sequence, Union
9
8
 
10
- from alembic import op
9
+ from collections.abc import Sequence
10
+
11
11
  import sqlalchemy as sa
12
- from sqlalchemy.dialects import postgresql
12
+ from alembic import op
13
13
  from pgvector.sqlalchemy import Vector
14
-
14
+ from sqlalchemy.dialects import postgresql
15
15
 
16
16
  # revision identifiers, used by Alembic.
17
- revision: str = '5a366d414dce'
18
- down_revision: Union[str, Sequence[str], None] = None
19
- branch_labels: Union[str, Sequence[str], None] = None
20
- depends_on: Union[str, Sequence[str], None] = None
17
+ revision: str = "5a366d414dce"
18
+ down_revision: str | Sequence[str] | None = None
19
+ branch_labels: str | Sequence[str] | None = None
20
+ depends_on: str | Sequence[str] | None = None
21
21
 
22
22
 
23
23
  def upgrade() -> None:
24
24
  """Upgrade schema - create all tables from scratch."""
25
25
 
26
26
  # Enable required extensions
27
- op.execute('CREATE EXTENSION IF NOT EXISTS vector')
27
+ op.execute("CREATE EXTENSION IF NOT EXISTS vector")
28
28
 
29
29
  # Create banks table
30
30
  op.create_table(
31
- 'banks',
32
- sa.Column('bank_id', sa.Text(), nullable=False),
33
- sa.Column('name', sa.Text(), nullable=True),
34
- sa.Column('personality', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
35
- sa.Column('background', sa.Text(), nullable=True),
36
- sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
37
- sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
38
- sa.PrimaryKeyConstraint('bank_id', name=op.f('pk_banks'))
31
+ "banks",
32
+ sa.Column("bank_id", sa.Text(), nullable=False),
33
+ sa.Column("name", sa.Text(), nullable=True),
34
+ sa.Column(
35
+ "personality",
36
+ postgresql.JSONB(astext_type=sa.Text()),
37
+ server_default=sa.text("'{}'::jsonb"),
38
+ nullable=False,
39
+ ),
40
+ sa.Column("background", sa.Text(), nullable=True),
41
+ sa.Column("created_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
42
+ sa.Column("updated_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
43
+ sa.PrimaryKeyConstraint("bank_id", name=op.f("pk_banks")),
39
44
  )
40
45
 
41
46
  # Create documents table
42
47
  op.create_table(
43
- 'documents',
44
- sa.Column('id', sa.Text(), nullable=False),
45
- sa.Column('bank_id', sa.Text(), nullable=False),
46
- sa.Column('original_text', sa.Text(), nullable=True),
47
- sa.Column('content_hash', sa.Text(), nullable=True),
48
- sa.Column('metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
49
- sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
50
- sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
51
- sa.PrimaryKeyConstraint('id', 'bank_id', name=op.f('pk_documents'))
48
+ "documents",
49
+ sa.Column("id", sa.Text(), nullable=False),
50
+ sa.Column("bank_id", sa.Text(), nullable=False),
51
+ sa.Column("original_text", sa.Text(), nullable=True),
52
+ sa.Column("content_hash", sa.Text(), nullable=True),
53
+ sa.Column(
54
+ "metadata", postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False
55
+ ),
56
+ sa.Column("created_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
57
+ sa.Column("updated_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
58
+ sa.PrimaryKeyConstraint("id", "bank_id", name=op.f("pk_documents")),
52
59
  )
53
- op.create_index('idx_documents_bank_id', 'documents', ['bank_id'])
54
- op.create_index('idx_documents_content_hash', 'documents', ['content_hash'])
60
+ op.create_index("idx_documents_bank_id", "documents", ["bank_id"])
61
+ op.create_index("idx_documents_content_hash", "documents", ["content_hash"])
55
62
 
56
63
  # Create async_operations table
57
64
  op.create_table(
58
- 'async_operations',
59
- sa.Column('operation_id', postgresql.UUID(as_uuid=True), server_default=sa.text('gen_random_uuid()'), nullable=False),
60
- sa.Column('bank_id', sa.Text(), nullable=False),
61
- sa.Column('operation_type', sa.Text(), nullable=False),
62
- sa.Column('status', sa.Text(), server_default='pending', nullable=False),
63
- sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
64
- sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
65
- sa.Column('completed_at', postgresql.TIMESTAMP(timezone=True), nullable=True),
66
- sa.Column('error_message', sa.Text(), nullable=True),
67
- sa.Column('result_metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
68
- sa.PrimaryKeyConstraint('operation_id', name=op.f('pk_async_operations')),
69
- sa.CheckConstraint("status IN ('pending', 'processing', 'completed', 'failed')", name='async_operations_status_check')
65
+ "async_operations",
66
+ sa.Column(
67
+ "operation_id", postgresql.UUID(as_uuid=True), server_default=sa.text("gen_random_uuid()"), nullable=False
68
+ ),
69
+ sa.Column("bank_id", sa.Text(), nullable=False),
70
+ sa.Column("operation_type", sa.Text(), nullable=False),
71
+ sa.Column("status", sa.Text(), server_default="pending", nullable=False),
72
+ sa.Column("created_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
73
+ sa.Column("updated_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
74
+ sa.Column("completed_at", postgresql.TIMESTAMP(timezone=True), nullable=True),
75
+ sa.Column("error_message", sa.Text(), nullable=True),
76
+ sa.Column(
77
+ "result_metadata",
78
+ postgresql.JSONB(astext_type=sa.Text()),
79
+ server_default=sa.text("'{}'::jsonb"),
80
+ nullable=False,
81
+ ),
82
+ sa.PrimaryKeyConstraint("operation_id", name=op.f("pk_async_operations")),
83
+ sa.CheckConstraint(
84
+ "status IN ('pending', 'processing', 'completed', 'failed')", name="async_operations_status_check"
85
+ ),
70
86
  )
71
- op.create_index('idx_async_operations_bank_id', 'async_operations', ['bank_id'])
72
- op.create_index('idx_async_operations_status', 'async_operations', ['status'])
73
- op.create_index('idx_async_operations_bank_status', 'async_operations', ['bank_id', 'status'])
87
+ op.create_index("idx_async_operations_bank_id", "async_operations", ["bank_id"])
88
+ op.create_index("idx_async_operations_status", "async_operations", ["status"])
89
+ op.create_index("idx_async_operations_bank_status", "async_operations", ["bank_id", "status"])
74
90
 
75
91
  # Create entities table
76
92
  op.create_table(
77
- 'entities',
78
- sa.Column('id', postgresql.UUID(as_uuid=True), server_default=sa.text('gen_random_uuid()'), nullable=False),
79
- sa.Column('canonical_name', sa.Text(), nullable=False),
80
- sa.Column('bank_id', sa.Text(), nullable=False),
81
- sa.Column('metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
82
- sa.Column('first_seen', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
83
- sa.Column('last_seen', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
84
- sa.Column('mention_count', sa.Integer(), server_default='1', nullable=False),
85
- sa.PrimaryKeyConstraint('id', name=op.f('pk_entities'))
93
+ "entities",
94
+ sa.Column("id", postgresql.UUID(as_uuid=True), server_default=sa.text("gen_random_uuid()"), nullable=False),
95
+ sa.Column("canonical_name", sa.Text(), nullable=False),
96
+ sa.Column("bank_id", sa.Text(), nullable=False),
97
+ sa.Column(
98
+ "metadata", postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False
99
+ ),
100
+ sa.Column("first_seen", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
101
+ sa.Column("last_seen", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
102
+ sa.Column("mention_count", sa.Integer(), server_default="1", nullable=False),
103
+ sa.PrimaryKeyConstraint("id", name=op.f("pk_entities")),
86
104
  )
87
- op.create_index('idx_entities_bank_id', 'entities', ['bank_id'])
88
- op.create_index('idx_entities_canonical_name', 'entities', ['canonical_name'])
89
- op.create_index('idx_entities_bank_name', 'entities', ['bank_id', 'canonical_name'])
105
+ op.create_index("idx_entities_bank_id", "entities", ["bank_id"])
106
+ op.create_index("idx_entities_canonical_name", "entities", ["canonical_name"])
107
+ op.create_index("idx_entities_bank_name", "entities", ["bank_id", "canonical_name"])
90
108
  # Create unique index on (bank_id, LOWER(canonical_name)) for entity resolution
91
- op.execute('CREATE UNIQUE INDEX idx_entities_bank_lower_name ON entities (bank_id, LOWER(canonical_name))')
109
+ op.execute("CREATE UNIQUE INDEX idx_entities_bank_lower_name ON entities (bank_id, LOWER(canonical_name))")
92
110
 
93
111
  # Create memory_units table
94
112
  op.create_table(
95
- 'memory_units',
96
- sa.Column('id', postgresql.UUID(as_uuid=True), server_default=sa.text('gen_random_uuid()'), nullable=False),
97
- sa.Column('bank_id', sa.Text(), nullable=False),
98
- sa.Column('document_id', sa.Text(), nullable=True),
99
- sa.Column('text', sa.Text(), nullable=False),
100
- sa.Column('embedding', Vector(384), nullable=True),
101
- sa.Column('context', sa.Text(), nullable=True),
102
- sa.Column('event_date', postgresql.TIMESTAMP(timezone=True), nullable=False),
103
- sa.Column('occurred_start', postgresql.TIMESTAMP(timezone=True), nullable=True),
104
- sa.Column('occurred_end', postgresql.TIMESTAMP(timezone=True), nullable=True),
105
- sa.Column('mentioned_at', postgresql.TIMESTAMP(timezone=True), nullable=True),
106
- sa.Column('fact_type', sa.Text(), server_default='world', nullable=False),
107
- sa.Column('confidence_score', sa.Float(), nullable=True),
108
- sa.Column('access_count', sa.Integer(), server_default='0', nullable=False),
109
- sa.Column('metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
110
- sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
111
- sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
112
- sa.ForeignKeyConstraint(['document_id', 'bank_id'], ['documents.id', 'documents.bank_id'], name='memory_units_document_fkey', ondelete='CASCADE'),
113
- sa.PrimaryKeyConstraint('id', name=op.f('pk_memory_units')),
114
- sa.CheckConstraint("fact_type IN ('world', 'bank', 'opinion', 'observation')", name='memory_units_fact_type_check'),
115
- sa.CheckConstraint("confidence_score IS NULL OR (confidence_score >= 0.0 AND confidence_score <= 1.0)", name='memory_units_confidence_range_check'),
113
+ "memory_units",
114
+ sa.Column("id", postgresql.UUID(as_uuid=True), server_default=sa.text("gen_random_uuid()"), nullable=False),
115
+ sa.Column("bank_id", sa.Text(), nullable=False),
116
+ sa.Column("document_id", sa.Text(), nullable=True),
117
+ sa.Column("text", sa.Text(), nullable=False),
118
+ sa.Column("embedding", Vector(384), nullable=True),
119
+ sa.Column("context", sa.Text(), nullable=True),
120
+ sa.Column("event_date", postgresql.TIMESTAMP(timezone=True), nullable=False),
121
+ sa.Column("occurred_start", postgresql.TIMESTAMP(timezone=True), nullable=True),
122
+ sa.Column("occurred_end", postgresql.TIMESTAMP(timezone=True), nullable=True),
123
+ sa.Column("mentioned_at", postgresql.TIMESTAMP(timezone=True), nullable=True),
124
+ sa.Column("fact_type", sa.Text(), server_default="world", nullable=False),
125
+ sa.Column("confidence_score", sa.Float(), nullable=True),
126
+ sa.Column("access_count", sa.Integer(), server_default="0", nullable=False),
127
+ sa.Column(
128
+ "metadata", postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False
129
+ ),
130
+ sa.Column("created_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
131
+ sa.Column("updated_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
132
+ sa.ForeignKeyConstraint(
133
+ ["document_id", "bank_id"],
134
+ ["documents.id", "documents.bank_id"],
135
+ name="memory_units_document_fkey",
136
+ ondelete="CASCADE",
137
+ ),
138
+ sa.PrimaryKeyConstraint("id", name=op.f("pk_memory_units")),
139
+ sa.CheckConstraint(
140
+ "fact_type IN ('world', 'bank', 'opinion', 'observation')", name="memory_units_fact_type_check"
141
+ ),
142
+ sa.CheckConstraint(
143
+ "confidence_score IS NULL OR (confidence_score >= 0.0 AND confidence_score <= 1.0)",
144
+ name="memory_units_confidence_range_check",
145
+ ),
116
146
  sa.CheckConstraint(
117
147
  "(fact_type = 'opinion' AND confidence_score IS NOT NULL) OR "
118
148
  "(fact_type = 'observation') OR "
119
149
  "(fact_type NOT IN ('opinion', 'observation') AND confidence_score IS NULL)",
120
- name='confidence_score_fact_type_check'
121
- )
150
+ name="confidence_score_fact_type_check",
151
+ ),
122
152
  )
123
153
 
124
154
  # Add search_vector column for full-text search
@@ -128,18 +158,41 @@ def upgrade() -> None:
128
158
  GENERATED ALWAYS AS (to_tsvector('english', COALESCE(text, '') || ' ' || COALESCE(context, ''))) STORED
129
159
  """)
130
160
 
131
- op.create_index('idx_memory_units_bank_id', 'memory_units', ['bank_id'])
132
- op.create_index('idx_memory_units_document_id', 'memory_units', ['document_id'])
133
- op.create_index('idx_memory_units_event_date', 'memory_units', [sa.text('event_date DESC')])
134
- op.create_index('idx_memory_units_bank_date', 'memory_units', ['bank_id', sa.text('event_date DESC')])
135
- op.create_index('idx_memory_units_access_count', 'memory_units', [sa.text('access_count DESC')])
136
- op.create_index('idx_memory_units_fact_type', 'memory_units', ['fact_type'])
137
- op.create_index('idx_memory_units_bank_fact_type', 'memory_units', ['bank_id', 'fact_type'])
138
- op.create_index('idx_memory_units_bank_type_date', 'memory_units', ['bank_id', 'fact_type', sa.text('event_date DESC')])
139
- op.create_index('idx_memory_units_opinion_confidence', 'memory_units', ['bank_id', sa.text('confidence_score DESC')], postgresql_where=sa.text("fact_type = 'opinion'"))
140
- op.create_index('idx_memory_units_opinion_date', 'memory_units', ['bank_id', sa.text('event_date DESC')], postgresql_where=sa.text("fact_type = 'opinion'"))
141
- op.create_index('idx_memory_units_observation_date', 'memory_units', ['bank_id', sa.text('event_date DESC')], postgresql_where=sa.text("fact_type = 'observation'"))
142
- op.create_index('idx_memory_units_embedding', 'memory_units', ['embedding'], postgresql_using='hnsw', postgresql_ops={'embedding': 'vector_cosine_ops'})
161
+ op.create_index("idx_memory_units_bank_id", "memory_units", ["bank_id"])
162
+ op.create_index("idx_memory_units_document_id", "memory_units", ["document_id"])
163
+ op.create_index("idx_memory_units_event_date", "memory_units", [sa.text("event_date DESC")])
164
+ op.create_index("idx_memory_units_bank_date", "memory_units", ["bank_id", sa.text("event_date DESC")])
165
+ op.create_index("idx_memory_units_access_count", "memory_units", [sa.text("access_count DESC")])
166
+ op.create_index("idx_memory_units_fact_type", "memory_units", ["fact_type"])
167
+ op.create_index("idx_memory_units_bank_fact_type", "memory_units", ["bank_id", "fact_type"])
168
+ op.create_index(
169
+ "idx_memory_units_bank_type_date", "memory_units", ["bank_id", "fact_type", sa.text("event_date DESC")]
170
+ )
171
+ op.create_index(
172
+ "idx_memory_units_opinion_confidence",
173
+ "memory_units",
174
+ ["bank_id", sa.text("confidence_score DESC")],
175
+ postgresql_where=sa.text("fact_type = 'opinion'"),
176
+ )
177
+ op.create_index(
178
+ "idx_memory_units_opinion_date",
179
+ "memory_units",
180
+ ["bank_id", sa.text("event_date DESC")],
181
+ postgresql_where=sa.text("fact_type = 'opinion'"),
182
+ )
183
+ op.create_index(
184
+ "idx_memory_units_observation_date",
185
+ "memory_units",
186
+ ["bank_id", sa.text("event_date DESC")],
187
+ postgresql_where=sa.text("fact_type = 'observation'"),
188
+ )
189
+ op.create_index(
190
+ "idx_memory_units_embedding",
191
+ "memory_units",
192
+ ["embedding"],
193
+ postgresql_using="hnsw",
194
+ postgresql_ops={"embedding": "vector_cosine_ops"},
195
+ )
143
196
 
144
197
  # Create BM25 full-text search index on search_vector
145
198
  op.execute("""
@@ -158,116 +211,149 @@ def upgrade() -> None:
158
211
  FROM memory_units
159
212
  """)
160
213
 
161
- op.create_index('idx_memory_units_bm25_bank', 'memory_units_bm25', ['bank_id'])
162
- op.create_index('idx_memory_units_bm25_text_vector', 'memory_units_bm25', ['text_vector'], postgresql_using='gin')
214
+ op.create_index("idx_memory_units_bm25_bank", "memory_units_bm25", ["bank_id"])
215
+ op.create_index("idx_memory_units_bm25_text_vector", "memory_units_bm25", ["text_vector"], postgresql_using="gin")
163
216
 
164
217
  # Create entity_cooccurrences table
165
218
  op.create_table(
166
- 'entity_cooccurrences',
167
- sa.Column('entity_id_1', postgresql.UUID(as_uuid=True), nullable=False),
168
- sa.Column('entity_id_2', postgresql.UUID(as_uuid=True), nullable=False),
169
- sa.Column('cooccurrence_count', sa.Integer(), server_default='1', nullable=False),
170
- sa.Column('last_cooccurred', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
171
- sa.ForeignKeyConstraint(['entity_id_1'], ['entities.id'], name=op.f('fk_entity_cooccurrences_entity_id_1_entities'), ondelete='CASCADE'),
172
- sa.ForeignKeyConstraint(['entity_id_2'], ['entities.id'], name=op.f('fk_entity_cooccurrences_entity_id_2_entities'), ondelete='CASCADE'),
173
- sa.PrimaryKeyConstraint('entity_id_1', 'entity_id_2', name=op.f('pk_entity_cooccurrences')),
174
- sa.CheckConstraint('entity_id_1 < entity_id_2', name='entity_cooccurrence_order_check')
219
+ "entity_cooccurrences",
220
+ sa.Column("entity_id_1", postgresql.UUID(as_uuid=True), nullable=False),
221
+ sa.Column("entity_id_2", postgresql.UUID(as_uuid=True), nullable=False),
222
+ sa.Column("cooccurrence_count", sa.Integer(), server_default="1", nullable=False),
223
+ sa.Column(
224
+ "last_cooccurred", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False
225
+ ),
226
+ sa.ForeignKeyConstraint(
227
+ ["entity_id_1"],
228
+ ["entities.id"],
229
+ name=op.f("fk_entity_cooccurrences_entity_id_1_entities"),
230
+ ondelete="CASCADE",
231
+ ),
232
+ sa.ForeignKeyConstraint(
233
+ ["entity_id_2"],
234
+ ["entities.id"],
235
+ name=op.f("fk_entity_cooccurrences_entity_id_2_entities"),
236
+ ondelete="CASCADE",
237
+ ),
238
+ sa.PrimaryKeyConstraint("entity_id_1", "entity_id_2", name=op.f("pk_entity_cooccurrences")),
239
+ sa.CheckConstraint("entity_id_1 < entity_id_2", name="entity_cooccurrence_order_check"),
175
240
  )
176
- op.create_index('idx_entity_cooccurrences_entity1', 'entity_cooccurrences', ['entity_id_1'])
177
- op.create_index('idx_entity_cooccurrences_entity2', 'entity_cooccurrences', ['entity_id_2'])
178
- op.create_index('idx_entity_cooccurrences_count', 'entity_cooccurrences', [sa.text('cooccurrence_count DESC')])
241
+ op.create_index("idx_entity_cooccurrences_entity1", "entity_cooccurrences", ["entity_id_1"])
242
+ op.create_index("idx_entity_cooccurrences_entity2", "entity_cooccurrences", ["entity_id_2"])
243
+ op.create_index("idx_entity_cooccurrences_count", "entity_cooccurrences", [sa.text("cooccurrence_count DESC")])
179
244
 
180
245
  # Create memory_links table
181
246
  op.create_table(
182
- 'memory_links',
183
- sa.Column('from_unit_id', postgresql.UUID(as_uuid=True), nullable=False),
184
- sa.Column('to_unit_id', postgresql.UUID(as_uuid=True), nullable=False),
185
- sa.Column('link_type', sa.Text(), nullable=False),
186
- sa.Column('entity_id', postgresql.UUID(as_uuid=True), nullable=True),
187
- sa.Column('weight', sa.Float(), server_default='1.0', nullable=False),
188
- sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
189
- sa.ForeignKeyConstraint(['entity_id'], ['entities.id'], name=op.f('fk_memory_links_entity_id_entities'), ondelete='CASCADE'),
190
- sa.ForeignKeyConstraint(['from_unit_id'], ['memory_units.id'], name=op.f('fk_memory_links_from_unit_id_memory_units'), ondelete='CASCADE'),
191
- sa.ForeignKeyConstraint(['to_unit_id'], ['memory_units.id'], name=op.f('fk_memory_links_to_unit_id_memory_units'), ondelete='CASCADE'),
192
- sa.CheckConstraint("link_type IN ('temporal', 'semantic', 'entity', 'causes', 'caused_by', 'enables', 'prevents')", name='memory_links_link_type_check'),
193
- sa.CheckConstraint('weight >= 0.0 AND weight <= 1.0', name='memory_links_weight_check')
247
+ "memory_links",
248
+ sa.Column("from_unit_id", postgresql.UUID(as_uuid=True), nullable=False),
249
+ sa.Column("to_unit_id", postgresql.UUID(as_uuid=True), nullable=False),
250
+ sa.Column("link_type", sa.Text(), nullable=False),
251
+ sa.Column("entity_id", postgresql.UUID(as_uuid=True), nullable=True),
252
+ sa.Column("weight", sa.Float(), server_default="1.0", nullable=False),
253
+ sa.Column("created_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
254
+ sa.ForeignKeyConstraint(
255
+ ["entity_id"], ["entities.id"], name=op.f("fk_memory_links_entity_id_entities"), ondelete="CASCADE"
256
+ ),
257
+ sa.ForeignKeyConstraint(
258
+ ["from_unit_id"],
259
+ ["memory_units.id"],
260
+ name=op.f("fk_memory_links_from_unit_id_memory_units"),
261
+ ondelete="CASCADE",
262
+ ),
263
+ sa.ForeignKeyConstraint(
264
+ ["to_unit_id"],
265
+ ["memory_units.id"],
266
+ name=op.f("fk_memory_links_to_unit_id_memory_units"),
267
+ ondelete="CASCADE",
268
+ ),
269
+ sa.CheckConstraint(
270
+ "link_type IN ('temporal', 'semantic', 'entity', 'causes', 'caused_by', 'enables', 'prevents')",
271
+ name="memory_links_link_type_check",
272
+ ),
273
+ sa.CheckConstraint("weight >= 0.0 AND weight <= 1.0", name="memory_links_weight_check"),
194
274
  )
195
275
  # Create unique constraint using COALESCE for nullable entity_id
196
- op.execute("CREATE UNIQUE INDEX idx_memory_links_unique ON memory_links (from_unit_id, to_unit_id, link_type, COALESCE(entity_id, '00000000-0000-0000-0000-000000000000'::uuid))")
197
- op.create_index('idx_memory_links_from_unit', 'memory_links', ['from_unit_id'])
198
- op.create_index('idx_memory_links_to_unit', 'memory_links', ['to_unit_id'])
199
- op.create_index('idx_memory_links_entity', 'memory_links', ['entity_id'])
200
- op.create_index('idx_memory_links_link_type', 'memory_links', ['link_type'])
276
+ op.execute(
277
+ "CREATE UNIQUE INDEX idx_memory_links_unique ON memory_links (from_unit_id, to_unit_id, link_type, COALESCE(entity_id, '00000000-0000-0000-0000-000000000000'::uuid))"
278
+ )
279
+ op.create_index("idx_memory_links_from_unit", "memory_links", ["from_unit_id"])
280
+ op.create_index("idx_memory_links_to_unit", "memory_links", ["to_unit_id"])
281
+ op.create_index("idx_memory_links_entity", "memory_links", ["entity_id"])
282
+ op.create_index("idx_memory_links_link_type", "memory_links", ["link_type"])
201
283
 
202
284
  # Create unit_entities table
203
285
  op.create_table(
204
- 'unit_entities',
205
- sa.Column('unit_id', postgresql.UUID(as_uuid=True), nullable=False),
206
- sa.Column('entity_id', postgresql.UUID(as_uuid=True), nullable=False),
207
- sa.ForeignKeyConstraint(['entity_id'], ['entities.id'], name=op.f('fk_unit_entities_entity_id_entities'), ondelete='CASCADE'),
208
- sa.ForeignKeyConstraint(['unit_id'], ['memory_units.id'], name=op.f('fk_unit_entities_unit_id_memory_units'), ondelete='CASCADE'),
209
- sa.PrimaryKeyConstraint('unit_id', 'entity_id', name=op.f('pk_unit_entities'))
286
+ "unit_entities",
287
+ sa.Column("unit_id", postgresql.UUID(as_uuid=True), nullable=False),
288
+ sa.Column("entity_id", postgresql.UUID(as_uuid=True), nullable=False),
289
+ sa.ForeignKeyConstraint(
290
+ ["entity_id"], ["entities.id"], name=op.f("fk_unit_entities_entity_id_entities"), ondelete="CASCADE"
291
+ ),
292
+ sa.ForeignKeyConstraint(
293
+ ["unit_id"], ["memory_units.id"], name=op.f("fk_unit_entities_unit_id_memory_units"), ondelete="CASCADE"
294
+ ),
295
+ sa.PrimaryKeyConstraint("unit_id", "entity_id", name=op.f("pk_unit_entities")),
210
296
  )
211
- op.create_index('idx_unit_entities_unit', 'unit_entities', ['unit_id'])
212
- op.create_index('idx_unit_entities_entity', 'unit_entities', ['entity_id'])
297
+ op.create_index("idx_unit_entities_unit", "unit_entities", ["unit_id"])
298
+ op.create_index("idx_unit_entities_entity", "unit_entities", ["entity_id"])
213
299
 
214
300
 
215
301
  def downgrade() -> None:
216
302
  """Downgrade schema - drop all tables."""
217
303
 
218
304
  # Drop tables in reverse dependency order
219
- op.drop_index('idx_unit_entities_entity', table_name='unit_entities')
220
- op.drop_index('idx_unit_entities_unit', table_name='unit_entities')
221
- op.drop_table('unit_entities')
222
-
223
- op.drop_index('idx_memory_links_link_type', table_name='memory_links')
224
- op.drop_index('idx_memory_links_entity', table_name='memory_links')
225
- op.drop_index('idx_memory_links_to_unit', table_name='memory_links')
226
- op.drop_index('idx_memory_links_from_unit', table_name='memory_links')
227
- op.execute('DROP INDEX IF EXISTS idx_memory_links_unique')
228
- op.drop_table('memory_links')
229
-
230
- op.drop_index('idx_entity_cooccurrences_count', table_name='entity_cooccurrences')
231
- op.drop_index('idx_entity_cooccurrences_entity2', table_name='entity_cooccurrences')
232
- op.drop_index('idx_entity_cooccurrences_entity1', table_name='entity_cooccurrences')
233
- op.drop_table('entity_cooccurrences')
305
+ op.drop_index("idx_unit_entities_entity", table_name="unit_entities")
306
+ op.drop_index("idx_unit_entities_unit", table_name="unit_entities")
307
+ op.drop_table("unit_entities")
308
+
309
+ op.drop_index("idx_memory_links_link_type", table_name="memory_links")
310
+ op.drop_index("idx_memory_links_entity", table_name="memory_links")
311
+ op.drop_index("idx_memory_links_to_unit", table_name="memory_links")
312
+ op.drop_index("idx_memory_links_from_unit", table_name="memory_links")
313
+ op.execute("DROP INDEX IF EXISTS idx_memory_links_unique")
314
+ op.drop_table("memory_links")
315
+
316
+ op.drop_index("idx_entity_cooccurrences_count", table_name="entity_cooccurrences")
317
+ op.drop_index("idx_entity_cooccurrences_entity2", table_name="entity_cooccurrences")
318
+ op.drop_index("idx_entity_cooccurrences_entity1", table_name="entity_cooccurrences")
319
+ op.drop_table("entity_cooccurrences")
234
320
 
235
321
  # Drop BM25 materialized view and index
236
- op.drop_index('idx_memory_units_bm25_text_vector', table_name='memory_units_bm25')
237
- op.drop_index('idx_memory_units_bm25_bank', table_name='memory_units_bm25')
238
- op.execute('DROP MATERIALIZED VIEW IF EXISTS memory_units_bm25')
239
-
240
- op.drop_index('idx_memory_units_embedding', table_name='memory_units')
241
- op.drop_index('idx_memory_units_observation_date', table_name='memory_units')
242
- op.drop_index('idx_memory_units_opinion_date', table_name='memory_units')
243
- op.drop_index('idx_memory_units_opinion_confidence', table_name='memory_units')
244
- op.drop_index('idx_memory_units_bank_type_date', table_name='memory_units')
245
- op.drop_index('idx_memory_units_bank_fact_type', table_name='memory_units')
246
- op.drop_index('idx_memory_units_fact_type', table_name='memory_units')
247
- op.drop_index('idx_memory_units_access_count', table_name='memory_units')
248
- op.drop_index('idx_memory_units_bank_date', table_name='memory_units')
249
- op.drop_index('idx_memory_units_event_date', table_name='memory_units')
250
- op.drop_index('idx_memory_units_document_id', table_name='memory_units')
251
- op.drop_index('idx_memory_units_bank_id', table_name='memory_units')
252
- op.execute('DROP INDEX IF EXISTS idx_memory_units_text_search')
253
- op.drop_table('memory_units')
254
-
255
- op.execute('DROP INDEX IF EXISTS idx_entities_bank_lower_name')
256
- op.drop_index('idx_entities_bank_name', table_name='entities')
257
- op.drop_index('idx_entities_canonical_name', table_name='entities')
258
- op.drop_index('idx_entities_bank_id', table_name='entities')
259
- op.drop_table('entities')
260
-
261
- op.drop_index('idx_async_operations_bank_status', table_name='async_operations')
262
- op.drop_index('idx_async_operations_status', table_name='async_operations')
263
- op.drop_index('idx_async_operations_bank_id', table_name='async_operations')
264
- op.drop_table('async_operations')
265
-
266
- op.drop_index('idx_documents_content_hash', table_name='documents')
267
- op.drop_index('idx_documents_bank_id', table_name='documents')
268
- op.drop_table('documents')
269
-
270
- op.drop_table('banks')
322
+ op.drop_index("idx_memory_units_bm25_text_vector", table_name="memory_units_bm25")
323
+ op.drop_index("idx_memory_units_bm25_bank", table_name="memory_units_bm25")
324
+ op.execute("DROP MATERIALIZED VIEW IF EXISTS memory_units_bm25")
325
+
326
+ op.drop_index("idx_memory_units_embedding", table_name="memory_units")
327
+ op.drop_index("idx_memory_units_observation_date", table_name="memory_units")
328
+ op.drop_index("idx_memory_units_opinion_date", table_name="memory_units")
329
+ op.drop_index("idx_memory_units_opinion_confidence", table_name="memory_units")
330
+ op.drop_index("idx_memory_units_bank_type_date", table_name="memory_units")
331
+ op.drop_index("idx_memory_units_bank_fact_type", table_name="memory_units")
332
+ op.drop_index("idx_memory_units_fact_type", table_name="memory_units")
333
+ op.drop_index("idx_memory_units_access_count", table_name="memory_units")
334
+ op.drop_index("idx_memory_units_bank_date", table_name="memory_units")
335
+ op.drop_index("idx_memory_units_event_date", table_name="memory_units")
336
+ op.drop_index("idx_memory_units_document_id", table_name="memory_units")
337
+ op.drop_index("idx_memory_units_bank_id", table_name="memory_units")
338
+ op.execute("DROP INDEX IF EXISTS idx_memory_units_text_search")
339
+ op.drop_table("memory_units")
340
+
341
+ op.execute("DROP INDEX IF EXISTS idx_entities_bank_lower_name")
342
+ op.drop_index("idx_entities_bank_name", table_name="entities")
343
+ op.drop_index("idx_entities_canonical_name", table_name="entities")
344
+ op.drop_index("idx_entities_bank_id", table_name="entities")
345
+ op.drop_table("entities")
346
+
347
+ op.drop_index("idx_async_operations_bank_status", table_name="async_operations")
348
+ op.drop_index("idx_async_operations_status", table_name="async_operations")
349
+ op.drop_index("idx_async_operations_bank_id", table_name="async_operations")
350
+ op.drop_table("async_operations")
351
+
352
+ op.drop_index("idx_documents_content_hash", table_name="documents")
353
+ op.drop_index("idx_documents_bank_id", table_name="documents")
354
+ op.drop_table("documents")
355
+
356
+ op.drop_table("banks")
271
357
 
272
358
  # Drop extensions (optional - comment out if you want to keep them)
273
359
  # op.execute('DROP EXTENSION IF EXISTS vector')