hindsight-api 0.1.4__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/.gitignore +6 -0
  2. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/PKG-INFO +6 -5
  3. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/__init__.py +10 -9
  4. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/alembic/env.py +5 -8
  5. hindsight_api-0.1.6/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +360 -0
  6. hindsight_api-0.1.6/hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +70 -0
  7. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +11 -11
  8. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +7 -12
  9. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +23 -15
  10. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/alembic/versions/rename_personality_to_disposition.py +30 -21
  11. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/api/__init__.py +10 -10
  12. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/api/http.py +575 -593
  13. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/api/mcp.py +31 -33
  14. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/banner.py +13 -6
  15. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/config.py +17 -12
  16. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/__init__.py +9 -9
  17. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/cross_encoder.py +23 -27
  18. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/db_utils.py +5 -4
  19. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/embeddings.py +22 -21
  20. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/entity_resolver.py +81 -75
  21. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/llm_wrapper.py +74 -88
  22. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/memory_engine.py +663 -673
  23. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/query_analyzer.py +100 -97
  24. hindsight_api-0.1.6/hindsight_api/engine/response_models.py +219 -0
  25. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/retain/__init__.py +9 -16
  26. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/retain/bank_utils.py +34 -58
  27. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/retain/chunk_storage.py +4 -12
  28. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/retain/deduplication.py +9 -28
  29. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/retain/embedding_processing.py +4 -11
  30. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/retain/embedding_utils.py +3 -4
  31. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/retain/entity_processing.py +7 -17
  32. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/retain/fact_extraction.py +155 -165
  33. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/retain/fact_storage.py +11 -23
  34. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/retain/link_creation.py +11 -39
  35. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/retain/link_utils.py +166 -95
  36. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/retain/observation_regeneration.py +39 -52
  37. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/retain/orchestrator.py +72 -62
  38. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/retain/types.py +49 -43
  39. hindsight_api-0.1.6/hindsight_api/engine/search/__init__.py +29 -0
  40. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/search/fusion.py +6 -15
  41. hindsight_api-0.1.6/hindsight_api/engine/search/graph_retrieval.py +234 -0
  42. hindsight_api-0.1.6/hindsight_api/engine/search/mpfp_retrieval.py +438 -0
  43. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/search/observation_utils.py +9 -16
  44. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/search/reranking.py +4 -7
  45. hindsight_api-0.1.6/hindsight_api/engine/search/retrieval.py +698 -0
  46. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/search/scoring.py +5 -7
  47. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/search/temporal_extraction.py +8 -11
  48. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/search/think_utils.py +115 -39
  49. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/search/trace.py +68 -38
  50. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/search/tracer.py +49 -35
  51. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/search/types.py +22 -16
  52. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/task_backend.py +21 -26
  53. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/engine/utils.py +25 -10
  54. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/main.py +21 -40
  55. hindsight_api-0.1.6/hindsight_api/mcp_local.py +190 -0
  56. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/metrics.py +44 -30
  57. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/migrations.py +10 -8
  58. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/models.py +60 -72
  59. hindsight_api-0.1.6/hindsight_api/pg0.py +134 -0
  60. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/server.py +3 -6
  61. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/pyproject.toml +36 -5
  62. hindsight_api-0.1.4/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +0 -274
  63. hindsight_api-0.1.4/hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +0 -70
  64. hindsight_api-0.1.4/hindsight_api/engine/response_models.py +0 -220
  65. hindsight_api-0.1.4/hindsight_api/engine/search/__init__.py +0 -15
  66. hindsight_api-0.1.4/hindsight_api/engine/search/retrieval.py +0 -503
  67. hindsight_api-0.1.4/hindsight_api/pg0.py +0 -407
  68. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/README.md +0 -0
  69. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/alembic/README +0 -0
  70. {hindsight_api-0.1.4 → hindsight_api-0.1.6}/hindsight_api/alembic/script.py.mako +0 -0
@@ -9,6 +9,9 @@ wheels/
9
9
  # Virtual environments
10
10
  .venv
11
11
 
12
+ # Node
13
+ node_modules/
14
+
12
15
  # Environment variables
13
16
  .env
14
17
 
@@ -29,6 +32,9 @@ logs/
29
32
 
30
33
  .DS_Store
31
34
 
35
+ # Generated docs files
36
+ hindsight-docs/static/llms-full.txt
37
+
32
38
 
33
39
  hindsight-dev/benchmarks/locomo/results/
34
40
  hindsight-dev/benchmarks/longmemeval/results/
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hindsight-api
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: Temporal + Semantic + Entity Memory System for AI agents using PostgreSQL
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: alembic>=1.17.1
7
7
  Requires-Dist: asyncpg>=0.29.0
8
8
  Requires-Dist: dateparser>=1.2.2
9
9
  Requires-Dist: fastapi[standard]>=0.120.3
10
- Requires-Dist: fastmcp>=2.0.0
10
+ Requires-Dist: fastmcp>=2.3.0
11
11
  Requires-Dist: google-genai>=1.0.0
12
12
  Requires-Dist: greenlet>=3.2.4
13
13
  Requires-Dist: httpx>=0.27.0
@@ -17,17 +17,18 @@ Requires-Dist: opentelemetry-api>=1.20.0
17
17
  Requires-Dist: opentelemetry-exporter-prometheus>=0.41b0
18
18
  Requires-Dist: opentelemetry-instrumentation-fastapi>=0.41b0
19
19
  Requires-Dist: opentelemetry-sdk>=1.20.0
20
+ Requires-Dist: pg0-embedded>=0.11.0
20
21
  Requires-Dist: pgvector>=0.4.1
21
22
  Requires-Dist: psycopg2-binary>=2.9.11
22
23
  Requires-Dist: pydantic>=2.0.0
23
24
  Requires-Dist: python-dateutil>=2.8.0
24
25
  Requires-Dist: python-dotenv>=1.0.0
25
26
  Requires-Dist: rich>=13.0.0
26
- Requires-Dist: sentence-transformers>=3.0.0
27
+ Requires-Dist: sentence-transformers<3.3.0,>=3.0.0
27
28
  Requires-Dist: sqlalchemy>=2.0.44
28
29
  Requires-Dist: tiktoken>=0.12.0
29
- Requires-Dist: torch>=2.0.0
30
- Requires-Dist: transformers>=4.30.0
30
+ Requires-Dist: torch<2.6.0,>=2.0.0
31
+ Requires-Dist: transformers<4.46.0,>=4.30.0
31
32
  Requires-Dist: uvicorn>=0.38.0
32
33
  Requires-Dist: wsproto>=1.0.0
33
34
  Provides-Extra: test
@@ -3,23 +3,24 @@ Memory System for AI Agents.
3
3
 
4
4
  Temporal + Semantic Memory Architecture using PostgreSQL with pgvector.
5
5
  """
6
+
7
+ from .config import HindsightConfig, get_config
8
+ from .engine.cross_encoder import CrossEncoderModel, LocalSTCrossEncoder, RemoteTEICrossEncoder
9
+ from .engine.embeddings import Embeddings, LocalSTEmbeddings, RemoteTEIEmbeddings
10
+ from .engine.llm_wrapper import LLMConfig
6
11
  from .engine.memory_engine import MemoryEngine
7
12
  from .engine.search.trace import (
8
- SearchTrace,
9
- QueryInfo,
10
13
  EntryPoint,
11
- NodeVisit,
12
- WeightComponents,
13
14
  LinkInfo,
15
+ NodeVisit,
14
16
  PruningDecision,
15
- SearchSummary,
17
+ QueryInfo,
16
18
  SearchPhaseMetrics,
19
+ SearchSummary,
20
+ SearchTrace,
21
+ WeightComponents,
17
22
  )
18
23
  from .engine.search.tracer import SearchTracer
19
- from .engine.embeddings import Embeddings, LocalSTEmbeddings, RemoteTEIEmbeddings
20
- from .engine.cross_encoder import CrossEncoderModel, LocalSTCrossEncoder, RemoteTEICrossEncoder
21
- from .engine.llm_wrapper import LLMConfig
22
- from .config import HindsightConfig, get_config
23
24
 
24
25
  __all__ = [
25
26
  "MemoryEngine",
@@ -2,20 +2,19 @@
2
2
  Alembic environment configuration for SQLAlchemy with pgvector.
3
3
  Uses synchronous psycopg2 driver for migrations to avoid pgbouncer issues.
4
4
  """
5
+
5
6
  import logging
6
7
  import os
7
- import sys
8
8
  from pathlib import Path
9
9
 
10
- from sqlalchemy import pool, engine_from_config
11
- from sqlalchemy.engine import Connection
12
-
13
10
  from alembic import context
14
11
  from dotenv import load_dotenv
12
+ from sqlalchemy import engine_from_config, pool
15
13
 
16
14
  # Import your models here
17
15
  from hindsight_api.models import Base
18
16
 
17
+
19
18
  # Load environment variables based on HINDSIGHT_API_DATABASE_URL env var or default to local
20
19
  def load_env():
21
20
  """Load environment variables from .env"""
@@ -30,6 +29,7 @@ def load_env():
30
29
  if env_file.exists():
31
30
  load_dotenv(env_file)
32
31
 
32
+
33
33
  load_env()
34
34
 
35
35
  # this is the Alembic Config object, which provides
@@ -128,10 +128,7 @@ def run_migrations_online() -> None:
128
128
  connection.execute(text("SET SESSION CHARACTERISTICS AS TRANSACTION READ WRITE"))
129
129
  connection.commit() # Commit the SET command
130
130
 
131
- context.configure(
132
- connection=connection,
133
- target_metadata=target_metadata
134
- )
131
+ context.configure(connection=connection, target_metadata=target_metadata)
135
132
 
136
133
  with context.begin_transaction():
137
134
  context.run_migrations()
@@ -0,0 +1,360 @@
1
+ """initial_schema
2
+
3
+ Revision ID: 5a366d414dce
4
+ Revises:
5
+ Create Date: 2025-11-27 11:54:19.228030
6
+
7
+ """
8
+
9
+ from collections.abc import Sequence
10
+
11
+ import sqlalchemy as sa
12
+ from alembic import op
13
+ from pgvector.sqlalchemy import Vector
14
+ from sqlalchemy.dialects import postgresql
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = "5a366d414dce"
18
+ down_revision: str | Sequence[str] | None = None
19
+ branch_labels: str | Sequence[str] | None = None
20
+ depends_on: str | Sequence[str] | None = None
21
+
22
+
23
+ def upgrade() -> None:
24
+ """Upgrade schema - create all tables from scratch."""
25
+
26
+ # Enable required extensions
27
+ op.execute("CREATE EXTENSION IF NOT EXISTS vector")
28
+
29
+ # Create banks table
30
+ op.create_table(
31
+ "banks",
32
+ sa.Column("bank_id", sa.Text(), nullable=False),
33
+ sa.Column("name", sa.Text(), nullable=True),
34
+ sa.Column(
35
+ "personality",
36
+ postgresql.JSONB(astext_type=sa.Text()),
37
+ server_default=sa.text("'{}'::jsonb"),
38
+ nullable=False,
39
+ ),
40
+ sa.Column("background", sa.Text(), nullable=True),
41
+ sa.Column("created_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
42
+ sa.Column("updated_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
43
+ sa.PrimaryKeyConstraint("bank_id", name=op.f("pk_banks")),
44
+ )
45
+
46
+ # Create documents table
47
+ op.create_table(
48
+ "documents",
49
+ sa.Column("id", sa.Text(), nullable=False),
50
+ sa.Column("bank_id", sa.Text(), nullable=False),
51
+ sa.Column("original_text", sa.Text(), nullable=True),
52
+ sa.Column("content_hash", sa.Text(), nullable=True),
53
+ sa.Column(
54
+ "metadata", postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False
55
+ ),
56
+ sa.Column("created_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
57
+ sa.Column("updated_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
58
+ sa.PrimaryKeyConstraint("id", "bank_id", name=op.f("pk_documents")),
59
+ )
60
+ op.create_index("idx_documents_bank_id", "documents", ["bank_id"])
61
+ op.create_index("idx_documents_content_hash", "documents", ["content_hash"])
62
+
63
+ # Create async_operations table
64
+ op.create_table(
65
+ "async_operations",
66
+ sa.Column(
67
+ "operation_id", postgresql.UUID(as_uuid=True), server_default=sa.text("gen_random_uuid()"), nullable=False
68
+ ),
69
+ sa.Column("bank_id", sa.Text(), nullable=False),
70
+ sa.Column("operation_type", sa.Text(), nullable=False),
71
+ sa.Column("status", sa.Text(), server_default="pending", nullable=False),
72
+ sa.Column("created_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
73
+ sa.Column("updated_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
74
+ sa.Column("completed_at", postgresql.TIMESTAMP(timezone=True), nullable=True),
75
+ sa.Column("error_message", sa.Text(), nullable=True),
76
+ sa.Column(
77
+ "result_metadata",
78
+ postgresql.JSONB(astext_type=sa.Text()),
79
+ server_default=sa.text("'{}'::jsonb"),
80
+ nullable=False,
81
+ ),
82
+ sa.PrimaryKeyConstraint("operation_id", name=op.f("pk_async_operations")),
83
+ sa.CheckConstraint(
84
+ "status IN ('pending', 'processing', 'completed', 'failed')", name="async_operations_status_check"
85
+ ),
86
+ )
87
+ op.create_index("idx_async_operations_bank_id", "async_operations", ["bank_id"])
88
+ op.create_index("idx_async_operations_status", "async_operations", ["status"])
89
+ op.create_index("idx_async_operations_bank_status", "async_operations", ["bank_id", "status"])
90
+
91
+ # Create entities table
92
+ op.create_table(
93
+ "entities",
94
+ sa.Column("id", postgresql.UUID(as_uuid=True), server_default=sa.text("gen_random_uuid()"), nullable=False),
95
+ sa.Column("canonical_name", sa.Text(), nullable=False),
96
+ sa.Column("bank_id", sa.Text(), nullable=False),
97
+ sa.Column(
98
+ "metadata", postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False
99
+ ),
100
+ sa.Column("first_seen", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
101
+ sa.Column("last_seen", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
102
+ sa.Column("mention_count", sa.Integer(), server_default="1", nullable=False),
103
+ sa.PrimaryKeyConstraint("id", name=op.f("pk_entities")),
104
+ )
105
+ op.create_index("idx_entities_bank_id", "entities", ["bank_id"])
106
+ op.create_index("idx_entities_canonical_name", "entities", ["canonical_name"])
107
+ op.create_index("idx_entities_bank_name", "entities", ["bank_id", "canonical_name"])
108
+ # Create unique index on (bank_id, LOWER(canonical_name)) for entity resolution
109
+ op.execute("CREATE UNIQUE INDEX idx_entities_bank_lower_name ON entities (bank_id, LOWER(canonical_name))")
110
+
111
+ # Create memory_units table
112
+ op.create_table(
113
+ "memory_units",
114
+ sa.Column("id", postgresql.UUID(as_uuid=True), server_default=sa.text("gen_random_uuid()"), nullable=False),
115
+ sa.Column("bank_id", sa.Text(), nullable=False),
116
+ sa.Column("document_id", sa.Text(), nullable=True),
117
+ sa.Column("text", sa.Text(), nullable=False),
118
+ sa.Column("embedding", Vector(384), nullable=True),
119
+ sa.Column("context", sa.Text(), nullable=True),
120
+ sa.Column("event_date", postgresql.TIMESTAMP(timezone=True), nullable=False),
121
+ sa.Column("occurred_start", postgresql.TIMESTAMP(timezone=True), nullable=True),
122
+ sa.Column("occurred_end", postgresql.TIMESTAMP(timezone=True), nullable=True),
123
+ sa.Column("mentioned_at", postgresql.TIMESTAMP(timezone=True), nullable=True),
124
+ sa.Column("fact_type", sa.Text(), server_default="world", nullable=False),
125
+ sa.Column("confidence_score", sa.Float(), nullable=True),
126
+ sa.Column("access_count", sa.Integer(), server_default="0", nullable=False),
127
+ sa.Column(
128
+ "metadata", postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False
129
+ ),
130
+ sa.Column("created_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
131
+ sa.Column("updated_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
132
+ sa.ForeignKeyConstraint(
133
+ ["document_id", "bank_id"],
134
+ ["documents.id", "documents.bank_id"],
135
+ name="memory_units_document_fkey",
136
+ ondelete="CASCADE",
137
+ ),
138
+ sa.PrimaryKeyConstraint("id", name=op.f("pk_memory_units")),
139
+ sa.CheckConstraint(
140
+ "fact_type IN ('world', 'bank', 'opinion', 'observation')", name="memory_units_fact_type_check"
141
+ ),
142
+ sa.CheckConstraint(
143
+ "confidence_score IS NULL OR (confidence_score >= 0.0 AND confidence_score <= 1.0)",
144
+ name="memory_units_confidence_range_check",
145
+ ),
146
+ sa.CheckConstraint(
147
+ "(fact_type = 'opinion' AND confidence_score IS NOT NULL) OR "
148
+ "(fact_type = 'observation') OR "
149
+ "(fact_type NOT IN ('opinion', 'observation') AND confidence_score IS NULL)",
150
+ name="confidence_score_fact_type_check",
151
+ ),
152
+ )
153
+
154
+ # Add search_vector column for full-text search
155
+ op.execute("""
156
+ ALTER TABLE memory_units
157
+ ADD COLUMN search_vector tsvector
158
+ GENERATED ALWAYS AS (to_tsvector('english', COALESCE(text, '') || ' ' || COALESCE(context, ''))) STORED
159
+ """)
160
+
161
+ op.create_index("idx_memory_units_bank_id", "memory_units", ["bank_id"])
162
+ op.create_index("idx_memory_units_document_id", "memory_units", ["document_id"])
163
+ op.create_index("idx_memory_units_event_date", "memory_units", [sa.text("event_date DESC")])
164
+ op.create_index("idx_memory_units_bank_date", "memory_units", ["bank_id", sa.text("event_date DESC")])
165
+ op.create_index("idx_memory_units_access_count", "memory_units", [sa.text("access_count DESC")])
166
+ op.create_index("idx_memory_units_fact_type", "memory_units", ["fact_type"])
167
+ op.create_index("idx_memory_units_bank_fact_type", "memory_units", ["bank_id", "fact_type"])
168
+ op.create_index(
169
+ "idx_memory_units_bank_type_date", "memory_units", ["bank_id", "fact_type", sa.text("event_date DESC")]
170
+ )
171
+ op.create_index(
172
+ "idx_memory_units_opinion_confidence",
173
+ "memory_units",
174
+ ["bank_id", sa.text("confidence_score DESC")],
175
+ postgresql_where=sa.text("fact_type = 'opinion'"),
176
+ )
177
+ op.create_index(
178
+ "idx_memory_units_opinion_date",
179
+ "memory_units",
180
+ ["bank_id", sa.text("event_date DESC")],
181
+ postgresql_where=sa.text("fact_type = 'opinion'"),
182
+ )
183
+ op.create_index(
184
+ "idx_memory_units_observation_date",
185
+ "memory_units",
186
+ ["bank_id", sa.text("event_date DESC")],
187
+ postgresql_where=sa.text("fact_type = 'observation'"),
188
+ )
189
+ op.create_index(
190
+ "idx_memory_units_embedding",
191
+ "memory_units",
192
+ ["embedding"],
193
+ postgresql_using="hnsw",
194
+ postgresql_ops={"embedding": "vector_cosine_ops"},
195
+ )
196
+
197
+ # Create BM25 full-text search index on search_vector
198
+ op.execute("""
199
+ CREATE INDEX idx_memory_units_text_search ON memory_units
200
+ USING gin(search_vector)
201
+ """)
202
+
203
+ op.execute("""
204
+ CREATE MATERIALIZED VIEW memory_units_bm25 AS
205
+ SELECT
206
+ id,
207
+ bank_id,
208
+ text,
209
+ to_tsvector('english', text) AS text_vector,
210
+ log(1.0 + length(text)::float / (SELECT avg(length(text)) FROM memory_units)) AS doc_length_factor
211
+ FROM memory_units
212
+ """)
213
+
214
+ op.create_index("idx_memory_units_bm25_bank", "memory_units_bm25", ["bank_id"])
215
+ op.create_index("idx_memory_units_bm25_text_vector", "memory_units_bm25", ["text_vector"], postgresql_using="gin")
216
+
217
+ # Create entity_cooccurrences table
218
+ op.create_table(
219
+ "entity_cooccurrences",
220
+ sa.Column("entity_id_1", postgresql.UUID(as_uuid=True), nullable=False),
221
+ sa.Column("entity_id_2", postgresql.UUID(as_uuid=True), nullable=False),
222
+ sa.Column("cooccurrence_count", sa.Integer(), server_default="1", nullable=False),
223
+ sa.Column(
224
+ "last_cooccurred", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False
225
+ ),
226
+ sa.ForeignKeyConstraint(
227
+ ["entity_id_1"],
228
+ ["entities.id"],
229
+ name=op.f("fk_entity_cooccurrences_entity_id_1_entities"),
230
+ ondelete="CASCADE",
231
+ ),
232
+ sa.ForeignKeyConstraint(
233
+ ["entity_id_2"],
234
+ ["entities.id"],
235
+ name=op.f("fk_entity_cooccurrences_entity_id_2_entities"),
236
+ ondelete="CASCADE",
237
+ ),
238
+ sa.PrimaryKeyConstraint("entity_id_1", "entity_id_2", name=op.f("pk_entity_cooccurrences")),
239
+ sa.CheckConstraint("entity_id_1 < entity_id_2", name="entity_cooccurrence_order_check"),
240
+ )
241
+ op.create_index("idx_entity_cooccurrences_entity1", "entity_cooccurrences", ["entity_id_1"])
242
+ op.create_index("idx_entity_cooccurrences_entity2", "entity_cooccurrences", ["entity_id_2"])
243
+ op.create_index("idx_entity_cooccurrences_count", "entity_cooccurrences", [sa.text("cooccurrence_count DESC")])
244
+
245
+ # Create memory_links table
246
+ op.create_table(
247
+ "memory_links",
248
+ sa.Column("from_unit_id", postgresql.UUID(as_uuid=True), nullable=False),
249
+ sa.Column("to_unit_id", postgresql.UUID(as_uuid=True), nullable=False),
250
+ sa.Column("link_type", sa.Text(), nullable=False),
251
+ sa.Column("entity_id", postgresql.UUID(as_uuid=True), nullable=True),
252
+ sa.Column("weight", sa.Float(), server_default="1.0", nullable=False),
253
+ sa.Column("created_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
254
+ sa.ForeignKeyConstraint(
255
+ ["entity_id"], ["entities.id"], name=op.f("fk_memory_links_entity_id_entities"), ondelete="CASCADE"
256
+ ),
257
+ sa.ForeignKeyConstraint(
258
+ ["from_unit_id"],
259
+ ["memory_units.id"],
260
+ name=op.f("fk_memory_links_from_unit_id_memory_units"),
261
+ ondelete="CASCADE",
262
+ ),
263
+ sa.ForeignKeyConstraint(
264
+ ["to_unit_id"],
265
+ ["memory_units.id"],
266
+ name=op.f("fk_memory_links_to_unit_id_memory_units"),
267
+ ondelete="CASCADE",
268
+ ),
269
+ sa.CheckConstraint(
270
+ "link_type IN ('temporal', 'semantic', 'entity', 'causes', 'caused_by', 'enables', 'prevents')",
271
+ name="memory_links_link_type_check",
272
+ ),
273
+ sa.CheckConstraint("weight >= 0.0 AND weight <= 1.0", name="memory_links_weight_check"),
274
+ )
275
+ # Create unique constraint using COALESCE for nullable entity_id
276
+ op.execute(
277
+ "CREATE UNIQUE INDEX idx_memory_links_unique ON memory_links (from_unit_id, to_unit_id, link_type, COALESCE(entity_id, '00000000-0000-0000-0000-000000000000'::uuid))"
278
+ )
279
+ op.create_index("idx_memory_links_from_unit", "memory_links", ["from_unit_id"])
280
+ op.create_index("idx_memory_links_to_unit", "memory_links", ["to_unit_id"])
281
+ op.create_index("idx_memory_links_entity", "memory_links", ["entity_id"])
282
+ op.create_index("idx_memory_links_link_type", "memory_links", ["link_type"])
283
+
284
+ # Create unit_entities table
285
+ op.create_table(
286
+ "unit_entities",
287
+ sa.Column("unit_id", postgresql.UUID(as_uuid=True), nullable=False),
288
+ sa.Column("entity_id", postgresql.UUID(as_uuid=True), nullable=False),
289
+ sa.ForeignKeyConstraint(
290
+ ["entity_id"], ["entities.id"], name=op.f("fk_unit_entities_entity_id_entities"), ondelete="CASCADE"
291
+ ),
292
+ sa.ForeignKeyConstraint(
293
+ ["unit_id"], ["memory_units.id"], name=op.f("fk_unit_entities_unit_id_memory_units"), ondelete="CASCADE"
294
+ ),
295
+ sa.PrimaryKeyConstraint("unit_id", "entity_id", name=op.f("pk_unit_entities")),
296
+ )
297
+ op.create_index("idx_unit_entities_unit", "unit_entities", ["unit_id"])
298
+ op.create_index("idx_unit_entities_entity", "unit_entities", ["entity_id"])
299
+
300
+
301
+ def downgrade() -> None:
302
+ """Downgrade schema - drop all tables."""
303
+
304
+ # Drop tables in reverse dependency order
305
+ op.drop_index("idx_unit_entities_entity", table_name="unit_entities")
306
+ op.drop_index("idx_unit_entities_unit", table_name="unit_entities")
307
+ op.drop_table("unit_entities")
308
+
309
+ op.drop_index("idx_memory_links_link_type", table_name="memory_links")
310
+ op.drop_index("idx_memory_links_entity", table_name="memory_links")
311
+ op.drop_index("idx_memory_links_to_unit", table_name="memory_links")
312
+ op.drop_index("idx_memory_links_from_unit", table_name="memory_links")
313
+ op.execute("DROP INDEX IF EXISTS idx_memory_links_unique")
314
+ op.drop_table("memory_links")
315
+
316
+ op.drop_index("idx_entity_cooccurrences_count", table_name="entity_cooccurrences")
317
+ op.drop_index("idx_entity_cooccurrences_entity2", table_name="entity_cooccurrences")
318
+ op.drop_index("idx_entity_cooccurrences_entity1", table_name="entity_cooccurrences")
319
+ op.drop_table("entity_cooccurrences")
320
+
321
+ # Drop BM25 materialized view and index
322
+ op.drop_index("idx_memory_units_bm25_text_vector", table_name="memory_units_bm25")
323
+ op.drop_index("idx_memory_units_bm25_bank", table_name="memory_units_bm25")
324
+ op.execute("DROP MATERIALIZED VIEW IF EXISTS memory_units_bm25")
325
+
326
+ op.drop_index("idx_memory_units_embedding", table_name="memory_units")
327
+ op.drop_index("idx_memory_units_observation_date", table_name="memory_units")
328
+ op.drop_index("idx_memory_units_opinion_date", table_name="memory_units")
329
+ op.drop_index("idx_memory_units_opinion_confidence", table_name="memory_units")
330
+ op.drop_index("idx_memory_units_bank_type_date", table_name="memory_units")
331
+ op.drop_index("idx_memory_units_bank_fact_type", table_name="memory_units")
332
+ op.drop_index("idx_memory_units_fact_type", table_name="memory_units")
333
+ op.drop_index("idx_memory_units_access_count", table_name="memory_units")
334
+ op.drop_index("idx_memory_units_bank_date", table_name="memory_units")
335
+ op.drop_index("idx_memory_units_event_date", table_name="memory_units")
336
+ op.drop_index("idx_memory_units_document_id", table_name="memory_units")
337
+ op.drop_index("idx_memory_units_bank_id", table_name="memory_units")
338
+ op.execute("DROP INDEX IF EXISTS idx_memory_units_text_search")
339
+ op.drop_table("memory_units")
340
+
341
+ op.execute("DROP INDEX IF EXISTS idx_entities_bank_lower_name")
342
+ op.drop_index("idx_entities_bank_name", table_name="entities")
343
+ op.drop_index("idx_entities_canonical_name", table_name="entities")
344
+ op.drop_index("idx_entities_bank_id", table_name="entities")
345
+ op.drop_table("entities")
346
+
347
+ op.drop_index("idx_async_operations_bank_status", table_name="async_operations")
348
+ op.drop_index("idx_async_operations_status", table_name="async_operations")
349
+ op.drop_index("idx_async_operations_bank_id", table_name="async_operations")
350
+ op.drop_table("async_operations")
351
+
352
+ op.drop_index("idx_documents_content_hash", table_name="documents")
353
+ op.drop_index("idx_documents_bank_id", table_name="documents")
354
+ op.drop_table("documents")
355
+
356
+ op.drop_table("banks")
357
+
358
+ # Drop extensions (optional - comment out if you want to keep them)
359
+ # op.execute('DROP EXTENSION IF EXISTS vector')
360
+ # op.execute('DROP EXTENSION IF EXISTS "uuid-ossp"')
@@ -0,0 +1,70 @@
1
+ """add_chunks_table
2
+
3
+ Revision ID: b7c4d8e9f1a2
4
+ Revises: 5a366d414dce
5
+ Create Date: 2025-11-28 00:00:00.000000
6
+
7
+ """
8
+
9
+ from collections.abc import Sequence
10
+
11
+ import sqlalchemy as sa
12
+ from alembic import op
13
+ from sqlalchemy.dialects import postgresql
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "b7c4d8e9f1a2"
17
+ down_revision: str | Sequence[str] | None = "5a366d414dce"
18
+ branch_labels: str | Sequence[str] | None = None
19
+ depends_on: str | Sequence[str] | None = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ """Add chunks table and link memory_units to chunks."""
24
+
25
+ # Create chunks table with single text PK (bank_id_document_id_chunk_index)
26
+ op.create_table(
27
+ "chunks",
28
+ sa.Column("chunk_id", sa.Text(), nullable=False),
29
+ sa.Column("document_id", sa.Text(), nullable=False),
30
+ sa.Column("bank_id", sa.Text(), nullable=False),
31
+ sa.Column("chunk_index", sa.Integer(), nullable=False),
32
+ sa.Column("chunk_text", sa.Text(), nullable=False),
33
+ sa.Column("created_at", postgresql.TIMESTAMP(timezone=True), server_default=sa.text("now()"), nullable=False),
34
+ sa.ForeignKeyConstraint(
35
+ ["document_id", "bank_id"],
36
+ ["documents.id", "documents.bank_id"],
37
+ name="chunks_document_fkey",
38
+ ondelete="CASCADE",
39
+ ),
40
+ sa.PrimaryKeyConstraint("chunk_id", name=op.f("pk_chunks")),
41
+ )
42
+
43
+ # Add indexes for efficient queries
44
+ op.create_index("idx_chunks_document_id", "chunks", ["document_id"])
45
+ op.create_index("idx_chunks_bank_id", "chunks", ["bank_id"])
46
+
47
+ # Add chunk_id column to memory_units (nullable, as existing records won't have chunks)
48
+ op.add_column("memory_units", sa.Column("chunk_id", sa.Text(), nullable=True))
49
+
50
+ # Add foreign key constraint to chunks table
51
+ op.create_foreign_key(
52
+ "memory_units_chunk_fkey", "memory_units", "chunks", ["chunk_id"], ["chunk_id"], ondelete="SET NULL"
53
+ )
54
+
55
+ # Add index on chunk_id for efficient lookups
56
+ op.create_index("idx_memory_units_chunk_id", "memory_units", ["chunk_id"])
57
+
58
+
59
+ def downgrade() -> None:
60
+ """Remove chunks table and chunk_id from memory_units."""
61
+
62
+ # Drop index and foreign key from memory_units
63
+ op.drop_index("idx_memory_units_chunk_id", table_name="memory_units")
64
+ op.drop_constraint("memory_units_chunk_fkey", "memory_units", type_="foreignkey")
65
+ op.drop_column("memory_units", "chunk_id")
66
+
67
+ # Drop chunks table indexes and table
68
+ op.drop_index("idx_chunks_bank_id", table_name="chunks")
69
+ op.drop_index("idx_chunks_document_id", table_name="chunks")
70
+ op.drop_table("chunks")
@@ -5,35 +5,35 @@ Revises: b7c4d8e9f1a2
5
5
  Create Date: 2025-12-02 00:00:00.000000
6
6
 
7
7
  """
8
- from typing import Sequence, Union
9
8
 
10
- from alembic import op
9
+ from collections.abc import Sequence
10
+
11
11
  import sqlalchemy as sa
12
+ from alembic import op
12
13
  from sqlalchemy.dialects import postgresql
13
14
 
14
-
15
15
  # revision identifiers, used by Alembic.
16
- revision: str = 'c8e5f2a3b4d1'
17
- down_revision: Union[str, Sequence[str], None] = 'b7c4d8e9f1a2'
18
- branch_labels: Union[str, Sequence[str], None] = None
19
- depends_on: Union[str, Sequence[str], None] = None
16
+ revision: str = "c8e5f2a3b4d1"
17
+ down_revision: str | Sequence[str] | None = "b7c4d8e9f1a2"
18
+ branch_labels: str | Sequence[str] | None = None
19
+ depends_on: str | Sequence[str] | None = None
20
20
 
21
21
 
22
22
  def upgrade() -> None:
23
23
  """Add retain_params JSONB column to documents table."""
24
24
 
25
25
  # Add retain_params column to store parameters passed during retain
26
- op.add_column('documents', sa.Column('retain_params', postgresql.JSONB(), nullable=True))
26
+ op.add_column("documents", sa.Column("retain_params", postgresql.JSONB(), nullable=True))
27
27
 
28
28
  # Add index for efficient queries on retain_params
29
- op.create_index('idx_documents_retain_params', 'documents', ['retain_params'], postgresql_using='gin')
29
+ op.create_index("idx_documents_retain_params", "documents", ["retain_params"], postgresql_using="gin")
30
30
 
31
31
 
32
32
  def downgrade() -> None:
33
33
  """Remove retain_params column from documents table."""
34
34
 
35
35
  # Drop index
36
- op.drop_index('idx_documents_retain_params', table_name='documents')
36
+ op.drop_index("idx_documents_retain_params", table_name="documents")
37
37
 
38
38
  # Drop column
39
- op.drop_column('documents', 'retain_params')
39
+ op.drop_column("documents", "retain_params")
@@ -5,20 +5,19 @@ Revises: c8e5f2a3b4d1
5
5
  Create Date: 2024-12-04 15:00:00.000000
6
6
 
7
7
  """
8
- from alembic import op
9
- import sqlalchemy as sa
10
8
 
9
+ from alembic import op
11
10
 
12
11
  # revision identifiers, used by Alembic.
13
- revision = 'd9f6a3b4c5e2'
14
- down_revision = 'c8e5f2a3b4d1'
12
+ revision = "d9f6a3b4c5e2"
13
+ down_revision = "c8e5f2a3b4d1"
15
14
  branch_labels = None
16
15
  depends_on = None
17
16
 
18
17
 
19
18
  def upgrade():
20
19
  # Drop old check constraint FIRST (before updating data)
21
- op.drop_constraint('memory_units_fact_type_check', 'memory_units', type_='check')
20
+ op.drop_constraint("memory_units_fact_type_check", "memory_units", type_="check")
22
21
 
23
22
  # Update existing 'bank' values to 'experience'
24
23
  op.execute("UPDATE memory_units SET fact_type = 'experience' WHERE fact_type = 'bank'")
@@ -27,22 +26,18 @@ def upgrade():
27
26
 
28
27
  # Create new check constraint with 'experience' instead of 'bank'
29
28
  op.create_check_constraint(
30
- 'memory_units_fact_type_check',
31
- 'memory_units',
32
- "fact_type IN ('world', 'experience', 'opinion', 'observation')"
29
+ "memory_units_fact_type_check", "memory_units", "fact_type IN ('world', 'experience', 'opinion', 'observation')"
33
30
  )
34
31
 
35
32
 
36
33
  def downgrade():
37
34
  # Drop new check constraint FIRST
38
- op.drop_constraint('memory_units_fact_type_check', 'memory_units', type_='check')
35
+ op.drop_constraint("memory_units_fact_type_check", "memory_units", type_="check")
39
36
 
40
37
  # Update 'experience' back to 'bank'
41
38
  op.execute("UPDATE memory_units SET fact_type = 'bank' WHERE fact_type = 'experience'")
42
39
 
43
40
  # Recreate old check constraint
44
41
  op.create_check_constraint(
45
- 'memory_units_fact_type_check',
46
- 'memory_units',
47
- "fact_type IN ('world', 'bank', 'opinion', 'observation')"
42
+ "memory_units_fact_type_check", "memory_units", "fact_type IN ('world', 'bank', 'opinion', 'observation')"
48
43
  )