hindsight-api 0.0.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. hindsight_api-0.0.13/.gitignore +34 -0
  2. hindsight_api-0.0.13/PKG-INFO +41 -0
  3. hindsight_api-0.0.13/README.md +1 -0
  4. hindsight_api-0.0.13/alembic/README +1 -0
  5. hindsight_api-0.0.13/alembic/env.py +129 -0
  6. hindsight_api-0.0.13/alembic/script.py.mako +28 -0
  7. hindsight_api-0.0.13/alembic/versions/5a366d414dce_initial_schema.py +275 -0
  8. hindsight_api-0.0.13/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +70 -0
  9. hindsight_api-0.0.13/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +39 -0
  10. hindsight_api-0.0.13/hindsight_api/__init__.py +38 -0
  11. hindsight_api-0.0.13/hindsight_api/api/__init__.py +105 -0
  12. hindsight_api-0.0.13/hindsight_api/api/http.py +1872 -0
  13. hindsight_api-0.0.13/hindsight_api/api/mcp.py +157 -0
  14. hindsight_api-0.0.13/hindsight_api/engine/__init__.py +47 -0
  15. hindsight_api-0.0.13/hindsight_api/engine/cross_encoder.py +97 -0
  16. hindsight_api-0.0.13/hindsight_api/engine/db_utils.py +93 -0
  17. hindsight_api-0.0.13/hindsight_api/engine/embeddings.py +113 -0
  18. hindsight_api-0.0.13/hindsight_api/engine/entity_resolver.py +575 -0
  19. hindsight_api-0.0.13/hindsight_api/engine/llm_wrapper.py +269 -0
  20. hindsight_api-0.0.13/hindsight_api/engine/memory_engine.py +3095 -0
  21. hindsight_api-0.0.13/hindsight_api/engine/query_analyzer.py +519 -0
  22. hindsight_api-0.0.13/hindsight_api/engine/response_models.py +222 -0
  23. hindsight_api-0.0.13/hindsight_api/engine/retain/__init__.py +50 -0
  24. hindsight_api-0.0.13/hindsight_api/engine/retain/bank_utils.py +423 -0
  25. hindsight_api-0.0.13/hindsight_api/engine/retain/chunk_storage.py +82 -0
  26. hindsight_api-0.0.13/hindsight_api/engine/retain/deduplication.py +104 -0
  27. hindsight_api-0.0.13/hindsight_api/engine/retain/embedding_processing.py +62 -0
  28. hindsight_api-0.0.13/hindsight_api/engine/retain/embedding_utils.py +54 -0
  29. hindsight_api-0.0.13/hindsight_api/engine/retain/entity_processing.py +90 -0
  30. hindsight_api-0.0.13/hindsight_api/engine/retain/fact_extraction.py +1027 -0
  31. hindsight_api-0.0.13/hindsight_api/engine/retain/fact_storage.py +176 -0
  32. hindsight_api-0.0.13/hindsight_api/engine/retain/link_creation.py +121 -0
  33. hindsight_api-0.0.13/hindsight_api/engine/retain/link_utils.py +651 -0
  34. hindsight_api-0.0.13/hindsight_api/engine/retain/orchestrator.py +405 -0
  35. hindsight_api-0.0.13/hindsight_api/engine/retain/types.py +206 -0
  36. hindsight_api-0.0.13/hindsight_api/engine/search/__init__.py +15 -0
  37. hindsight_api-0.0.13/hindsight_api/engine/search/fusion.py +122 -0
  38. hindsight_api-0.0.13/hindsight_api/engine/search/observation_utils.py +132 -0
  39. hindsight_api-0.0.13/hindsight_api/engine/search/reranking.py +103 -0
  40. hindsight_api-0.0.13/hindsight_api/engine/search/retrieval.py +503 -0
  41. hindsight_api-0.0.13/hindsight_api/engine/search/scoring.py +161 -0
  42. hindsight_api-0.0.13/hindsight_api/engine/search/temporal_extraction.py +64 -0
  43. hindsight_api-0.0.13/hindsight_api/engine/search/think_utils.py +255 -0
  44. hindsight_api-0.0.13/hindsight_api/engine/search/trace.py +215 -0
  45. hindsight_api-0.0.13/hindsight_api/engine/search/tracer.py +447 -0
  46. hindsight_api-0.0.13/hindsight_api/engine/search/types.py +160 -0
  47. hindsight_api-0.0.13/hindsight_api/engine/task_backend.py +223 -0
  48. hindsight_api-0.0.13/hindsight_api/engine/utils.py +203 -0
  49. hindsight_api-0.0.13/hindsight_api/metrics.py +227 -0
  50. hindsight_api-0.0.13/hindsight_api/migrations.py +163 -0
  51. hindsight_api-0.0.13/hindsight_api/models.py +309 -0
  52. hindsight_api-0.0.13/hindsight_api/pg0.py +425 -0
  53. hindsight_api-0.0.13/hindsight_api/web/__init__.py +12 -0
  54. hindsight_api-0.0.13/hindsight_api/web/server.py +143 -0
  55. hindsight_api-0.0.13/pyproject.toml +76 -0
  56. hindsight_api-0.0.13/test_chunks_debug.py +46 -0
  57. hindsight_api-0.0.13/test_mentioned_at.py +69 -0
  58. hindsight_api-0.0.13/tests/RETAIN_TEST_COVERAGE_PLAN.md +302 -0
  59. hindsight_api-0.0.13/tests/__init__.py +1 -0
  60. hindsight_api-0.0.13/tests/conftest.py +130 -0
  61. hindsight_api-0.0.13/tests/fixtures/README.md +19 -0
  62. hindsight_api-0.0.13/tests/fixtures/locomo_conversation_sample.json +5271 -0
  63. hindsight_api-0.0.13/tests/test_agents_api.py +260 -0
  64. hindsight_api-0.0.13/tests/test_batch_chunking.py +58 -0
  65. hindsight_api-0.0.13/tests/test_chunking.py +60 -0
  66. hindsight_api-0.0.13/tests/test_document_tracking.py +131 -0
  67. hindsight_api-0.0.13/tests/test_fact_extraction_quality.py +1168 -0
  68. hindsight_api-0.0.13/tests/test_fact_ordering.py +183 -0
  69. hindsight_api-0.0.13/tests/test_http_api_integration.py +460 -0
  70. hindsight_api-0.0.13/tests/test_link_utils.py +256 -0
  71. hindsight_api-0.0.13/tests/test_mcp_api_integration.py +178 -0
  72. hindsight_api-0.0.13/tests/test_observations.py +339 -0
  73. hindsight_api-0.0.13/tests/test_query_analyzer.py +285 -0
  74. hindsight_api-0.0.13/tests/test_retain.py +1597 -0
  75. hindsight_api-0.0.13/tests/test_search_trace.py +140 -0
  76. hindsight_api-0.0.13/tests/test_temporal_ranges.py +146 -0
  77. hindsight_api-0.0.13/tests/test_think.py +150 -0
@@ -0,0 +1,34 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+
12
+ # Environment variables
13
+ .env
14
+
15
+ # IDE
16
+ .idea/
17
+ .vscode/
18
+ *.swp
19
+ *.swo
20
+
21
+ # NLTK data (will be downloaded automatically)
22
+ nltk_data/
23
+
24
+ # Large benchmark datasets (will be downloaded automatically)
25
+ **/longmemeval_s_cleaned.json
26
+
27
+ # Debug logs
28
+ logs/
29
+
30
+ .DS_Store
31
+
32
+
33
+ hindsight-dev/benchmarks/locomo/results/
34
+ hindsight-dev/benchmarks/longmemeval/results/
@@ -0,0 +1,41 @@
1
+ Metadata-Version: 2.4
2
+ Name: hindsight-api
3
+ Version: 0.0.13
4
+ Summary: Temporal + Semantic + Entity Memory System for AI agents using PostgreSQL
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: alembic>=1.17.1
7
+ Requires-Dist: asyncpg>=0.29.0
8
+ Requires-Dist: dateparser>=1.2.2
9
+ Requires-Dist: fastapi[standard]>=0.120.3
10
+ Requires-Dist: fastmcp>=2.0.0
11
+ Requires-Dist: greenlet>=3.2.4
12
+ Requires-Dist: httpx>=0.27.0
13
+ Requires-Dist: langchain-text-splitters>=0.3.0
14
+ Requires-Dist: openai>=1.0.0
15
+ Requires-Dist: opentelemetry-api>=1.20.0
16
+ Requires-Dist: opentelemetry-exporter-prometheus>=0.41b0
17
+ Requires-Dist: opentelemetry-instrumentation-fastapi>=0.41b0
18
+ Requires-Dist: opentelemetry-sdk>=1.20.0
19
+ Requires-Dist: pgvector>=0.4.1
20
+ Requires-Dist: psycopg2-binary>=2.9.11
21
+ Requires-Dist: pydantic>=2.0.0
22
+ Requires-Dist: python-dateutil>=2.8.0
23
+ Requires-Dist: python-dotenv>=1.0.0
24
+ Requires-Dist: rich>=13.0.0
25
+ Requires-Dist: sentence-transformers>=2.2.0
26
+ Requires-Dist: sqlalchemy>=2.0.44
27
+ Requires-Dist: tiktoken>=0.12.0
28
+ Requires-Dist: torch>=2.0.0
29
+ Requires-Dist: transformers>=4.30.0
30
+ Requires-Dist: uvicorn>=0.38.0
31
+ Requires-Dist: wsproto>=1.0.0
32
+ Provides-Extra: test
33
+ Requires-Dist: filelock>=3.0.0; extra == 'test'
34
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == 'test'
35
+ Requires-Dist: pytest-timeout>=2.4.0; extra == 'test'
36
+ Requires-Dist: pytest-xdist>=3.0.0; extra == 'test'
37
+ Requires-Dist: pytest>=7.0.0; extra == 'test'
38
+ Requires-Dist: testcontainers[postgres]>=4.0.0; extra == 'test'
39
+ Description-Content-Type: text/markdown
40
+
41
+ # Memory
@@ -0,0 +1 @@
1
+ # Memory
@@ -0,0 +1 @@
1
+ Generic single-database configuration.
@@ -0,0 +1,129 @@
1
+ """
2
+ Alembic environment configuration for SQLAlchemy with pgvector.
3
+ Uses synchronous psycopg2 driver for migrations to avoid pgbouncer issues.
4
+ """
5
+ import logging
6
+ import os
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ from sqlalchemy import pool, engine_from_config
11
+ from sqlalchemy.engine import Connection
12
+
13
+ from alembic import context
14
+ from dotenv import load_dotenv
15
+
16
+ # Import your models here
17
+ from hindsight_api.models import Base
18
+
19
+ # Load environment variables based on HINDSIGHT_API_DATABASE_URL env var or default to local
20
+ def load_env():
21
+ """Load environment variables from .env"""
22
+ # Check if HINDSIGHT_API_DATABASE_URL is already set (e.g., by CI/CD)
23
+ if os.getenv("HINDSIGHT_API_DATABASE_URL"):
24
+ return
25
+
26
+ # Look for .env file in the parent directory (root of the workspace)
27
+ root_dir = Path(__file__).parent.parent.parent
28
+ env_file = root_dir / ".env"
29
+
30
+ if env_file.exists():
31
+ load_dotenv(env_file)
32
+
33
+ load_env()
34
+
35
+ # this is the Alembic Config object, which provides
36
+ # access to the values within the .ini file in use.
37
+ config = context.config
38
+
39
+ # Note: We don't call fileConfig() here to avoid overriding the application's logging configuration.
40
+ # Alembic will use the existing logging configuration from the application.
41
+
42
+ # add your model's MetaData object here
43
+ # for 'autogenerate' support
44
+ target_metadata = Base.metadata
45
+
46
+ # other values from the config, defined by the needs of env.py,
47
+ # can be acquired:
48
+ # my_important_option = config.get_main_option("my_important_option")
49
+ # ... etc.
50
+
51
+
52
+ def get_database_url() -> str:
53
+ """
54
+ Get and process the database URL from config or environment.
55
+
56
+ Returns the URL with the correct driver (psycopg2) for migrations.
57
+ """
58
+ # Get database URL from config (set programmatically) or environment
59
+ database_url = config.get_main_option("sqlalchemy.url")
60
+ if not database_url:
61
+ database_url = os.getenv("HINDSIGHT_API_DATABASE_URL")
62
+ if not database_url:
63
+ raise ValueError(
64
+ "Database URL not found. "
65
+ "Set HINDSIGHT_API_DATABASE_URL environment variable or pass database_url to run_migrations()."
66
+ )
67
+
68
+ # For migrations, use psycopg2 (sync driver) to avoid pgbouncer prepared statement issues
69
+ if database_url.startswith("postgresql+asyncpg://"):
70
+ database_url = database_url.replace("postgresql+asyncpg://", "postgresql://", 1)
71
+ elif database_url.startswith("postgres+asyncpg://"):
72
+ database_url = database_url.replace("postgres+asyncpg://", "postgresql://", 1)
73
+
74
+ # Update config with processed URL for engine_from_config to use
75
+ config.set_main_option("sqlalchemy.url", database_url)
76
+
77
+ return database_url
78
+
79
+
80
+ def run_migrations_offline() -> None:
81
+ """Run migrations in 'offline' mode.
82
+
83
+ This configures the context with just a URL
84
+ and not an Engine, though an Engine is acceptable
85
+ here as well. By skipping the Engine creation
86
+ we don't even need a DBAPI to be available.
87
+
88
+ Calls to context.execute() here emit the given string to the
89
+ script output.
90
+
91
+ """
92
+ logging.info("running offline")
93
+ database_url = get_database_url()
94
+
95
+ context.configure(
96
+ url=database_url,
97
+ target_metadata=target_metadata,
98
+ literal_binds=True,
99
+ dialect_opts={"paramstyle": "named"},
100
+ )
101
+
102
+ with context.begin_transaction():
103
+ context.run_migrations()
104
+
105
+
106
+ def run_migrations_online() -> None:
107
+ """Run migrations in 'online' mode with synchronous engine."""
108
+ get_database_url() # Process and set the database URL in config
109
+
110
+ connectable = engine_from_config(
111
+ config.get_section(config.config_ini_section, {}),
112
+ prefix="sqlalchemy.",
113
+ poolclass=pool.NullPool,
114
+ )
115
+
116
+ with connectable.connect() as connection:
117
+ context.configure(
118
+ connection=connection,
119
+ target_metadata=target_metadata
120
+ )
121
+
122
+ with context.begin_transaction():
123
+ context.run_migrations()
124
+
125
+
126
+ if context.is_offline_mode():
127
+ run_migrations_offline()
128
+ else:
129
+ run_migrations_online()
@@ -0,0 +1,28 @@
1
+ """${message}
2
+
3
+ Revision ID: ${up_revision}
4
+ Revises: ${down_revision | comma,n}
5
+ Create Date: ${create_date}
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ ${imports if imports else ""}
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = ${repr(up_revision)}
16
+ down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
17
+ branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
18
+ depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
19
+
20
+
21
+ def upgrade() -> None:
22
+ """Upgrade schema."""
23
+ ${upgrades if upgrades else "pass"}
24
+
25
+
26
+ def downgrade() -> None:
27
+ """Downgrade schema."""
28
+ ${downgrades if downgrades else "pass"}
@@ -0,0 +1,275 @@
1
+ """initial_schema
2
+
3
+ Revision ID: 5a366d414dce
4
+ Revises:
5
+ Create Date: 2025-11-27 11:54:19.228030
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ from sqlalchemy.dialects import postgresql
13
+ from pgvector.sqlalchemy import Vector
14
+
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = '5a366d414dce'
18
+ down_revision: Union[str, Sequence[str], None] = None
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade() -> None:
24
+ """Upgrade schema - create all tables from scratch."""
25
+
26
+ # Enable required extensions
27
+ op.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp"')
28
+ op.execute('CREATE EXTENSION IF NOT EXISTS vector')
29
+
30
+ # Create banks table
31
+ op.create_table(
32
+ 'banks',
33
+ sa.Column('bank_id', sa.Text(), nullable=False),
34
+ sa.Column('name', sa.Text(), nullable=True),
35
+ sa.Column('personality', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
36
+ sa.Column('background', sa.Text(), nullable=True),
37
+ sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
38
+ sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
39
+ sa.PrimaryKeyConstraint('bank_id', name=op.f('pk_banks'))
40
+ )
41
+
42
+ # Create documents table
43
+ op.create_table(
44
+ 'documents',
45
+ sa.Column('id', sa.Text(), nullable=False),
46
+ sa.Column('bank_id', sa.Text(), nullable=False),
47
+ sa.Column('original_text', sa.Text(), nullable=True),
48
+ sa.Column('content_hash', sa.Text(), nullable=True),
49
+ sa.Column('metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
50
+ sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
51
+ sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
52
+ sa.PrimaryKeyConstraint('id', 'bank_id', name=op.f('pk_documents'))
53
+ )
54
+ op.create_index('idx_documents_bank_id', 'documents', ['bank_id'])
55
+ op.create_index('idx_documents_content_hash', 'documents', ['content_hash'])
56
+
57
+ # Create async_operations table
58
+ op.create_table(
59
+ 'async_operations',
60
+ sa.Column('operation_id', postgresql.UUID(as_uuid=True), server_default=sa.text('uuid_generate_v4()'), nullable=False),
61
+ sa.Column('bank_id', sa.Text(), nullable=False),
62
+ sa.Column('operation_type', sa.Text(), nullable=False),
63
+ sa.Column('status', sa.Text(), server_default='pending', nullable=False),
64
+ sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
65
+ sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
66
+ sa.Column('completed_at', postgresql.TIMESTAMP(timezone=True), nullable=True),
67
+ sa.Column('error_message', sa.Text(), nullable=True),
68
+ sa.Column('result_metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
69
+ sa.PrimaryKeyConstraint('operation_id', name=op.f('pk_async_operations')),
70
+ sa.CheckConstraint("status IN ('pending', 'processing', 'completed', 'failed')", name='async_operations_status_check')
71
+ )
72
+ op.create_index('idx_async_operations_bank_id', 'async_operations', ['bank_id'])
73
+ op.create_index('idx_async_operations_status', 'async_operations', ['status'])
74
+ op.create_index('idx_async_operations_bank_status', 'async_operations', ['bank_id', 'status'])
75
+
76
+ # Create entities table
77
+ op.create_table(
78
+ 'entities',
79
+ sa.Column('id', postgresql.UUID(as_uuid=True), server_default=sa.text('uuid_generate_v4()'), nullable=False),
80
+ sa.Column('canonical_name', sa.Text(), nullable=False),
81
+ sa.Column('bank_id', sa.Text(), nullable=False),
82
+ sa.Column('metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
83
+ sa.Column('first_seen', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
84
+ sa.Column('last_seen', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
85
+ sa.Column('mention_count', sa.Integer(), server_default='1', nullable=False),
86
+ sa.PrimaryKeyConstraint('id', name=op.f('pk_entities'))
87
+ )
88
+ op.create_index('idx_entities_bank_id', 'entities', ['bank_id'])
89
+ op.create_index('idx_entities_canonical_name', 'entities', ['canonical_name'])
90
+ op.create_index('idx_entities_bank_name', 'entities', ['bank_id', 'canonical_name'])
91
+ # Create unique index on (bank_id, LOWER(canonical_name)) for entity resolution
92
+ op.execute('CREATE UNIQUE INDEX idx_entities_bank_lower_name ON entities (bank_id, LOWER(canonical_name))')
93
+
94
+ # Create memory_units table
95
+ op.create_table(
96
+ 'memory_units',
97
+ sa.Column('id', postgresql.UUID(as_uuid=True), server_default=sa.text('uuid_generate_v4()'), nullable=False),
98
+ sa.Column('bank_id', sa.Text(), nullable=False),
99
+ sa.Column('document_id', sa.Text(), nullable=True),
100
+ sa.Column('text', sa.Text(), nullable=False),
101
+ sa.Column('embedding', Vector(384), nullable=True),
102
+ sa.Column('context', sa.Text(), nullable=True),
103
+ sa.Column('event_date', postgresql.TIMESTAMP(timezone=True), nullable=False),
104
+ sa.Column('occurred_start', postgresql.TIMESTAMP(timezone=True), nullable=True),
105
+ sa.Column('occurred_end', postgresql.TIMESTAMP(timezone=True), nullable=True),
106
+ sa.Column('mentioned_at', postgresql.TIMESTAMP(timezone=True), nullable=True),
107
+ sa.Column('fact_type', sa.Text(), server_default='world', nullable=False),
108
+ sa.Column('confidence_score', sa.Float(), nullable=True),
109
+ sa.Column('access_count', sa.Integer(), server_default='0', nullable=False),
110
+ sa.Column('metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
111
+ sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
112
+ sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
113
+ sa.ForeignKeyConstraint(['document_id', 'bank_id'], ['documents.id', 'documents.bank_id'], name='memory_units_document_fkey', ondelete='CASCADE'),
114
+ sa.PrimaryKeyConstraint('id', name=op.f('pk_memory_units')),
115
+ sa.CheckConstraint("fact_type IN ('world', 'bank', 'opinion', 'observation')", name='memory_units_fact_type_check'),
116
+ sa.CheckConstraint("confidence_score IS NULL OR (confidence_score >= 0.0 AND confidence_score <= 1.0)", name='memory_units_confidence_range_check'),
117
+ sa.CheckConstraint(
118
+ "(fact_type = 'opinion' AND confidence_score IS NOT NULL) OR "
119
+ "(fact_type = 'observation') OR "
120
+ "(fact_type NOT IN ('opinion', 'observation') AND confidence_score IS NULL)",
121
+ name='confidence_score_fact_type_check'
122
+ )
123
+ )
124
+
125
+ # Add search_vector column for full-text search
126
+ op.execute("""
127
+ ALTER TABLE memory_units
128
+ ADD COLUMN search_vector tsvector
129
+ GENERATED ALWAYS AS (to_tsvector('english', COALESCE(text, '') || ' ' || COALESCE(context, ''))) STORED
130
+ """)
131
+
132
+ op.create_index('idx_memory_units_bank_id', 'memory_units', ['bank_id'])
133
+ op.create_index('idx_memory_units_document_id', 'memory_units', ['document_id'])
134
+ op.create_index('idx_memory_units_event_date', 'memory_units', [sa.text('event_date DESC')])
135
+ op.create_index('idx_memory_units_bank_date', 'memory_units', ['bank_id', sa.text('event_date DESC')])
136
+ op.create_index('idx_memory_units_access_count', 'memory_units', [sa.text('access_count DESC')])
137
+ op.create_index('idx_memory_units_fact_type', 'memory_units', ['fact_type'])
138
+ op.create_index('idx_memory_units_bank_fact_type', 'memory_units', ['bank_id', 'fact_type'])
139
+ op.create_index('idx_memory_units_bank_type_date', 'memory_units', ['bank_id', 'fact_type', sa.text('event_date DESC')])
140
+ op.create_index('idx_memory_units_opinion_confidence', 'memory_units', ['bank_id', sa.text('confidence_score DESC')], postgresql_where=sa.text("fact_type = 'opinion'"))
141
+ op.create_index('idx_memory_units_opinion_date', 'memory_units', ['bank_id', sa.text('event_date DESC')], postgresql_where=sa.text("fact_type = 'opinion'"))
142
+ op.create_index('idx_memory_units_observation_date', 'memory_units', ['bank_id', sa.text('event_date DESC')], postgresql_where=sa.text("fact_type = 'observation'"))
143
+ op.create_index('idx_memory_units_embedding', 'memory_units', ['embedding'], postgresql_using='hnsw', postgresql_ops={'embedding': 'vector_cosine_ops'})
144
+
145
+ # Create BM25 full-text search index on search_vector
146
+ op.execute("""
147
+ CREATE INDEX idx_memory_units_text_search ON memory_units
148
+ USING gin(search_vector)
149
+ """)
150
+
151
+ op.execute("""
152
+ CREATE MATERIALIZED VIEW memory_units_bm25 AS
153
+ SELECT
154
+ id,
155
+ bank_id,
156
+ text,
157
+ to_tsvector('english', text) AS text_vector,
158
+ log(1.0 + length(text)::float / (SELECT avg(length(text)) FROM memory_units)) AS doc_length_factor
159
+ FROM memory_units
160
+ """)
161
+
162
+ op.create_index('idx_memory_units_bm25_bank', 'memory_units_bm25', ['bank_id'])
163
+ op.create_index('idx_memory_units_bm25_text_vector', 'memory_units_bm25', ['text_vector'], postgresql_using='gin')
164
+
165
+ # Create entity_cooccurrences table
166
+ op.create_table(
167
+ 'entity_cooccurrences',
168
+ sa.Column('entity_id_1', postgresql.UUID(as_uuid=True), nullable=False),
169
+ sa.Column('entity_id_2', postgresql.UUID(as_uuid=True), nullable=False),
170
+ sa.Column('cooccurrence_count', sa.Integer(), server_default='1', nullable=False),
171
+ sa.Column('last_cooccurred', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
172
+ sa.ForeignKeyConstraint(['entity_id_1'], ['entities.id'], name=op.f('fk_entity_cooccurrences_entity_id_1_entities'), ondelete='CASCADE'),
173
+ sa.ForeignKeyConstraint(['entity_id_2'], ['entities.id'], name=op.f('fk_entity_cooccurrences_entity_id_2_entities'), ondelete='CASCADE'),
174
+ sa.PrimaryKeyConstraint('entity_id_1', 'entity_id_2', name=op.f('pk_entity_cooccurrences')),
175
+ sa.CheckConstraint('entity_id_1 < entity_id_2', name='entity_cooccurrence_order_check')
176
+ )
177
+ op.create_index('idx_entity_cooccurrences_entity1', 'entity_cooccurrences', ['entity_id_1'])
178
+ op.create_index('idx_entity_cooccurrences_entity2', 'entity_cooccurrences', ['entity_id_2'])
179
+ op.create_index('idx_entity_cooccurrences_count', 'entity_cooccurrences', [sa.text('cooccurrence_count DESC')])
180
+
181
+ # Create memory_links table
182
+ op.create_table(
183
+ 'memory_links',
184
+ sa.Column('from_unit_id', postgresql.UUID(as_uuid=True), nullable=False),
185
+ sa.Column('to_unit_id', postgresql.UUID(as_uuid=True), nullable=False),
186
+ sa.Column('link_type', sa.Text(), nullable=False),
187
+ sa.Column('entity_id', postgresql.UUID(as_uuid=True), nullable=True),
188
+ sa.Column('weight', sa.Float(), server_default='1.0', nullable=False),
189
+ sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
190
+ sa.ForeignKeyConstraint(['entity_id'], ['entities.id'], name=op.f('fk_memory_links_entity_id_entities'), ondelete='CASCADE'),
191
+ sa.ForeignKeyConstraint(['from_unit_id'], ['memory_units.id'], name=op.f('fk_memory_links_from_unit_id_memory_units'), ondelete='CASCADE'),
192
+ sa.ForeignKeyConstraint(['to_unit_id'], ['memory_units.id'], name=op.f('fk_memory_links_to_unit_id_memory_units'), ondelete='CASCADE'),
193
+ sa.CheckConstraint("link_type IN ('temporal', 'semantic', 'entity', 'causes', 'caused_by', 'enables', 'prevents')", name='memory_links_link_type_check'),
194
+ sa.CheckConstraint('weight >= 0.0 AND weight <= 1.0', name='memory_links_weight_check')
195
+ )
196
+ # Create unique constraint using COALESCE for nullable entity_id
197
+ op.execute("CREATE UNIQUE INDEX idx_memory_links_unique ON memory_links (from_unit_id, to_unit_id, link_type, COALESCE(entity_id, '00000000-0000-0000-0000-000000000000'::uuid))")
198
+ op.create_index('idx_memory_links_from_unit', 'memory_links', ['from_unit_id'])
199
+ op.create_index('idx_memory_links_to_unit', 'memory_links', ['to_unit_id'])
200
+ op.create_index('idx_memory_links_entity', 'memory_links', ['entity_id'])
201
+ op.create_index('idx_memory_links_link_type', 'memory_links', ['link_type'])
202
+
203
+ # Create unit_entities table
204
+ op.create_table(
205
+ 'unit_entities',
206
+ sa.Column('unit_id', postgresql.UUID(as_uuid=True), nullable=False),
207
+ sa.Column('entity_id', postgresql.UUID(as_uuid=True), nullable=False),
208
+ sa.ForeignKeyConstraint(['entity_id'], ['entities.id'], name=op.f('fk_unit_entities_entity_id_entities'), ondelete='CASCADE'),
209
+ sa.ForeignKeyConstraint(['unit_id'], ['memory_units.id'], name=op.f('fk_unit_entities_unit_id_memory_units'), ondelete='CASCADE'),
210
+ sa.PrimaryKeyConstraint('unit_id', 'entity_id', name=op.f('pk_unit_entities'))
211
+ )
212
+ op.create_index('idx_unit_entities_unit', 'unit_entities', ['unit_id'])
213
+ op.create_index('idx_unit_entities_entity', 'unit_entities', ['entity_id'])
214
+
215
+
216
+ def downgrade() -> None:
217
+ """Downgrade schema - drop all tables."""
218
+
219
+ # Drop tables in reverse dependency order
220
+ op.drop_index('idx_unit_entities_entity', table_name='unit_entities')
221
+ op.drop_index('idx_unit_entities_unit', table_name='unit_entities')
222
+ op.drop_table('unit_entities')
223
+
224
+ op.drop_index('idx_memory_links_link_type', table_name='memory_links')
225
+ op.drop_index('idx_memory_links_entity', table_name='memory_links')
226
+ op.drop_index('idx_memory_links_to_unit', table_name='memory_links')
227
+ op.drop_index('idx_memory_links_from_unit', table_name='memory_links')
228
+ op.execute('DROP INDEX IF EXISTS idx_memory_links_unique')
229
+ op.drop_table('memory_links')
230
+
231
+ op.drop_index('idx_entity_cooccurrences_count', table_name='entity_cooccurrences')
232
+ op.drop_index('idx_entity_cooccurrences_entity2', table_name='entity_cooccurrences')
233
+ op.drop_index('idx_entity_cooccurrences_entity1', table_name='entity_cooccurrences')
234
+ op.drop_table('entity_cooccurrences')
235
+
236
+ # Drop BM25 materialized view and index
237
+ op.drop_index('idx_memory_units_bm25_text_vector', table_name='memory_units_bm25')
238
+ op.drop_index('idx_memory_units_bm25_bank', table_name='memory_units_bm25')
239
+ op.execute('DROP MATERIALIZED VIEW IF EXISTS memory_units_bm25')
240
+
241
+ op.drop_index('idx_memory_units_embedding', table_name='memory_units')
242
+ op.drop_index('idx_memory_units_observation_date', table_name='memory_units')
243
+ op.drop_index('idx_memory_units_opinion_date', table_name='memory_units')
244
+ op.drop_index('idx_memory_units_opinion_confidence', table_name='memory_units')
245
+ op.drop_index('idx_memory_units_bank_type_date', table_name='memory_units')
246
+ op.drop_index('idx_memory_units_bank_fact_type', table_name='memory_units')
247
+ op.drop_index('idx_memory_units_fact_type', table_name='memory_units')
248
+ op.drop_index('idx_memory_units_access_count', table_name='memory_units')
249
+ op.drop_index('idx_memory_units_bank_date', table_name='memory_units')
250
+ op.drop_index('idx_memory_units_event_date', table_name='memory_units')
251
+ op.drop_index('idx_memory_units_document_id', table_name='memory_units')
252
+ op.drop_index('idx_memory_units_bank_id', table_name='memory_units')
253
+ op.execute('DROP INDEX IF EXISTS idx_memory_units_text_search')
254
+ op.drop_table('memory_units')
255
+
256
+ op.execute('DROP INDEX IF EXISTS idx_entities_bank_lower_name')
257
+ op.drop_index('idx_entities_bank_name', table_name='entities')
258
+ op.drop_index('idx_entities_canonical_name', table_name='entities')
259
+ op.drop_index('idx_entities_bank_id', table_name='entities')
260
+ op.drop_table('entities')
261
+
262
+ op.drop_index('idx_async_operations_bank_status', table_name='async_operations')
263
+ op.drop_index('idx_async_operations_status', table_name='async_operations')
264
+ op.drop_index('idx_async_operations_bank_id', table_name='async_operations')
265
+ op.drop_table('async_operations')
266
+
267
+ op.drop_index('idx_documents_content_hash', table_name='documents')
268
+ op.drop_index('idx_documents_bank_id', table_name='documents')
269
+ op.drop_table('documents')
270
+
271
+ op.drop_table('banks')
272
+
273
+ # Drop extensions (optional - comment out if you want to keep them)
274
+ # op.execute('DROP EXTENSION IF EXISTS vector')
275
+ # op.execute('DROP EXTENSION IF EXISTS "uuid-ossp"')
@@ -0,0 +1,70 @@
1
+ """add_chunks_table
2
+
3
+ Revision ID: b7c4d8e9f1a2
4
+ Revises: 5a366d414dce
5
+ Create Date: 2025-11-28 00:00:00.000000
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ from sqlalchemy.dialects import postgresql
13
+
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = 'b7c4d8e9f1a2'
17
+ down_revision: Union[str, Sequence[str], None] = '5a366d414dce'
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ """Add chunks table and link memory_units to chunks."""
24
+
25
+ # Create chunks table with single text PK (bank_id_document_id_chunk_index)
26
+ op.create_table(
27
+ 'chunks',
28
+ sa.Column('chunk_id', sa.Text(), nullable=False),
29
+ sa.Column('document_id', sa.Text(), nullable=False),
30
+ sa.Column('bank_id', sa.Text(), nullable=False),
31
+ sa.Column('chunk_index', sa.Integer(), nullable=False),
32
+ sa.Column('chunk_text', sa.Text(), nullable=False),
33
+ sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
34
+ sa.ForeignKeyConstraint(['document_id', 'bank_id'], ['documents.id', 'documents.bank_id'], name='chunks_document_fkey', ondelete='CASCADE'),
35
+ sa.PrimaryKeyConstraint('chunk_id', name=op.f('pk_chunks'))
36
+ )
37
+
38
+ # Add indexes for efficient queries
39
+ op.create_index('idx_chunks_document_id', 'chunks', ['document_id'])
40
+ op.create_index('idx_chunks_bank_id', 'chunks', ['bank_id'])
41
+
42
+ # Add chunk_id column to memory_units (nullable, as existing records won't have chunks)
43
+ op.add_column('memory_units', sa.Column('chunk_id', sa.Text(), nullable=True))
44
+
45
+ # Add foreign key constraint to chunks table
46
+ op.create_foreign_key(
47
+ 'memory_units_chunk_fkey',
48
+ 'memory_units',
49
+ 'chunks',
50
+ ['chunk_id'],
51
+ ['chunk_id'],
52
+ ondelete='SET NULL'
53
+ )
54
+
55
+ # Add index on chunk_id for efficient lookups
56
+ op.create_index('idx_memory_units_chunk_id', 'memory_units', ['chunk_id'])
57
+
58
+
59
+ def downgrade() -> None:
60
+ """Remove chunks table and chunk_id from memory_units."""
61
+
62
+ # Drop index and foreign key from memory_units
63
+ op.drop_index('idx_memory_units_chunk_id', table_name='memory_units')
64
+ op.drop_constraint('memory_units_chunk_fkey', 'memory_units', type_='foreignkey')
65
+ op.drop_column('memory_units', 'chunk_id')
66
+
67
+ # Drop chunks table indexes and table
68
+ op.drop_index('idx_chunks_bank_id', table_name='chunks')
69
+ op.drop_index('idx_chunks_document_id', table_name='chunks')
70
+ op.drop_table('chunks')
@@ -0,0 +1,39 @@
1
+ """add_retain_params_to_documents
2
+
3
+ Revision ID: c8e5f2a3b4d1
4
+ Revises: b7c4d8e9f1a2
5
+ Create Date: 2025-12-02 00:00:00.000000
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ from sqlalchemy.dialects import postgresql
13
+
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = 'c8e5f2a3b4d1'
17
+ down_revision: Union[str, Sequence[str], None] = 'b7c4d8e9f1a2'
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ """Add retain_params JSONB column to documents table."""
24
+
25
+ # Add retain_params column to store parameters passed during retain
26
+ op.add_column('documents', sa.Column('retain_params', postgresql.JSONB(), nullable=True))
27
+
28
+ # Add index for efficient queries on retain_params
29
+ op.create_index('idx_documents_retain_params', 'documents', ['retain_params'], postgresql_using='gin')
30
+
31
+
32
+ def downgrade() -> None:
33
+ """Remove retain_params column from documents table."""
34
+
35
+ # Drop index
36
+ op.drop_index('idx_documents_retain_params', table_name='documents')
37
+
38
+ # Drop column
39
+ op.drop_column('documents', 'retain_params')
@@ -0,0 +1,38 @@
1
+ """
2
+ Memory System for AI Agents.
3
+
4
+ Temporal + Semantic Memory Architecture using PostgreSQL with pgvector.
5
+ """
6
+ from .engine.memory_engine import MemoryEngine
7
+ from .engine.search.trace import (
8
+ SearchTrace,
9
+ QueryInfo,
10
+ EntryPoint,
11
+ NodeVisit,
12
+ WeightComponents,
13
+ LinkInfo,
14
+ PruningDecision,
15
+ SearchSummary,
16
+ SearchPhaseMetrics,
17
+ )
18
+ from .engine.search.tracer import SearchTracer
19
+ from .engine.embeddings import Embeddings, SentenceTransformersEmbeddings
20
+ from .engine.llm_wrapper import LLMConfig
21
+
22
+ __all__ = [
23
+ "MemoryEngine",
24
+ "SearchTrace",
25
+ "SearchTracer",
26
+ "QueryInfo",
27
+ "EntryPoint",
28
+ "NodeVisit",
29
+ "WeightComponents",
30
+ "LinkInfo",
31
+ "PruningDecision",
32
+ "SearchSummary",
33
+ "SearchPhaseMetrics",
34
+ "Embeddings",
35
+ "SentenceTransformersEmbeddings",
36
+ "LLMConfig",
37
+ ]
38
+ __version__ = "0.1.0"