PyPI - hindsight-api - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

hindsight-api 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

hindsight_api/__init__.py +10 -2
hindsight_api/alembic/README +1 -0
hindsight_api/alembic/env.py +146 -0
hindsight_api/alembic/script.py.mako +28 -0
hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +274 -0
hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +70 -0
hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +39 -0
hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +48 -0
hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +62 -0
hindsight_api/alembic/versions/rename_personality_to_disposition.py +65 -0
hindsight_api/api/http.py +84 -86
hindsight_api/config.py +154 -0
hindsight_api/engine/__init__.py +7 -2
hindsight_api/engine/cross_encoder.py +219 -15
hindsight_api/engine/embeddings.py +192 -18
hindsight_api/engine/llm_wrapper.py +88 -139
hindsight_api/engine/memory_engine.py +71 -51
hindsight_api/engine/retain/bank_utils.py +2 -2
hindsight_api/engine/retain/fact_extraction.py +1 -1
hindsight_api/engine/search/reranking.py +6 -10
hindsight_api/engine/search/tracer.py +1 -1
hindsight_api/main.py +201 -0
hindsight_api/migrations.py +7 -7
hindsight_api/server.py +43 -0
{hindsight_api-0.1.0.dist-info → hindsight_api-0.1.1.dist-info}/METADATA +1 -1
{hindsight_api-0.1.0.dist-info → hindsight_api-0.1.1.dist-info}/RECORD +28 -19
hindsight_api-0.1.1.dist-info/entry_points.txt +2 -0
hindsight_api/cli.py +0 -127
hindsight_api/web/__init__.py +0 -12
hindsight_api/web/server.py +0 -109
hindsight_api-0.1.0.dist-info/entry_points.txt +0 -2
{hindsight_api-0.1.0.dist-info → hindsight_api-0.1.1.dist-info}/WHEEL +0 -0

hindsight_api/__init__.py CHANGED Viewed

@@ -16,11 +16,15 @@ from .engine.search.trace import (
     SearchPhaseMetrics,
 )
 from .engine.search.tracer import SearchTracer
-from .engine.embeddings import Embeddings, SentenceTransformersEmbeddings
+from .engine.embeddings import Embeddings, LocalSTEmbeddings, RemoteTEIEmbeddings
+from .engine.cross_encoder import CrossEncoderModel, LocalSTCrossEncoder, RemoteTEICrossEncoder
 from .engine.llm_wrapper import LLMConfig
+from .config import HindsightConfig, get_config
 __all__ = [
     "MemoryEngine",
+    "HindsightConfig",
+    "get_config",
     "SearchTrace",
     "SearchTracer",
     "QueryInfo",
@@ -32,7 +36,11 @@ __all__ = [
     "SearchSummary",
     "SearchPhaseMetrics",
     "Embeddings",
-    "SentenceTransformersEmbeddings",
+    "LocalSTEmbeddings",
+    "RemoteTEIEmbeddings",
+    "CrossEncoderModel",
+    "LocalSTCrossEncoder",
+    "RemoteTEICrossEncoder",
     "LLMConfig",
 ]
 __version__ = "0.1.0"

hindsight_api/alembic/README ADDED Viewed

	@@ -0,0 +1 @@
1	+ Generic single-database configuration.

hindsight_api/alembic/env.py ADDED Viewed

@@ -0,0 +1,146 @@
+"""
+Alembic environment configuration for SQLAlchemy with pgvector.
+Uses synchronous psycopg2 driver for migrations to avoid pgbouncer issues.
+"""
+import logging
+import os
+import sys
+from pathlib import Path
+from sqlalchemy import pool, engine_from_config
+from sqlalchemy.engine import Connection
+from alembic import context
+from dotenv import load_dotenv
+# Import your models here
+from hindsight_api.models import Base
+# Load environment variables based on HINDSIGHT_API_DATABASE_URL env var or default to local
+def load_env():
+    """Load environment variables from .env"""
+    # Check if HINDSIGHT_API_DATABASE_URL is already set (e.g., by CI/CD)
+    if os.getenv("HINDSIGHT_API_DATABASE_URL"):
+        return
+    # Look for .env file in the parent directory (root of the workspace)
+    root_dir = Path(__file__).parent.parent.parent
+    env_file = root_dir / ".env"
+    if env_file.exists():
+        load_dotenv(env_file)
+load_env()
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+# Note: We don't call fileConfig() here to avoid overriding the application's logging configuration.
+# Alembic will use the existing logging configuration from the application.
+# add your model's MetaData object here
+# for 'autogenerate' support
+target_metadata = Base.metadata
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+def get_database_url() -> str:
+    """
+    Get and process the database URL from config or environment.
+    Returns the URL with the correct driver (psycopg2) for migrations.
+    """
+    # Get database URL from config (set programmatically) or environment
+    database_url = config.get_main_option("sqlalchemy.url")
+    if not database_url:
+        database_url = os.getenv("HINDSIGHT_API_DATABASE_URL")
+        if not database_url:
+            raise ValueError(
+                "Database URL not found. "
+                "Set HINDSIGHT_API_DATABASE_URL environment variable or pass database_url to run_migrations()."
+            )
+    # For migrations, use psycopg2 (sync driver) to avoid pgbouncer prepared statement issues
+    if database_url.startswith("postgresql+asyncpg://"):
+        database_url = database_url.replace("postgresql+asyncpg://", "postgresql://", 1)
+    elif database_url.startswith("postgres+asyncpg://"):
+        database_url = database_url.replace("postgres+asyncpg://", "postgresql://", 1)
+    # Update config with processed URL for engine_from_config to use
+    config.set_main_option("sqlalchemy.url", database_url)
+    return database_url
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode.
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+    Calls to context.execute() here emit the given string to the
+    script output.
+    """
+    logging.info("running offline")
+    database_url = get_database_url()
+    context.configure(
+        url=database_url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+    with context.begin_transaction():
+        context.run_migrations()
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode with synchronous engine."""
+    from sqlalchemy import event, text
+    get_database_url()  # Process and set the database URL in config
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+    # Add event listener to ensure connection is in read-write mode
+    # This is needed for Supabase which may start connections in read-only mode
+    @event.listens_for(connectable, "connect")
+    def set_read_write_mode(dbapi_connection, connection_record):
+        cursor = dbapi_connection.cursor()
+        cursor.execute("SET SESSION CHARACTERISTICS AS TRANSACTION READ WRITE")
+        cursor.close()
+    with connectable.connect() as connection:
+        # Also explicitly set read-write mode on this connection
+        connection.execute(text("SET SESSION CHARACTERISTICS AS TRANSACTION READ WRITE"))
+        connection.commit()  # Commit the SET command
+        context.configure(
+            connection=connection,
+            target_metadata=target_metadata
+        )
+        with context.begin_transaction():
+            context.run_migrations()
+        # Explicit commit to ensure changes are persisted (especially for Supabase)
+        connection.commit()
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()

hindsight_api/alembic/script.py.mako ADDED Viewed

@@ -0,0 +1,28 @@
+"""${message}
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+def upgrade() -> None:
+    """Upgrade schema."""
+    ${upgrades if upgrades else "pass"}
+def downgrade() -> None:
+    """Downgrade schema."""
+    ${downgrades if downgrades else "pass"}

hindsight_api/alembic/versions/5a366d414dce_initial_schema.py ADDED Viewed

@@ -0,0 +1,274 @@
+"""initial_schema
+Revision ID: 5a366d414dce
+Revises:
+Create Date: 2025-11-27 11:54:19.228030
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+from pgvector.sqlalchemy import Vector
+# revision identifiers, used by Alembic.
+revision: str = '5a366d414dce'
+down_revision: Union[str, Sequence[str], None] = None
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    """Upgrade schema - create all tables from scratch."""
+    # Enable required extensions
+    op.execute('CREATE EXTENSION IF NOT EXISTS vector')
+    # Create banks table
+    op.create_table(
+        'banks',
+        sa.Column('bank_id', sa.Text(), nullable=False),
+        sa.Column('name', sa.Text(), nullable=True),
+        sa.Column('personality', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
+        sa.Column('background', sa.Text(), nullable=True),
+        sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.PrimaryKeyConstraint('bank_id', name=op.f('pk_banks'))
+    )
+    # Create documents table
+    op.create_table(
+        'documents',
+        sa.Column('id', sa.Text(), nullable=False),
+        sa.Column('bank_id', sa.Text(), nullable=False),
+        sa.Column('original_text', sa.Text(), nullable=True),
+        sa.Column('content_hash', sa.Text(), nullable=True),
+        sa.Column('metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
+        sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.PrimaryKeyConstraint('id', 'bank_id', name=op.f('pk_documents'))
+    )
+    op.create_index('idx_documents_bank_id', 'documents', ['bank_id'])
+    op.create_index('idx_documents_content_hash', 'documents', ['content_hash'])
+    # Create async_operations table
+    op.create_table(
+        'async_operations',
+        sa.Column('operation_id', postgresql.UUID(as_uuid=True), server_default=sa.text('gen_random_uuid()'), nullable=False),
+        sa.Column('bank_id', sa.Text(), nullable=False),
+        sa.Column('operation_type', sa.Text(), nullable=False),
+        sa.Column('status', sa.Text(), server_default='pending', nullable=False),
+        sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.Column('completed_at', postgresql.TIMESTAMP(timezone=True), nullable=True),
+        sa.Column('error_message', sa.Text(), nullable=True),
+        sa.Column('result_metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
+        sa.PrimaryKeyConstraint('operation_id', name=op.f('pk_async_operations')),
+        sa.CheckConstraint("status IN ('pending', 'processing', 'completed', 'failed')", name='async_operations_status_check')
+    )
+    op.create_index('idx_async_operations_bank_id', 'async_operations', ['bank_id'])
+    op.create_index('idx_async_operations_status', 'async_operations', ['status'])
+    op.create_index('idx_async_operations_bank_status', 'async_operations', ['bank_id', 'status'])
+    # Create entities table
+    op.create_table(
+        'entities',
+        sa.Column('id', postgresql.UUID(as_uuid=True), server_default=sa.text('gen_random_uuid()'), nullable=False),
+        sa.Column('canonical_name', sa.Text(), nullable=False),
+        sa.Column('bank_id', sa.Text(), nullable=False),
+        sa.Column('metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
+        sa.Column('first_seen', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.Column('last_seen', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.Column('mention_count', sa.Integer(), server_default='1', nullable=False),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_entities'))
+    )
+    op.create_index('idx_entities_bank_id', 'entities', ['bank_id'])
+    op.create_index('idx_entities_canonical_name', 'entities', ['canonical_name'])
+    op.create_index('idx_entities_bank_name', 'entities', ['bank_id', 'canonical_name'])
+    # Create unique index on (bank_id, LOWER(canonical_name)) for entity resolution
+    op.execute('CREATE UNIQUE INDEX idx_entities_bank_lower_name ON entities (bank_id, LOWER(canonical_name))')
+    # Create memory_units table
+    op.create_table(
+        'memory_units',
+        sa.Column('id', postgresql.UUID(as_uuid=True), server_default=sa.text('gen_random_uuid()'), nullable=False),
+        sa.Column('bank_id', sa.Text(), nullable=False),
+        sa.Column('document_id', sa.Text(), nullable=True),
+        sa.Column('text', sa.Text(), nullable=False),
+        sa.Column('embedding', Vector(384), nullable=True),
+        sa.Column('context', sa.Text(), nullable=True),
+        sa.Column('event_date', postgresql.TIMESTAMP(timezone=True), nullable=False),
+        sa.Column('occurred_start', postgresql.TIMESTAMP(timezone=True), nullable=True),
+        sa.Column('occurred_end', postgresql.TIMESTAMP(timezone=True), nullable=True),
+        sa.Column('mentioned_at', postgresql.TIMESTAMP(timezone=True), nullable=True),
+        sa.Column('fact_type', sa.Text(), server_default='world', nullable=False),
+        sa.Column('confidence_score', sa.Float(), nullable=True),
+        sa.Column('access_count', sa.Integer(), server_default='0', nullable=False),
+        sa.Column('metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
+        sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.ForeignKeyConstraint(['document_id', 'bank_id'], ['documents.id', 'documents.bank_id'], name='memory_units_document_fkey', ondelete='CASCADE'),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_memory_units')),
+        sa.CheckConstraint("fact_type IN ('world', 'bank', 'opinion', 'observation')", name='memory_units_fact_type_check'),
+        sa.CheckConstraint("confidence_score IS NULL OR (confidence_score >= 0.0 AND confidence_score <= 1.0)", name='memory_units_confidence_range_check'),
+        sa.CheckConstraint(
+            "(fact_type = 'opinion' AND confidence_score IS NOT NULL) OR "
+            "(fact_type = 'observation') OR "
+            "(fact_type NOT IN ('opinion', 'observation') AND confidence_score IS NULL)",
+            name='confidence_score_fact_type_check'
+        )
+    )
+    # Add search_vector column for full-text search
+    op.execute("""
+        ALTER TABLE memory_units
+        ADD COLUMN search_vector tsvector
+        GENERATED ALWAYS AS (to_tsvector('english', COALESCE(text, '') || ' ' || COALESCE(context, ''))) STORED
+    """)
+    op.create_index('idx_memory_units_bank_id', 'memory_units', ['bank_id'])
+    op.create_index('idx_memory_units_document_id', 'memory_units', ['document_id'])
+    op.create_index('idx_memory_units_event_date', 'memory_units', [sa.text('event_date DESC')])
+    op.create_index('idx_memory_units_bank_date', 'memory_units', ['bank_id', sa.text('event_date DESC')])
+    op.create_index('idx_memory_units_access_count', 'memory_units', [sa.text('access_count DESC')])
+    op.create_index('idx_memory_units_fact_type', 'memory_units', ['fact_type'])
+    op.create_index('idx_memory_units_bank_fact_type', 'memory_units', ['bank_id', 'fact_type'])
+    op.create_index('idx_memory_units_bank_type_date', 'memory_units', ['bank_id', 'fact_type', sa.text('event_date DESC')])
+    op.create_index('idx_memory_units_opinion_confidence', 'memory_units', ['bank_id', sa.text('confidence_score DESC')], postgresql_where=sa.text("fact_type = 'opinion'"))
+    op.create_index('idx_memory_units_opinion_date', 'memory_units', ['bank_id', sa.text('event_date DESC')], postgresql_where=sa.text("fact_type = 'opinion'"))
+    op.create_index('idx_memory_units_observation_date', 'memory_units', ['bank_id', sa.text('event_date DESC')], postgresql_where=sa.text("fact_type = 'observation'"))
+    op.create_index('idx_memory_units_embedding', 'memory_units', ['embedding'], postgresql_using='hnsw', postgresql_ops={'embedding': 'vector_cosine_ops'})
+    # Create BM25 full-text search index on search_vector
+    op.execute("""
+        CREATE INDEX idx_memory_units_text_search ON memory_units
+        USING gin(search_vector)
+    """)
+    op.execute("""
+        CREATE MATERIALIZED VIEW memory_units_bm25 AS
+        SELECT
+            id,
+            bank_id,
+            text,
+            to_tsvector('english', text) AS text_vector,
+            log(1.0 + length(text)::float / (SELECT avg(length(text)) FROM memory_units)) AS doc_length_factor
+        FROM memory_units
+    """)
+    op.create_index('idx_memory_units_bm25_bank', 'memory_units_bm25', ['bank_id'])
+    op.create_index('idx_memory_units_bm25_text_vector', 'memory_units_bm25', ['text_vector'], postgresql_using='gin')
+    # Create entity_cooccurrences table
+    op.create_table(
+        'entity_cooccurrences',
+        sa.Column('entity_id_1', postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column('entity_id_2', postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column('cooccurrence_count', sa.Integer(), server_default='1', nullable=False),
+        sa.Column('last_cooccurred', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.ForeignKeyConstraint(['entity_id_1'], ['entities.id'], name=op.f('fk_entity_cooccurrences_entity_id_1_entities'), ondelete='CASCADE'),
+        sa.ForeignKeyConstraint(['entity_id_2'], ['entities.id'], name=op.f('fk_entity_cooccurrences_entity_id_2_entities'), ondelete='CASCADE'),
+        sa.PrimaryKeyConstraint('entity_id_1', 'entity_id_2', name=op.f('pk_entity_cooccurrences')),
+        sa.CheckConstraint('entity_id_1 < entity_id_2', name='entity_cooccurrence_order_check')
+    )
+    op.create_index('idx_entity_cooccurrences_entity1', 'entity_cooccurrences', ['entity_id_1'])
+    op.create_index('idx_entity_cooccurrences_entity2', 'entity_cooccurrences', ['entity_id_2'])
+    op.create_index('idx_entity_cooccurrences_count', 'entity_cooccurrences', [sa.text('cooccurrence_count DESC')])
+    # Create memory_links table
+    op.create_table(
+        'memory_links',
+        sa.Column('from_unit_id', postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column('to_unit_id', postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column('link_type', sa.Text(), nullable=False),
+        sa.Column('entity_id', postgresql.UUID(as_uuid=True), nullable=True),
+        sa.Column('weight', sa.Float(), server_default='1.0', nullable=False),
+        sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.ForeignKeyConstraint(['entity_id'], ['entities.id'], name=op.f('fk_memory_links_entity_id_entities'), ondelete='CASCADE'),
+        sa.ForeignKeyConstraint(['from_unit_id'], ['memory_units.id'], name=op.f('fk_memory_links_from_unit_id_memory_units'), ondelete='CASCADE'),
+        sa.ForeignKeyConstraint(['to_unit_id'], ['memory_units.id'], name=op.f('fk_memory_links_to_unit_id_memory_units'), ondelete='CASCADE'),
+        sa.CheckConstraint("link_type IN ('temporal', 'semantic', 'entity', 'causes', 'caused_by', 'enables', 'prevents')", name='memory_links_link_type_check'),
+        sa.CheckConstraint('weight >= 0.0 AND weight <= 1.0', name='memory_links_weight_check')
+    )
+    # Create unique constraint using COALESCE for nullable entity_id
+    op.execute("CREATE UNIQUE INDEX idx_memory_links_unique ON memory_links (from_unit_id, to_unit_id, link_type, COALESCE(entity_id, '00000000-0000-0000-0000-000000000000'::uuid))")
+    op.create_index('idx_memory_links_from_unit', 'memory_links', ['from_unit_id'])
+    op.create_index('idx_memory_links_to_unit', 'memory_links', ['to_unit_id'])
+    op.create_index('idx_memory_links_entity', 'memory_links', ['entity_id'])
+    op.create_index('idx_memory_links_link_type', 'memory_links', ['link_type'])
+    # Create unit_entities table
+    op.create_table(
+        'unit_entities',
+        sa.Column('unit_id', postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column('entity_id', postgresql.UUID(as_uuid=True), nullable=False),
+        sa.ForeignKeyConstraint(['entity_id'], ['entities.id'], name=op.f('fk_unit_entities_entity_id_entities'), ondelete='CASCADE'),
+        sa.ForeignKeyConstraint(['unit_id'], ['memory_units.id'], name=op.f('fk_unit_entities_unit_id_memory_units'), ondelete='CASCADE'),
+        sa.PrimaryKeyConstraint('unit_id', 'entity_id', name=op.f('pk_unit_entities'))
+    )
+    op.create_index('idx_unit_entities_unit', 'unit_entities', ['unit_id'])
+    op.create_index('idx_unit_entities_entity', 'unit_entities', ['entity_id'])
+def downgrade() -> None:
+    """Downgrade schema - drop all tables."""
+    # Drop tables in reverse dependency order
+    op.drop_index('idx_unit_entities_entity', table_name='unit_entities')
+    op.drop_index('idx_unit_entities_unit', table_name='unit_entities')
+    op.drop_table('unit_entities')
+    op.drop_index('idx_memory_links_link_type', table_name='memory_links')
+    op.drop_index('idx_memory_links_entity', table_name='memory_links')
+    op.drop_index('idx_memory_links_to_unit', table_name='memory_links')
+    op.drop_index('idx_memory_links_from_unit', table_name='memory_links')
+    op.execute('DROP INDEX IF EXISTS idx_memory_links_unique')
+    op.drop_table('memory_links')
+    op.drop_index('idx_entity_cooccurrences_count', table_name='entity_cooccurrences')
+    op.drop_index('idx_entity_cooccurrences_entity2', table_name='entity_cooccurrences')
+    op.drop_index('idx_entity_cooccurrences_entity1', table_name='entity_cooccurrences')
+    op.drop_table('entity_cooccurrences')
+    # Drop BM25 materialized view and index
+    op.drop_index('idx_memory_units_bm25_text_vector', table_name='memory_units_bm25')
+    op.drop_index('idx_memory_units_bm25_bank', table_name='memory_units_bm25')
+    op.execute('DROP MATERIALIZED VIEW IF EXISTS memory_units_bm25')
+    op.drop_index('idx_memory_units_embedding', table_name='memory_units')
+    op.drop_index('idx_memory_units_observation_date', table_name='memory_units')
+    op.drop_index('idx_memory_units_opinion_date', table_name='memory_units')
+    op.drop_index('idx_memory_units_opinion_confidence', table_name='memory_units')
+    op.drop_index('idx_memory_units_bank_type_date', table_name='memory_units')
+    op.drop_index('idx_memory_units_bank_fact_type', table_name='memory_units')
+    op.drop_index('idx_memory_units_fact_type', table_name='memory_units')
+    op.drop_index('idx_memory_units_access_count', table_name='memory_units')
+    op.drop_index('idx_memory_units_bank_date', table_name='memory_units')
+    op.drop_index('idx_memory_units_event_date', table_name='memory_units')
+    op.drop_index('idx_memory_units_document_id', table_name='memory_units')
+    op.drop_index('idx_memory_units_bank_id', table_name='memory_units')
+    op.execute('DROP INDEX IF EXISTS idx_memory_units_text_search')
+    op.drop_table('memory_units')
+    op.execute('DROP INDEX IF EXISTS idx_entities_bank_lower_name')
+    op.drop_index('idx_entities_bank_name', table_name='entities')
+    op.drop_index('idx_entities_canonical_name', table_name='entities')
+    op.drop_index('idx_entities_bank_id', table_name='entities')
+    op.drop_table('entities')
+    op.drop_index('idx_async_operations_bank_status', table_name='async_operations')
+    op.drop_index('idx_async_operations_status', table_name='async_operations')
+    op.drop_index('idx_async_operations_bank_id', table_name='async_operations')
+    op.drop_table('async_operations')
+    op.drop_index('idx_documents_content_hash', table_name='documents')
+    op.drop_index('idx_documents_bank_id', table_name='documents')
+    op.drop_table('documents')
+    op.drop_table('banks')
+    # Drop extensions (optional - comment out if you want to keep them)
+    # op.execute('DROP EXTENSION IF EXISTS vector')
+    # op.execute('DROP EXTENSION IF EXISTS "uuid-ossp"')

hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py ADDED Viewed

@@ -0,0 +1,70 @@
+"""add_chunks_table
+Revision ID: b7c4d8e9f1a2
+Revises: 5a366d414dce
+Create Date: 2025-11-28 00:00:00.000000
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+# revision identifiers, used by Alembic.
+revision: str = 'b7c4d8e9f1a2'
+down_revision: Union[str, Sequence[str], None] = '5a366d414dce'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    """Add chunks table and link memory_units to chunks."""
+    # Create chunks table with single text PK (bank_id_document_id_chunk_index)
+    op.create_table(
+        'chunks',
+        sa.Column('chunk_id', sa.Text(), nullable=False),
+        sa.Column('document_id', sa.Text(), nullable=False),
+        sa.Column('bank_id', sa.Text(), nullable=False),
+        sa.Column('chunk_index', sa.Integer(), nullable=False),
+        sa.Column('chunk_text', sa.Text(), nullable=False),
+        sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.ForeignKeyConstraint(['document_id', 'bank_id'], ['documents.id', 'documents.bank_id'], name='chunks_document_fkey', ondelete='CASCADE'),
+        sa.PrimaryKeyConstraint('chunk_id', name=op.f('pk_chunks'))
+    )
+    # Add indexes for efficient queries
+    op.create_index('idx_chunks_document_id', 'chunks', ['document_id'])
+    op.create_index('idx_chunks_bank_id', 'chunks', ['bank_id'])
+    # Add chunk_id column to memory_units (nullable, as existing records won't have chunks)
+    op.add_column('memory_units', sa.Column('chunk_id', sa.Text(), nullable=True))
+    # Add foreign key constraint to chunks table
+    op.create_foreign_key(
+        'memory_units_chunk_fkey',
+        'memory_units',
+        'chunks',
+        ['chunk_id'],
+        ['chunk_id'],
+        ondelete='SET NULL'
+    )
+    # Add index on chunk_id for efficient lookups
+    op.create_index('idx_memory_units_chunk_id', 'memory_units', ['chunk_id'])
+def downgrade() -> None:
+    """Remove chunks table and chunk_id from memory_units."""
+    # Drop index and foreign key from memory_units
+    op.drop_index('idx_memory_units_chunk_id', table_name='memory_units')
+    op.drop_constraint('memory_units_chunk_fkey', 'memory_units', type_='foreignkey')
+    op.drop_column('memory_units', 'chunk_id')
+    # Drop chunks table indexes and table
+    op.drop_index('idx_chunks_bank_id', table_name='chunks')
+    op.drop_index('idx_chunks_document_id', table_name='chunks')
+    op.drop_table('chunks')

hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""add_retain_params_to_documents
+Revision ID: c8e5f2a3b4d1
+Revises: b7c4d8e9f1a2
+Create Date: 2025-12-02 00:00:00.000000
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+# revision identifiers, used by Alembic.
+revision: str = 'c8e5f2a3b4d1'
+down_revision: Union[str, Sequence[str], None] = 'b7c4d8e9f1a2'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    """Add retain_params JSONB column to documents table."""
+    # Add retain_params column to store parameters passed during retain
+    op.add_column('documents', sa.Column('retain_params', postgresql.JSONB(), nullable=True))
+    # Add index for efficient queries on retain_params
+    op.create_index('idx_documents_retain_params', 'documents', ['retain_params'], postgresql_using='gin')
+def downgrade() -> None:
+    """Remove retain_params column from documents table."""
+    # Drop index
+    op.drop_index('idx_documents_retain_params', table_name='documents')
+    # Drop column
+    op.drop_column('documents', 'retain_params')

hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""Rename fact_type 'bank' to 'experience'
+Revision ID: d9f6a3b4c5e2
+Revises: c8e5f2a3b4d1
+Create Date: 2024-12-04 15:00:00.000000
+"""
+from alembic import op
+import sqlalchemy as sa
+# revision identifiers, used by Alembic.
+revision = 'd9f6a3b4c5e2'
+down_revision = 'c8e5f2a3b4d1'
+branch_labels = None
+depends_on = None
+def upgrade():
+    # Drop old check constraint FIRST (before updating data)
+    op.drop_constraint('memory_units_fact_type_check', 'memory_units', type_='check')
+    # Update existing 'bank' values to 'experience'
+    op.execute("UPDATE memory_units SET fact_type = 'experience' WHERE fact_type = 'bank'")
+    # Also update any 'interactions' values (in case of partial migration)
+    op.execute("UPDATE memory_units SET fact_type = 'experience' WHERE fact_type = 'interactions'")
+    # Create new check constraint with 'experience' instead of 'bank'
+    op.create_check_constraint(
+        'memory_units_fact_type_check',
+        'memory_units',
+        "fact_type IN ('world', 'experience', 'opinion', 'observation')"
+    )
+def downgrade():
+    # Drop new check constraint FIRST
+    op.drop_constraint('memory_units_fact_type_check', 'memory_units', type_='check')
+    # Update 'experience' back to 'bank'
+    op.execute("UPDATE memory_units SET fact_type = 'bank' WHERE fact_type = 'experience'")
+    # Recreate old check constraint
+    op.create_check_constraint(
+        'memory_units_fact_type_check',
+        'memory_units',
+        "fact_type IN ('world', 'bank', 'opinion', 'observation')"
+    )

hindsight-api 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

hindsight-api 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl