PyPI - hindsight-api - Versions diffs - 0.0.13__tar.gz - Mend

hindsight-api 0.0.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

hindsight_api-0.0.13/.gitignore +34 -0
hindsight_api-0.0.13/PKG-INFO +41 -0
hindsight_api-0.0.13/README.md +1 -0
hindsight_api-0.0.13/alembic/README +1 -0
hindsight_api-0.0.13/alembic/env.py +129 -0
hindsight_api-0.0.13/alembic/script.py.mako +28 -0
hindsight_api-0.0.13/alembic/versions/5a366d414dce_initial_schema.py +275 -0
hindsight_api-0.0.13/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +70 -0
hindsight_api-0.0.13/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +39 -0
hindsight_api-0.0.13/hindsight_api/__init__.py +38 -0
hindsight_api-0.0.13/hindsight_api/api/__init__.py +105 -0
hindsight_api-0.0.13/hindsight_api/api/http.py +1872 -0
hindsight_api-0.0.13/hindsight_api/api/mcp.py +157 -0
hindsight_api-0.0.13/hindsight_api/engine/__init__.py +47 -0
hindsight_api-0.0.13/hindsight_api/engine/cross_encoder.py +97 -0
hindsight_api-0.0.13/hindsight_api/engine/db_utils.py +93 -0
hindsight_api-0.0.13/hindsight_api/engine/embeddings.py +113 -0
hindsight_api-0.0.13/hindsight_api/engine/entity_resolver.py +575 -0
hindsight_api-0.0.13/hindsight_api/engine/llm_wrapper.py +269 -0
hindsight_api-0.0.13/hindsight_api/engine/memory_engine.py +3095 -0
hindsight_api-0.0.13/hindsight_api/engine/query_analyzer.py +519 -0
hindsight_api-0.0.13/hindsight_api/engine/response_models.py +222 -0
hindsight_api-0.0.13/hindsight_api/engine/retain/__init__.py +50 -0
hindsight_api-0.0.13/hindsight_api/engine/retain/bank_utils.py +423 -0
hindsight_api-0.0.13/hindsight_api/engine/retain/chunk_storage.py +82 -0
hindsight_api-0.0.13/hindsight_api/engine/retain/deduplication.py +104 -0
hindsight_api-0.0.13/hindsight_api/engine/retain/embedding_processing.py +62 -0
hindsight_api-0.0.13/hindsight_api/engine/retain/embedding_utils.py +54 -0
hindsight_api-0.0.13/hindsight_api/engine/retain/entity_processing.py +90 -0
hindsight_api-0.0.13/hindsight_api/engine/retain/fact_extraction.py +1027 -0
hindsight_api-0.0.13/hindsight_api/engine/retain/fact_storage.py +176 -0
hindsight_api-0.0.13/hindsight_api/engine/retain/link_creation.py +121 -0
hindsight_api-0.0.13/hindsight_api/engine/retain/link_utils.py +651 -0
hindsight_api-0.0.13/hindsight_api/engine/retain/orchestrator.py +405 -0
hindsight_api-0.0.13/hindsight_api/engine/retain/types.py +206 -0
hindsight_api-0.0.13/hindsight_api/engine/search/__init__.py +15 -0
hindsight_api-0.0.13/hindsight_api/engine/search/fusion.py +122 -0
hindsight_api-0.0.13/hindsight_api/engine/search/observation_utils.py +132 -0
hindsight_api-0.0.13/hindsight_api/engine/search/reranking.py +103 -0
hindsight_api-0.0.13/hindsight_api/engine/search/retrieval.py +503 -0
hindsight_api-0.0.13/hindsight_api/engine/search/scoring.py +161 -0
hindsight_api-0.0.13/hindsight_api/engine/search/temporal_extraction.py +64 -0
hindsight_api-0.0.13/hindsight_api/engine/search/think_utils.py +255 -0
hindsight_api-0.0.13/hindsight_api/engine/search/trace.py +215 -0
hindsight_api-0.0.13/hindsight_api/engine/search/tracer.py +447 -0
hindsight_api-0.0.13/hindsight_api/engine/search/types.py +160 -0
hindsight_api-0.0.13/hindsight_api/engine/task_backend.py +223 -0
hindsight_api-0.0.13/hindsight_api/engine/utils.py +203 -0
hindsight_api-0.0.13/hindsight_api/metrics.py +227 -0
hindsight_api-0.0.13/hindsight_api/migrations.py +163 -0
hindsight_api-0.0.13/hindsight_api/models.py +309 -0
hindsight_api-0.0.13/hindsight_api/pg0.py +425 -0
hindsight_api-0.0.13/hindsight_api/web/__init__.py +12 -0
hindsight_api-0.0.13/hindsight_api/web/server.py +143 -0
hindsight_api-0.0.13/pyproject.toml +76 -0
hindsight_api-0.0.13/test_chunks_debug.py +46 -0
hindsight_api-0.0.13/test_mentioned_at.py +69 -0
hindsight_api-0.0.13/tests/RETAIN_TEST_COVERAGE_PLAN.md +302 -0
hindsight_api-0.0.13/tests/__init__.py +1 -0
hindsight_api-0.0.13/tests/conftest.py +130 -0
hindsight_api-0.0.13/tests/fixtures/README.md +19 -0
hindsight_api-0.0.13/tests/fixtures/locomo_conversation_sample.json +5271 -0
hindsight_api-0.0.13/tests/test_agents_api.py +260 -0
hindsight_api-0.0.13/tests/test_batch_chunking.py +58 -0
hindsight_api-0.0.13/tests/test_chunking.py +60 -0
hindsight_api-0.0.13/tests/test_document_tracking.py +131 -0
hindsight_api-0.0.13/tests/test_fact_extraction_quality.py +1168 -0
hindsight_api-0.0.13/tests/test_fact_ordering.py +183 -0
hindsight_api-0.0.13/tests/test_http_api_integration.py +460 -0
hindsight_api-0.0.13/tests/test_link_utils.py +256 -0
hindsight_api-0.0.13/tests/test_mcp_api_integration.py +178 -0
hindsight_api-0.0.13/tests/test_observations.py +339 -0
hindsight_api-0.0.13/tests/test_query_analyzer.py +285 -0
hindsight_api-0.0.13/tests/test_retain.py +1597 -0
hindsight_api-0.0.13/tests/test_search_trace.py +140 -0
hindsight_api-0.0.13/tests/test_temporal_ranges.py +146 -0
hindsight_api-0.0.13/tests/test_think.py +150 -0

hindsight_api-0.0.13/.gitignore ADDED Viewed

@@ -0,0 +1,34 @@
+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv
+# Environment variables
+.env
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+# NLTK data (will be downloaded automatically)
+nltk_data/
+# Large benchmark datasets (will be downloaded automatically)
+**/longmemeval_s_cleaned.json
+# Debug logs
+logs/
+.DS_Store
+hindsight-dev/benchmarks/locomo/results/
+hindsight-dev/benchmarks/longmemeval/results/

hindsight_api-0.0.13/PKG-INFO ADDED Viewed

@@ -0,0 +1,41 @@
+Metadata-Version: 2.4
+Name: hindsight-api
+Version: 0.0.13
+Summary: Temporal + Semantic + Entity Memory System for AI agents using PostgreSQL
+Requires-Python: >=3.11
+Requires-Dist: alembic>=1.17.1
+Requires-Dist: asyncpg>=0.29.0
+Requires-Dist: dateparser>=1.2.2
+Requires-Dist: fastapi[standard]>=0.120.3
+Requires-Dist: fastmcp>=2.0.0
+Requires-Dist: greenlet>=3.2.4
+Requires-Dist: httpx>=0.27.0
+Requires-Dist: langchain-text-splitters>=0.3.0
+Requires-Dist: openai>=1.0.0
+Requires-Dist: opentelemetry-api>=1.20.0
+Requires-Dist: opentelemetry-exporter-prometheus>=0.41b0
+Requires-Dist: opentelemetry-instrumentation-fastapi>=0.41b0
+Requires-Dist: opentelemetry-sdk>=1.20.0
+Requires-Dist: pgvector>=0.4.1
+Requires-Dist: psycopg2-binary>=2.9.11
+Requires-Dist: pydantic>=2.0.0
+Requires-Dist: python-dateutil>=2.8.0
+Requires-Dist: python-dotenv>=1.0.0
+Requires-Dist: rich>=13.0.0
+Requires-Dist: sentence-transformers>=2.2.0
+Requires-Dist: sqlalchemy>=2.0.44
+Requires-Dist: tiktoken>=0.12.0
+Requires-Dist: torch>=2.0.0
+Requires-Dist: transformers>=4.30.0
+Requires-Dist: uvicorn>=0.38.0
+Requires-Dist: wsproto>=1.0.0
+Provides-Extra: test
+Requires-Dist: filelock>=3.0.0; extra == 'test'
+Requires-Dist: pytest-asyncio>=0.21.0; extra == 'test'
+Requires-Dist: pytest-timeout>=2.4.0; extra == 'test'
+Requires-Dist: pytest-xdist>=3.0.0; extra == 'test'
+Requires-Dist: pytest>=7.0.0; extra == 'test'
+Requires-Dist: testcontainers[postgres]>=4.0.0; extra == 'test'
+Description-Content-Type: text/markdown
+# Memory

hindsight_api-0.0.13/README.md ADDED Viewed

	@@ -0,0 +1 @@
1	+ # Memory

hindsight_api-0.0.13/alembic/README ADDED Viewed

	@@ -0,0 +1 @@
1	+ Generic single-database configuration.

hindsight_api-0.0.13/alembic/env.py ADDED Viewed

@@ -0,0 +1,129 @@
+"""
+Alembic environment configuration for SQLAlchemy with pgvector.
+Uses synchronous psycopg2 driver for migrations to avoid pgbouncer issues.
+"""
+import logging
+import os
+import sys
+from pathlib import Path
+from sqlalchemy import pool, engine_from_config
+from sqlalchemy.engine import Connection
+from alembic import context
+from dotenv import load_dotenv
+# Import your models here
+from hindsight_api.models import Base
+# Load environment variables based on HINDSIGHT_API_DATABASE_URL env var or default to local
+def load_env():
+    """Load environment variables from .env"""
+    # Check if HINDSIGHT_API_DATABASE_URL is already set (e.g., by CI/CD)
+    if os.getenv("HINDSIGHT_API_DATABASE_URL"):
+        return
+    # Look for .env file in the parent directory (root of the workspace)
+    root_dir = Path(__file__).parent.parent.parent
+    env_file = root_dir / ".env"
+    if env_file.exists():
+        load_dotenv(env_file)
+load_env()
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+# Note: We don't call fileConfig() here to avoid overriding the application's logging configuration.
+# Alembic will use the existing logging configuration from the application.
+# add your model's MetaData object here
+# for 'autogenerate' support
+target_metadata = Base.metadata
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+def get_database_url() -> str:
+    """
+    Get and process the database URL from config or environment.
+    Returns the URL with the correct driver (psycopg2) for migrations.
+    """
+    # Get database URL from config (set programmatically) or environment
+    database_url = config.get_main_option("sqlalchemy.url")
+    if not database_url:
+        database_url = os.getenv("HINDSIGHT_API_DATABASE_URL")
+        if not database_url:
+            raise ValueError(
+                "Database URL not found. "
+                "Set HINDSIGHT_API_DATABASE_URL environment variable or pass database_url to run_migrations()."
+            )
+    # For migrations, use psycopg2 (sync driver) to avoid pgbouncer prepared statement issues
+    if database_url.startswith("postgresql+asyncpg://"):
+        database_url = database_url.replace("postgresql+asyncpg://", "postgresql://", 1)
+    elif database_url.startswith("postgres+asyncpg://"):
+        database_url = database_url.replace("postgres+asyncpg://", "postgresql://", 1)
+    # Update config with processed URL for engine_from_config to use
+    config.set_main_option("sqlalchemy.url", database_url)
+    return database_url
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode.
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+    Calls to context.execute() here emit the given string to the
+    script output.
+    """
+    logging.info("running offline")
+    database_url = get_database_url()
+    context.configure(
+        url=database_url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+    with context.begin_transaction():
+        context.run_migrations()
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode with synchronous engine."""
+    get_database_url()  # Process and set the database URL in config
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection,
+            target_metadata=target_metadata
+        )
+        with context.begin_transaction():
+            context.run_migrations()
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()

hindsight_api-0.0.13/alembic/script.py.mako ADDED Viewed

@@ -0,0 +1,28 @@
+"""${message}
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+def upgrade() -> None:
+    """Upgrade schema."""
+    ${upgrades if upgrades else "pass"}
+def downgrade() -> None:
+    """Downgrade schema."""
+    ${downgrades if downgrades else "pass"}

hindsight_api-0.0.13/alembic/versions/5a366d414dce_initial_schema.py ADDED Viewed

@@ -0,0 +1,275 @@
+"""initial_schema
+Revision ID: 5a366d414dce
+Revises:
+Create Date: 2025-11-27 11:54:19.228030
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+from pgvector.sqlalchemy import Vector
+# revision identifiers, used by Alembic.
+revision: str = '5a366d414dce'
+down_revision: Union[str, Sequence[str], None] = None
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    """Upgrade schema - create all tables from scratch."""
+    # Enable required extensions
+    op.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp"')
+    op.execute('CREATE EXTENSION IF NOT EXISTS vector')
+    # Create banks table
+    op.create_table(
+        'banks',
+        sa.Column('bank_id', sa.Text(), nullable=False),
+        sa.Column('name', sa.Text(), nullable=True),
+        sa.Column('personality', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
+        sa.Column('background', sa.Text(), nullable=True),
+        sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.PrimaryKeyConstraint('bank_id', name=op.f('pk_banks'))
+    )
+    # Create documents table
+    op.create_table(
+        'documents',
+        sa.Column('id', sa.Text(), nullable=False),
+        sa.Column('bank_id', sa.Text(), nullable=False),
+        sa.Column('original_text', sa.Text(), nullable=True),
+        sa.Column('content_hash', sa.Text(), nullable=True),
+        sa.Column('metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
+        sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.PrimaryKeyConstraint('id', 'bank_id', name=op.f('pk_documents'))
+    )
+    op.create_index('idx_documents_bank_id', 'documents', ['bank_id'])
+    op.create_index('idx_documents_content_hash', 'documents', ['content_hash'])
+    # Create async_operations table
+    op.create_table(
+        'async_operations',
+        sa.Column('operation_id', postgresql.UUID(as_uuid=True), server_default=sa.text('uuid_generate_v4()'), nullable=False),
+        sa.Column('bank_id', sa.Text(), nullable=False),
+        sa.Column('operation_type', sa.Text(), nullable=False),
+        sa.Column('status', sa.Text(), server_default='pending', nullable=False),
+        sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.Column('completed_at', postgresql.TIMESTAMP(timezone=True), nullable=True),
+        sa.Column('error_message', sa.Text(), nullable=True),
+        sa.Column('result_metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
+        sa.PrimaryKeyConstraint('operation_id', name=op.f('pk_async_operations')),
+        sa.CheckConstraint("status IN ('pending', 'processing', 'completed', 'failed')", name='async_operations_status_check')
+    )
+    op.create_index('idx_async_operations_bank_id', 'async_operations', ['bank_id'])
+    op.create_index('idx_async_operations_status', 'async_operations', ['status'])
+    op.create_index('idx_async_operations_bank_status', 'async_operations', ['bank_id', 'status'])
+    # Create entities table
+    op.create_table(
+        'entities',
+        sa.Column('id', postgresql.UUID(as_uuid=True), server_default=sa.text('uuid_generate_v4()'), nullable=False),
+        sa.Column('canonical_name', sa.Text(), nullable=False),
+        sa.Column('bank_id', sa.Text(), nullable=False),
+        sa.Column('metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
+        sa.Column('first_seen', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.Column('last_seen', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.Column('mention_count', sa.Integer(), server_default='1', nullable=False),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_entities'))
+    )
+    op.create_index('idx_entities_bank_id', 'entities', ['bank_id'])
+    op.create_index('idx_entities_canonical_name', 'entities', ['canonical_name'])
+    op.create_index('idx_entities_bank_name', 'entities', ['bank_id', 'canonical_name'])
+    # Create unique index on (bank_id, LOWER(canonical_name)) for entity resolution
+    op.execute('CREATE UNIQUE INDEX idx_entities_bank_lower_name ON entities (bank_id, LOWER(canonical_name))')
+    # Create memory_units table
+    op.create_table(
+        'memory_units',
+        sa.Column('id', postgresql.UUID(as_uuid=True), server_default=sa.text('uuid_generate_v4()'), nullable=False),
+        sa.Column('bank_id', sa.Text(), nullable=False),
+        sa.Column('document_id', sa.Text(), nullable=True),
+        sa.Column('text', sa.Text(), nullable=False),
+        sa.Column('embedding', Vector(384), nullable=True),
+        sa.Column('context', sa.Text(), nullable=True),
+        sa.Column('event_date', postgresql.TIMESTAMP(timezone=True), nullable=False),
+        sa.Column('occurred_start', postgresql.TIMESTAMP(timezone=True), nullable=True),
+        sa.Column('occurred_end', postgresql.TIMESTAMP(timezone=True), nullable=True),
+        sa.Column('mentioned_at', postgresql.TIMESTAMP(timezone=True), nullable=True),
+        sa.Column('fact_type', sa.Text(), server_default='world', nullable=False),
+        sa.Column('confidence_score', sa.Float(), nullable=True),
+        sa.Column('access_count', sa.Integer(), server_default='0', nullable=False),
+        sa.Column('metadata', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'{}'::jsonb"), nullable=False),
+        sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.ForeignKeyConstraint(['document_id', 'bank_id'], ['documents.id', 'documents.bank_id'], name='memory_units_document_fkey', ondelete='CASCADE'),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_memory_units')),
+        sa.CheckConstraint("fact_type IN ('world', 'bank', 'opinion', 'observation')", name='memory_units_fact_type_check'),
+        sa.CheckConstraint("confidence_score IS NULL OR (confidence_score >= 0.0 AND confidence_score <= 1.0)", name='memory_units_confidence_range_check'),
+        sa.CheckConstraint(
+            "(fact_type = 'opinion' AND confidence_score IS NOT NULL) OR "
+            "(fact_type = 'observation') OR "
+            "(fact_type NOT IN ('opinion', 'observation') AND confidence_score IS NULL)",
+            name='confidence_score_fact_type_check'
+        )
+    )
+    # Add search_vector column for full-text search
+    op.execute("""
+        ALTER TABLE memory_units
+        ADD COLUMN search_vector tsvector
+        GENERATED ALWAYS AS (to_tsvector('english', COALESCE(text, '') || ' ' || COALESCE(context, ''))) STORED
+    """)
+    op.create_index('idx_memory_units_bank_id', 'memory_units', ['bank_id'])
+    op.create_index('idx_memory_units_document_id', 'memory_units', ['document_id'])
+    op.create_index('idx_memory_units_event_date', 'memory_units', [sa.text('event_date DESC')])
+    op.create_index('idx_memory_units_bank_date', 'memory_units', ['bank_id', sa.text('event_date DESC')])
+    op.create_index('idx_memory_units_access_count', 'memory_units', [sa.text('access_count DESC')])
+    op.create_index('idx_memory_units_fact_type', 'memory_units', ['fact_type'])
+    op.create_index('idx_memory_units_bank_fact_type', 'memory_units', ['bank_id', 'fact_type'])
+    op.create_index('idx_memory_units_bank_type_date', 'memory_units', ['bank_id', 'fact_type', sa.text('event_date DESC')])
+    op.create_index('idx_memory_units_opinion_confidence', 'memory_units', ['bank_id', sa.text('confidence_score DESC')], postgresql_where=sa.text("fact_type = 'opinion'"))
+    op.create_index('idx_memory_units_opinion_date', 'memory_units', ['bank_id', sa.text('event_date DESC')], postgresql_where=sa.text("fact_type = 'opinion'"))
+    op.create_index('idx_memory_units_observation_date', 'memory_units', ['bank_id', sa.text('event_date DESC')], postgresql_where=sa.text("fact_type = 'observation'"))
+    op.create_index('idx_memory_units_embedding', 'memory_units', ['embedding'], postgresql_using='hnsw', postgresql_ops={'embedding': 'vector_cosine_ops'})
+    # Create BM25 full-text search index on search_vector
+    op.execute("""
+        CREATE INDEX idx_memory_units_text_search ON memory_units
+        USING gin(search_vector)
+    """)
+    op.execute("""
+        CREATE MATERIALIZED VIEW memory_units_bm25 AS
+        SELECT
+            id,
+            bank_id,
+            text,
+            to_tsvector('english', text) AS text_vector,
+            log(1.0 + length(text)::float / (SELECT avg(length(text)) FROM memory_units)) AS doc_length_factor
+        FROM memory_units
+    """)
+    op.create_index('idx_memory_units_bm25_bank', 'memory_units_bm25', ['bank_id'])
+    op.create_index('idx_memory_units_bm25_text_vector', 'memory_units_bm25', ['text_vector'], postgresql_using='gin')
+    # Create entity_cooccurrences table
+    op.create_table(
+        'entity_cooccurrences',
+        sa.Column('entity_id_1', postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column('entity_id_2', postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column('cooccurrence_count', sa.Integer(), server_default='1', nullable=False),
+        sa.Column('last_cooccurred', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.ForeignKeyConstraint(['entity_id_1'], ['entities.id'], name=op.f('fk_entity_cooccurrences_entity_id_1_entities'), ondelete='CASCADE'),
+        sa.ForeignKeyConstraint(['entity_id_2'], ['entities.id'], name=op.f('fk_entity_cooccurrences_entity_id_2_entities'), ondelete='CASCADE'),
+        sa.PrimaryKeyConstraint('entity_id_1', 'entity_id_2', name=op.f('pk_entity_cooccurrences')),
+        sa.CheckConstraint('entity_id_1 < entity_id_2', name='entity_cooccurrence_order_check')
+    )
+    op.create_index('idx_entity_cooccurrences_entity1', 'entity_cooccurrences', ['entity_id_1'])
+    op.create_index('idx_entity_cooccurrences_entity2', 'entity_cooccurrences', ['entity_id_2'])
+    op.create_index('idx_entity_cooccurrences_count', 'entity_cooccurrences', [sa.text('cooccurrence_count DESC')])
+    # Create memory_links table
+    op.create_table(
+        'memory_links',
+        sa.Column('from_unit_id', postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column('to_unit_id', postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column('link_type', sa.Text(), nullable=False),
+        sa.Column('entity_id', postgresql.UUID(as_uuid=True), nullable=True),
+        sa.Column('weight', sa.Float(), server_default='1.0', nullable=False),
+        sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.ForeignKeyConstraint(['entity_id'], ['entities.id'], name=op.f('fk_memory_links_entity_id_entities'), ondelete='CASCADE'),
+        sa.ForeignKeyConstraint(['from_unit_id'], ['memory_units.id'], name=op.f('fk_memory_links_from_unit_id_memory_units'), ondelete='CASCADE'),
+        sa.ForeignKeyConstraint(['to_unit_id'], ['memory_units.id'], name=op.f('fk_memory_links_to_unit_id_memory_units'), ondelete='CASCADE'),
+        sa.CheckConstraint("link_type IN ('temporal', 'semantic', 'entity', 'causes', 'caused_by', 'enables', 'prevents')", name='memory_links_link_type_check'),
+        sa.CheckConstraint('weight >= 0.0 AND weight <= 1.0', name='memory_links_weight_check')
+    )
+    # Create unique constraint using COALESCE for nullable entity_id
+    op.execute("CREATE UNIQUE INDEX idx_memory_links_unique ON memory_links (from_unit_id, to_unit_id, link_type, COALESCE(entity_id, '00000000-0000-0000-0000-000000000000'::uuid))")
+    op.create_index('idx_memory_links_from_unit', 'memory_links', ['from_unit_id'])
+    op.create_index('idx_memory_links_to_unit', 'memory_links', ['to_unit_id'])
+    op.create_index('idx_memory_links_entity', 'memory_links', ['entity_id'])
+    op.create_index('idx_memory_links_link_type', 'memory_links', ['link_type'])
+    # Create unit_entities table
+    op.create_table(
+        'unit_entities',
+        sa.Column('unit_id', postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column('entity_id', postgresql.UUID(as_uuid=True), nullable=False),
+        sa.ForeignKeyConstraint(['entity_id'], ['entities.id'], name=op.f('fk_unit_entities_entity_id_entities'), ondelete='CASCADE'),
+        sa.ForeignKeyConstraint(['unit_id'], ['memory_units.id'], name=op.f('fk_unit_entities_unit_id_memory_units'), ondelete='CASCADE'),
+        sa.PrimaryKeyConstraint('unit_id', 'entity_id', name=op.f('pk_unit_entities'))
+    )
+    op.create_index('idx_unit_entities_unit', 'unit_entities', ['unit_id'])
+    op.create_index('idx_unit_entities_entity', 'unit_entities', ['entity_id'])
+def downgrade() -> None:
+    """Downgrade schema - drop all tables."""
+    # Drop tables in reverse dependency order
+    op.drop_index('idx_unit_entities_entity', table_name='unit_entities')
+    op.drop_index('idx_unit_entities_unit', table_name='unit_entities')
+    op.drop_table('unit_entities')
+    op.drop_index('idx_memory_links_link_type', table_name='memory_links')
+    op.drop_index('idx_memory_links_entity', table_name='memory_links')
+    op.drop_index('idx_memory_links_to_unit', table_name='memory_links')
+    op.drop_index('idx_memory_links_from_unit', table_name='memory_links')
+    op.execute('DROP INDEX IF EXISTS idx_memory_links_unique')
+    op.drop_table('memory_links')
+    op.drop_index('idx_entity_cooccurrences_count', table_name='entity_cooccurrences')
+    op.drop_index('idx_entity_cooccurrences_entity2', table_name='entity_cooccurrences')
+    op.drop_index('idx_entity_cooccurrences_entity1', table_name='entity_cooccurrences')
+    op.drop_table('entity_cooccurrences')
+    # Drop BM25 materialized view and index
+    op.drop_index('idx_memory_units_bm25_text_vector', table_name='memory_units_bm25')
+    op.drop_index('idx_memory_units_bm25_bank', table_name='memory_units_bm25')
+    op.execute('DROP MATERIALIZED VIEW IF EXISTS memory_units_bm25')
+    op.drop_index('idx_memory_units_embedding', table_name='memory_units')
+    op.drop_index('idx_memory_units_observation_date', table_name='memory_units')
+    op.drop_index('idx_memory_units_opinion_date', table_name='memory_units')
+    op.drop_index('idx_memory_units_opinion_confidence', table_name='memory_units')
+    op.drop_index('idx_memory_units_bank_type_date', table_name='memory_units')
+    op.drop_index('idx_memory_units_bank_fact_type', table_name='memory_units')
+    op.drop_index('idx_memory_units_fact_type', table_name='memory_units')
+    op.drop_index('idx_memory_units_access_count', table_name='memory_units')
+    op.drop_index('idx_memory_units_bank_date', table_name='memory_units')
+    op.drop_index('idx_memory_units_event_date', table_name='memory_units')
+    op.drop_index('idx_memory_units_document_id', table_name='memory_units')
+    op.drop_index('idx_memory_units_bank_id', table_name='memory_units')
+    op.execute('DROP INDEX IF EXISTS idx_memory_units_text_search')
+    op.drop_table('memory_units')
+    op.execute('DROP INDEX IF EXISTS idx_entities_bank_lower_name')
+    op.drop_index('idx_entities_bank_name', table_name='entities')
+    op.drop_index('idx_entities_canonical_name', table_name='entities')
+    op.drop_index('idx_entities_bank_id', table_name='entities')
+    op.drop_table('entities')
+    op.drop_index('idx_async_operations_bank_status', table_name='async_operations')
+    op.drop_index('idx_async_operations_status', table_name='async_operations')
+    op.drop_index('idx_async_operations_bank_id', table_name='async_operations')
+    op.drop_table('async_operations')
+    op.drop_index('idx_documents_content_hash', table_name='documents')
+    op.drop_index('idx_documents_bank_id', table_name='documents')
+    op.drop_table('documents')
+    op.drop_table('banks')
+    # Drop extensions (optional - comment out if you want to keep them)
+    # op.execute('DROP EXTENSION IF EXISTS vector')
+    # op.execute('DROP EXTENSION IF EXISTS "uuid-ossp"')

hindsight_api-0.0.13/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py ADDED Viewed

@@ -0,0 +1,70 @@
+"""add_chunks_table
+Revision ID: b7c4d8e9f1a2
+Revises: 5a366d414dce
+Create Date: 2025-11-28 00:00:00.000000
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+# revision identifiers, used by Alembic.
+revision: str = 'b7c4d8e9f1a2'
+down_revision: Union[str, Sequence[str], None] = '5a366d414dce'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    """Add chunks table and link memory_units to chunks."""
+    # Create chunks table with single text PK (bank_id_document_id_chunk_index)
+    op.create_table(
+        'chunks',
+        sa.Column('chunk_id', sa.Text(), nullable=False),
+        sa.Column('document_id', sa.Text(), nullable=False),
+        sa.Column('bank_id', sa.Text(), nullable=False),
+        sa.Column('chunk_index', sa.Integer(), nullable=False),
+        sa.Column('chunk_text', sa.Text(), nullable=False),
+        sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False),
+        sa.ForeignKeyConstraint(['document_id', 'bank_id'], ['documents.id', 'documents.bank_id'], name='chunks_document_fkey', ondelete='CASCADE'),
+        sa.PrimaryKeyConstraint('chunk_id', name=op.f('pk_chunks'))
+    )
+    # Add indexes for efficient queries
+    op.create_index('idx_chunks_document_id', 'chunks', ['document_id'])
+    op.create_index('idx_chunks_bank_id', 'chunks', ['bank_id'])
+    # Add chunk_id column to memory_units (nullable, as existing records won't have chunks)
+    op.add_column('memory_units', sa.Column('chunk_id', sa.Text(), nullable=True))
+    # Add foreign key constraint to chunks table
+    op.create_foreign_key(
+        'memory_units_chunk_fkey',
+        'memory_units',
+        'chunks',
+        ['chunk_id'],
+        ['chunk_id'],
+        ondelete='SET NULL'
+    )
+    # Add index on chunk_id for efficient lookups
+    op.create_index('idx_memory_units_chunk_id', 'memory_units', ['chunk_id'])
+def downgrade() -> None:
+    """Remove chunks table and chunk_id from memory_units."""
+    # Drop index and foreign key from memory_units
+    op.drop_index('idx_memory_units_chunk_id', table_name='memory_units')
+    op.drop_constraint('memory_units_chunk_fkey', 'memory_units', type_='foreignkey')
+    op.drop_column('memory_units', 'chunk_id')
+    # Drop chunks table indexes and table
+    op.drop_index('idx_chunks_bank_id', table_name='chunks')
+    op.drop_index('idx_chunks_document_id', table_name='chunks')
+    op.drop_table('chunks')

hindsight_api-0.0.13/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""add_retain_params_to_documents
+Revision ID: c8e5f2a3b4d1
+Revises: b7c4d8e9f1a2
+Create Date: 2025-12-02 00:00:00.000000
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+# revision identifiers, used by Alembic.
+revision: str = 'c8e5f2a3b4d1'
+down_revision: Union[str, Sequence[str], None] = 'b7c4d8e9f1a2'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    """Add retain_params JSONB column to documents table."""
+    # Add retain_params column to store parameters passed during retain
+    op.add_column('documents', sa.Column('retain_params', postgresql.JSONB(), nullable=True))
+    # Add index for efficient queries on retain_params
+    op.create_index('idx_documents_retain_params', 'documents', ['retain_params'], postgresql_using='gin')
+def downgrade() -> None:
+    """Remove retain_params column from documents table."""
+    # Drop index
+    op.drop_index('idx_documents_retain_params', table_name='documents')
+    # Drop column
+    op.drop_column('documents', 'retain_params')

hindsight_api-0.0.13/hindsight_api/__init__.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""
+Memory System for AI Agents.
+Temporal + Semantic Memory Architecture using PostgreSQL with pgvector.
+"""
+from .engine.memory_engine import MemoryEngine
+from .engine.search.trace import (
+    SearchTrace,
+    QueryInfo,
+    EntryPoint,
+    NodeVisit,
+    WeightComponents,
+    LinkInfo,
+    PruningDecision,
+    SearchSummary,
+    SearchPhaseMetrics,
+)
+from .engine.search.tracer import SearchTracer
+from .engine.embeddings import Embeddings, SentenceTransformersEmbeddings
+from .engine.llm_wrapper import LLMConfig
+__all__ = [
+    "MemoryEngine",
+    "SearchTrace",
+    "SearchTracer",
+    "QueryInfo",
+    "EntryPoint",
+    "NodeVisit",
+    "WeightComponents",
+    "LinkInfo",
+    "PruningDecision",
+    "SearchSummary",
+    "SearchPhaseMetrics",
+    "Embeddings",
+    "SentenceTransformersEmbeddings",
+    "LLMConfig",
+]
+__version__ = "0.1.0"